Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
XmlMessageProtocol |
|
| 3.0;3 |
1 | /* | |
2 | * $Id: XmlMessageProtocol.java 19877 2010-10-12 12:50:49Z aperepel $ | |
3 | * -------------------------------------------------------------------------------------- | |
4 | * Copyright (c) MuleSoft, Inc. All rights reserved. http://www.mulesoft.com | |
5 | * | |
6 | * The software in this package is published under the terms of the CPAL v1.0 | |
7 | * license, a copy of which has been included with this distribution in the | |
8 | * LICENSE.txt file. | |
9 | */ | |
10 | ||
11 | package org.mule.transport.tcp.protocols; | |
12 | ||
13 | import java.io.IOException; | |
14 | import java.io.InputStream; | |
15 | import java.io.PushbackInputStream; | |
16 | ||
17 | import edu.emory.mathcs.backport.java.util.concurrent.ConcurrentHashMap; | |
18 | import edu.emory.mathcs.backport.java.util.concurrent.ConcurrentMap; | |
19 | ||
20 | /** | |
21 | * <p> | |
22 | * The XmlMessageProtocol is an application level tcp protocol that can be used to | |
23 | * read streaming xml documents. The only requirement is that each document include | |
24 | * an xml declaration at the beginning of the document of the form "<?xml...". In | |
25 | * section 2.8, the xml 1.0 standard contains "Definition: XML documents | |
26 | * <strong>SHOULD</strong> begin with an XML declaration which specifies the version | |
27 | * of XML being used" while the xml 1.1 standard contains "Definition: XML 1.1 | |
28 | * documents <strong>MUST</strong> begin with an XML declaration which specifies the | |
29 | * version of XML being used". The SHOULD indicates a recommendation that, if not | |
30 | * followed, needs to be carefully checked for unintended consequences. MUST | |
31 | * indicates a mandatory requirement for a well-formed document. Please make sure | |
32 | * that the xml documents being streamed begin with an xml declaration when using | |
33 | * this class. | |
34 | * </p> | |
35 | * <p> | |
36 | * Data are read until a new document is found or there are no more data | |
37 | * (momentarily). For slower networks, | |
38 | * {@link org.mule.transport.tcp.protocols.XmlMessageEOFProtocol} may be more reliable. | |
39 | * </p> | |
40 | * <p> | |
41 | * Also, the default character encoding for the platform is used to decode the | |
42 | * message bytes when looking for the XML declaration. Some caution with message | |
43 | * character encodings is warranted. | |
44 | * </p> | |
45 | * <p> | |
46 | * Finally, this class uses a PushbackInputStream to enable parsing of individual | |
47 | * messages. The stream stores any pushed-back bytes into it's own internal buffer | |
48 | * and not the original stream. Therefore, the read buffer size is intentionally | |
49 | * limited to insure that unread characters remain on the stream so that all data may | |
50 | * be read later. | |
51 | * </p> | |
52 | */ | |
53 | public class XmlMessageProtocol extends AbstractByteProtocol | |
54 | { | |
55 | ||
56 | private static final String XML_PATTERN = "<?xml"; | |
57 | ||
58 | private static final int READ_BUFFER_SIZE = 4096; | |
59 | private static final int PUSHBACK_BUFFER_SIZE = READ_BUFFER_SIZE * 2; | |
60 | ||
61 | 0 | private ConcurrentMap pbMap = new ConcurrentHashMap(); |
62 | ||
63 | public XmlMessageProtocol() | |
64 | { | |
65 | 0 | super(STREAM_OK); |
66 | 0 | } |
67 | ||
68 | public Object read(InputStream is) throws IOException | |
69 | { | |
70 | 0 | PushbackInputStream pbis = (PushbackInputStream) pbMap.get(is); |
71 | 0 | if (null == pbis) |
72 | { | |
73 | 0 | pbis = new PushbackInputStream(is, PUSHBACK_BUFFER_SIZE); |
74 | 0 | PushbackInputStream prev = (PushbackInputStream) pbMap.putIfAbsent(is, pbis); |
75 | 0 | pbis = null == prev ? pbis : prev; |
76 | } | |
77 | ||
78 | 0 | int len = -1; |
79 | try | |
80 | { | |
81 | // read until xml pattern is seen (and then pushed back) or no more data | |
82 | // to read. return all data as message | |
83 | 0 | byte[] buffer = new byte[READ_BUFFER_SIZE]; |
84 | 0 | StringBuffer message = new StringBuffer(READ_BUFFER_SIZE); |
85 | 0 | int patternIndex = -1; |
86 | boolean repeat; | |
87 | do | |
88 | { | |
89 | 0 | len = safeRead(pbis, buffer); |
90 | 0 | if (len >= 0) |
91 | { | |
92 | // TODO take encoding into account, ideally from the incoming XML | |
93 | 0 | message.append(new String(buffer, 0, len)); |
94 | // start search at 2nd character in buffer (index=1) to | |
95 | // indicate whether we have reached a new document. | |
96 | 0 | patternIndex = message.toString().indexOf(XML_PATTERN, 1); |
97 | 0 | repeat = isRepeat(patternIndex, len, pbis.available()); |
98 | } | |
99 | else | |
100 | { | |
101 | // never repeat on closed stream (and avoid calling available) | |
102 | 0 | repeat = false; |
103 | } | |
104 | ||
105 | } | |
106 | 0 | while (repeat); |
107 | ||
108 | 0 | if (patternIndex > 0) |
109 | { | |
110 | // push back the start of the next message and | |
111 | // ignore the pushed-back characters in the return buffer | |
112 | 0 | pbis.unread(message.substring(patternIndex, message.length()).getBytes()); |
113 | 0 | message.setLength(patternIndex); |
114 | } | |
115 | ||
116 | // TODO encoding here, too... | |
117 | 0 | return nullEmptyArray(message.toString().getBytes()); |
118 | ||
119 | } | |
120 | finally | |
121 | { | |
122 | // TODO - this doesn't seem very reliable, since loop above can end | |
123 | // without EOF. On the other hand, what else can we do? Entire logic | |
124 | // is not very dependable, IMHO. XmlMessageEOFProtocol is more likely | |
125 | // to be correct here, I think. | |
126 | ||
127 | // clear from map if stream has ended | |
128 | 0 | if (len < 0) |
129 | { | |
130 | 0 | pbMap.remove(is); |
131 | } | |
132 | } | |
133 | } | |
134 | ||
135 | /** | |
136 | * Show we continue reading? This class, following previous implementations, only | |
137 | * reads while input is saturated. | |
138 | * @see XmlMessageEOFProtocol | |
139 | * | |
140 | * @param patternIndex The index of the xml tag (or -1 if the next message not found) | |
141 | * @param len The amount of data read this loop (or -1 if EOF) | |
142 | * @param available The amount of data available to read | |
143 | * @return true if the read should continue | |
144 | */ | |
145 | protected boolean isRepeat(int patternIndex, int len, int available) | |
146 | { | |
147 | 0 | return patternIndex < 0 && len == READ_BUFFER_SIZE && available > 0; |
148 | } | |
149 | } |