Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
XmlMessageProtocol |
|
| 3.0;3 |
1 | /* | |
2 | * $Id: XmlMessageProtocol.java 7976 2007-08-21 14:26:13Z dirk.olmes $ | |
3 | * -------------------------------------------------------------------------------------- | |
4 | * Copyright (c) MuleSource, Inc. All rights reserved. http://www.mulesource.com | |
5 | * | |
6 | * The software in this package is published under the terms of the CPAL v1.0 | |
7 | * license, a copy of which has been included with this distribution in the | |
8 | * LICENSE.txt file. | |
9 | */ | |
10 | ||
11 | package org.mule.providers.tcp.protocols; | |
12 | ||
13 | import java.io.IOException; | |
14 | import java.io.InputStream; | |
15 | import java.io.PushbackInputStream; | |
16 | ||
17 | import edu.emory.mathcs.backport.java.util.concurrent.ConcurrentHashMap; | |
18 | ||
19 | /** | |
20 | * <p> | |
21 | * The XmlMessageProtocol is an application level tcp protocol that can be used to | |
22 | * read streaming xml documents. The only requirement is that each document include | |
23 | * an xml declaration at the beginning of the document of the form "<?xml...". In | |
24 | * section 2.8, the xml 1.0 standard contains "Definition: XML documents | |
25 | * <strong>SHOULD</strong> begin with an XML declaration which specifies the version | |
26 | * of XML being used" while the xml 1.1 standard contains "Definition: XML 1.1 | |
27 | * documents <strong>MUST</strong> begin with an XML declaration which specifies the | |
28 | * version of XML being used". The SHOULD indicates a recommendation that, if not | |
29 | * followed, needs to be carefully checked for unintended consequences. MUST | |
30 | * indicates a mandatory requirement for a well-formed document. Please make sure | |
31 | * that the xml documents being streamed begin with an xml declaration when using | |
32 | * this class. | |
33 | * </p> | |
34 | * <p> | |
35 | * Data are read until a new document is found or there are no more data | |
36 | * (momentarily). For slower networks, | |
37 | * {@link org.mule.providers.tcp.protocols.XmlMessageEOFProtocol} may be more reliable. | |
38 | * </p> | |
39 | * <p> | |
40 | * Also, the default character encoding for the platform is used to decode the | |
41 | * message bytes when looking for the XML declaration. Some caution with message | |
42 | * character encodings is warranted. | |
43 | * </p> | |
44 | * <p> | |
45 | * Finally, this class uses a PushbackInputStream to enable parsing of individual | |
46 | * messages. The stream stores any pushed-back bytes into it's own internal buffer | |
47 | * and not the original stream. Therefore, the read buffer size is intentionally | |
48 | * limited to insure that unread characters remain on the stream so that all data may | |
49 | * be read later. | |
50 | * </p> | |
51 | */ | |
52 | public class XmlMessageProtocol extends ByteProtocol | |
53 | { | |
54 | private static final String XML_PATTERN = "<?xml"; | |
55 | ||
56 | private static final int READ_BUFFER_SIZE = 4096; | |
57 | private static final int PUSHBACK_BUFFER_SIZE = READ_BUFFER_SIZE * 2; | |
58 | ||
59 | 0 | private ConcurrentHashMap pbMap = new ConcurrentHashMap(); |
60 | ||
61 | public XmlMessageProtocol() | |
62 | { | |
63 | 0 | super(STREAM_OK); |
64 | 0 | } |
65 | ||
66 | public Object read(InputStream is) throws IOException | |
67 | { | |
68 | 0 | PushbackInputStream pbis = (PushbackInputStream) pbMap.get(is); |
69 | 0 | if (null == pbis) |
70 | { | |
71 | 0 | pbis = new PushbackInputStream(is, PUSHBACK_BUFFER_SIZE); |
72 | 0 | PushbackInputStream prev = (PushbackInputStream) pbMap.putIfAbsent(is, pbis); |
73 | 0 | pbis = null == prev ? pbis : prev; |
74 | } | |
75 | ||
76 | 0 | int len = -1; |
77 | try | |
78 | { | |
79 | // read until xml pattern is seen (and then pushed back) or no more data | |
80 | // to read. return all data as message | |
81 | 0 | byte[] buffer = new byte[READ_BUFFER_SIZE]; |
82 | 0 | StringBuffer message = new StringBuffer(READ_BUFFER_SIZE); |
83 | 0 | int patternIndex = -1; |
84 | boolean repeat; | |
85 | do | |
86 | { | |
87 | 0 | len = safeRead(pbis, buffer); |
88 | 0 | if (len >= 0) |
89 | { | |
90 | // TODO take encoding into account, ideally from the incoming XML | |
91 | 0 | message.append(new String(buffer, 0, len)); |
92 | // start search at 2nd character in buffer (index=1) to | |
93 | // indicate whether we have reached a new document. | |
94 | 0 | patternIndex = message.toString().indexOf(XML_PATTERN, 1); |
95 | 0 | repeat = isRepeat(patternIndex, len, pbis.available()); |
96 | } | |
97 | else | |
98 | { | |
99 | // never repeat on closed stream (and avoid calling available) | |
100 | 0 | repeat = false; |
101 | } | |
102 | ||
103 | } | |
104 | 0 | while (repeat); |
105 | ||
106 | 0 | if (patternIndex > 0) |
107 | { | |
108 | // push back the start of the next message and | |
109 | // ignore the pushed-back characters in the return buffer | |
110 | 0 | pbis.unread(message.substring(patternIndex, message.length()).getBytes()); |
111 | 0 | message.setLength(patternIndex); |
112 | } | |
113 | ||
114 | // TODO encoding here, too... | |
115 | 0 | return nullEmptyArray(message.toString().getBytes()); |
116 | ||
117 | } | |
118 | finally | |
119 | { | |
120 | // TODO - this doesn't seem very reliable, since loop above can end | |
121 | // without EOF. On the other hand, what else can we do? Entire logic | |
122 | // is not very dependable, IMHO. XmlMessageEOFProtocol is more likely | |
123 | // to be correct here, I think. | |
124 | ||
125 | // clear from map if stream has ended | |
126 | 0 | if (len < 0) |
127 | { | |
128 | 0 | pbMap.remove(is); |
129 | } | |
130 | } | |
131 | } | |
132 | ||
133 | /** | |
134 | * Show we continue reading? This class, following previous implementations, only | |
135 | * reads while input is saturated. | |
136 | * @see XmlMessageEOFProtocol | |
137 | * | |
138 | * @param patternIndex The index of the xml tag (or -1 if the next message not found) | |
139 | * @param len The amount of data read this loop (or -1 if EOF) | |
140 | * @param available The amount of data available to read | |
141 | * @return true if the read should continue | |
142 | */ | |
143 | protected boolean isRepeat(int patternIndex, int len, int available) | |
144 | { | |
145 | 0 | return patternIndex < 0 && len == READ_BUFFER_SIZE && available > 0; |
146 | } | |
147 | ||
148 | } |