Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
XmlMessageProtocol |
|
| 3.0;3 |
1 | /* | |
2 | * $Id: XmlMessageProtocol.java 10489 2008-01-23 17:53:38Z dfeist $ | |
3 | * -------------------------------------------------------------------------------------- | |
4 | * Copyright (c) MuleSource, Inc. All rights reserved. http://www.mulesource.com | |
5 | * | |
6 | * The software in this package is published under the terms of the CPAL v1.0 | |
7 | * license, a copy of which has been included with this distribution in the | |
8 | * LICENSE.txt file. | |
9 | */ | |
10 | ||
11 | package org.mule.transport.tcp.protocols; | |
12 | ||
13 | import java.io.IOException; | |
14 | import java.io.InputStream; | |
15 | import java.io.PushbackInputStream; | |
16 | ||
17 | import edu.emory.mathcs.backport.java.util.concurrent.ConcurrentHashMap; | |
18 | ||
19 | /** | |
20 | * <p> | |
21 | * The XmlMessageProtocol is an application level tcp protocol that can be used to | |
22 | * read streaming xml documents. The only requirement is that each document include | |
23 | * an xml declaration at the beginning of the document of the form "<?xml...". In | |
24 | * section 2.8, the xml 1.0 standard contains "Definition: XML documents | |
25 | * <strong>SHOULD</strong> begin with an XML declaration which specifies the version | |
26 | * of XML being used" while the xml 1.1 standard contains "Definition: XML 1.1 | |
27 | * documents <strong>MUST</strong> begin with an XML declaration which specifies the | |
28 | * version of XML being used". The SHOULD indicates a recommendation that, if not | |
29 | * followed, needs to be carefully checked for unintended consequences. MUST | |
30 | * indicates a mandatory requirement for a well-formed document. Please make sure | |
31 | * that the xml documents being streamed begin with an xml declaration when using | |
32 | * this class. | |
33 | * </p> | |
34 | * <p> | |
35 | * Data are read until a new document is found or there are no more data | |
36 | * (momentarily). For slower networks, | |
37 | * {@link org.mule.transport.tcp.protocols.XmlMessageEOFProtocol} may be more reliable. | |
38 | * </p> | |
39 | * <p> | |
40 | * Also, the default character encoding for the platform is used to decode the | |
41 | * message bytes when looking for the XML declaration. Some caution with message | |
42 | * character encodings is warranted. | |
43 | * </p> | |
44 | * <p> | |
45 | * Finally, this class uses a PushbackInputStream to enable parsing of individual | |
46 | * messages. The stream stores any pushed-back bytes into it's own internal buffer | |
47 | * and not the original stream. Therefore, the read buffer size is intentionally | |
48 | * limited to insure that unread characters remain on the stream so that all data may | |
49 | * be read later. | |
50 | * </p> | |
51 | */ | |
52 | public class XmlMessageProtocol extends AbstractByteProtocol | |
53 | { | |
54 | ||
55 | private static final String XML_PATTERN = "<?xml"; | |
56 | ||
57 | private static final int READ_BUFFER_SIZE = 4096; | |
58 | private static final int PUSHBACK_BUFFER_SIZE = READ_BUFFER_SIZE * 2; | |
59 | ||
60 | 16 | private ConcurrentHashMap pbMap = new ConcurrentHashMap(); |
61 | ||
62 | public XmlMessageProtocol() | |
63 | { | |
64 | 16 | super(STREAM_OK); |
65 | 16 | } |
66 | ||
67 | public Object read(InputStream is) throws IOException | |
68 | { | |
69 | 88 | PushbackInputStream pbis = (PushbackInputStream) pbMap.get(is); |
70 | 88 | if (null == pbis) |
71 | { | |
72 | 22 | pbis = new PushbackInputStream(is, PUSHBACK_BUFFER_SIZE); |
73 | 22 | PushbackInputStream prev = (PushbackInputStream) pbMap.putIfAbsent(is, pbis); |
74 | 22 | pbis = null == prev ? pbis : prev; |
75 | } | |
76 | ||
77 | 88 | int len = -1; |
78 | try | |
79 | { | |
80 | // read until xml pattern is seen (and then pushed back) or no more data | |
81 | // to read. return all data as message | |
82 | 88 | byte[] buffer = new byte[READ_BUFFER_SIZE]; |
83 | 88 | StringBuffer message = new StringBuffer(READ_BUFFER_SIZE); |
84 | 88 | int patternIndex = -1; |
85 | boolean repeat; | |
86 | do | |
87 | { | |
88 | 172 | len = safeRead(pbis, buffer); |
89 | 172 | if (len >= 0) |
90 | { | |
91 | // TODO take encoding into account, ideally from the incoming XML | |
92 | 152 | message.append(new String(buffer, 0, len)); |
93 | // start search at 2nd character in buffer (index=1) to | |
94 | // indicate whether we have reached a new document. | |
95 | 152 | patternIndex = message.toString().indexOf(XML_PATTERN, 1); |
96 | 152 | repeat = isRepeat(patternIndex, len, pbis.available()); |
97 | } | |
98 | else | |
99 | { | |
100 | // never repeat on closed stream (and avoid calling available) | |
101 | 20 | repeat = false; |
102 | } | |
103 | ||
104 | } | |
105 | 172 | while (repeat); |
106 | ||
107 | 88 | if (patternIndex > 0) |
108 | { | |
109 | // push back the start of the next message and | |
110 | // ignore the pushed-back characters in the return buffer | |
111 | 60 | pbis.unread(message.substring(patternIndex, message.length()).getBytes()); |
112 | 60 | message.setLength(patternIndex); |
113 | } | |
114 | ||
115 | // TODO encoding here, too... | |
116 | 88 | return nullEmptyArray(message.toString().getBytes()); |
117 | ||
118 | } | |
119 | finally | |
120 | { | |
121 | // TODO - this doesn't seem very reliable, since loop above can end | |
122 | // without EOF. On the other hand, what else can we do? Entire logic | |
123 | // is not very dependable, IMHO. XmlMessageEOFProtocol is more likely | |
124 | // to be correct here, I think. | |
125 | ||
126 | // clear from map if stream has ended | |
127 | 88 | if (len < 0) |
128 | { | |
129 | 20 | pbMap.remove(is); |
130 | } | |
131 | } | |
132 | } | |
133 | ||
134 | /** | |
135 | * Show we continue reading? This class, following previous implementations, only | |
136 | * reads while input is saturated. | |
137 | * @see XmlMessageEOFProtocol | |
138 | * | |
139 | * @param patternIndex The index of the xml tag (or -1 if the next message not found) | |
140 | * @param len The amount of data read this loop (or -1 if EOF) | |
141 | * @param available The amount of data available to read | |
142 | * @return true if the read should continue | |
143 | */ | |
144 | protected boolean isRepeat(int patternIndex, int len, int available) | |
145 | { | |
146 | 38 | return patternIndex < 0 && len == READ_BUFFER_SIZE && available > 0; |
147 | } | |
148 | } |