1 /*
2 * $Id: XmlMessageProtocol.java 19877 2010-10-12 12:50:49Z aperepel $
3 * --------------------------------------------------------------------------------------
4 * Copyright (c) MuleSoft, Inc. All rights reserved. http://www.mulesoft.com
5 *
6 * The software in this package is published under the terms of the CPAL v1.0
7 * license, a copy of which has been included with this distribution in the
8 * LICENSE.txt file.
9 */
10
11 package org.mule.transport.tcp.protocols;
12
13 import java.io.IOException;
14 import java.io.InputStream;
15 import java.io.PushbackInputStream;
16
17 import edu.emory.mathcs.backport.java.util.concurrent.ConcurrentHashMap;
18 import edu.emory.mathcs.backport.java.util.concurrent.ConcurrentMap;
19
20 /**
21 * <p>
22 * The XmlMessageProtocol is an application level tcp protocol that can be used to
23 * read streaming xml documents. The only requirement is that each document include
24 * an xml declaration at the beginning of the document of the form "<?xml...". In
25 * section 2.8, the xml 1.0 standard contains "Definition: XML documents
26 * <strong>SHOULD</strong> begin with an XML declaration which specifies the version
27 * of XML being used" while the xml 1.1 standard contains "Definition: XML 1.1
28 * documents <strong>MUST</strong> begin with an XML declaration which specifies the
29 * version of XML being used". The SHOULD indicates a recommendation that, if not
30 * followed, needs to be carefully checked for unintended consequences. MUST
31 * indicates a mandatory requirement for a well-formed document. Please make sure
32 * that the xml documents being streamed begin with an xml declaration when using
33 * this class.
34 * </p>
35 * <p>
36 * Data are read until a new document is found or there are no more data
37 * (momentarily). For slower networks,
38 * {@link org.mule.transport.tcp.protocols.XmlMessageEOFProtocol} may be more reliable.
39 * </p>
40 * <p>
41 * Also, the default character encoding for the platform is used to decode the
42 * message bytes when looking for the XML declaration. Some caution with message
43 * character encodings is warranted.
44 * </p>
45 * <p>
46 * Finally, this class uses a PushbackInputStream to enable parsing of individual
47 * messages. The stream stores any pushed-back bytes into it's own internal buffer
48 * and not the original stream. Therefore, the read buffer size is intentionally
49 * limited to insure that unread characters remain on the stream so that all data may
50 * be read later.
51 * </p>
52 */
53 public class XmlMessageProtocol extends AbstractByteProtocol
54 {
55
56 private static final String XML_PATTERN = "<?xml";
57
58 private static final int READ_BUFFER_SIZE = 4096;
59 private static final int PUSHBACK_BUFFER_SIZE = READ_BUFFER_SIZE * 2;
60
61 private ConcurrentMap pbMap = new ConcurrentHashMap();
62
63 public XmlMessageProtocol()
64 {
65 super(STREAM_OK);
66 }
67
68 public Object read(InputStream is) throws IOException
69 {
70 PushbackInputStream pbis = (PushbackInputStream) pbMap.get(is);
71 if (null == pbis)
72 {
73 pbis = new PushbackInputStream(is, PUSHBACK_BUFFER_SIZE);
74 PushbackInputStream prev = (PushbackInputStream) pbMap.putIfAbsent(is, pbis);
75 pbis = null == prev ? pbis : prev;
76 }
77
78 int len = -1;
79 try
80 {
81 // read until xml pattern is seen (and then pushed back) or no more data
82 // to read. return all data as message
83 byte[] buffer = new byte[READ_BUFFER_SIZE];
84 StringBuffer message = new StringBuffer(READ_BUFFER_SIZE);
85 int patternIndex = -1;
86 boolean repeat;
87 do
88 {
89 len = safeRead(pbis, buffer);
90 if (len >= 0)
91 {
92 // TODO take encoding into account, ideally from the incoming XML
93 message.append(new String(buffer, 0, len));
94 // start search at 2nd character in buffer (index=1) to
95 // indicate whether we have reached a new document.
96 patternIndex = message.toString().indexOf(XML_PATTERN, 1);
97 repeat = isRepeat(patternIndex, len, pbis.available());
98 }
99 else
100 {
101 // never repeat on closed stream (and avoid calling available)
102 repeat = false;
103 }
104
105 }
106 while (repeat);
107
108 if (patternIndex > 0)
109 {
110 // push back the start of the next message and
111 // ignore the pushed-back characters in the return buffer
112 pbis.unread(message.substring(patternIndex, message.length()).getBytes());
113 message.setLength(patternIndex);
114 }
115
116 // TODO encoding here, too...
117 return nullEmptyArray(message.toString().getBytes());
118
119 }
120 finally
121 {
122 // TODO - this doesn't seem very reliable, since loop above can end
123 // without EOF. On the other hand, what else can we do? Entire logic
124 // is not very dependable, IMHO. XmlMessageEOFProtocol is more likely
125 // to be correct here, I think.
126
127 // clear from map if stream has ended
128 if (len < 0)
129 {
130 pbMap.remove(is);
131 }
132 }
133 }
134
135 /**
136 * Show we continue reading? This class, following previous implementations, only
137 * reads while input is saturated.
138 * @see XmlMessageEOFProtocol
139 *
140 * @param patternIndex The index of the xml tag (or -1 if the next message not found)
141 * @param len The amount of data read this loop (or -1 if EOF)
142 * @param available The amount of data available to read
143 * @return true if the read should continue
144 */
145 protected boolean isRepeat(int patternIndex, int len, int available)
146 {
147 return patternIndex < 0 && len == READ_BUFFER_SIZE && available > 0;
148 }
149 }