1 /*
2 * Copyright (c) MuleSoft, Inc. All rights reserved. http://www.mulesoft.com
3 * The software in this package is published under the terms of the CPAL v1.0
4 * license, a copy of which has been included with this distribution in the
5 * LICENSE.txt file.
6 */
7 package org.mule.transport.tcp.protocols;
8
9 import java.io.IOException;
10 import java.io.InputStream;
11 import java.io.PushbackInputStream;
12
13 import edu.emory.mathcs.backport.java.util.concurrent.ConcurrentHashMap;
14 import edu.emory.mathcs.backport.java.util.concurrent.ConcurrentMap;
15
16 /**
17 * <p>
18 * The XmlMessageProtocol is an application level tcp protocol that can be used to
19 * read streaming xml documents. The only requirement is that each document include
20 * an xml declaration at the beginning of the document of the form "<?xml...". In
21 * section 2.8, the xml 1.0 standard contains "Definition: XML documents
22 * <strong>SHOULD</strong> begin with an XML declaration which specifies the version
23 * of XML being used" while the xml 1.1 standard contains "Definition: XML 1.1
24 * documents <strong>MUST</strong> begin with an XML declaration which specifies the
25 * version of XML being used". The SHOULD indicates a recommendation that, if not
26 * followed, needs to be carefully checked for unintended consequences. MUST
27 * indicates a mandatory requirement for a well-formed document. Please make sure
28 * that the xml documents being streamed begin with an xml declaration when using
29 * this class.
30 * </p>
31 * <p>
32 * Data are read until a new document is found or there are no more data
33 * (momentarily). For slower networks,
34 * {@link org.mule.transport.tcp.protocols.XmlMessageEOFProtocol} may be more reliable.
35 * </p>
36 * <p>
37 * Also, the default character encoding for the platform is used to decode the
38 * message bytes when looking for the XML declaration. Some caution with message
39 * character encodings is warranted.
40 * </p>
41 * <p>
42 * Finally, this class uses a PushbackInputStream to enable parsing of individual
43 * messages. The stream stores any pushed-back bytes into it's own internal buffer
44 * and not the original stream. Therefore, the read buffer size is intentionally
45 * limited to insure that unread characters remain on the stream so that all data may
46 * be read later.
47 * </p>
48 */
49 public class XmlMessageProtocol extends AbstractByteProtocol
50 {
51
52 private static final String XML_PATTERN = "<?xml";
53
54 private static final int READ_BUFFER_SIZE = 4096;
55 private static final int PUSHBACK_BUFFER_SIZE = READ_BUFFER_SIZE * 2;
56
57 private ConcurrentMap pbMap = new ConcurrentHashMap();
58
59 public XmlMessageProtocol()
60 {
61 super(STREAM_OK);
62 }
63
64 public Object read(InputStream is) throws IOException
65 {
66 PushbackInputStream pbis = (PushbackInputStream) pbMap.get(is);
67 if (null == pbis)
68 {
69 pbis = new PushbackInputStream(is, PUSHBACK_BUFFER_SIZE);
70 PushbackInputStream prev = (PushbackInputStream) pbMap.putIfAbsent(is, pbis);
71 pbis = null == prev ? pbis : prev;
72 }
73
74 int len = -1;
75 try
76 {
77 // read until xml pattern is seen (and then pushed back) or no more data
78 // to read. return all data as message
79 byte[] buffer = new byte[READ_BUFFER_SIZE];
80 StringBuffer message = new StringBuffer(READ_BUFFER_SIZE);
81 int patternIndex = -1;
82 boolean repeat;
83 do
84 {
85 len = safeRead(pbis, buffer);
86 if (len >= 0)
87 {
88 // TODO take encoding into account, ideally from the incoming XML
89 message.append(new String(buffer, 0, len));
90 // start search at 2nd character in buffer (index=1) to
91 // indicate whether we have reached a new document.
92 patternIndex = message.toString().indexOf(XML_PATTERN, 1);
93 repeat = isRepeat(patternIndex, len, pbis.available());
94 }
95 else
96 {
97 // never repeat on closed stream (and avoid calling available)
98 repeat = false;
99 }
100
101 }
102 while (repeat);
103
104 if (patternIndex > 0)
105 {
106 // push back the start of the next message and
107 // ignore the pushed-back characters in the return buffer
108 pbis.unread(message.substring(patternIndex, message.length()).getBytes());
109 message.setLength(patternIndex);
110 }
111
112 // TODO encoding here, too...
113 return nullEmptyArray(message.toString().getBytes());
114
115 }
116 finally
117 {
118 // TODO - this doesn't seem very reliable, since loop above can end
119 // without EOF. On the other hand, what else can we do? Entire logic
120 // is not very dependable, IMHO. XmlMessageEOFProtocol is more likely
121 // to be correct here, I think.
122
123 // clear from map if stream has ended
124 if (len < 0)
125 {
126 pbMap.remove(is);
127 }
128 }
129 }
130
131 /**
132 * Show we continue reading? This class, following previous implementations, only
133 * reads while input is saturated.
134 * @see XmlMessageEOFProtocol
135 *
136 * @param patternIndex The index of the xml tag (or -1 if the next message not found)
137 * @param len The amount of data read this loop (or -1 if EOF)
138 * @param available The amount of data available to read
139 * @return true if the read should continue
140 */
141 protected boolean isRepeat(int patternIndex, int len, int available)
142 {
143 return patternIndex < 0 && len == READ_BUFFER_SIZE && available > 0;
144 }
145 }