1 /*
2 * $Id: XmlMessageProtocol.java 19191 2010-08-25 21:05:23Z tcarlson $
3 * --------------------------------------------------------------------------------------
4 * Copyright (c) MuleSoft, Inc. All rights reserved. http://www.mulesoft.com
5 *
6 * The software in this package is published under the terms of the CPAL v1.0
7 * license, a copy of which has been included with this distribution in the
8 * LICENSE.txt file.
9 */
10
11 package org.mule.transport.tcp.protocols;
12
13 import java.io.IOException;
14 import java.io.InputStream;
15 import java.io.PushbackInputStream;
16
17 import edu.emory.mathcs.backport.java.util.concurrent.ConcurrentHashMap;
18
19 /**
20 * <p>
21 * The XmlMessageProtocol is an application level tcp protocol that can be used to
22 * read streaming xml documents. The only requirement is that each document include
23 * an xml declaration at the beginning of the document of the form "<?xml...". In
24 * section 2.8, the xml 1.0 standard contains "Definition: XML documents
25 * <strong>SHOULD</strong> begin with an XML declaration which specifies the version
26 * of XML being used" while the xml 1.1 standard contains "Definition: XML 1.1
27 * documents <strong>MUST</strong> begin with an XML declaration which specifies the
28 * version of XML being used". The SHOULD indicates a recommendation that, if not
29 * followed, needs to be carefully checked for unintended consequences. MUST
30 * indicates a mandatory requirement for a well-formed document. Please make sure
31 * that the xml documents being streamed begin with an xml declaration when using
32 * this class.
33 * </p>
34 * <p>
35 * Data are read until a new document is found or there are no more data
36 * (momentarily). For slower networks,
37 * {@link org.mule.transport.tcp.protocols.XmlMessageEOFProtocol} may be more reliable.
38 * </p>
39 * <p>
40 * Also, the default character encoding for the platform is used to decode the
41 * message bytes when looking for the XML declaration. Some caution with message
42 * character encodings is warranted.
43 * </p>
44 * <p>
45 * Finally, this class uses a PushbackInputStream to enable parsing of individual
46 * messages. The stream stores any pushed-back bytes into it's own internal buffer
47 * and not the original stream. Therefore, the read buffer size is intentionally
48 * limited to insure that unread characters remain on the stream so that all data may
49 * be read later.
50 * </p>
51 */
52 public class XmlMessageProtocol extends AbstractByteProtocol
53 {
54
55 private static final String XML_PATTERN = "<?xml";
56
57 private static final int READ_BUFFER_SIZE = 4096;
58 private static final int PUSHBACK_BUFFER_SIZE = READ_BUFFER_SIZE * 2;
59
60 private ConcurrentHashMap pbMap = new ConcurrentHashMap();
61
62 public XmlMessageProtocol()
63 {
64 super(STREAM_OK);
65 }
66
67 public Object read(InputStream is) throws IOException
68 {
69 PushbackInputStream pbis = (PushbackInputStream) pbMap.get(is);
70 if (null == pbis)
71 {
72 pbis = new PushbackInputStream(is, PUSHBACK_BUFFER_SIZE);
73 PushbackInputStream prev = (PushbackInputStream) pbMap.putIfAbsent(is, pbis);
74 pbis = null == prev ? pbis : prev;
75 }
76
77 int len = -1;
78 try
79 {
80 // read until xml pattern is seen (and then pushed back) or no more data
81 // to read. return all data as message
82 byte[] buffer = new byte[READ_BUFFER_SIZE];
83 StringBuffer message = new StringBuffer(READ_BUFFER_SIZE);
84 int patternIndex = -1;
85 boolean repeat;
86 do
87 {
88 len = safeRead(pbis, buffer);
89 if (len >= 0)
90 {
91 // TODO take encoding into account, ideally from the incoming XML
92 message.append(new String(buffer, 0, len));
93 // start search at 2nd character in buffer (index=1) to
94 // indicate whether we have reached a new document.
95 patternIndex = message.toString().indexOf(XML_PATTERN, 1);
96 repeat = isRepeat(patternIndex, len, pbis.available());
97 }
98 else
99 {
100 // never repeat on closed stream (and avoid calling available)
101 repeat = false;
102 }
103
104 }
105 while (repeat);
106
107 if (patternIndex > 0)
108 {
109 // push back the start of the next message and
110 // ignore the pushed-back characters in the return buffer
111 pbis.unread(message.substring(patternIndex, message.length()).getBytes());
112 message.setLength(patternIndex);
113 }
114
115 // TODO encoding here, too...
116 return nullEmptyArray(message.toString().getBytes());
117
118 }
119 finally
120 {
121 // TODO - this doesn't seem very reliable, since loop above can end
122 // without EOF. On the other hand, what else can we do? Entire logic
123 // is not very dependable, IMHO. XmlMessageEOFProtocol is more likely
124 // to be correct here, I think.
125
126 // clear from map if stream has ended
127 if (len < 0)
128 {
129 pbMap.remove(is);
130 }
131 }
132 }
133
134 /**
135 * Show we continue reading? This class, following previous implementations, only
136 * reads while input is saturated.
137 * @see XmlMessageEOFProtocol
138 *
139 * @param patternIndex The index of the xml tag (or -1 if the next message not found)
140 * @param len The amount of data read this loop (or -1 if EOF)
141 * @param available The amount of data available to read
142 * @return true if the read should continue
143 */
144 protected boolean isRepeat(int patternIndex, int len, int available)
145 {
146 return patternIndex < 0 && len == READ_BUFFER_SIZE && available > 0;
147 }
148 }