View Javadoc
1   /*
2    * Copyright (c) MuleSoft, Inc.  All rights reserved.  http://www.mulesoft.com
3    * The software in this package is published under the terms of the CPAL v1.0
4    * license, a copy of which has been included with this distribution in the
5    * LICENSE.txt file.
6    */
7   package org.mule.transport.tcp.protocols;
8   
9   import java.io.IOException;
10  import java.io.InputStream;
11  import java.io.PushbackInputStream;
12  
13  import edu.emory.mathcs.backport.java.util.concurrent.ConcurrentHashMap;
14  import edu.emory.mathcs.backport.java.util.concurrent.ConcurrentMap;
15  
16  /**
17   * <p>
18   * The XmlMessageProtocol is an application level tcp protocol that can be used to
19   * read streaming xml documents. The only requirement is that each document include
20   * an xml declaration at the beginning of the document of the form "<?xml...". In
21   * section 2.8, the xml 1.0 standard contains "Definition: XML documents
22   * <strong>SHOULD</strong> begin with an XML declaration which specifies the version
23   * of XML being used" while the xml 1.1 standard contains "Definition: XML 1.1
24   * documents <strong>MUST</strong> begin with an XML declaration which specifies the
25   * version of XML being used". The SHOULD indicates a recommendation that, if not
26   * followed, needs to be carefully checked for unintended consequences. MUST
27   * indicates a mandatory requirement for a well-formed document. Please make sure
28   * that the xml documents being streamed begin with an xml declaration when using
29   * this class.
30   * </p>
31   * <p>
32   * Data are read until a new document is found or there are no more data
33   * (momentarily).  For slower networks,
34   * {@link org.mule.transport.tcp.protocols.XmlMessageEOFProtocol} may be more reliable.
35   * </p>
36   * <p>
37   * Also, the default character encoding for the platform is used to decode the
38   * message bytes when looking for the XML declaration. Some caution with message
39   * character encodings is warranted.
40   * </p>
41   * <p>
42   * Finally, this class uses a PushbackInputStream to enable parsing of individual
43   * messages. The stream stores any pushed-back bytes into it's own internal buffer
44   * and not the original stream. Therefore, the read buffer size is intentionally
45   * limited to insure that unread characters remain on the stream so that all data may
46   * be read later.
47   * </p>
48   */
49  public class XmlMessageProtocol extends AbstractByteProtocol
50  {
51      
52      private static final String XML_PATTERN = "<?xml";
53  
54      private static final int READ_BUFFER_SIZE = 4096;
55      private static final int PUSHBACK_BUFFER_SIZE = READ_BUFFER_SIZE * 2;
56  
57      private ConcurrentMap pbMap = new ConcurrentHashMap();
58  
59      public XmlMessageProtocol()
60      {
61          super(STREAM_OK);
62      }
63  
64      public Object read(InputStream is) throws IOException
65      {
66          PushbackInputStream pbis = (PushbackInputStream) pbMap.get(is);
67          if (null == pbis)
68          {
69              pbis = new PushbackInputStream(is, PUSHBACK_BUFFER_SIZE);
70              PushbackInputStream prev = (PushbackInputStream) pbMap.putIfAbsent(is, pbis);
71              pbis = null == prev ? pbis : prev;
72          }
73  
74          int len = -1;
75          try
76          {
77              // read until xml pattern is seen (and then pushed back) or no more data
78              // to read. return all data as message
79              byte[] buffer = new byte[READ_BUFFER_SIZE];
80              StringBuffer message = new StringBuffer(READ_BUFFER_SIZE);
81              int patternIndex = -1;
82              boolean repeat;
83              do
84              {
85                  len = safeRead(pbis, buffer);
86                  if (len >= 0)
87                  {
88                      // TODO take encoding into account, ideally from the incoming XML
89                      message.append(new String(buffer, 0, len));
90                      // start search at 2nd character in buffer (index=1) to
91                      // indicate whether we have reached a new document.
92                      patternIndex = message.toString().indexOf(XML_PATTERN, 1);
93                      repeat = isRepeat(patternIndex, len, pbis.available());
94                  }
95                  else
96                  {
97                      // never repeat on closed stream (and avoid calling available)
98                      repeat = false;
99                  }
100 
101             }
102             while (repeat);
103 
104             if (patternIndex > 0)
105             {
106                 // push back the start of the next message and
107                 // ignore the pushed-back characters in the return buffer
108                 pbis.unread(message.substring(patternIndex, message.length()).getBytes());
109                 message.setLength(patternIndex);
110             }
111 
112             // TODO encoding here, too...
113             return nullEmptyArray(message.toString().getBytes());
114 
115         }
116         finally
117         {
118             // TODO - this doesn't seem very reliable, since loop above can end
119             // without EOF.  On the other hand, what else can we do?  Entire logic
120             // is not very dependable, IMHO.  XmlMessageEOFProtocol is more likely
121             // to be correct here, I think.
122 
123             // clear from map if stream has ended
124             if (len < 0)
125             {
126                 pbMap.remove(is);
127             }
128         }
129     }
130 
131     /**
132      * Show we continue reading?  This class, following previous implementations, only
133      * reads while input is saturated.
134      * @see XmlMessageEOFProtocol
135      *
136      * @param patternIndex The index of the xml tag (or -1 if the next message not found)
137      * @param len The amount of data read this loop (or -1 if EOF)
138      * @param available The amount of data available to read
139      * @return true if the read should continue
140      */
141     protected boolean isRepeat(int patternIndex, int len, int available)
142     {
143         return patternIndex < 0 && len == READ_BUFFER_SIZE && available > 0;
144     }
145 }