View Javadoc

1   /*
2    * $Id: XmlMessageProtocol.java 19877 2010-10-12 12:50:49Z aperepel $
3    * --------------------------------------------------------------------------------------
4    * Copyright (c) MuleSoft, Inc.  All rights reserved.  http://www.mulesoft.com
5    *
6    * The software in this package is published under the terms of the CPAL v1.0
7    * license, a copy of which has been included with this distribution in the
8    * LICENSE.txt file.
9    */
10  
11  package org.mule.transport.tcp.protocols;
12  
13  import java.io.IOException;
14  import java.io.InputStream;
15  import java.io.PushbackInputStream;
16  
17  import edu.emory.mathcs.backport.java.util.concurrent.ConcurrentHashMap;
18  import edu.emory.mathcs.backport.java.util.concurrent.ConcurrentMap;
19  
20  /**
21   * <p>
22   * The XmlMessageProtocol is an application level tcp protocol that can be used to
23   * read streaming xml documents. The only requirement is that each document include
24   * an xml declaration at the beginning of the document of the form "<?xml...". In
25   * section 2.8, the xml 1.0 standard contains "Definition: XML documents
26   * <strong>SHOULD</strong> begin with an XML declaration which specifies the version
27   * of XML being used" while the xml 1.1 standard contains "Definition: XML 1.1
28   * documents <strong>MUST</strong> begin with an XML declaration which specifies the
29   * version of XML being used". The SHOULD indicates a recommendation that, if not
30   * followed, needs to be carefully checked for unintended consequences. MUST
31   * indicates a mandatory requirement for a well-formed document. Please make sure
32   * that the xml documents being streamed begin with an xml declaration when using
33   * this class.
34   * </p>
35   * <p>
36   * Data are read until a new document is found or there are no more data
37   * (momentarily).  For slower networks,
38   * {@link org.mule.transport.tcp.protocols.XmlMessageEOFProtocol} may be more reliable.
39   * </p>
40   * <p>
41   * Also, the default character encoding for the platform is used to decode the
42   * message bytes when looking for the XML declaration. Some caution with message
43   * character encodings is warranted.
44   * </p>
45   * <p>
46   * Finally, this class uses a PushbackInputStream to enable parsing of individual
47   * messages. The stream stores any pushed-back bytes into it's own internal buffer
48   * and not the original stream. Therefore, the read buffer size is intentionally
49   * limited to insure that unread characters remain on the stream so that all data may
50   * be read later.
51   * </p>
52   */
53  public class XmlMessageProtocol extends AbstractByteProtocol
54  {
55      
56      private static final String XML_PATTERN = "<?xml";
57  
58      private static final int READ_BUFFER_SIZE = 4096;
59      private static final int PUSHBACK_BUFFER_SIZE = READ_BUFFER_SIZE * 2;
60  
61      private ConcurrentMap pbMap = new ConcurrentHashMap();
62  
63      public XmlMessageProtocol()
64      {
65          super(STREAM_OK);
66      }
67  
68      public Object read(InputStream is) throws IOException
69      {
70          PushbackInputStream pbis = (PushbackInputStream) pbMap.get(is);
71          if (null == pbis)
72          {
73              pbis = new PushbackInputStream(is, PUSHBACK_BUFFER_SIZE);
74              PushbackInputStream prev = (PushbackInputStream) pbMap.putIfAbsent(is, pbis);
75              pbis = null == prev ? pbis : prev;
76          }
77  
78          int len = -1;
79          try
80          {
81              // read until xml pattern is seen (and then pushed back) or no more data
82              // to read. return all data as message
83              byte[] buffer = new byte[READ_BUFFER_SIZE];
84              StringBuffer message = new StringBuffer(READ_BUFFER_SIZE);
85              int patternIndex = -1;
86              boolean repeat;
87              do
88              {
89                  len = safeRead(pbis, buffer);
90                  if (len >= 0)
91                  {
92                      // TODO take encoding into account, ideally from the incoming XML
93                      message.append(new String(buffer, 0, len));
94                      // start search at 2nd character in buffer (index=1) to
95                      // indicate whether we have reached a new document.
96                      patternIndex = message.toString().indexOf(XML_PATTERN, 1);
97                      repeat = isRepeat(patternIndex, len, pbis.available());
98                  }
99                  else
100                 {
101                     // never repeat on closed stream (and avoid calling available)
102                     repeat = false;
103                 }
104 
105             }
106             while (repeat);
107 
108             if (patternIndex > 0)
109             {
110                 // push back the start of the next message and
111                 // ignore the pushed-back characters in the return buffer
112                 pbis.unread(message.substring(patternIndex, message.length()).getBytes());
113                 message.setLength(patternIndex);
114             }
115 
116             // TODO encoding here, too...
117             return nullEmptyArray(message.toString().getBytes());
118 
119         }
120         finally
121         {
122             // TODO - this doesn't seem very reliable, since loop above can end
123             // without EOF.  On the other hand, what else can we do?  Entire logic
124             // is not very dependable, IMHO.  XmlMessageEOFProtocol is more likely
125             // to be correct here, I think.
126 
127             // clear from map if stream has ended
128             if (len < 0)
129             {
130                 pbMap.remove(is);
131             }
132         }
133     }
134 
135     /**
136      * Show we continue reading?  This class, following previous implementations, only
137      * reads while input is saturated.
138      * @see XmlMessageEOFProtocol
139      *
140      * @param patternIndex The index of the xml tag (or -1 if the next message not found)
141      * @param len The amount of data read this loop (or -1 if EOF)
142      * @param available The amount of data available to read
143      * @return true if the read should continue
144      */
145     protected boolean isRepeat(int patternIndex, int len, int available)
146     {
147         return patternIndex < 0 && len == READ_BUFFER_SIZE && available > 0;
148     }
149 }