View Javadoc
1   /*
2    * Copyright (c) MuleSoft, Inc.  All rights reserved.  http://www.mulesoft.com
3    * The software in this package is published under the terms of the CPAL v1.0
4    * license, a copy of which has been included with this distribution in the
5    * LICENSE.txt file.
6    */
7   package org.mule.module.xml.util;
8   
9   import org.mule.RequestContext;
10  import org.mule.api.MuleContext;
11  import org.mule.api.transport.OutputHandler;
12  import org.mule.module.xml.stax.DelegateXMLStreamReader;
13  import org.mule.module.xml.stax.StaxSource;
14  import org.mule.module.xml.transformer.DelayedResult;
15  import org.mule.module.xml.transformer.XmlToDomDocument;
16  import org.mule.transformer.types.DataTypeFactory;
17  import org.mule.util.IOUtils;
18  
19  import java.io.ByteArrayInputStream;
20  import java.io.File;
21  import java.io.FileReader;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.io.InputStreamReader;
25  import java.io.Reader;
26  import java.io.StringReader;
27  import java.util.ArrayList;
28  import java.util.Iterator;
29  import java.util.List;
30  
31  import javax.xml.namespace.NamespaceContext;
32  import javax.xml.parsers.DocumentBuilderFactory;
33  import javax.xml.stream.XMLStreamConstants;
34  import javax.xml.stream.XMLStreamException;
35  import javax.xml.stream.XMLStreamReader;
36  import javax.xml.stream.XMLStreamWriter;
37  import javax.xml.transform.Source;
38  import javax.xml.transform.Transformer;
39  import javax.xml.transform.TransformerConfigurationException;
40  import javax.xml.transform.TransformerFactory;
41  import javax.xml.transform.TransformerFactoryConfigurationError;
42  import javax.xml.transform.dom.DOMResult;
43  import javax.xml.transform.dom.DOMSource;
44  import javax.xml.transform.sax.SAXSource;
45  import javax.xml.transform.stream.StreamSource;
46  import javax.xml.xpath.XPath;
47  import javax.xml.xpath.XPathConstants;
48  import javax.xml.xpath.XPathExpressionException;
49  import javax.xml.xpath.XPathFactory;
50  
51  import org.apache.commons.io.output.ByteArrayOutputStream;
52  import org.apache.commons.lang.StringUtils;
53  import org.dom4j.DocumentException;
54  import org.dom4j.io.DOMReader;
55  import org.dom4j.io.DOMWriter;
56  import org.dom4j.io.DocumentSource;
57  import org.w3c.dom.Document;
58  import org.w3c.dom.Node;
59  import org.w3c.dom.NodeList;
60  import org.xml.sax.InputSource;
61  
62  /**
63   * General utility methods for working with XML.
64   */
65  public class XMLUtils extends org.mule.util.XMLUtils
66  {
67      public static final String TRANSFORMER_FACTORY_JDK5 = "com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl";
68  
69      // xml parser feature names for optional XSD validation
70      public static final String APACHE_XML_FEATURES_VALIDATION_SCHEMA = "http://apache.org/xml/features/validation/schema";
71      public static final String APACHE_XML_FEATURES_VALIDATION_SCHEMA_FULL_CHECKING = "http://apache.org/xml/features/validation/schema-full-checking";
72  
73      // JAXP property for specifying external XSD location
74      public static final String JAXP_PROPERTIES_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource";
75  
76      // JAXP properties for specifying external XSD language (as required by newer
77      // JAXP implementation)
78      public static final String JAXP_PROPERTIES_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage";
79      public static final String JAXP_PROPERTIES_SCHEMA_LANGUAGE_VALUE = "http://www.w3.org/2001/XMLSchema";
80  
81      /**
82       * Converts a DOM to an XML string.
83       * @param dom the dome object to convert
84       * @return A string representation of the document
85       */
86      public static String toXml(Document dom)
87      {
88          return new DOMReader().read(dom).asXML();
89      }
90  
91      /**
92       * @return a new XSLT transformer
93       * @throws TransformerConfigurationException if no TransformerFactory can be located in the
94       * runtime environment.
95       */
96      public static Transformer getTransformer() throws TransformerConfigurationException
97      {
98          TransformerFactory tf;
99          try
100         {
101             tf = TransformerFactory.newInstance();
102         }
103         catch (TransformerFactoryConfigurationError e)
104         {
105             System.setProperty("javax.xml.transform.TransformerFactory", TRANSFORMER_FACTORY_JDK5);
106             tf = TransformerFactory.newInstance();
107         }
108         if (tf != null)
109         {
110             return tf.newTransformer();
111         }
112         else
113         {
114             throw new TransformerConfigurationException("Unable to instantiate a TransformerFactory");
115         }
116     }
117 
118     public static org.dom4j.Document toDocument(Object obj, MuleContext muleContext) throws Exception
119     {
120         return toDocument(obj, null, muleContext);
121     }
122     
123     /**
124      * Converts an object of unknown type to an org.dom4j.Document if possible.
125      * @return null if object cannot be converted
126      * @throws DocumentException if an error occurs while parsing
127      */
128     public static org.dom4j.Document toDocument(Object obj, String externalSchemaLocation, MuleContext muleContext) throws Exception
129     {
130         org.dom4j.io.SAXReader reader = new org.dom4j.io.SAXReader();
131         if (externalSchemaLocation != null)
132         {
133             reader.setValidation(true);
134             reader.setFeature(APACHE_XML_FEATURES_VALIDATION_SCHEMA, true);
135             reader.setFeature(APACHE_XML_FEATURES_VALIDATION_SCHEMA_FULL_CHECKING, true);
136             
137             InputStream xsdAsStream = IOUtils.getResourceAsStream(externalSchemaLocation, XMLUtils.class);
138             if (xsdAsStream == null)
139             {
140                 throw new IllegalArgumentException("Couldn't find schema at " + externalSchemaLocation);
141             }
142     
143             // Set schema language property (must be done before the schemaSource
144             // is set)
145             reader.setProperty(JAXP_PROPERTIES_SCHEMA_LANGUAGE, JAXP_PROPERTIES_SCHEMA_LANGUAGE_VALUE);
146     
147             // Need this one to map schemaLocation to a physical location
148             reader.setProperty(JAXP_PROPERTIES_SCHEMA_SOURCE, xsdAsStream);
149         }
150 
151 
152         if (obj instanceof org.dom4j.Document)
153         {
154             return (org.dom4j.Document) obj;
155         }
156         else if (obj instanceof org.w3c.dom.Document)
157         {
158             org.dom4j.io.DOMReader domReader = new org.dom4j.io.DOMReader();
159             return domReader.read((org.w3c.dom.Document) obj);
160         }
161         else if (obj instanceof org.xml.sax.InputSource)
162         {                
163             return reader.read((org.xml.sax.InputSource) obj);
164         }
165         else if (obj instanceof javax.xml.transform.Source || obj instanceof javax.xml.stream.XMLStreamReader)
166         {                
167             // TODO Find a more direct way to do this
168             XmlToDomDocument tr = new XmlToDomDocument();
169             tr.setMuleContext(muleContext);
170             tr.setReturnDataType(DataTypeFactory.create(org.dom4j.Document.class));
171             return (org.dom4j.Document) tr.transform(obj);
172         }
173         else if (obj instanceof java.io.InputStream)
174         {                
175             return reader.read((java.io.InputStream) obj);
176         }
177         else if (obj instanceof String)
178         {
179             return reader.read(new StringReader((String) obj));
180         }
181         else if (obj instanceof byte[])
182         {
183             // TODO Handle encoding/charset somehow
184             return reader.read(new StringReader(new String((byte[]) obj)));
185         }
186         else if (obj instanceof File)
187         {                
188             return reader.read((File) obj);
189         }
190         else
191         {
192             return null;
193         }
194     }
195 
196     /**
197      * Converts a payload to a {@link org.w3c.dom.Document} representation.
198      * <p> Reproduces the behavior from {@link org.mule.module.xml.util.XMLUtils#toDocument(Object, MuleContext)}
199      * which works converting to {@link org.dom4j.Document}.
200      *
201      * @param payload the payload to convert.
202      * @return a document from the payload or null if the payload is not a valid XML document.
203      */
204     public static org.w3c.dom.Document toW3cDocument(Object payload) throws Exception
205     {
206         if (payload instanceof org.dom4j.Document)
207         {
208             DOMWriter writer = new DOMWriter();
209             org.w3c.dom.Document w3cDocument = writer.write((org.dom4j.Document) payload);
210 
211             return w3cDocument;
212         }
213         else if (payload instanceof org.w3c.dom.Document)
214         {
215             return (org.w3c.dom.Document) payload;
216         }
217         else if (payload instanceof org.xml.sax.InputSource)
218         {
219             return parseXML((InputSource) payload);
220         }
221         else if (payload instanceof javax.xml.transform.Source || payload instanceof javax.xml.stream.XMLStreamReader)
222         {
223             DOMResult result = new DOMResult();
224             Transformer idTransformer = getTransformer();
225             Source source = (payload instanceof Source) ? (Source)payload : toXmlSource(null, true, payload);
226             idTransformer.transform(source, result);
227             return (Document) result.getNode();
228         }
229         else if (payload instanceof java.io.InputStream)
230         {
231             InputStreamReader input = new InputStreamReader((InputStream) payload);
232             return parseXML(input);
233         }
234         else if (payload instanceof String)
235         {
236             Reader input = new StringReader((String) payload);
237 
238             return parseXML(input);
239         }
240         else if (payload instanceof byte[])
241         {
242             // TODO Handle encoding/charset somehow
243             Reader input = new StringReader(new String((byte[]) payload));
244             return parseXML(input);
245         }
246         else if (payload instanceof File)
247         {
248             Reader input = new FileReader((File) payload);
249             return parseXML(input);
250         }
251         else
252         {
253             return null;
254         }
255     }
256 
257     private static org.w3c.dom.Document parseXML(Reader source) throws Exception
258     {
259         return parseXML(new InputSource(source));
260     }
261 
262     private static org.w3c.dom.Document parseXML(InputSource source) throws Exception
263     {
264         DocumentBuilderFactory factory =
265                 DocumentBuilderFactory.newInstance();
266 
267         return factory.newDocumentBuilder().parse(source);
268     }
269 
270     /**
271      * Returns an XMLStreamReader for an object of unknown type if possible.
272      * @return null if no XMLStreamReader can be created for the object type
273      * @throws XMLStreamException
274      */
275     public static javax.xml.stream.XMLStreamReader toXMLStreamReader(javax.xml.stream.XMLInputFactory factory, Object obj) throws XMLStreamException
276     {
277         if (obj instanceof javax.xml.stream.XMLStreamReader)
278         {
279             return (javax.xml.stream.XMLStreamReader) obj;
280         }
281         else if (obj instanceof org.mule.module.xml.stax.StaxSource)
282         {
283             return ((org.mule.module.xml.stax.StaxSource) obj).getXMLStreamReader();
284         }
285         else if (obj instanceof javax.xml.transform.Source)
286         {
287             return factory.createXMLStreamReader((javax.xml.transform.Source) obj);
288         }
289         else if (obj instanceof org.xml.sax.InputSource)
290         {
291             return factory.createXMLStreamReader(((org.xml.sax.InputSource) obj).getByteStream());
292         }
293         else if (obj instanceof org.w3c.dom.Document)
294         {
295             return factory.createXMLStreamReader(new javax.xml.transform.dom.DOMSource((org.w3c.dom.Document) obj));
296         }
297         else if (obj instanceof org.dom4j.Document)
298         {
299             return factory.createXMLStreamReader(new org.dom4j.io.DocumentSource((org.dom4j.Document) obj));
300         }
301         else if (obj instanceof java.io.InputStream)
302         {
303             final InputStream is = (java.io.InputStream) obj;
304             
305             XMLStreamReader xsr = factory.createXMLStreamReader(is);
306             return new DelegateXMLStreamReader(xsr) 
307             {
308                 @Override
309                 public void close() throws XMLStreamException
310                 {
311                     super.close();
312                     
313                     try
314                     {
315                         is.close();
316                     }
317                     catch (IOException e)
318                     {
319                         throw new XMLStreamException(e);
320                     }
321                 }
322                 
323             };
324         }
325         else if (obj instanceof String)
326         {
327             return factory.createXMLStreamReader(new StringReader((String) obj));
328         }
329         else if (obj instanceof byte[])
330         {
331             // TODO Handle encoding/charset?
332             return factory.createXMLStreamReader(new ByteArrayInputStream((byte[]) obj));
333         }
334         else
335         {
336             return null;
337         }
338     }
339     
340     /**
341      * Convert our object to a Source type efficiently.
342      */ 
343     public static javax.xml.transform.Source toXmlSource(javax.xml.stream.XMLInputFactory xmlInputFactory, boolean useStaxSource, Object src) throws Exception
344     {
345         if (src instanceof javax.xml.transform.Source)
346         {
347             return (Source) src;
348         }
349         else if (src instanceof byte[])
350         {
351             ByteArrayInputStream stream = new ByteArrayInputStream((byte[]) src);
352             return toStreamSource(xmlInputFactory, useStaxSource, stream);
353         }
354         else if (src instanceof InputStream)
355         {
356             return toStreamSource(xmlInputFactory, useStaxSource, (InputStream) src);
357         }
358         else if (src instanceof String)
359         {
360             if (useStaxSource)
361             {
362                 return new StaxSource(xmlInputFactory.createXMLStreamReader(new StringReader((String) src)));
363             }
364             else
365             {
366                 return new StreamSource(new StringReader((String) src));
367             }
368         }
369         else if (src instanceof org.dom4j.Document)
370         {
371             return new DocumentSource((org.dom4j.Document) src);
372         }
373         else if (src instanceof org.xml.sax.InputSource)
374         {
375             return new SAXSource((InputSource) src);
376         }
377         // TODO MULE-3555
378         else if (src instanceof XMLStreamReader)
379         {
380             XMLStreamReader xsr = (XMLStreamReader) src;
381             
382             // StaxSource requires that we advance to a start element/document event
383             if (!xsr.isStartElement() && 
384                             xsr.getEventType() != XMLStreamConstants.START_DOCUMENT) 
385             {
386                 xsr.nextTag();
387             }
388             
389             return new StaxSource((XMLStreamReader) src);
390         }
391         else if (src instanceof org.w3c.dom.Document || src instanceof org.w3c.dom.Element)
392         {
393             return new DOMSource((org.w3c.dom.Node) src);
394         }
395         else if (src instanceof DelayedResult) 
396         {
397             DelayedResult result = ((DelayedResult) src);
398             DOMResult domResult = new DOMResult();
399             result.write(domResult);
400             return new DOMSource(domResult.getNode());
401         }
402         else if (src instanceof OutputHandler) 
403         {
404             OutputHandler handler = ((OutputHandler) src);
405             ByteArrayOutputStream output = new ByteArrayOutputStream();
406             
407             handler.write(RequestContext.getEvent(), output);
408             
409             return toStreamSource(xmlInputFactory, useStaxSource, new ByteArrayInputStream(output.toByteArray()));
410         }
411         else
412         {
413             return null;
414         }
415     }
416 
417     public static javax.xml.transform.Source toStreamSource(javax.xml.stream.XMLInputFactory xmlInputFactory, boolean useStaxSource, InputStream stream) throws XMLStreamException
418     {
419         if (useStaxSource)
420         {
421             return new org.mule.module.xml.stax.StaxSource(xmlInputFactory.createXMLStreamReader(stream));
422         }
423         else 
424         {
425             return new javax.xml.transform.stream.StreamSource(stream);
426         }
427     }
428     
429     /**
430      * Copies the reader to the writer. The start and end document methods must
431      * be handled on the writer manually. TODO: if the namespace on the reader
432      * has been declared previously to where we are in the stream, this probably
433      * won't work.
434      * 
435      * @param reader
436      * @param writer
437      * @throws XMLStreamException
438      */
439     public static void copy(XMLStreamReader reader, XMLStreamWriter writer) throws XMLStreamException {
440         copy(reader, writer, false);
441     }
442     public static void copy(XMLStreamReader reader, XMLStreamWriter writer,
443                             boolean fragment) throws XMLStreamException {
444         // number of elements read in
445         int read = 0;
446         int event = reader.getEventType();
447 
448         while (reader.hasNext()) {
449             switch (event) {
450             case XMLStreamConstants.START_ELEMENT:
451                 read++;
452                 writeStartElement(reader, writer);
453                 break;
454             case XMLStreamConstants.END_ELEMENT:
455                 writer.writeEndElement();
456                 read--;
457                 if (read <= 0 && !fragment) {
458                     return;
459                 }
460                 break;
461             case XMLStreamConstants.CHARACTERS:
462                 writer.writeCharacters(reader.getText());
463                 break;
464             case XMLStreamConstants.START_DOCUMENT:
465             case XMLStreamConstants.END_DOCUMENT:
466             case XMLStreamConstants.ATTRIBUTE:
467             case XMLStreamConstants.NAMESPACE:
468                 break;
469             default:
470                 break;
471             }
472             event = reader.next();
473         }
474     }
475 
476     private static void writeStartElement(XMLStreamReader reader, XMLStreamWriter writer)
477         throws XMLStreamException {
478         String local = reader.getLocalName();
479         String uri = reader.getNamespaceURI();
480         String prefix = reader.getPrefix();
481         if (prefix == null) {
482             prefix = "";
483         }
484 
485         
486 //        System.out.println("STAXUTILS:writeStartElement : node name : " + local +  " namespace URI" + uri);
487         boolean writeElementNS = false;
488         if (uri != null) {
489             String boundPrefix = writer.getPrefix(uri);
490             if (boundPrefix == null || !prefix.equals(boundPrefix)) {
491                 writeElementNS = true;
492             }
493         }
494 
495         // Write out the element name
496         if (uri != null) {
497             if (prefix.length() == 0 && StringUtils.isEmpty(uri)) {
498                 writer.writeStartElement(local);
499                 writer.setDefaultNamespace(uri);
500 
501             } else {
502                 writer.writeStartElement(prefix, local, uri);
503                 writer.setPrefix(prefix, uri);
504             }
505         } else {
506             writer.writeStartElement(local);
507         }
508 
509         // Write out the namespaces
510         for (int i = 0; i < reader.getNamespaceCount(); i++) {
511             String nsURI = reader.getNamespaceURI(i);
512             String nsPrefix = reader.getNamespacePrefix(i);
513             if (nsPrefix == null) {
514                 nsPrefix = "";
515             }
516 
517             if (nsPrefix.length() == 0) {
518                 writer.writeDefaultNamespace(nsURI);
519             } else {
520                 writer.writeNamespace(nsPrefix, nsURI);
521             }
522 
523             if (nsURI.equals(uri) && nsPrefix.equals(prefix)) {
524                 writeElementNS = false;
525             }
526         }
527 
528         // Check if the namespace still needs to be written.
529         // We need this check because namespace writing works
530         // different on Woodstox and the RI.
531         if (writeElementNS) {
532             if (prefix.length() == 0) {
533                 writer.writeDefaultNamespace(uri);
534             } else {
535                 writer.writeNamespace(prefix, uri);
536             }
537         }        
538         
539         // Write out attributes
540         for (int i = 0; i < reader.getAttributeCount(); i++) {
541             String ns = reader.getAttributeNamespace(i);
542             String nsPrefix = reader.getAttributePrefix(i);
543             if (ns == null || ns.length() == 0) {
544                 writer.writeAttribute(reader.getAttributeLocalName(i), reader.getAttributeValue(i));
545             } else if (nsPrefix == null || nsPrefix.length() == 0) {
546                 writer.writeAttribute(reader.getAttributeNamespace(i), reader.getAttributeLocalName(i),
547                                       reader.getAttributeValue(i));
548             } else {
549                 writer.writeAttribute(reader.getAttributePrefix(i), reader.getAttributeNamespace(i), reader
550                     .getAttributeLocalName(i), reader.getAttributeValue(i));
551             }
552 
553         }
554     }
555 
556     /**
557      * Creates an XPath object with a custom NamespaceContext given the Node to operate on
558      * @param node the Node or document to operate on.  Note that namespace handling will not work if a Node fragment is passed in
559      * @return a new XPath object
560      */
561     private static XPath createXPath(Node node)
562     {
563         XPath xp = XPathFactory.newInstance().newXPath();
564         if (node instanceof Document)
565         {
566             xp.setNamespaceContext(new XPathNamespaceContext((Document) node));
567         }
568         return xp;
569     }
570 
571     /**
572      * Select a single XML node using an Xpath
573      * @param xpath the XPath expression to evaluate
574      * @param node the node (or document) to exaluate on
575      * @return the result of the evaluation.
576      * @throws XPathExpressionException if the XPath expression is malformed and cannot be parsed
577      */
578     public static Node selectOne(String xpath, Node node) throws XPathExpressionException
579     {
580             XPath xp = createXPath(node);
581             return (Node) xp.evaluate(xpath, node, XPathConstants.NODE);
582     }
583 
584     /**
585      * Select a single XML String value using an Xpath
586      * @param xpath the XPath expression to evaluate
587      * @param node the node (or document) to evaluate on
588      * @return the result of the evaluation.
589      * @throws XPathExpressionException if the XPath expression is malformed and cannot be parsed
590      */
591     public static String selectValue(String xpath, Node node) throws XPathExpressionException
592     {
593             XPath xp = createXPath(node);
594             return (String) xp.evaluate(xpath, node, XPathConstants.STRING);
595     }
596 
597     /**
598      * Select a set of Node objects using the Xpath expression
599      * @param xpath the XPath expression to evaluate
600      * @param node the node (or document) to evaluate on
601      * @return the result of the evaluation. 
602      * @throws XPathExpressionException if the XPath expression is malformed and cannot be parsed
603      */
604     public static List<Node> select(String xpath, Node node) throws XPathExpressionException
605     {
606             XPath xp = createXPath(node);
607             NodeList nl = (NodeList) xp.evaluate(xpath, node, XPathConstants.NODESET);
608             List<Node> nodeList = new ArrayList<Node>(nl.getLength());
609             for (int i = 0; i < nl.getLength(); i++)
610             {
611                 nodeList.add(nl.item(i));
612             }
613             return nodeList;
614     }
615 
616 
617 
618     /**
619      * The default namespace context that will read namespaces from the current document if the
620      * Node being processed is a Document
621      */
622     private static class XPathNamespaceContext implements NamespaceContext
623     {
624         private Document document;
625 
626         public XPathNamespaceContext(Document document)
627         {
628             this.document = document;
629         }
630 
631         public String getNamespaceURI(String prefix)
632         {
633             if (prefix == null || prefix.equals(""))
634             {
635                 return document.getDocumentElement().getNamespaceURI();
636             }
637             else
638             {
639                 return document.lookupNamespaceURI(prefix);
640             }
641         }
642 
643         public String getPrefix(String namespaceURI)
644         {
645             return document.lookupPrefix(namespaceURI);
646         }
647 
648         public Iterator<String> getPrefixes(String namespaceURI)
649         {
650             List<String> list = new ArrayList<String>();
651             list.add(getPrefix(namespaceURI));
652             return list.iterator();
653         }
654     }
655 }