View Javadoc

1   /*
2    * $Id: XMLUtils.java 19191 2010-08-25 21:05:23Z tcarlson $
3    * --------------------------------------------------------------------------------------
4    * Copyright (c) MuleSoft, Inc.  All rights reserved.  http://www.mulesoft.com
5    *
6    * The software in this package is published under the terms of the CPAL v1.0
7    * license, a copy of which has been included with this distribution in the
8    * LICENSE.txt file.
9    */
10  
11  package org.mule.module.xml.util;
12  
13  import org.mule.DefaultMuleMessage;
14  import org.mule.RequestContext;
15  import org.mule.api.MuleContext;
16  import org.mule.api.MuleMessage;
17  import org.mule.api.transport.OutputHandler;
18  import org.mule.module.xml.stax.DelegateXMLStreamReader;
19  import org.mule.module.xml.stax.StaxSource;
20  import org.mule.module.xml.transformer.DelayedResult;
21  import org.mule.module.xml.transformer.XmlToDomDocument;
22  import org.mule.transformer.types.DataTypeFactory;
23  import org.mule.util.IOUtils;
24  
25  import java.io.ByteArrayInputStream;
26  import java.io.File;
27  import java.io.FileReader;
28  import java.io.IOException;
29  import java.io.InputStream;
30  import java.io.InputStreamReader;
31  import java.io.Reader;
32  import java.io.StringReader;
33  import java.util.ArrayList;
34  import java.util.Iterator;
35  import java.util.List;
36  
37  import javax.xml.parsers.DocumentBuilderFactory;
38  import javax.xml.namespace.NamespaceContext;
39  import javax.xml.stream.XMLStreamConstants;
40  import javax.xml.stream.XMLStreamException;
41  import javax.xml.stream.XMLStreamReader;
42  import javax.xml.stream.XMLStreamWriter;
43  import javax.xml.transform.Source;
44  import javax.xml.transform.Transformer;
45  import javax.xml.transform.TransformerConfigurationException;
46  import javax.xml.transform.TransformerFactory;
47  import javax.xml.transform.TransformerFactoryConfigurationError;
48  import javax.xml.transform.dom.DOMResult;
49  import javax.xml.transform.dom.DOMSource;
50  import javax.xml.transform.sax.SAXSource;
51  import javax.xml.transform.stream.StreamSource;
52  import javax.xml.xpath.XPath;
53  import javax.xml.xpath.XPathConstants;
54  import javax.xml.xpath.XPathExpressionException;
55  import javax.xml.xpath.XPathFactory;
56  
57  import org.apache.commons.io.output.ByteArrayOutputStream;
58  import org.apache.commons.lang.StringUtils;
59  import org.dom4j.DocumentException;
60  import org.dom4j.io.DOMReader;
61  import org.dom4j.io.DOMWriter;
62  import org.dom4j.io.DocumentSource;
63  import org.w3c.dom.Document;
64  import org.w3c.dom.Node;
65  import org.w3c.dom.NodeList;
66  import org.xml.sax.InputSource;
67  
68  /**
69   * General utility methods for working with XML.
70   */
71  public class XMLUtils extends org.mule.util.XMLUtils
72  {
73      public static final String TRANSFORMER_FACTORY_JDK5 = "com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl";
74  
75      // xml parser feature names for optional XSD validation
76      public static final String APACHE_XML_FEATURES_VALIDATION_SCHEMA = "http://apache.org/xml/features/validation/schema";
77      public static final String APACHE_XML_FEATURES_VALIDATION_SCHEMA_FULL_CHECKING = "http://apache.org/xml/features/validation/schema-full-checking";
78  
79      // JAXP property for specifying external XSD location
80      public static final String JAXP_PROPERTIES_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource";
81  
82      // JAXP properties for specifying external XSD language (as required by newer
83      // JAXP implementation)
84      public static final String JAXP_PROPERTIES_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage";
85      public static final String JAXP_PROPERTIES_SCHEMA_LANGUAGE_VALUE = "http://www.w3.org/2001/XMLSchema";
86  
87      /**
88       * Converts a DOM to an XML string.
89       * @param dom the dome object to convert
90       * @return A string representation of the document
91       */
92      public static String toXml(Document dom)
93      {
94          return new DOMReader().read(dom).asXML();
95      }
96  
97      /**
98       * @return a new XSLT transformer
99       * @throws TransformerConfigurationException if no TransformerFactory can be located in the
100      * runtime environment.
101      */
102     public static Transformer getTransformer() throws TransformerConfigurationException
103     {
104         TransformerFactory tf;
105         try
106         {
107             tf = TransformerFactory.newInstance();
108         }
109         catch (TransformerFactoryConfigurationError e)
110         {
111             System.setProperty("javax.xml.transform.TransformerFactory", TRANSFORMER_FACTORY_JDK5);
112             tf = TransformerFactory.newInstance();
113         }
114         if (tf != null)
115         {
116             return tf.newTransformer();
117         }
118         else
119         {
120             throw new TransformerConfigurationException("Unable to instantiate a TransformerFactory");
121         }
122     }
123 
124     public static org.dom4j.Document toDocument(Object obj, MuleContext muleContext) throws Exception
125     {
126         return toDocument(obj, null, muleContext);
127     }
128     
129     /**
130      * Converts an object of unknown type to an org.dom4j.Document if possible.
131      * @return null if object cannot be converted
132      * @throws DocumentException if an error occurs while parsing
133      */
134     public static org.dom4j.Document toDocument(Object obj, String externalSchemaLocation, MuleContext muleContext) throws Exception
135     {
136         org.dom4j.io.SAXReader reader = new org.dom4j.io.SAXReader();
137         if (externalSchemaLocation != null)
138         {
139             reader.setValidation(true);
140             reader.setFeature(APACHE_XML_FEATURES_VALIDATION_SCHEMA, true);
141             reader.setFeature(APACHE_XML_FEATURES_VALIDATION_SCHEMA_FULL_CHECKING, true);
142             
143             InputStream xsdAsStream = IOUtils.getResourceAsStream(externalSchemaLocation, XMLUtils.class);
144             if (xsdAsStream == null)
145             {
146                 throw new IllegalArgumentException("Couldn't find schema at " + externalSchemaLocation);
147             }
148     
149             // Set schema language property (must be done before the schemaSource
150             // is set)
151             reader.setProperty(JAXP_PROPERTIES_SCHEMA_LANGUAGE, JAXP_PROPERTIES_SCHEMA_LANGUAGE_VALUE);
152     
153             // Need this one to map schemaLocation to a physical location
154             reader.setProperty(JAXP_PROPERTIES_SCHEMA_SOURCE, xsdAsStream);
155         }
156 
157 
158         if (obj instanceof org.dom4j.Document)
159         {
160             return (org.dom4j.Document) obj;
161         }
162         else if (obj instanceof org.w3c.dom.Document)
163         {
164             org.dom4j.io.DOMReader domReader = new org.dom4j.io.DOMReader();
165             return domReader.read((org.w3c.dom.Document) obj);
166         }
167         else if (obj instanceof org.xml.sax.InputSource)
168         {                
169             return reader.read((org.xml.sax.InputSource) obj);
170         }
171         else if (obj instanceof javax.xml.transform.Source || obj instanceof javax.xml.stream.XMLStreamReader)
172         {                
173             // TODO Find a more direct way to do this
174             XmlToDomDocument tr = new XmlToDomDocument();
175             tr.setMuleContext(muleContext);
176             tr.setReturnDataType(DataTypeFactory.create(org.dom4j.Document.class));
177             return (org.dom4j.Document) tr.transform(obj);
178         }
179         else if (obj instanceof java.io.InputStream)
180         {                
181             return reader.read((java.io.InputStream) obj);
182         }
183         else if (obj instanceof String)
184         {
185             return reader.read(new StringReader((String) obj));
186         }
187         else if (obj instanceof byte[])
188         {
189             // TODO Handle encoding/charset somehow
190             return reader.read(new StringReader(new String((byte[]) obj)));
191         }
192         else if (obj instanceof File)
193         {                
194             return reader.read((File) obj);
195         }
196         else
197         {
198             return null;
199         }
200     }
201 
202     /**
203      * Converts a payload to a {@link org.w3c.dom.Document} representation.
204      * <p> Reproduces the behavior from {@link org.mule.module.xml.util.XMLUtils#toDocument(Object, MuleContext)}
205      * which works converting to {@link org.dom4j.Document}.
206      *
207      * @param payload the payload to convert.
208      * @return a document from the payload or null if the payload is not a valid XML document.
209      */
210     public static org.w3c.dom.Document toW3cDocument(Object payload) throws Exception
211     {
212         if (payload instanceof org.dom4j.Document)
213         {
214             DOMWriter writer = new DOMWriter();
215             org.w3c.dom.Document w3cDocument = writer.write((org.dom4j.Document) payload);
216 
217             return w3cDocument;
218         }
219         else if (payload instanceof org.w3c.dom.Document)
220         {
221             return (org.w3c.dom.Document) payload;
222         }
223         else if (payload instanceof org.xml.sax.InputSource)
224         {
225             return parseXML((InputSource) payload);
226         }
227         else if (payload instanceof javax.xml.transform.Source || payload instanceof javax.xml.stream.XMLStreamReader)
228         {
229             DOMResult result = new DOMResult();
230             Transformer idTransformer = getTransformer();
231             Source source = (payload instanceof Source) ? (Source)payload : toXmlSource(null, true, payload);
232             idTransformer.transform(source, result);
233             return (Document) result.getNode();
234         }
235         else if (payload instanceof java.io.InputStream)
236         {
237             InputStreamReader input = new InputStreamReader((InputStream) payload);
238             return parseXML(input);
239         }
240         else if (payload instanceof String)
241         {
242             Reader input = new StringReader((String) payload);
243 
244             return parseXML(input);
245         }
246         else if (payload instanceof byte[])
247         {
248             // TODO Handle encoding/charset somehow
249             Reader input = new StringReader(new String((byte[]) payload));
250             return parseXML(input);
251         }
252         else if (payload instanceof File)
253         {
254             Reader input = new FileReader((File) payload);
255             return parseXML(input);
256         }
257         else
258         {
259             return null;
260         }
261     }
262 
263     private static org.w3c.dom.Document parseXML(Reader source) throws Exception
264     {
265         return parseXML(new InputSource(source));
266     }
267 
268     private static org.w3c.dom.Document parseXML(InputSource source) throws Exception
269     {
270         DocumentBuilderFactory factory =
271                 DocumentBuilderFactory.newInstance();
272 
273         return factory.newDocumentBuilder().parse(source);
274     }
275 
276     /**
277      * Returns an XMLStreamReader for an object of unknown type if possible.
278      * @return null if no XMLStreamReader can be created for the object type
279      * @throws XMLStreamException
280      */
281     public static javax.xml.stream.XMLStreamReader toXMLStreamReader(javax.xml.stream.XMLInputFactory factory, Object obj) throws XMLStreamException
282     {
283         if (obj instanceof javax.xml.stream.XMLStreamReader)
284         {
285             return (javax.xml.stream.XMLStreamReader) obj;
286         }
287         else if (obj instanceof org.mule.module.xml.stax.StaxSource)
288         {
289             return ((org.mule.module.xml.stax.StaxSource) obj).getXMLStreamReader();
290         }
291         else if (obj instanceof javax.xml.transform.Source)
292         {
293             return factory.createXMLStreamReader((javax.xml.transform.Source) obj);
294         }
295         else if (obj instanceof org.xml.sax.InputSource)
296         {
297             return factory.createXMLStreamReader(((org.xml.sax.InputSource) obj).getByteStream());
298         }
299         else if (obj instanceof org.w3c.dom.Document)
300         {
301             return factory.createXMLStreamReader(new javax.xml.transform.dom.DOMSource((org.w3c.dom.Document) obj));
302         }
303         else if (obj instanceof org.dom4j.Document)
304         {
305             return factory.createXMLStreamReader(new org.dom4j.io.DocumentSource((org.dom4j.Document) obj));
306         }
307         else if (obj instanceof java.io.InputStream)
308         {
309             final InputStream is = (java.io.InputStream) obj;
310             
311             XMLStreamReader xsr = factory.createXMLStreamReader(is);
312             return new DelegateXMLStreamReader(xsr) 
313             {
314                 @Override
315                 public void close() throws XMLStreamException
316                 {
317                     super.close();
318                     
319                     try
320                     {
321                         is.close();
322                     }
323                     catch (IOException e)
324                     {
325                         throw new XMLStreamException(e);
326                     }
327                 }
328                 
329             };
330         }
331         else if (obj instanceof String)
332         {
333             return factory.createXMLStreamReader(new StringReader((String) obj));
334         }
335         else if (obj instanceof byte[])
336         {
337             // TODO Handle encoding/charset?
338             return factory.createXMLStreamReader(new ByteArrayInputStream((byte[]) obj));
339         }
340         else
341         {
342             return null;
343         }
344     }
345     
346     /**
347      * Convert our object to a Source type efficiently.
348      */ 
349     public static javax.xml.transform.Source toXmlSource(javax.xml.stream.XMLInputFactory xmlInputFactory, boolean useStaxSource, Object src) throws Exception
350     {
351         if (src instanceof javax.xml.transform.Source)
352         {
353             return (Source) src;
354         }
355         else if (src instanceof byte[])
356         {
357             ByteArrayInputStream stream = new ByteArrayInputStream((byte[]) src);
358             return toStreamSource(xmlInputFactory, useStaxSource, stream);
359         }
360         else if (src instanceof InputStream)
361         {
362             return toStreamSource(xmlInputFactory, useStaxSource, (InputStream) src);
363         }
364         else if (src instanceof String)
365         {
366             if (useStaxSource)
367             {
368                 return new StaxSource(xmlInputFactory.createXMLStreamReader(new StringReader((String) src)));
369             }
370             else
371             {
372                 return new StreamSource(new StringReader((String) src));
373             }
374         }
375         else if (src instanceof org.dom4j.Document)
376         {
377             return new DocumentSource((org.dom4j.Document) src);
378         }
379         else if (src instanceof org.xml.sax.InputSource)
380         {
381             return new SAXSource((InputSource) src);
382         }
383         // TODO MULE-3555
384         else if (src instanceof XMLStreamReader)
385         {
386             XMLStreamReader xsr = (XMLStreamReader) src;
387             
388             // StaxSource requires that we advance to a start element/document event
389             if (!xsr.isStartElement() && 
390                             xsr.getEventType() != XMLStreamConstants.START_DOCUMENT) 
391             {
392                 xsr.nextTag();
393             }
394             
395             return new StaxSource((XMLStreamReader) src);
396         }
397         else if (src instanceof org.w3c.dom.Document || src instanceof org.w3c.dom.Element)
398         {
399             return new DOMSource((org.w3c.dom.Node) src);
400         }
401         else if (src instanceof DelayedResult) 
402         {
403             DelayedResult result = ((DelayedResult) src);
404             DOMResult domResult = new DOMResult();
405             result.write(domResult);
406             return new DOMSource(domResult.getNode());
407         }
408         else if (src instanceof OutputHandler) 
409         {
410             OutputHandler handler = ((OutputHandler) src);
411             ByteArrayOutputStream output = new ByteArrayOutputStream();
412             
413             handler.write(RequestContext.getEvent(), output);
414             
415             return toStreamSource(xmlInputFactory, useStaxSource, new ByteArrayInputStream(output.toByteArray()));
416         }
417         else
418         {
419             return null;
420         }
421     }
422 
423     public static javax.xml.transform.Source toStreamSource(javax.xml.stream.XMLInputFactory xmlInputFactory, boolean useStaxSource, InputStream stream) throws XMLStreamException
424     {
425         if (useStaxSource)
426         {
427             return new org.mule.module.xml.stax.StaxSource(xmlInputFactory.createXMLStreamReader(stream));
428         }
429         else 
430         {
431             return new javax.xml.transform.stream.StreamSource(stream);
432         }
433     }
434     
435     /**
436      * Copies the reader to the writer. The start and end document methods must
437      * be handled on the writer manually. TODO: if the namespace on the reader
438      * has been declared previously to where we are in the stream, this probably
439      * won't work.
440      * 
441      * @param reader
442      * @param writer
443      * @throws XMLStreamException
444      */
445     public static void copy(XMLStreamReader reader, XMLStreamWriter writer) throws XMLStreamException {
446         copy(reader, writer, false);
447     }
448     public static void copy(XMLStreamReader reader, XMLStreamWriter writer,
449                             boolean fragment) throws XMLStreamException {
450         // number of elements read in
451         int read = 0;
452         int event = reader.getEventType();
453 
454         while (reader.hasNext()) {
455             switch (event) {
456             case XMLStreamConstants.START_ELEMENT:
457                 read++;
458                 writeStartElement(reader, writer);
459                 break;
460             case XMLStreamConstants.END_ELEMENT:
461                 writer.writeEndElement();
462                 read--;
463                 if (read <= 0 && !fragment) {
464                     return;
465                 }
466                 break;
467             case XMLStreamConstants.CHARACTERS:
468                 writer.writeCharacters(reader.getText());
469                 break;
470             case XMLStreamConstants.START_DOCUMENT:
471             case XMLStreamConstants.END_DOCUMENT:
472             case XMLStreamConstants.ATTRIBUTE:
473             case XMLStreamConstants.NAMESPACE:
474                 break;
475             default:
476                 break;
477             }
478             event = reader.next();
479         }
480     }
481 
482     private static void writeStartElement(XMLStreamReader reader, XMLStreamWriter writer)
483         throws XMLStreamException {
484         String local = reader.getLocalName();
485         String uri = reader.getNamespaceURI();
486         String prefix = reader.getPrefix();
487         if (prefix == null) {
488             prefix = "";
489         }
490 
491         
492 //        System.out.println("STAXUTILS:writeStartElement : node name : " + local +  " namespace URI" + uri);
493         boolean writeElementNS = false;
494         if (uri != null) {
495             String boundPrefix = writer.getPrefix(uri);
496             if (boundPrefix == null || !prefix.equals(boundPrefix)) {
497                 writeElementNS = true;
498             }
499         }
500 
501         // Write out the element name
502         if (uri != null) {
503             if (prefix.length() == 0 && StringUtils.isEmpty(uri)) {
504                 writer.writeStartElement(local);
505                 writer.setDefaultNamespace(uri);
506 
507             } else {
508                 writer.writeStartElement(prefix, local, uri);
509                 writer.setPrefix(prefix, uri);
510             }
511         } else {
512             writer.writeStartElement(local);
513         }
514 
515         // Write out the namespaces
516         for (int i = 0; i < reader.getNamespaceCount(); i++) {
517             String nsURI = reader.getNamespaceURI(i);
518             String nsPrefix = reader.getNamespacePrefix(i);
519             if (nsPrefix == null) {
520                 nsPrefix = "";
521             }
522 
523             if (nsPrefix.length() == 0) {
524                 writer.writeDefaultNamespace(nsURI);
525             } else {
526                 writer.writeNamespace(nsPrefix, nsURI);
527             }
528 
529             if (nsURI.equals(uri) && nsPrefix.equals(prefix)) {
530                 writeElementNS = false;
531             }
532         }
533 
534         // Check if the namespace still needs to be written.
535         // We need this check because namespace writing works
536         // different on Woodstox and the RI.
537         if (writeElementNS) {
538             if (prefix.length() == 0) {
539                 writer.writeDefaultNamespace(uri);
540             } else {
541                 writer.writeNamespace(prefix, uri);
542             }
543         }        
544         
545         // Write out attributes
546         for (int i = 0; i < reader.getAttributeCount(); i++) {
547             String ns = reader.getAttributeNamespace(i);
548             String nsPrefix = reader.getAttributePrefix(i);
549             if (ns == null || ns.length() == 0) {
550                 writer.writeAttribute(reader.getAttributeLocalName(i), reader.getAttributeValue(i));
551             } else if (nsPrefix == null || nsPrefix.length() == 0) {
552                 writer.writeAttribute(reader.getAttributeNamespace(i), reader.getAttributeLocalName(i),
553                                       reader.getAttributeValue(i));
554             } else {
555                 writer.writeAttribute(reader.getAttributePrefix(i), reader.getAttributeNamespace(i), reader
556                     .getAttributeLocalName(i), reader.getAttributeValue(i));
557             }
558 
559         }
560     }
561 
562     /**
563      * Creates an XPath object with a custom NamespaceContext given the Node to operate on
564      * @param node the Node or document to operate on.  Note that namespace handling will not work if a Node fragment is passed in
565      * @return a new XPath object
566      */
567     private static XPath createXPath(Node node)
568     {
569         XPath xp = XPathFactory.newInstance().newXPath();
570         if (node instanceof Document)
571         {
572             xp.setNamespaceContext(new XPathNamespaceContext((Document) node));
573         }
574         return xp;
575     }
576 
577     /**
578      * Select a single XML node using an Xpath
579      * @param xpath the XPath expression to evaluate
580      * @param node the node (or document) to exaluate on
581      * @return the result of the evaluation.
582      * @throws XPathExpressionException if the XPath expression is malformed and cannot be parsed
583      */
584     public static Node selectOne(String xpath, Node node) throws XPathExpressionException
585     {
586             XPath xp = createXPath(node);
587             return (Node) xp.evaluate(xpath, node, XPathConstants.NODE);
588     }
589 
590     /**
591      * Select a single XML String value using an Xpath
592      * @param xpath the XPath expression to evaluate
593      * @param node the node (or document) to evaluate on
594      * @return the result of the evaluation.
595      * @throws XPathExpressionException if the XPath expression is malformed and cannot be parsed
596      */
597     public static String selectValue(String xpath, Node node) throws XPathExpressionException
598     {
599             XPath xp = createXPath(node);
600             return (String) xp.evaluate(xpath, node, XPathConstants.STRING);
601     }
602 
603     /**
604      * Select a set of Node objects using the Xpath expression
605      * @param xpath the XPath expression to evaluate
606      * @param node the node (or document) to evaluate on
607      * @return the result of the evaluation. 
608      * @throws XPathExpressionException if the XPath expression is malformed and cannot be parsed
609      */
610     public static List<Node> select(String xpath, Node node) throws XPathExpressionException
611     {
612             XPath xp = createXPath(node);
613             NodeList nl = (NodeList) xp.evaluate(xpath, node, XPathConstants.NODESET);
614             List<Node> nodeList = new ArrayList<Node>(nl.getLength());
615             for (int i = 0; i < nl.getLength(); i++)
616             {
617                 nodeList.add(nl.item(i));
618             }
619             return nodeList;
620     }
621 
622 
623 
624     /**
625      * The default namespace context that will read namespaces from the current document if the
626      * Node being processed is a Document
627      */
628     private static class XPathNamespaceContext implements NamespaceContext
629     {
630         private Document document;
631 
632         public XPathNamespaceContext(Document document)
633         {
634             this.document = document;
635         }
636 
637         public String getNamespaceURI(String prefix)
638         {
639             if (prefix == null || prefix.equals(""))
640             {
641                 return document.getDocumentElement().getNamespaceURI();
642             }
643             else
644             {
645                 return document.lookupNamespaceURI(prefix);
646             }
647         }
648 
649         public String getPrefix(String namespaceURI)
650         {
651             return document.lookupPrefix(namespaceURI);
652         }
653 
654         public Iterator<String> getPrefixes(String namespaceURI)
655         {
656             List<String> list = new ArrayList<String>();
657             list.add(getPrefix(namespaceURI));
658             return list.iterator();
659         }
660     }
661 }