View Javadoc

1   /*
2    * $Id: XMLUtils.java 20321 2010-11-24 15:21:24Z dfeist $
3    * --------------------------------------------------------------------------------------
4    * Copyright (c) MuleSoft, Inc.  All rights reserved.  http://www.mulesoft.com
5    *
6    * The software in this package is published under the terms of the CPAL v1.0
7    * license, a copy of which has been included with this distribution in the
8    * LICENSE.txt file.
9    */
10  
11  package org.mule.module.xml.util;
12  
13  import org.mule.RequestContext;
14  import org.mule.api.MuleContext;
15  import org.mule.api.transport.OutputHandler;
16  import org.mule.module.xml.stax.DelegateXMLStreamReader;
17  import org.mule.module.xml.stax.StaxSource;
18  import org.mule.module.xml.transformer.DelayedResult;
19  import org.mule.module.xml.transformer.XmlToDomDocument;
20  import org.mule.transformer.types.DataTypeFactory;
21  import org.mule.util.IOUtils;
22  
23  import java.io.ByteArrayInputStream;
24  import java.io.File;
25  import java.io.FileReader;
26  import java.io.IOException;
27  import java.io.InputStream;
28  import java.io.InputStreamReader;
29  import java.io.Reader;
30  import java.io.StringReader;
31  import java.util.ArrayList;
32  import java.util.Iterator;
33  import java.util.List;
34  
35  import javax.xml.namespace.NamespaceContext;
36  import javax.xml.parsers.DocumentBuilderFactory;
37  import javax.xml.stream.XMLStreamConstants;
38  import javax.xml.stream.XMLStreamException;
39  import javax.xml.stream.XMLStreamReader;
40  import javax.xml.stream.XMLStreamWriter;
41  import javax.xml.transform.Source;
42  import javax.xml.transform.Transformer;
43  import javax.xml.transform.TransformerConfigurationException;
44  import javax.xml.transform.TransformerFactory;
45  import javax.xml.transform.TransformerFactoryConfigurationError;
46  import javax.xml.transform.dom.DOMResult;
47  import javax.xml.transform.dom.DOMSource;
48  import javax.xml.transform.sax.SAXSource;
49  import javax.xml.transform.stream.StreamSource;
50  import javax.xml.xpath.XPath;
51  import javax.xml.xpath.XPathConstants;
52  import javax.xml.xpath.XPathExpressionException;
53  import javax.xml.xpath.XPathFactory;
54  
55  import org.apache.commons.io.output.ByteArrayOutputStream;
56  import org.apache.commons.lang.StringUtils;
57  import org.dom4j.DocumentException;
58  import org.dom4j.io.DOMReader;
59  import org.dom4j.io.DOMWriter;
60  import org.dom4j.io.DocumentSource;
61  import org.w3c.dom.Document;
62  import org.w3c.dom.Node;
63  import org.w3c.dom.NodeList;
64  import org.xml.sax.InputSource;
65  
66  /**
67   * General utility methods for working with XML.
68   */
69  public class XMLUtils extends org.mule.util.XMLUtils
70  {
71      public static final String TRANSFORMER_FACTORY_JDK5 = "com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl";
72  
73      // xml parser feature names for optional XSD validation
74      public static final String APACHE_XML_FEATURES_VALIDATION_SCHEMA = "http://apache.org/xml/features/validation/schema";
75      public static final String APACHE_XML_FEATURES_VALIDATION_SCHEMA_FULL_CHECKING = "http://apache.org/xml/features/validation/schema-full-checking";
76  
77      // JAXP property for specifying external XSD location
78      public static final String JAXP_PROPERTIES_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource";
79  
80      // JAXP properties for specifying external XSD language (as required by newer
81      // JAXP implementation)
82      public static final String JAXP_PROPERTIES_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage";
83      public static final String JAXP_PROPERTIES_SCHEMA_LANGUAGE_VALUE = "http://www.w3.org/2001/XMLSchema";
84  
85      /**
86       * Converts a DOM to an XML string.
87       * @param dom the dome object to convert
88       * @return A string representation of the document
89       */
90      public static String toXml(Document dom)
91      {
92          return new DOMReader().read(dom).asXML();
93      }
94  
95      /**
96       * @return a new XSLT transformer
97       * @throws TransformerConfigurationException if no TransformerFactory can be located in the
98       * runtime environment.
99       */
100     public static Transformer getTransformer() throws TransformerConfigurationException
101     {
102         TransformerFactory tf;
103         try
104         {
105             tf = TransformerFactory.newInstance();
106         }
107         catch (TransformerFactoryConfigurationError e)
108         {
109             System.setProperty("javax.xml.transform.TransformerFactory", TRANSFORMER_FACTORY_JDK5);
110             tf = TransformerFactory.newInstance();
111         }
112         if (tf != null)
113         {
114             return tf.newTransformer();
115         }
116         else
117         {
118             throw new TransformerConfigurationException("Unable to instantiate a TransformerFactory");
119         }
120     }
121 
122     public static org.dom4j.Document toDocument(Object obj, MuleContext muleContext) throws Exception
123     {
124         return toDocument(obj, null, muleContext);
125     }
126     
127     /**
128      * Converts an object of unknown type to an org.dom4j.Document if possible.
129      * @return null if object cannot be converted
130      * @throws DocumentException if an error occurs while parsing
131      */
132     public static org.dom4j.Document toDocument(Object obj, String externalSchemaLocation, MuleContext muleContext) throws Exception
133     {
134         org.dom4j.io.SAXReader reader = new org.dom4j.io.SAXReader();
135         if (externalSchemaLocation != null)
136         {
137             reader.setValidation(true);
138             reader.setFeature(APACHE_XML_FEATURES_VALIDATION_SCHEMA, true);
139             reader.setFeature(APACHE_XML_FEATURES_VALIDATION_SCHEMA_FULL_CHECKING, true);
140             
141             InputStream xsdAsStream = IOUtils.getResourceAsStream(externalSchemaLocation, XMLUtils.class);
142             if (xsdAsStream == null)
143             {
144                 throw new IllegalArgumentException("Couldn't find schema at " + externalSchemaLocation);
145             }
146     
147             // Set schema language property (must be done before the schemaSource
148             // is set)
149             reader.setProperty(JAXP_PROPERTIES_SCHEMA_LANGUAGE, JAXP_PROPERTIES_SCHEMA_LANGUAGE_VALUE);
150     
151             // Need this one to map schemaLocation to a physical location
152             reader.setProperty(JAXP_PROPERTIES_SCHEMA_SOURCE, xsdAsStream);
153         }
154 
155 
156         if (obj instanceof org.dom4j.Document)
157         {
158             return (org.dom4j.Document) obj;
159         }
160         else if (obj instanceof org.w3c.dom.Document)
161         {
162             org.dom4j.io.DOMReader domReader = new org.dom4j.io.DOMReader();
163             return domReader.read((org.w3c.dom.Document) obj);
164         }
165         else if (obj instanceof org.xml.sax.InputSource)
166         {                
167             return reader.read((org.xml.sax.InputSource) obj);
168         }
169         else if (obj instanceof javax.xml.transform.Source || obj instanceof javax.xml.stream.XMLStreamReader)
170         {                
171             // TODO Find a more direct way to do this
172             XmlToDomDocument tr = new XmlToDomDocument();
173             tr.setMuleContext(muleContext);
174             tr.setReturnDataType(DataTypeFactory.create(org.dom4j.Document.class));
175             return (org.dom4j.Document) tr.transform(obj);
176         }
177         else if (obj instanceof java.io.InputStream)
178         {                
179             return reader.read((java.io.InputStream) obj);
180         }
181         else if (obj instanceof String)
182         {
183             return reader.read(new StringReader((String) obj));
184         }
185         else if (obj instanceof byte[])
186         {
187             // TODO Handle encoding/charset somehow
188             return reader.read(new StringReader(new String((byte[]) obj)));
189         }
190         else if (obj instanceof File)
191         {                
192             return reader.read((File) obj);
193         }
194         else
195         {
196             return null;
197         }
198     }
199 
200     /**
201      * Converts a payload to a {@link org.w3c.dom.Document} representation.
202      * <p> Reproduces the behavior from {@link org.mule.module.xml.util.XMLUtils#toDocument(Object, MuleContext)}
203      * which works converting to {@link org.dom4j.Document}.
204      *
205      * @param payload the payload to convert.
206      * @return a document from the payload or null if the payload is not a valid XML document.
207      */
208     public static org.w3c.dom.Document toW3cDocument(Object payload) throws Exception
209     {
210         if (payload instanceof org.dom4j.Document)
211         {
212             DOMWriter writer = new DOMWriter();
213             org.w3c.dom.Document w3cDocument = writer.write((org.dom4j.Document) payload);
214 
215             return w3cDocument;
216         }
217         else if (payload instanceof org.w3c.dom.Document)
218         {
219             return (org.w3c.dom.Document) payload;
220         }
221         else if (payload instanceof org.xml.sax.InputSource)
222         {
223             return parseXML((InputSource) payload);
224         }
225         else if (payload instanceof javax.xml.transform.Source || payload instanceof javax.xml.stream.XMLStreamReader)
226         {
227             DOMResult result = new DOMResult();
228             Transformer idTransformer = getTransformer();
229             Source source = (payload instanceof Source) ? (Source)payload : toXmlSource(null, true, payload);
230             idTransformer.transform(source, result);
231             return (Document) result.getNode();
232         }
233         else if (payload instanceof java.io.InputStream)
234         {
235             InputStreamReader input = new InputStreamReader((InputStream) payload);
236             return parseXML(input);
237         }
238         else if (payload instanceof String)
239         {
240             Reader input = new StringReader((String) payload);
241 
242             return parseXML(input);
243         }
244         else if (payload instanceof byte[])
245         {
246             // TODO Handle encoding/charset somehow
247             Reader input = new StringReader(new String((byte[]) payload));
248             return parseXML(input);
249         }
250         else if (payload instanceof File)
251         {
252             Reader input = new FileReader((File) payload);
253             return parseXML(input);
254         }
255         else
256         {
257             return null;
258         }
259     }
260 
261     private static org.w3c.dom.Document parseXML(Reader source) throws Exception
262     {
263         return parseXML(new InputSource(source));
264     }
265 
266     private static org.w3c.dom.Document parseXML(InputSource source) throws Exception
267     {
268         DocumentBuilderFactory factory =
269                 DocumentBuilderFactory.newInstance();
270 
271         return factory.newDocumentBuilder().parse(source);
272     }
273 
274     /**
275      * Returns an XMLStreamReader for an object of unknown type if possible.
276      * @return null if no XMLStreamReader can be created for the object type
277      * @throws XMLStreamException
278      */
279     public static javax.xml.stream.XMLStreamReader toXMLStreamReader(javax.xml.stream.XMLInputFactory factory, Object obj) throws XMLStreamException
280     {
281         if (obj instanceof javax.xml.stream.XMLStreamReader)
282         {
283             return (javax.xml.stream.XMLStreamReader) obj;
284         }
285         else if (obj instanceof org.mule.module.xml.stax.StaxSource)
286         {
287             return ((org.mule.module.xml.stax.StaxSource) obj).getXMLStreamReader();
288         }
289         else if (obj instanceof javax.xml.transform.Source)
290         {
291             return factory.createXMLStreamReader((javax.xml.transform.Source) obj);
292         }
293         else if (obj instanceof org.xml.sax.InputSource)
294         {
295             return factory.createXMLStreamReader(((org.xml.sax.InputSource) obj).getByteStream());
296         }
297         else if (obj instanceof org.w3c.dom.Document)
298         {
299             return factory.createXMLStreamReader(new javax.xml.transform.dom.DOMSource((org.w3c.dom.Document) obj));
300         }
301         else if (obj instanceof org.dom4j.Document)
302         {
303             return factory.createXMLStreamReader(new org.dom4j.io.DocumentSource((org.dom4j.Document) obj));
304         }
305         else if (obj instanceof java.io.InputStream)
306         {
307             final InputStream is = (java.io.InputStream) obj;
308             
309             XMLStreamReader xsr = factory.createXMLStreamReader(is);
310             return new DelegateXMLStreamReader(xsr) 
311             {
312                 @Override
313                 public void close() throws XMLStreamException
314                 {
315                     super.close();
316                     
317                     try
318                     {
319                         is.close();
320                     }
321                     catch (IOException e)
322                     {
323                         throw new XMLStreamException(e);
324                     }
325                 }
326                 
327             };
328         }
329         else if (obj instanceof String)
330         {
331             return factory.createXMLStreamReader(new StringReader((String) obj));
332         }
333         else if (obj instanceof byte[])
334         {
335             // TODO Handle encoding/charset?
336             return factory.createXMLStreamReader(new ByteArrayInputStream((byte[]) obj));
337         }
338         else
339         {
340             return null;
341         }
342     }
343     
344     /**
345      * Convert our object to a Source type efficiently.
346      */ 
347     public static javax.xml.transform.Source toXmlSource(javax.xml.stream.XMLInputFactory xmlInputFactory, boolean useStaxSource, Object src) throws Exception
348     {
349         if (src instanceof javax.xml.transform.Source)
350         {
351             return (Source) src;
352         }
353         else if (src instanceof byte[])
354         {
355             ByteArrayInputStream stream = new ByteArrayInputStream((byte[]) src);
356             return toStreamSource(xmlInputFactory, useStaxSource, stream);
357         }
358         else if (src instanceof InputStream)
359         {
360             return toStreamSource(xmlInputFactory, useStaxSource, (InputStream) src);
361         }
362         else if (src instanceof String)
363         {
364             if (useStaxSource)
365             {
366                 return new StaxSource(xmlInputFactory.createXMLStreamReader(new StringReader((String) src)));
367             }
368             else
369             {
370                 return new StreamSource(new StringReader((String) src));
371             }
372         }
373         else if (src instanceof org.dom4j.Document)
374         {
375             return new DocumentSource((org.dom4j.Document) src);
376         }
377         else if (src instanceof org.xml.sax.InputSource)
378         {
379             return new SAXSource((InputSource) src);
380         }
381         // TODO MULE-3555
382         else if (src instanceof XMLStreamReader)
383         {
384             XMLStreamReader xsr = (XMLStreamReader) src;
385             
386             // StaxSource requires that we advance to a start element/document event
387             if (!xsr.isStartElement() && 
388                             xsr.getEventType() != XMLStreamConstants.START_DOCUMENT) 
389             {
390                 xsr.nextTag();
391             }
392             
393             return new StaxSource((XMLStreamReader) src);
394         }
395         else if (src instanceof org.w3c.dom.Document || src instanceof org.w3c.dom.Element)
396         {
397             return new DOMSource((org.w3c.dom.Node) src);
398         }
399         else if (src instanceof DelayedResult) 
400         {
401             DelayedResult result = ((DelayedResult) src);
402             DOMResult domResult = new DOMResult();
403             result.write(domResult);
404             return new DOMSource(domResult.getNode());
405         }
406         else if (src instanceof OutputHandler) 
407         {
408             OutputHandler handler = ((OutputHandler) src);
409             ByteArrayOutputStream output = new ByteArrayOutputStream();
410             
411             handler.write(RequestContext.getEvent(), output);
412             
413             return toStreamSource(xmlInputFactory, useStaxSource, new ByteArrayInputStream(output.toByteArray()));
414         }
415         else
416         {
417             return null;
418         }
419     }
420 
421     public static javax.xml.transform.Source toStreamSource(javax.xml.stream.XMLInputFactory xmlInputFactory, boolean useStaxSource, InputStream stream) throws XMLStreamException
422     {
423         if (useStaxSource)
424         {
425             return new org.mule.module.xml.stax.StaxSource(xmlInputFactory.createXMLStreamReader(stream));
426         }
427         else 
428         {
429             return new javax.xml.transform.stream.StreamSource(stream);
430         }
431     }
432     
433     /**
434      * Copies the reader to the writer. The start and end document methods must
435      * be handled on the writer manually. TODO: if the namespace on the reader
436      * has been declared previously to where we are in the stream, this probably
437      * won't work.
438      * 
439      * @param reader
440      * @param writer
441      * @throws XMLStreamException
442      */
443     public static void copy(XMLStreamReader reader, XMLStreamWriter writer) throws XMLStreamException {
444         copy(reader, writer, false);
445     }
446     public static void copy(XMLStreamReader reader, XMLStreamWriter writer,
447                             boolean fragment) throws XMLStreamException {
448         // number of elements read in
449         int read = 0;
450         int event = reader.getEventType();
451 
452         while (reader.hasNext()) {
453             switch (event) {
454             case XMLStreamConstants.START_ELEMENT:
455                 read++;
456                 writeStartElement(reader, writer);
457                 break;
458             case XMLStreamConstants.END_ELEMENT:
459                 writer.writeEndElement();
460                 read--;
461                 if (read <= 0 && !fragment) {
462                     return;
463                 }
464                 break;
465             case XMLStreamConstants.CHARACTERS:
466                 writer.writeCharacters(reader.getText());
467                 break;
468             case XMLStreamConstants.START_DOCUMENT:
469             case XMLStreamConstants.END_DOCUMENT:
470             case XMLStreamConstants.ATTRIBUTE:
471             case XMLStreamConstants.NAMESPACE:
472                 break;
473             default:
474                 break;
475             }
476             event = reader.next();
477         }
478     }
479 
480     private static void writeStartElement(XMLStreamReader reader, XMLStreamWriter writer)
481         throws XMLStreamException {
482         String local = reader.getLocalName();
483         String uri = reader.getNamespaceURI();
484         String prefix = reader.getPrefix();
485         if (prefix == null) {
486             prefix = "";
487         }
488 
489         
490 //        System.out.println("STAXUTILS:writeStartElement : node name : " + local +  " namespace URI" + uri);
491         boolean writeElementNS = false;
492         if (uri != null) {
493             String boundPrefix = writer.getPrefix(uri);
494             if (boundPrefix == null || !prefix.equals(boundPrefix)) {
495                 writeElementNS = true;
496             }
497         }
498 
499         // Write out the element name
500         if (uri != null) {
501             if (prefix.length() == 0 && StringUtils.isEmpty(uri)) {
502                 writer.writeStartElement(local);
503                 writer.setDefaultNamespace(uri);
504 
505             } else {
506                 writer.writeStartElement(prefix, local, uri);
507                 writer.setPrefix(prefix, uri);
508             }
509         } else {
510             writer.writeStartElement(local);
511         }
512 
513         // Write out the namespaces
514         for (int i = 0; i < reader.getNamespaceCount(); i++) {
515             String nsURI = reader.getNamespaceURI(i);
516             String nsPrefix = reader.getNamespacePrefix(i);
517             if (nsPrefix == null) {
518                 nsPrefix = "";
519             }
520 
521             if (nsPrefix.length() == 0) {
522                 writer.writeDefaultNamespace(nsURI);
523             } else {
524                 writer.writeNamespace(nsPrefix, nsURI);
525             }
526 
527             if (nsURI.equals(uri) && nsPrefix.equals(prefix)) {
528                 writeElementNS = false;
529             }
530         }
531 
532         // Check if the namespace still needs to be written.
533         // We need this check because namespace writing works
534         // different on Woodstox and the RI.
535         if (writeElementNS) {
536             if (prefix.length() == 0) {
537                 writer.writeDefaultNamespace(uri);
538             } else {
539                 writer.writeNamespace(prefix, uri);
540             }
541         }        
542         
543         // Write out attributes
544         for (int i = 0; i < reader.getAttributeCount(); i++) {
545             String ns = reader.getAttributeNamespace(i);
546             String nsPrefix = reader.getAttributePrefix(i);
547             if (ns == null || ns.length() == 0) {
548                 writer.writeAttribute(reader.getAttributeLocalName(i), reader.getAttributeValue(i));
549             } else if (nsPrefix == null || nsPrefix.length() == 0) {
550                 writer.writeAttribute(reader.getAttributeNamespace(i), reader.getAttributeLocalName(i),
551                                       reader.getAttributeValue(i));
552             } else {
553                 writer.writeAttribute(reader.getAttributePrefix(i), reader.getAttributeNamespace(i), reader
554                     .getAttributeLocalName(i), reader.getAttributeValue(i));
555             }
556 
557         }
558     }
559 
560     /**
561      * Creates an XPath object with a custom NamespaceContext given the Node to operate on
562      * @param node the Node or document to operate on.  Note that namespace handling will not work if a Node fragment is passed in
563      * @return a new XPath object
564      */
565     private static XPath createXPath(Node node)
566     {
567         XPath xp = XPathFactory.newInstance().newXPath();
568         if (node instanceof Document)
569         {
570             xp.setNamespaceContext(new XPathNamespaceContext((Document) node));
571         }
572         return xp;
573     }
574 
575     /**
576      * Select a single XML node using an Xpath
577      * @param xpath the XPath expression to evaluate
578      * @param node the node (or document) to exaluate on
579      * @return the result of the evaluation.
580      * @throws XPathExpressionException if the XPath expression is malformed and cannot be parsed
581      */
582     public static Node selectOne(String xpath, Node node) throws XPathExpressionException
583     {
584             XPath xp = createXPath(node);
585             return (Node) xp.evaluate(xpath, node, XPathConstants.NODE);
586     }
587 
588     /**
589      * Select a single XML String value using an Xpath
590      * @param xpath the XPath expression to evaluate
591      * @param node the node (or document) to evaluate on
592      * @return the result of the evaluation.
593      * @throws XPathExpressionException if the XPath expression is malformed and cannot be parsed
594      */
595     public static String selectValue(String xpath, Node node) throws XPathExpressionException
596     {
597             XPath xp = createXPath(node);
598             return (String) xp.evaluate(xpath, node, XPathConstants.STRING);
599     }
600 
601     /**
602      * Select a set of Node objects using the Xpath expression
603      * @param xpath the XPath expression to evaluate
604      * @param node the node (or document) to evaluate on
605      * @return the result of the evaluation. 
606      * @throws XPathExpressionException if the XPath expression is malformed and cannot be parsed
607      */
608     public static List<Node> select(String xpath, Node node) throws XPathExpressionException
609     {
610             XPath xp = createXPath(node);
611             NodeList nl = (NodeList) xp.evaluate(xpath, node, XPathConstants.NODESET);
612             List<Node> nodeList = new ArrayList<Node>(nl.getLength());
613             for (int i = 0; i < nl.getLength(); i++)
614             {
615                 nodeList.add(nl.item(i));
616             }
617             return nodeList;
618     }
619 
620 
621 
622     /**
623      * The default namespace context that will read namespaces from the current document if the
624      * Node being processed is a Document
625      */
626     private static class XPathNamespaceContext implements NamespaceContext
627     {
628         private Document document;
629 
630         public XPathNamespaceContext(Document document)
631         {
632             this.document = document;
633         }
634 
635         public String getNamespaceURI(String prefix)
636         {
637             if (prefix == null || prefix.equals(""))
638             {
639                 return document.getDocumentElement().getNamespaceURI();
640             }
641             else
642             {
643                 return document.lookupNamespaceURI(prefix);
644             }
645         }
646 
647         public String getPrefix(String namespaceURI)
648         {
649             return document.lookupPrefix(namespaceURI);
650         }
651 
652         public Iterator<String> getPrefixes(String namespaceURI)
653         {
654             List<String> list = new ArrayList<String>();
655             list.add(getPrefix(namespaceURI));
656             return list.iterator();
657         }
658     }
659 }