View Javadoc

1   /*
2    * Copyright 2010-2013 smartics, Kronseder & Reiner GmbH
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package de.smartics.xml.encoding;
17  
18  import java.io.IOException;
19  import java.io.InputStream;
20  
21  import org.apache.commons.lang.StringUtils;
22  import org.xml.sax.Attributes;
23  import org.xml.sax.InputSource;
24  import org.xml.sax.Locator;
25  import org.xml.sax.SAXException;
26  import org.xml.sax.SAXNotRecognizedException;
27  import org.xml.sax.SAXNotSupportedException;
28  import org.xml.sax.SAXParseException;
29  import org.xml.sax.XMLReader;
30  import org.xml.sax.ext.Locator2;
31  import org.xml.sax.helpers.DefaultHandler;
32  import org.xml.sax.helpers.XMLReaderFactory;
33  
34  /**
35   * Detects the {@link XmlDescriptor descriptor} of an XML document.
36   * <p>
37   * The implementation requires a parser implementing
38   * <code>org.xml.sax.ext.Locator2</code>. This is provided by the <a
39   * href="http://xerces.apache.org/xerces2-j">Xerces</a> from version 2.0.2 on
40   * (please refer to <a
41   * href="http://xerces.apache.org/xerces2-j/faq-sax.html#faq-6">Xerces SAX
42   * FAQ</a> for details).
43   * </p>
44   * <p>
45   * This library does not force a special Xerces version to be used. Maven users
46   * may want to add the following dependency to their POM:
47   * </p>
48   *
49   * <pre>
50   * {@markupExample "POM Dependency"
51   * &lt;dependency&gt;
52   *   &lt;groupId&gt;xerces&lt;/groupId&gt;
53   *   &lt;artifactId&gt;xercesImpl&lt;/artifactId&gt;
54   *   &lt;version&gt;2.10.0&lt;/version&gt;
55   * &lt;/dependency&gt;
56   * }
57   * </pre>
58   *
59   * {@stickyNote Most implementations only provide the version and encoding
60   * information if a stream is passed instead of a input source.}
61   */
62  public class XmlDescriptorDetector
63  { // NOPMD due content handler delegation
64    // ********************************* Fields *********************************
65  
66    // --- constants ------------------------------------------------------------
67  
68    /**
69     * The message text to be added as prefix to the SAX exception that signals to
70     * abort the parsing.
71     */
72    private static final String ABORTION_MESSAGE_TEXT =
73        "Abort parsing. XML document descriptor detected: ";
74  
75    // --- members --------------------------------------------------------------
76  
77    /**
78     * The delegate to fetch additional information from the stream.
79     */
80    private final XmlParsingHandler delegate;
81  
82    // ****************************** Initializer *******************************
83  
84    // ****************************** Constructors ******************************
85  
86    /**
87     * Convenience constructor with no delegate that stops parsing after document
88     * start event has been detected.
89     */
90    public XmlDescriptorDetector()
91    {
92      this(null);
93    }
94  
95    /**
96     * <p>
97     * Default constructor.
98     * </p>
99     * {@stickyNote Please consider to throw an {@link AbortException in your
100    * content handler delegate to abort the parsing as soon as all information is
101    * collected.}
102    *
103    * @param delegate the delegate to fetch additional information from the
104    *          stream.
105    */
106   public XmlDescriptorDetector(final XmlParsingHandler delegate)
107   {
108     this.delegate = delegate;
109   }
110 
111   // ****************************** Inner Classes *****************************
112 
113   /**
114    * Handles the XML document to detect the descriptor information (including
115    * the encoding).
116    */
117   private final class DescriptorHandler extends DefaultHandler
118   { // NOPMD due content handler delegation
119     // ******************************** Fields ********************************
120 
121     // --- constants ----------------------------------------------------------
122 
123     // --- members ------------------------------------------------------------
124 
125     /**
126      * The public identifier as reported by the input source. May be
127      * <code>null</code>.
128      */
129     private final String publicId;
130 
131     /**
132      * The system identifier as reported by the input source. May be
133      * <code>null</code>.
134      */
135     private final String systemId;
136 
137     /**
138      * The XML information fetched from the document.
139      */
140     private XmlDescriptor descriptor;
141 
142     /**
143      * The locator of the XML document being parsed.
144      */
145     private Locator locator;
146 
147     // ***************************** Initializer ******************************
148 
149     // ***************************** Constructors *****************************
150 
151     /**
152      * Default constructor.
153      *
154      * @param publicId the public identifier as reported by the input source.
155      * @param systemId the system identifier as reported by the input source.
156      */
157     private DescriptorHandler(final String publicId, final String systemId)
158     {
159       this.publicId = publicId;
160       this.systemId = systemId;
161     }
162 
163     // ***************************** Inner Classes ****************************
164 
165     // ******************************** Methods *******************************
166 
167     // --- init ---------------------------------------------------------------
168 
169     // --- get&set ------------------------------------------------------------
170 
171     // --- business -----------------------------------------------------------
172 
173     /**
174      * {@inheritDoc}
175      *
176      * @see org.xml.sax.helpers.DefaultHandler#setDocumentLocator(org.xml.sax.Locator)
177      */
178     @Override
179     public void setDocumentLocator(final Locator locator)
180     {
181       this.locator = locator;
182 
183       if (delegate != null)
184       {
185         delegate.setDocumentLocator(locator);
186       }
187     }
188 
189     /**
190      * {@inheritDoc}
191      *
192      * @see org.xml.sax.helpers.DefaultHandler#startDocument()
193      */
194     @Override
195     public void startDocument() throws SAXException
196     {
197       final XmlDescriptor.Builder builder = new XmlDescriptor.Builder();
198 
199       if (locator instanceof Locator2)
200       {
201         final Locator2 locator2 = (Locator2) locator;
202         builder.withVersion(locator2.getXMLVersion());
203         builder.withEncoding(locator2.getEncoding());
204       }
205 
206       if (locator != null)
207       {
208         builder.withPublicId(get(locator.getPublicId(), publicId));
209         builder.withSystemId(get(locator.getSystemId(), systemId));
210       }
211 
212       this.descriptor = builder.build();
213 
214       if (delegate == null)
215       {
216         throw new AbortException(ABORTION_MESSAGE_TEXT + descriptor);
217       }
218       else
219       {
220         delegate.startDocument();
221       }
222     }
223 
224     private String get(final String value, final String defaultValue)
225     {
226       if (StringUtils.isBlank(value))
227       {
228         return defaultValue;
229       }
230       return value;
231     }
232 
233     // ... delegation only ....................................................
234 
235     /**
236      * {@inheritDoc}
237      * <p>
238      * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
239      * handler delegate}.
240      * </p>
241      *
242      * @see org.xml.sax.helpers.DefaultHandler#resolveEntity(java.lang.String,
243      *      java.lang.String)
244      */
245     @Override
246     public InputSource resolveEntity(final String publicId,
247         final String systemId) throws IOException, SAXException
248     {
249       return delegate.resolveEntity(publicId, systemId);
250     }
251 
252     /**
253      * {@inheritDoc}
254      * <p>
255      * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
256      * handler delegate}.
257      * </p>
258      *
259      * @see org.xml.sax.helpers.DefaultHandler#notationDecl(java.lang.String,
260      *      java.lang.String, java.lang.String)
261      */
262     @Override
263     public void notationDecl(final String name, final String publicId,
264         final String systemId) throws SAXException
265     {
266       delegate.notationDecl(name, publicId, systemId);
267     }
268 
269     /**
270      * {@inheritDoc}
271      * <p>
272      * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
273      * handler delegate}.
274      * </p>
275      *
276      * @see org.xml.sax.helpers.DefaultHandler#unparsedEntityDecl(java.lang.String,
277      *      java.lang.String, java.lang.String, java.lang.String)
278      */
279     @Override
280     public void unparsedEntityDecl(final String name, final String publicId,
281         final String systemId, final String notationName) throws SAXException
282     {
283       delegate.unparsedEntityDecl(name, publicId, systemId, notationName);
284     }
285 
286     /**
287      * {@inheritDoc}
288      * <p>
289      * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
290      * handler delegate}.
291      * </p>
292      *
293      * @see org.xml.sax.helpers.DefaultHandler#endDocument()
294      */
295     @Override
296     public void endDocument() throws SAXException
297     {
298       delegate.endDocument();
299     }
300 
301     /**
302      * {@inheritDoc}
303      * <p>
304      * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
305      * handler delegate}.
306      * </p>
307      *
308      * @see org.xml.sax.helpers.DefaultHandler#startPrefixMapping(java.lang.String,
309      *      java.lang.String)
310      */
311     @Override
312     public void startPrefixMapping(final String prefix, final String uri)
313       throws SAXException
314     {
315       delegate.startPrefixMapping(prefix, uri);
316     }
317 
318     /**
319      * {@inheritDoc}
320      * <p>
321      * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
322      * handler delegate}.
323      * </p>
324      *
325      * @see org.xml.sax.helpers.DefaultHandler#endPrefixMapping(java.lang.String)
326      */
327     @Override
328     public void endPrefixMapping(final String prefix) throws SAXException
329     {
330       delegate.endPrefixMapping(prefix);
331     }
332 
333     /**
334      * {@inheritDoc}
335      * <p>
336      * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
337      * handler delegate}.
338      * </p>
339      *
340      * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String,
341      *      java.lang.String, java.lang.String, org.xml.sax.Attributes)
342      */
343     @Override
344     public void startElement(final String uri, final String localName,
345         final String qName, final Attributes attributes) throws SAXException
346     {
347       delegate.startElement(uri, localName, qName, attributes);
348     }
349 
350     /**
351      * {@inheritDoc}
352      * <p>
353      * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
354      * handler delegate}.
355      * </p>
356      *
357      * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String,
358      *      java.lang.String, java.lang.String)
359      */
360     @Override
361     public void endElement(final String uri, final String localName,
362         final String qName) throws SAXException
363     {
364       delegate.endElement(uri, localName, qName);
365     }
366 
367     /**
368      * {@inheritDoc}
369      * <p>
370      * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
371      * handler delegate}.
372      * </p>
373      *
374      * @see org.xml.sax.helpers.DefaultHandler#characters(char[], int, int)
375      */
376     @Override
377     public void characters(final char[] ch, final int start, final int length)
378       throws SAXException
379     {
380       delegate.characters(ch, start, length);
381     }
382 
383     /**
384      * {@inheritDoc}
385      * <p>
386      * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
387      * handler delegate}.
388      * </p>
389      *
390      * @see org.xml.sax.helpers.DefaultHandler#ignorableWhitespace(char[], int,
391      *      int)
392      */
393     @Override
394     public void ignorableWhitespace(final char[] ch, final int start,
395         final int length) throws SAXException
396     {
397       delegate.ignorableWhitespace(ch, start, length);
398     }
399 
400     /**
401      * {@inheritDoc}
402      * <p>
403      * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
404      * handler delegate}.
405      * </p>
406      *
407      * @see org.xml.sax.helpers.DefaultHandler#processingInstruction(java.lang.String,
408      *      java.lang.String)
409      */
410     @Override
411     public void processingInstruction(final String target, final String data)
412       throws SAXException
413     {
414       delegate.processingInstruction(target, data);
415     }
416 
417     /**
418      * {@inheritDoc}
419      * <p>
420      * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
421      * handler delegate}.
422      * </p>
423      *
424      * @see org.xml.sax.helpers.DefaultHandler#skippedEntity(java.lang.String)
425      */
426     @Override
427     public void skippedEntity(final String name) throws SAXException
428     {
429       delegate.skippedEntity(name);
430     }
431 
432     /**
433      * {@inheritDoc}
434      * <p>
435      * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
436      * handler delegate}.
437      * </p>
438      *
439      * @see org.xml.sax.helpers.DefaultHandler#warning(org.xml.sax.SAXParseException)
440      */
441     @Override
442     public void warning(final SAXParseException e) throws SAXException
443     {
444       delegate.warning(e);
445     }
446 
447     /**
448      * {@inheritDoc}
449      * <p>
450      * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
451      * handler delegate}.
452      * </p>
453      *
454      * @see org.xml.sax.helpers.DefaultHandler#error(org.xml.sax.SAXParseException)
455      */
456     @Override
457     public void error(final SAXParseException e) throws SAXException
458     {
459       delegate.error(e);
460     }
461 
462     /**
463      * {@inheritDoc}
464      * <p>
465      * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
466      * handler delegate}.
467      * </p>
468      *
469      * @see org.xml.sax.helpers.DefaultHandler#error(org.xml.sax.SAXParseException)
470      */
471     @Override
472     public void fatalError(final SAXParseException e) throws SAXException
473     {
474       delegate.fatalError(e);
475     }
476 
477     // --- object basics ------------------------------------------------------
478   }
479 
480   // ********************************* Methods ********************************
481 
482   // --- init -----------------------------------------------------------------
483 
484   // --- get&set --------------------------------------------------------------
485 
486   /**
487    * Returns the delegate to fetch additional information from the stream.
488    *
489    * @return the delegate to fetch additional information from the stream.
490    */
491   public XmlParsingHandler getDelegate()
492   {
493     return delegate;
494   }
495 
496   // --- business -------------------------------------------------------------
497 
498   /**
499    * Creates and configures a parser instance.
500    *
501    * @return the new parser instance.
502    * @throws SAXException on any problem creating the parser.
503    * @throws SAXNotRecognizedException on any problem configuring the parser.
504    * @throws SAXNotSupportedException on any problem configuring the parser.
505    */
506   protected XMLReader createParser() throws SAXException,
507     SAXNotRecognizedException, SAXNotSupportedException
508   {
509     final XMLReader parser = XMLReaderFactory.createXMLReader();
510     return parser;
511   }
512 
513   /**
514    * Reads the XML descriptor of the file provided by the input stream.
515    * <p>
516    * This will only detect XML version and encoding.
517    * </p>
518    * {@stickyNote Most implementations only provide the version and encoding
519    * information if a stream is passed instead of a input source.}
520    *
521    * @param input the XML document whose descriptor is to be read.
522    * @return the descriptor of the document. Never <code>null</code>.
523    * @throws SAXException on any problem reading the XML document.
524    * @throws IOException on any problem reading from the input source.
525    */
526   public XmlDescriptor readDescriptor(final InputStream input)
527     throws SAXException, IOException
528   {
529     final InputSource source = new InputSource(input);
530     return readDescriptor(source);
531   }
532 
533   /**
534    * Reads the XML descriptor of the file provided by the input source.
535    * <p>
536    * As soon as all relevant information is read, the paring is aborted.
537    * </p>
538    * {@stickyNote Most implementations read the public and system ID from the
539    * input source as defaults and return them if no such information can be
540    * determined from the {@code input}.}
541    *
542    * @param input the XML document whose descriptor is to be read.
543    * @return the descriptor of the document. Never <code>null</code>.
544    * @throws SAXException on any problem reading the XML document.
545    * @throws IOException on any problem reading from the input source.
546    */
547   public XmlDescriptor readDescriptor(final InputSource input)
548     throws SAXException, IOException
549   {
550     final XMLReader parser = createParser();
551 
552     final DescriptorHandler handler =
553         new DescriptorHandler(input.getPublicId(), input.getSystemId());
554     parser.setContentHandler(handler);
555 
556     try
557     {
558       parser.parse(input);
559     }
560     catch (final AbortException e)
561     {
562       // OK, requested abort of the XML parsing from one of the content
563       // handlers.
564     }
565 
566     return handler.descriptor;
567   }
568 
569   // --- object basics --------------------------------------------------------
570 
571 }