1 /*
2 * Copyright 2010-2013 smartics, Kronseder & Reiner GmbH
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 package de.smartics.xml.encoding;
17
18 import java.io.IOException;
19 import java.io.InputStream;
20
21 import org.apache.commons.lang.StringUtils;
22 import org.xml.sax.Attributes;
23 import org.xml.sax.InputSource;
24 import org.xml.sax.Locator;
25 import org.xml.sax.SAXException;
26 import org.xml.sax.SAXNotRecognizedException;
27 import org.xml.sax.SAXNotSupportedException;
28 import org.xml.sax.SAXParseException;
29 import org.xml.sax.XMLReader;
30 import org.xml.sax.ext.Locator2;
31 import org.xml.sax.helpers.DefaultHandler;
32 import org.xml.sax.helpers.XMLReaderFactory;
33
34 /**
35 * Detects the {@link XmlDescriptor descriptor} of an XML document.
36 * <p>
37 * The implementation requires a parser implementing
38 * <code>org.xml.sax.ext.Locator2</code>. This is provided by the <a
39 * href="http://xerces.apache.org/xerces2-j">Xerces</a> from version 2.0.2 on
40 * (please refer to <a
41 * href="http://xerces.apache.org/xerces2-j/faq-sax.html#faq-6">Xerces SAX
42 * FAQ</a> for details).
43 * </p>
44 * <p>
45 * This library does not force a special Xerces version to be used. Maven users
46 * may want to add the following dependency to their POM:
47 * </p>
48 *
49 * <pre>
50 * {@markupExample "POM Dependency"
51 * <dependency>
52 * <groupId>xerces</groupId>
53 * <artifactId>xercesImpl</artifactId>
54 * <version>2.10.0</version>
55 * </dependency>
56 * }
57 * </pre>
58 *
59 * {@stickyNote Most implementations only provide the version and encoding
60 * information if a stream is passed instead of a input source.}
61 */
62 public class XmlDescriptorDetector
63 { // NOPMD due content handler delegation
64 // ********************************* Fields *********************************
65
66 // --- constants ------------------------------------------------------------
67
68 /**
69 * The message text to be added as prefix to the SAX exception that signals to
70 * abort the parsing.
71 */
72 private static final String ABORTION_MESSAGE_TEXT =
73 "Abort parsing. XML document descriptor detected: ";
74
75 // --- members --------------------------------------------------------------
76
77 /**
78 * The delegate to fetch additional information from the stream.
79 */
80 private final XmlParsingHandler delegate;
81
82 // ****************************** Initializer *******************************
83
84 // ****************************** Constructors ******************************
85
86 /**
87 * Convenience constructor with no delegate that stops parsing after document
88 * start event has been detected.
89 */
90 public XmlDescriptorDetector()
91 {
92 this(null);
93 }
94
95 /**
96 * <p>
97 * Default constructor.
98 * </p>
99 * {@stickyNote Please consider to throw an {@link AbortException in your
100 * content handler delegate to abort the parsing as soon as all information is
101 * collected.}
102 *
103 * @param delegate the delegate to fetch additional information from the
104 * stream.
105 */
106 public XmlDescriptorDetector(final XmlParsingHandler delegate)
107 {
108 this.delegate = delegate;
109 }
110
111 // ****************************** Inner Classes *****************************
112
113 /**
114 * Handles the XML document to detect the descriptor information (including
115 * the encoding).
116 */
117 private final class DescriptorHandler extends DefaultHandler
118 { // NOPMD due content handler delegation
119 // ******************************** Fields ********************************
120
121 // --- constants ----------------------------------------------------------
122
123 // --- members ------------------------------------------------------------
124
125 /**
126 * The public identifier as reported by the input source. May be
127 * <code>null</code>.
128 */
129 private final String publicId;
130
131 /**
132 * The system identifier as reported by the input source. May be
133 * <code>null</code>.
134 */
135 private final String systemId;
136
137 /**
138 * The XML information fetched from the document.
139 */
140 private XmlDescriptor descriptor;
141
142 /**
143 * The locator of the XML document being parsed.
144 */
145 private Locator locator;
146
147 // ***************************** Initializer ******************************
148
149 // ***************************** Constructors *****************************
150
151 /**
152 * Default constructor.
153 *
154 * @param publicId the public identifier as reported by the input source.
155 * @param systemId the system identifier as reported by the input source.
156 */
157 private DescriptorHandler(final String publicId, final String systemId)
158 {
159 this.publicId = publicId;
160 this.systemId = systemId;
161 }
162
163 // ***************************** Inner Classes ****************************
164
165 // ******************************** Methods *******************************
166
167 // --- init ---------------------------------------------------------------
168
169 // --- get&set ------------------------------------------------------------
170
171 // --- business -----------------------------------------------------------
172
173 /**
174 * {@inheritDoc}
175 *
176 * @see org.xml.sax.helpers.DefaultHandler#setDocumentLocator(org.xml.sax.Locator)
177 */
178 @Override
179 public void setDocumentLocator(final Locator locator)
180 {
181 this.locator = locator;
182
183 if (delegate != null)
184 {
185 delegate.setDocumentLocator(locator);
186 }
187 }
188
189 /**
190 * {@inheritDoc}
191 *
192 * @see org.xml.sax.helpers.DefaultHandler#startDocument()
193 */
194 @Override
195 public void startDocument() throws SAXException
196 {
197 final XmlDescriptor.Builder builder = new XmlDescriptor.Builder();
198
199 if (locator instanceof Locator2)
200 {
201 final Locator2 locator2 = (Locator2) locator;
202 builder.withVersion(locator2.getXMLVersion());
203 builder.withEncoding(locator2.getEncoding());
204 }
205
206 if (locator != null)
207 {
208 builder.withPublicId(get(locator.getPublicId(), publicId));
209 builder.withSystemId(get(locator.getSystemId(), systemId));
210 }
211
212 this.descriptor = builder.build();
213
214 if (delegate == null)
215 {
216 throw new AbortException(ABORTION_MESSAGE_TEXT + descriptor);
217 }
218 else
219 {
220 delegate.startDocument();
221 }
222 }
223
224 private String get(final String value, final String defaultValue)
225 {
226 if (StringUtils.isBlank(value))
227 {
228 return defaultValue;
229 }
230 return value;
231 }
232
233 // ... delegation only ....................................................
234
235 /**
236 * {@inheritDoc}
237 * <p>
238 * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
239 * handler delegate}.
240 * </p>
241 *
242 * @see org.xml.sax.helpers.DefaultHandler#resolveEntity(java.lang.String,
243 * java.lang.String)
244 */
245 @Override
246 public InputSource resolveEntity(final String publicId,
247 final String systemId) throws IOException, SAXException
248 {
249 return delegate.resolveEntity(publicId, systemId);
250 }
251
252 /**
253 * {@inheritDoc}
254 * <p>
255 * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
256 * handler delegate}.
257 * </p>
258 *
259 * @see org.xml.sax.helpers.DefaultHandler#notationDecl(java.lang.String,
260 * java.lang.String, java.lang.String)
261 */
262 @Override
263 public void notationDecl(final String name, final String publicId,
264 final String systemId) throws SAXException
265 {
266 delegate.notationDecl(name, publicId, systemId);
267 }
268
269 /**
270 * {@inheritDoc}
271 * <p>
272 * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
273 * handler delegate}.
274 * </p>
275 *
276 * @see org.xml.sax.helpers.DefaultHandler#unparsedEntityDecl(java.lang.String,
277 * java.lang.String, java.lang.String, java.lang.String)
278 */
279 @Override
280 public void unparsedEntityDecl(final String name, final String publicId,
281 final String systemId, final String notationName) throws SAXException
282 {
283 delegate.unparsedEntityDecl(name, publicId, systemId, notationName);
284 }
285
286 /**
287 * {@inheritDoc}
288 * <p>
289 * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
290 * handler delegate}.
291 * </p>
292 *
293 * @see org.xml.sax.helpers.DefaultHandler#endDocument()
294 */
295 @Override
296 public void endDocument() throws SAXException
297 {
298 delegate.endDocument();
299 }
300
301 /**
302 * {@inheritDoc}
303 * <p>
304 * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
305 * handler delegate}.
306 * </p>
307 *
308 * @see org.xml.sax.helpers.DefaultHandler#startPrefixMapping(java.lang.String,
309 * java.lang.String)
310 */
311 @Override
312 public void startPrefixMapping(final String prefix, final String uri)
313 throws SAXException
314 {
315 delegate.startPrefixMapping(prefix, uri);
316 }
317
318 /**
319 * {@inheritDoc}
320 * <p>
321 * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
322 * handler delegate}.
323 * </p>
324 *
325 * @see org.xml.sax.helpers.DefaultHandler#endPrefixMapping(java.lang.String)
326 */
327 @Override
328 public void endPrefixMapping(final String prefix) throws SAXException
329 {
330 delegate.endPrefixMapping(prefix);
331 }
332
333 /**
334 * {@inheritDoc}
335 * <p>
336 * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
337 * handler delegate}.
338 * </p>
339 *
340 * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String,
341 * java.lang.String, java.lang.String, org.xml.sax.Attributes)
342 */
343 @Override
344 public void startElement(final String uri, final String localName,
345 final String qName, final Attributes attributes) throws SAXException
346 {
347 delegate.startElement(uri, localName, qName, attributes);
348 }
349
350 /**
351 * {@inheritDoc}
352 * <p>
353 * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
354 * handler delegate}.
355 * </p>
356 *
357 * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String,
358 * java.lang.String, java.lang.String)
359 */
360 @Override
361 public void endElement(final String uri, final String localName,
362 final String qName) throws SAXException
363 {
364 delegate.endElement(uri, localName, qName);
365 }
366
367 /**
368 * {@inheritDoc}
369 * <p>
370 * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
371 * handler delegate}.
372 * </p>
373 *
374 * @see org.xml.sax.helpers.DefaultHandler#characters(char[], int, int)
375 */
376 @Override
377 public void characters(final char[] ch, final int start, final int length)
378 throws SAXException
379 {
380 delegate.characters(ch, start, length);
381 }
382
383 /**
384 * {@inheritDoc}
385 * <p>
386 * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
387 * handler delegate}.
388 * </p>
389 *
390 * @see org.xml.sax.helpers.DefaultHandler#ignorableWhitespace(char[], int,
391 * int)
392 */
393 @Override
394 public void ignorableWhitespace(final char[] ch, final int start,
395 final int length) throws SAXException
396 {
397 delegate.ignorableWhitespace(ch, start, length);
398 }
399
400 /**
401 * {@inheritDoc}
402 * <p>
403 * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
404 * handler delegate}.
405 * </p>
406 *
407 * @see org.xml.sax.helpers.DefaultHandler#processingInstruction(java.lang.String,
408 * java.lang.String)
409 */
410 @Override
411 public void processingInstruction(final String target, final String data)
412 throws SAXException
413 {
414 delegate.processingInstruction(target, data);
415 }
416
417 /**
418 * {@inheritDoc}
419 * <p>
420 * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
421 * handler delegate}.
422 * </p>
423 *
424 * @see org.xml.sax.helpers.DefaultHandler#skippedEntity(java.lang.String)
425 */
426 @Override
427 public void skippedEntity(final String name) throws SAXException
428 {
429 delegate.skippedEntity(name);
430 }
431
432 /**
433 * {@inheritDoc}
434 * <p>
435 * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
436 * handler delegate}.
437 * </p>
438 *
439 * @see org.xml.sax.helpers.DefaultHandler#warning(org.xml.sax.SAXParseException)
440 */
441 @Override
442 public void warning(final SAXParseException e) throws SAXException
443 {
444 delegate.warning(e);
445 }
446
447 /**
448 * {@inheritDoc}
449 * <p>
450 * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
451 * handler delegate}.
452 * </p>
453 *
454 * @see org.xml.sax.helpers.DefaultHandler#error(org.xml.sax.SAXParseException)
455 */
456 @Override
457 public void error(final SAXParseException e) throws SAXException
458 {
459 delegate.error(e);
460 }
461
462 /**
463 * {@inheritDoc}
464 * <p>
465 * Delegates to the {@link XmlDescriptorDetector#getDelegate() content
466 * handler delegate}.
467 * </p>
468 *
469 * @see org.xml.sax.helpers.DefaultHandler#error(org.xml.sax.SAXParseException)
470 */
471 @Override
472 public void fatalError(final SAXParseException e) throws SAXException
473 {
474 delegate.fatalError(e);
475 }
476
477 // --- object basics ------------------------------------------------------
478 }
479
480 // ********************************* Methods ********************************
481
482 // --- init -----------------------------------------------------------------
483
484 // --- get&set --------------------------------------------------------------
485
486 /**
487 * Returns the delegate to fetch additional information from the stream.
488 *
489 * @return the delegate to fetch additional information from the stream.
490 */
491 public XmlParsingHandler getDelegate()
492 {
493 return delegate;
494 }
495
496 // --- business -------------------------------------------------------------
497
498 /**
499 * Creates and configures a parser instance.
500 *
501 * @return the new parser instance.
502 * @throws SAXException on any problem creating the parser.
503 * @throws SAXNotRecognizedException on any problem configuring the parser.
504 * @throws SAXNotSupportedException on any problem configuring the parser.
505 */
506 protected XMLReader createParser() throws SAXException,
507 SAXNotRecognizedException, SAXNotSupportedException
508 {
509 final XMLReader parser = XMLReaderFactory.createXMLReader();
510 return parser;
511 }
512
513 /**
514 * Reads the XML descriptor of the file provided by the input stream.
515 * <p>
516 * This will only detect XML version and encoding.
517 * </p>
518 * {@stickyNote Most implementations only provide the version and encoding
519 * information if a stream is passed instead of a input source.}
520 *
521 * @param input the XML document whose descriptor is to be read.
522 * @return the descriptor of the document. Never <code>null</code>.
523 * @throws SAXException on any problem reading the XML document.
524 * @throws IOException on any problem reading from the input source.
525 */
526 public XmlDescriptor readDescriptor(final InputStream input)
527 throws SAXException, IOException
528 {
529 final InputSource source = new InputSource(input);
530 return readDescriptor(source);
531 }
532
533 /**
534 * Reads the XML descriptor of the file provided by the input source.
535 * <p>
536 * As soon as all relevant information is read, the paring is aborted.
537 * </p>
538 * {@stickyNote Most implementations read the public and system ID from the
539 * input source as defaults and return them if no such information can be
540 * determined from the {@code input}.}
541 *
542 * @param input the XML document whose descriptor is to be read.
543 * @return the descriptor of the document. Never <code>null</code>.
544 * @throws SAXException on any problem reading the XML document.
545 * @throws IOException on any problem reading from the input source.
546 */
547 public XmlDescriptor readDescriptor(final InputSource input)
548 throws SAXException, IOException
549 {
550 final XMLReader parser = createParser();
551
552 final DescriptorHandler handler =
553 new DescriptorHandler(input.getPublicId(), input.getSystemId());
554 parser.setContentHandler(handler);
555
556 try
557 {
558 parser.parse(input);
559 }
560 catch (final AbortException e)
561 {
562 // OK, requested abort of the XML parsing from one of the content
563 // handlers.
564 }
565
566 return handler.descriptor;
567 }
568
569 // --- object basics --------------------------------------------------------
570
571 }