Author: jukka
Date: Mon Jun 1 22:45:05 2009
New Revision: 780859
URL: http://svn.apache.org/viewvc?rev=780859&view=rev
Log:
TIKA-237: Better distinction between SAXException and TikaException
Use the new TaggedContentHandler mechanism in CompositeParser to catch and
convert illegal SAXExceptions.
Modified:
lucene/tika/trunk/CHANGES.txt
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TaggedSAXException.java
Modified: lucene/tika/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/CHANGES.txt?rev=780859&r1=780858&r2=780859&view=diff
==============================================================================
--- lucene/tika/trunk/CHANGES.txt (original)
+++ lucene/tika/trunk/CHANGES.txt Mon Jun 1 22:45:05 2009
@@ -36,9 +36,9 @@
in Tika to avoid the dependency to the large ICU4J jar. (TIKA-229)
* Composite parsers like the AutoDetectParser now make sure that any
- RuntimeExceptions or IOExceptions unrelated to the given document
- stream are converted to TikaExceptions before being passed to the
- client. (TIKA-198)
+ RuntimeExceptions, IOExceptions or SAXExceptions unrelated to the given
+ document stream or content handler are converted to TikaExceptions
+ before being passed to the client. (TIKA-198, TIKA-237)
Release 0.3 - 03/09/2009
------------------------
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java?rev=780859&r1=780858&r2=780859&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
(original)
+++
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
Mon Jun 1 22:45:05 2009
@@ -24,6 +24,7 @@
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TaggedInputStream;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.sax.TaggedContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -101,25 +102,32 @@
}
/**
- * Delegates the call to the matching component parser. Potential
- * {...@link RuntimeException}s and {...@link IOException}s unrelated to
the
- * given input stream are automatically wrapped into
- * {...@link TikaException}s to better honor the {...@link Parser}
contract.
+ * Delegates the call to the matching component parser.
+ * <p>
+ * Potential {...@link RuntimeException}s, {...@link IOException}s and
+ * {...@link SAXException}s unrelated to the given input stream and content
+ * handler are automatically wrapped into {...@link TikaException}s to
better
+ * honor the {...@link Parser} contract.
*/
public void parse(
InputStream stream, ContentHandler handler, Metadata metadata)
throws IOException, SAXException, TikaException {
- TaggedInputStream tagged = new TaggedInputStream(stream);
+ Parser parser = getParser(metadata);
+ TaggedInputStream taggedStream = new TaggedInputStream(stream);
+ TaggedContentHandler taggedHandler = new TaggedContentHandler(handler);
try {
- getParser(metadata).parse(tagged, handler, metadata);
+ parser.parse(taggedStream, taggedHandler, metadata);
} catch (RuntimeException e) {
- throw new TikaException("Unexpected parse error", e);
+ throw new TikaException(
+ "Unexpected RuntimeException from " + parser, e);
} catch (IOException e) {
- tagged.throwIfCauseOf(e);
-
- // The IOException was caused by the parser instead of the stream,
- // convert the exception to a TikaException
- throw new TikaException("Parse error", e);
+ taggedStream.throwIfCauseOf(e);
+ throw new TikaException(
+ "TIKA-198: Illegal IOException from " + parser, e);
+ } catch (SAXException e) {
+ taggedHandler.throwIfCauseOf(e);
+ throw new TikaException(
+ "TIKA-237: Illegal SAXException from " + parser, e);
}
}
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TaggedSAXException.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TaggedSAXException.java?rev=780859&r1=780858&r2=780859&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TaggedSAXException.java
(original)
+++
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TaggedSAXException.java
Mon Jun 1 22:45:05 2009
@@ -38,6 +38,7 @@
*/
public TaggedSAXException(SAXException original, Object tag) {
super(original.getMessage(), original);
+ initCause(original); // SAXException has it's own chaining mechanism!
this.tag = tag;
}