Author: jukka
Date: Mon Jun  1 22:45:05 2009
New Revision: 780859

URL: http://svn.apache.org/viewvc?rev=780859&view=rev
Log:
TIKA-237: Better distinction between SAXException and TikaException

Use the new TaggedContentHandler mechanism in CompositeParser to catch and 
convert illegal SAXExceptions.

Modified:
    lucene/tika/trunk/CHANGES.txt
    
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
    
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TaggedSAXException.java

Modified: lucene/tika/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/CHANGES.txt?rev=780859&r1=780858&r2=780859&view=diff
==============================================================================
--- lucene/tika/trunk/CHANGES.txt (original)
+++ lucene/tika/trunk/CHANGES.txt Mon Jun  1 22:45:05 2009
@@ -36,9 +36,9 @@
     in Tika to avoid the dependency to the large ICU4J jar. (TIKA-229)
 
   * Composite parsers like the AutoDetectParser now make sure that any
-    RuntimeExceptions or IOExceptions unrelated to the given document
-    stream are converted to TikaExceptions before being passed to the
-    client. (TIKA-198)
+    RuntimeExceptions, IOExceptions or SAXExceptions unrelated to the given
+    document stream or content handler are converted to TikaExceptions
+    before being passed to the client. (TIKA-198, TIKA-237)
 
 Release 0.3 - 03/09/2009
 ------------------------

Modified: 
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java?rev=780859&r1=780858&r2=780859&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
 (original)
+++ 
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
 Mon Jun  1 22:45:05 2009
@@ -24,6 +24,7 @@
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TaggedInputStream;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.sax.TaggedContentHandler;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
@@ -101,25 +102,32 @@
     }
 
     /**
-     * Delegates the call to the matching component parser. Potential
-     * {...@link RuntimeException}s and {...@link IOException}s unrelated to 
the
-     * given input stream are automatically wrapped into
-     * {...@link TikaException}s to better honor the {...@link Parser} 
contract.
+     * Delegates the call to the matching component parser.
+     * <p>
+     * Potential {...@link RuntimeException}s, {...@link IOException}s and
+     * {...@link SAXException}s unrelated to the given input stream and content
+     * handler are automatically wrapped into {...@link TikaException}s to 
better
+     * honor the {...@link Parser} contract.
      */
     public void parse(
             InputStream stream, ContentHandler handler, Metadata metadata)
             throws IOException, SAXException, TikaException {
-        TaggedInputStream tagged = new TaggedInputStream(stream);
+        Parser parser = getParser(metadata);
+        TaggedInputStream taggedStream = new TaggedInputStream(stream);
+        TaggedContentHandler taggedHandler = new TaggedContentHandler(handler);
         try {
-            getParser(metadata).parse(tagged, handler, metadata);
+            parser.parse(taggedStream, taggedHandler, metadata);
         } catch (RuntimeException e) {
-            throw new TikaException("Unexpected parse error", e);
+            throw new TikaException(
+                    "Unexpected RuntimeException from " + parser, e);
         } catch (IOException e) {
-            tagged.throwIfCauseOf(e);
-
-            // The IOException was caused by the parser instead of the stream,
-            // convert the exception to a TikaException
-            throw new TikaException("Parse error", e);
+            taggedStream.throwIfCauseOf(e);
+            throw new TikaException(
+                    "TIKA-198: Illegal IOException from " + parser, e);
+        } catch (SAXException e) {
+            taggedHandler.throwIfCauseOf(e);
+            throw new TikaException(
+                    "TIKA-237: Illegal SAXException from " + parser, e);
         }
     }
 

Modified: 
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TaggedSAXException.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TaggedSAXException.java?rev=780859&r1=780858&r2=780859&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TaggedSAXException.java
 (original)
+++ 
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TaggedSAXException.java
 Mon Jun  1 22:45:05 2009
@@ -38,6 +38,7 @@
      */
     public TaggedSAXException(SAXException original, Object tag) {
         super(original.getMessage(), original);
+        initCause(original); // SAXException has it's own chaining mechanism!
         this.tag = tag;
     }
 


Reply via email to