Author: jukka
Date: Sun Apr 26 21:07:42 2009
New Revision: 768792

URL: http://svn.apache.org/viewvc?rev=768792&view=rev
Log:
TIKA-216: Zip bomb prevention

Convert the SAXException that reports a zip bomb to a more appropriate 
TikaException.

Modified:
    lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
    
lucene/tika/trunk/src/main/java/org/apache/tika/sax/SecureContentHandler.java
    
lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java

Modified: 
lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java?rev=768792&r1=768791&r2=768792&view=diff
==============================================================================
--- 
lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java 
(original)
+++ 
lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java 
Sun Apr 26 21:07:42 2009
@@ -81,7 +81,13 @@
         SecureContentHandler secure = new SecureContentHandler(handler, count);
 
         // Parse the document
-        super.parse(count, secure, metadata);
+        try {
+            super.parse(count, secure, metadata);
+        } catch (SAXException e) {
+            // Convert zip bomb exceptions to TikaExceptions
+            secure.throwIfCauseOf(e);
+            throw e;
+        }
     }
 
 }

Modified: 
lucene/tika/trunk/src/main/java/org/apache/tika/sax/SecureContentHandler.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/sax/SecureContentHandler.java?rev=768792&r1=768791&r2=768792&view=diff
==============================================================================
--- 
lucene/tika/trunk/src/main/java/org/apache/tika/sax/SecureContentHandler.java 
(original)
+++ 
lucene/tika/trunk/src/main/java/org/apache/tika/sax/SecureContentHandler.java 
Sun Apr 26 21:07:42 2009
@@ -17,6 +17,7 @@
 package org.apache.tika.sax;
 
 import org.apache.commons.io.input.CountingInputStream;
+import org.apache.tika.exception.TikaException;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
@@ -114,6 +115,20 @@
         this.ratio = ratio;
     }
 
+    /**
+     * Converts the given {...@link SAXException} to a corresponding
+     * {...@link TikaException} if it's caused by this instance detecting
+     * a zip bomb.
+     *
+     * @param e SAX exception
+     * @throws TikaException zip bomb exception
+     */
+    public void throwIfCauseOf(SAXException e) throws TikaException {
+        if (e instanceof SecureSAXException
+                && ((SecureSAXException) e).isCausedBy(this)) {
+            throw new TikaException("Zip bomb detected!", e);
+        }
+    }
 
     /**
      * Records the given number of output characters (or more accurately
@@ -127,7 +142,7 @@
         characterCount += length;
         if (characterCount > threshold
                 && characterCount > stream.getByteCount() * ratio) {
-            throw new SAXException("Zip Bomb detected!");
+            throw new SecureSAXException();
         }
     }
 
@@ -145,4 +160,23 @@
         super.ignorableWhitespace(ch, start, length);
     }
 
+    /**
+     * Private exception class used to indicate a suspected zip bomb.
+     *
+     * @see SecureContentHandler#throwIfCauseOf(SAXException)
+     */
+    private class SecureSAXException extends SAXException {
+
+        public SecureSAXException() {
+            super("Suspected zip bomb: "
+                    + stream.getByteCount() + " input bytes produced "
+                    + characterCount + " output characters");
+        }
+
+        public boolean isCausedBy(SecureContentHandler handler) {
+            return SecureContentHandler.this == handler;
+        }
+
+    }
+
 }

Modified: 
lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java?rev=768792&r1=768791&r2=768792&view=diff
==============================================================================
--- 
lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
 (original)
+++ 
lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
 Sun Apr 26 21:07:42 2009
@@ -21,6 +21,7 @@
 
 import org.apache.commons.lang.builder.ReflectionToStringBuilder;
 import org.apache.commons.lang.builder.ToStringStyle;
+import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.sax.BodyContentHandler;
 import org.xml.sax.ContentHandler;
@@ -177,7 +178,7 @@
             ContentHandler handler = new BodyContentHandler();
             new AutoDetectParser().parse(tgz, handler, metadata);
             fail("Zip bomb was not detected");
-        } catch (SAXException e) {
+        } catch (TikaException e) {
             // expected
         } finally {
             tgz.close();


Reply via email to