Author: jukka
Date: Sat Apr 25 22:41:29 2009
New Revision: 768619

URL: http://svn.apache.org/viewvc?rev=768619&view=rev
Log:
TIKA-216: Zip bomb prevention

Use the SecureContentHandler in AutoDetectParser.

Modified:
    lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
    
lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java

Modified: 
lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java?rev=768619&r1=768618&r2=768619&view=diff
==============================================================================
--- 
lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java 
(original)
+++ 
lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java 
Sat Apr 25 22:41:29 2009
@@ -20,11 +20,13 @@
 import java.io.IOException;
 import java.io.InputStream;
 
+import org.apache.commons.io.input.CountingInputStream;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MimeTypes;
+import org.apache.tika.sax.SecureContentHandler;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
@@ -74,8 +76,12 @@
         MediaType type = types.detect(stream, metadata);
         metadata.set(Metadata.CONTENT_TYPE, type.toString());
 
+        // TIKA-216: Zip bomb prevention
+        CountingInputStream count = new CountingInputStream(stream);
+        SecureContentHandler secure = new SecureContentHandler(handler, count);
+
         // Parse the document
-        super.parse(stream, handler, metadata);
+        super.parse(count, secure, metadata);
     }
 
 }

Modified: 
lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java?rev=768619&r1=768618&r2=768619&view=diff
==============================================================================
--- 
lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
 (original)
+++ 
lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
 Sat Apr 25 22:41:29 2009
@@ -24,6 +24,7 @@
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.sax.BodyContentHandler;
 import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
 
 import junit.framework.TestCase;
 
@@ -164,6 +165,27 @@
     }
 
     /**
+     * Make sure that zip bomb attacks are prevented.
+     *
+     * @see <a 
href="https://issues.apache.org/jira/browse/TIKA-216";>TIKA-216</a>
+     */
+    public void testZipBombPrevention() throws Exception {
+        InputStream tgz = AutoDetectParserTest.class.getResourceAsStream(
+                "/test-documents/TIKA-216.tgz");
+        try {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler();
+            new AutoDetectParser().parse(tgz, handler, metadata);
+            fail("Zip bomb was not detected");
+        } catch (SAXException e) {
+            // expected
+        } finally {
+            tgz.close();
+        }
+    
+    }
+
+    /**
      * Minimal class to encapsulate all parameters -- the main reason for
      * its existence is to aid in debugging via its toString() method.
      *


Reply via email to