Author: jukka
Date: Sat Apr 25 22:41:29 2009
New Revision: 768619
URL: http://svn.apache.org/viewvc?rev=768619&view=rev
Log:
TIKA-216: Zip bomb prevention
Use the SecureContentHandler in AutoDetectParser.
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java?rev=768619&r1=768618&r2=768619&view=diff
==============================================================================
---
lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
(original)
+++
lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
Sat Apr 25 22:41:29 2009
@@ -20,11 +20,13 @@
import java.io.IOException;
import java.io.InputStream;
+import org.apache.commons.io.input.CountingInputStream;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MimeTypes;
+import org.apache.tika.sax.SecureContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -74,8 +76,12 @@
MediaType type = types.detect(stream, metadata);
metadata.set(Metadata.CONTENT_TYPE, type.toString());
+ // TIKA-216: Zip bomb prevention
+ CountingInputStream count = new CountingInputStream(stream);
+ SecureContentHandler secure = new SecureContentHandler(handler, count);
+
// Parse the document
- super.parse(stream, handler, metadata);
+ super.parse(count, secure, metadata);
}
}
Modified:
lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java?rev=768619&r1=768618&r2=768619&view=diff
==============================================================================
---
lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
(original)
+++
lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
Sat Apr 25 22:41:29 2009
@@ -24,6 +24,7 @@
import org.apache.tika.metadata.Metadata;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
import junit.framework.TestCase;
@@ -164,6 +165,27 @@
}
/**
+ * Make sure that zip bomb attacks are prevented.
+ *
+ * @see <a
href="https://issues.apache.org/jira/browse/TIKA-216">TIKA-216</a>
+ */
+ public void testZipBombPrevention() throws Exception {
+ InputStream tgz = AutoDetectParserTest.class.getResourceAsStream(
+ "/test-documents/TIKA-216.tgz");
+ try {
+ Metadata metadata = new Metadata();
+ ContentHandler handler = new BodyContentHandler();
+ new AutoDetectParser().parse(tgz, handler, metadata);
+ fail("Zip bomb was not detected");
+ } catch (SAXException e) {
+ // expected
+ } finally {
+ tgz.close();
+ }
+
+ }
+
+ /**
* Minimal class to encapsulate all parameters -- the main reason for
* its existence is to aid in debugging via its toString() method.
*