Author: jukka
Date: Sun Apr 26 21:07:42 2009
New Revision: 768792
URL: http://svn.apache.org/viewvc?rev=768792&view=rev
Log:
TIKA-216: Zip bomb prevention
Convert the SAXException that reports a zip bomb to a more appropriate
TikaException.
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
lucene/tika/trunk/src/main/java/org/apache/tika/sax/SecureContentHandler.java
lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java?rev=768792&r1=768791&r2=768792&view=diff
==============================================================================
---
lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
(original)
+++
lucene/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
Sun Apr 26 21:07:42 2009
@@ -81,7 +81,13 @@
SecureContentHandler secure = new SecureContentHandler(handler, count);
// Parse the document
- super.parse(count, secure, metadata);
+ try {
+ super.parse(count, secure, metadata);
+ } catch (SAXException e) {
+ // Convert zip bomb exceptions to TikaExceptions
+ secure.throwIfCauseOf(e);
+ throw e;
+ }
}
}
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/sax/SecureContentHandler.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/sax/SecureContentHandler.java?rev=768792&r1=768791&r2=768792&view=diff
==============================================================================
---
lucene/tika/trunk/src/main/java/org/apache/tika/sax/SecureContentHandler.java
(original)
+++
lucene/tika/trunk/src/main/java/org/apache/tika/sax/SecureContentHandler.java
Sun Apr 26 21:07:42 2009
@@ -17,6 +17,7 @@
package org.apache.tika.sax;
import org.apache.commons.io.input.CountingInputStream;
+import org.apache.tika.exception.TikaException;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -114,6 +115,20 @@
this.ratio = ratio;
}
+ /**
+ * Converts the given {...@link SAXException} to a corresponding
+ * {...@link TikaException} if it's caused by this instance detecting
+ * a zip bomb.
+ *
+ * @param e SAX exception
+ * @throws TikaException zip bomb exception
+ */
+ public void throwIfCauseOf(SAXException e) throws TikaException {
+ if (e instanceof SecureSAXException
+ && ((SecureSAXException) e).isCausedBy(this)) {
+ throw new TikaException("Zip bomb detected!", e);
+ }
+ }
/**
* Records the given number of output characters (or more accurately
@@ -127,7 +142,7 @@
characterCount += length;
if (characterCount > threshold
&& characterCount > stream.getByteCount() * ratio) {
- throw new SAXException("Zip Bomb detected!");
+ throw new SecureSAXException();
}
}
@@ -145,4 +160,23 @@
super.ignorableWhitespace(ch, start, length);
}
+ /**
+ * Private exception class used to indicate a suspected zip bomb.
+ *
+ * @see SecureContentHandler#throwIfCauseOf(SAXException)
+ */
+ private class SecureSAXException extends SAXException {
+
+ public SecureSAXException() {
+ super("Suspected zip bomb: "
+ + stream.getByteCount() + " input bytes produced "
+ + characterCount + " output characters");
+ }
+
+ public boolean isCausedBy(SecureContentHandler handler) {
+ return SecureContentHandler.this == handler;
+ }
+
+ }
+
}
Modified:
lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java?rev=768792&r1=768791&r2=768792&view=diff
==============================================================================
---
lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
(original)
+++
lucene/tika/trunk/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
Sun Apr 26 21:07:42 2009
@@ -21,6 +21,7 @@
import org.apache.commons.lang.builder.ReflectionToStringBuilder;
import org.apache.commons.lang.builder.ToStringStyle;
+import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
@@ -177,7 +178,7 @@
ContentHandler handler = new BodyContentHandler();
new AutoDetectParser().parse(tgz, handler, metadata);
fail("Zip bomb was not detected");
- } catch (SAXException e) {
+ } catch (TikaException e) {
// expected
} finally {
tgz.close();