Author: tallison
Date: Wed Jan 28 18:57:00 2015
New Revision: 1655431
URL: http://svn.apache.org/r1655431
Log:
TIKA-1521: follow commons-compress and require installation of jce before
testing password on 7z file
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java?rev=1655431&r1=1655430&r2=1655431&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
Wed Jan 28 18:57:00 2015
@@ -16,6 +16,8 @@
*/
package org.apache.tika.parser.pkg;
+import static org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE;
+
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
@@ -54,13 +56,16 @@ import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
-import static org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE;
-
/**
* Parser for various packaging formats. Package entries will be written to
* the XHTML event stream as <div class="package-entry"> elements that
* contain the (optional) entry name as a <h1> element and the full
* structured body content of the parsed entry.
+ * <p>
+ * User must have JCE Unlimited Strength jars installed for encryption to
+ * work with 7Z files (see: COMPRESS-299 and TIKA-1521). If the jars
+ * are not installed, an IOException will be thrown, and potentially
+ * wrapped in a TikaException.
*/
public class PackageParser extends AbstractParser {
@@ -161,7 +166,6 @@ public class PackageParser extends Abstr
if (!type.equals(MediaType.OCTET_STREAM)) {
metadata.set(CONTENT_TYPE, type.toString());
}
-
// Use the delegate parser to parse the contained document
EmbeddedDocumentExtractor extractor = context.get(
EmbeddedDocumentExtractor.class,
@@ -185,12 +189,13 @@ public class PackageParser extends Abstr
}
// Otherwise fall through to raise the exception as normal
} catch (IOException ie) {
- // Is this a password protection error?
+ // Is this a password protection error?
// (COMPRESS-298 should give a nicer way when implemented, see
TIKA-1525)
if ("Cannot read encrypted files without a
password".equals(ie.getMessage())) {
throw new EncryptedDocumentException();
}
// Otherwise fall through to raise the exception as normal
+ throw ie;
} finally {
ais.close();
tmp.close();
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java?rev=1655431&r1=1655430&r2=1655431&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java
Wed Jan 28 18:57:00 2015
@@ -22,9 +22,13 @@ import static org.junit.Assert.assertNul
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import javax.crypto.Cipher;
+
import java.io.InputStream;
+import java.security.NoSuchAlgorithmException;
import org.apache.tika.exception.EncryptedDocumentException;
+import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
@@ -134,15 +138,20 @@ public class Seven7ParserTest extends Ab
// No password, will fail with EncryptedDocumentException
InputStream stream = Seven7ParserTest.class.getResourceAsStream(
"/test-documents/test7Z_protected_passTika.7z");
+ boolean ex = false;
try {
parser.parse(stream, handler, metadata, recursingContext);
fail("Shouldn't be able to read a password protected 7z without
the password");
} catch (EncryptedDocumentException e) {
// Good
+ ex = true;
} finally {
stream.close();
}
+ assertTrue("test no password", ex);
+
+ ex = false;
// Wrong password currently silently gives no content
// Ideally we'd like Commons Compress to give an error, but it
doesn't...
@@ -157,42 +166,72 @@ public class Seven7ParserTest extends Ab
"/test-documents/test7Z_protected_passTika.7z");
try {
parser.parse(stream, handler, metadata, recursingContext);
-// fail("Shouldn't be able to read a password protected 7z with
wrong password");
-// } catch (EncryptedDocumentException e) {
+ fail("Shouldn't be able to read a password protected 7z with wrong
password");
+ } catch (TikaException e) {
+ //if JCE is installed, the cause will be: Caused by:
org.tukaani.xz.CorruptedInputException: Compressed data is corrupt
+ //if JCE is not installed, the message will include
+ // "(do you have the JCE Unlimited Strength Jurisdiction Policy
Files installed?")
+ ex = true;
} finally {
stream.close();
}
-
+ assertTrue("TikaException for bad password", ex);
// Will be empty
assertEquals("", handler.toString());
-
-
- // Right password works fine
- recursingContext.set(PasswordProvider.class, new PasswordProvider() {
- @Override
- public String getPassword(Metadata metadata) {
- return "Tika";
+
+ ex = false;
+ // Right password works fine if JCE Unlimited Strength has been
installed!!!
+ if (isStrongCryptoAvailable()) {
+ recursingContext.set(PasswordProvider.class, new
PasswordProvider() {
+ @Override
+ public String getPassword(Metadata metadata) {
+ return "Tika";
+ }
+ });
+ handler = new BodyContentHandler();
+ stream = Seven7ParserTest.class.getResourceAsStream(
+ "/test-documents/test7Z_protected_passTika.7z");
+ try {
+ parser.parse(stream, handler, metadata, recursingContext);
+ } finally {
+ stream.close();
}
- });
- handler = new BodyContentHandler();
- stream = Seven7ParserTest.class.getResourceAsStream(
- "/test-documents/test7Z_protected_passTika.7z");
- try {
- parser.parse(stream, handler, metadata, recursingContext);
- } finally {
- stream.close();
+
+ assertEquals(TYPE_7ZIP.toString(),
metadata.get(Metadata.CONTENT_TYPE));
+ String content = handler.toString();
+
+ // Should get filename
+ assertContains("text.txt", content);
+
+ // Should get contents from the text file in the 7z file
+ assertContains("TEST DATA FOR TIKA.", content);
+ assertContains("This is text inside an encrypted 7zip (7z) file.",
content);
+ assertContains("It should be processed by Tika just fine!",
content);
+ assertContains("TIKA-1521", content);
+ } else {
+ //if jce is not installed, test for IOException wrapped in
TikaException
+ boolean ioe = false;
+ recursingContext.set(PasswordProvider.class, new
PasswordProvider() {
+ @Override
+ public String getPassword(Metadata metadata) {
+ return "Tika";
+ }
+ });
+ handler = new BodyContentHandler();
+ stream = Seven7ParserTest.class.getResourceAsStream(
+ "/test-documents/test7Z_protected_passTika.7z");
+ try {
+ parser.parse(stream, handler, metadata, recursingContext);
+ } catch (TikaException e) {
+ ioe = true;
+ } finally {
+ stream.close();
+ }
+ assertTrue("IOException because JCE was not installed", ioe);
}
+ }
- assertEquals(TYPE_7ZIP.toString(),
metadata.get(Metadata.CONTENT_TYPE));
- String content = handler.toString();
-
- // Should get filename
- assertContains("text.txt", content);
-
- // Should get contents from the text file in the 7z file
- assertContains("TEST DATA FOR TIKA.", content);
- assertContains("This is text inside an encrypted 7zip (7z) file.",
content);
- assertContains("It should be processed by Tika just fine!", content);
- assertContains("TIKA-1521", content);
+ private static boolean isStrongCryptoAvailable() throws
NoSuchAlgorithmException {
+ return Cipher.getMaxAllowedKeyLength("AES/ECB/PKCS5Padding") >= 256;
}
}