Author: tallison Date: Wed Jan 28 18:57:00 2015 New Revision: 1655431 URL: http://svn.apache.org/r1655431 Log: TIKA-1521: follow commons-compress and require installation of jce before testing password on 7z file
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java?rev=1655431&r1=1655430&r2=1655431&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java Wed Jan 28 18:57:00 2015 @@ -16,6 +16,8 @@ */ package org.apache.tika.parser.pkg; +import static org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE; + import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; @@ -54,13 +56,16 @@ import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; import org.xml.sax.helpers.AttributesImpl; -import static org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE; - /** * Parser for various packaging formats. Package entries will be written to * the XHTML event stream as <div class="package-entry"> elements that * contain the (optional) entry name as a <h1> element and the full * structured body content of the parsed entry. + * <p> + * User must have JCE Unlimited Strength jars installed for encryption to + * work with 7Z files (see: COMPRESS-299 and TIKA-1521). If the jars + * are not installed, an IOException will be thrown, and potentially + * wrapped in a TikaException. */ public class PackageParser extends AbstractParser { @@ -161,7 +166,6 @@ public class PackageParser extends Abstr if (!type.equals(MediaType.OCTET_STREAM)) { metadata.set(CONTENT_TYPE, type.toString()); } - // Use the delegate parser to parse the contained document EmbeddedDocumentExtractor extractor = context.get( EmbeddedDocumentExtractor.class, @@ -185,12 +189,13 @@ public class PackageParser extends Abstr } // Otherwise fall through to raise the exception as normal } catch (IOException ie) { - // Is this a password protection error? + // Is this a password protection error? // (COMPRESS-298 should give a nicer way when implemented, see TIKA-1525) if ("Cannot read encrypted files without a password".equals(ie.getMessage())) { throw new EncryptedDocumentException(); } // Otherwise fall through to raise the exception as normal + throw ie; } finally { ais.close(); tmp.close(); Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java?rev=1655431&r1=1655430&r2=1655431&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java (original) +++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java Wed Jan 28 18:57:00 2015 @@ -22,9 +22,13 @@ import static org.junit.Assert.assertNul import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import javax.crypto.Cipher; + import java.io.InputStream; +import java.security.NoSuchAlgorithmException; import org.apache.tika.exception.EncryptedDocumentException; +import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.AutoDetectParser; @@ -134,15 +138,20 @@ public class Seven7ParserTest extends Ab // No password, will fail with EncryptedDocumentException InputStream stream = Seven7ParserTest.class.getResourceAsStream( "/test-documents/test7Z_protected_passTika.7z"); + boolean ex = false; try { parser.parse(stream, handler, metadata, recursingContext); fail("Shouldn't be able to read a password protected 7z without the password"); } catch (EncryptedDocumentException e) { // Good + ex = true; } finally { stream.close(); } + assertTrue("test no password", ex); + + ex = false; // Wrong password currently silently gives no content // Ideally we'd like Commons Compress to give an error, but it doesn't... @@ -157,42 +166,72 @@ public class Seven7ParserTest extends Ab "/test-documents/test7Z_protected_passTika.7z"); try { parser.parse(stream, handler, metadata, recursingContext); -// fail("Shouldn't be able to read a password protected 7z with wrong password"); -// } catch (EncryptedDocumentException e) { + fail("Shouldn't be able to read a password protected 7z with wrong password"); + } catch (TikaException e) { + //if JCE is installed, the cause will be: Caused by: org.tukaani.xz.CorruptedInputException: Compressed data is corrupt + //if JCE is not installed, the message will include + // "(do you have the JCE Unlimited Strength Jurisdiction Policy Files installed?") + ex = true; } finally { stream.close(); } - + assertTrue("TikaException for bad password", ex); // Will be empty assertEquals("", handler.toString()); - - - // Right password works fine - recursingContext.set(PasswordProvider.class, new PasswordProvider() { - @Override - public String getPassword(Metadata metadata) { - return "Tika"; + + ex = false; + // Right password works fine if JCE Unlimited Strength has been installed!!! + if (isStrongCryptoAvailable()) { + recursingContext.set(PasswordProvider.class, new PasswordProvider() { + @Override + public String getPassword(Metadata metadata) { + return "Tika"; + } + }); + handler = new BodyContentHandler(); + stream = Seven7ParserTest.class.getResourceAsStream( + "/test-documents/test7Z_protected_passTika.7z"); + try { + parser.parse(stream, handler, metadata, recursingContext); + } finally { + stream.close(); } - }); - handler = new BodyContentHandler(); - stream = Seven7ParserTest.class.getResourceAsStream( - "/test-documents/test7Z_protected_passTika.7z"); - try { - parser.parse(stream, handler, metadata, recursingContext); - } finally { - stream.close(); + + assertEquals(TYPE_7ZIP.toString(), metadata.get(Metadata.CONTENT_TYPE)); + String content = handler.toString(); + + // Should get filename + assertContains("text.txt", content); + + // Should get contents from the text file in the 7z file + assertContains("TEST DATA FOR TIKA.", content); + assertContains("This is text inside an encrypted 7zip (7z) file.", content); + assertContains("It should be processed by Tika just fine!", content); + assertContains("TIKA-1521", content); + } else { + //if jce is not installed, test for IOException wrapped in TikaException + boolean ioe = false; + recursingContext.set(PasswordProvider.class, new PasswordProvider() { + @Override + public String getPassword(Metadata metadata) { + return "Tika"; + } + }); + handler = new BodyContentHandler(); + stream = Seven7ParserTest.class.getResourceAsStream( + "/test-documents/test7Z_protected_passTika.7z"); + try { + parser.parse(stream, handler, metadata, recursingContext); + } catch (TikaException e) { + ioe = true; + } finally { + stream.close(); + } + assertTrue("IOException because JCE was not installed", ioe); } + } - assertEquals(TYPE_7ZIP.toString(), metadata.get(Metadata.CONTENT_TYPE)); - String content = handler.toString(); - - // Should get filename - assertContains("text.txt", content); - - // Should get contents from the text file in the 7z file - assertContains("TEST DATA FOR TIKA.", content); - assertContains("This is text inside an encrypted 7zip (7z) file.", content); - assertContains("It should be processed by Tika just fine!", content); - assertContains("TIKA-1521", content); + private static boolean isStrongCryptoAvailable() throws NoSuchAlgorithmException { + return Cipher.getMaxAllowedKeyLength("AES/ECB/PKCS5Padding") >= 256; } }