Author: tallison
Date: Wed Jan 28 18:57:00 2015
New Revision: 1655431

URL: http://svn.apache.org/r1655431
Log:
TIKA-1521: follow commons-compress and require installation of jce before 
testing password on 7z file

Modified:
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java?rev=1655431&r1=1655430&r2=1655431&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
 Wed Jan 28 18:57:00 2015
@@ -16,6 +16,8 @@
  */
 package org.apache.tika.parser.pkg;
 
+import static org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE;
+
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
@@ -54,13 +56,16 @@ import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.AttributesImpl;
 
-import static org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE;
-
 /**
  * Parser for various packaging formats. Package entries will be written to
  * the XHTML event stream as <div class="package-entry"> elements that
  * contain the (optional) entry name as a <h1> element and the full
  * structured body content of the parsed entry.
+ * <p>
+ * User must have JCE Unlimited Strength jars installed for encryption to
+ * work with 7Z files (see: COMPRESS-299 and TIKA-1521).  If the jars
+ * are not installed, an IOException will be thrown, and potentially
+ * wrapped in a TikaException.
  */
 public class PackageParser extends AbstractParser {
 
@@ -161,7 +166,6 @@ public class PackageParser extends Abstr
         if (!type.equals(MediaType.OCTET_STREAM)) {
             metadata.set(CONTENT_TYPE, type.toString());
         }
-
         // Use the delegate parser to parse the contained document
         EmbeddedDocumentExtractor extractor = context.get(
                 EmbeddedDocumentExtractor.class,
@@ -185,12 +189,13 @@ public class PackageParser extends Abstr
             }
             // Otherwise fall through to raise the exception as normal
         } catch (IOException ie) {
-            // Is this a password protection error? 
+            // Is this a password protection error?
             // (COMPRESS-298 should give a nicer way when implemented, see 
TIKA-1525)
             if ("Cannot read encrypted files without a 
password".equals(ie.getMessage())) {
                 throw new EncryptedDocumentException();
             }
             // Otherwise fall through to raise the exception as normal
+            throw ie;
         } finally {
             ais.close();
             tmp.close();

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java?rev=1655431&r1=1655430&r2=1655431&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java
 Wed Jan 28 18:57:00 2015
@@ -22,9 +22,13 @@ import static org.junit.Assert.assertNul
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
+import javax.crypto.Cipher;
+
 import java.io.InputStream;
+import java.security.NoSuchAlgorithmException;
 
 import org.apache.tika.exception.EncryptedDocumentException;
+import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AutoDetectParser;
@@ -134,15 +138,20 @@ public class Seven7ParserTest extends Ab
         // No password, will fail with EncryptedDocumentException
         InputStream stream = Seven7ParserTest.class.getResourceAsStream(
                 "/test-documents/test7Z_protected_passTika.7z");
+        boolean ex = false;
         try {
             parser.parse(stream, handler, metadata, recursingContext);
             fail("Shouldn't be able to read a password protected 7z without 
the password");
         } catch (EncryptedDocumentException e) {
             // Good
+            ex = true;
         } finally {
             stream.close();
         }
         
+        assertTrue("test no password", ex);
+
+        ex = false;
         
         // Wrong password currently silently gives no content
         // Ideally we'd like Commons Compress to give an error, but it 
doesn't...
@@ -157,42 +166,72 @@ public class Seven7ParserTest extends Ab
                 "/test-documents/test7Z_protected_passTika.7z");
         try {
             parser.parse(stream, handler, metadata, recursingContext);
-//            fail("Shouldn't be able to read a password protected 7z with 
wrong password");
-//        } catch (EncryptedDocumentException e) {
+            fail("Shouldn't be able to read a password protected 7z with wrong 
password");
+        } catch (TikaException e) {
+            //if JCE is installed, the cause will be: Caused by: 
org.tukaani.xz.CorruptedInputException: Compressed data is corrupt
+            //if JCE is not installed, the message will include
+            // "(do you have the JCE  Unlimited Strength Jurisdiction Policy 
Files installed?")
+            ex = true;
         } finally {
             stream.close();
         }
-        
+        assertTrue("TikaException for bad password", ex);
         // Will be empty
         assertEquals("", handler.toString());
-        
-        
-        // Right password works fine
-        recursingContext.set(PasswordProvider.class, new PasswordProvider() {
-            @Override
-            public String getPassword(Metadata metadata) {
-                return "Tika";
+
+        ex = false;
+        // Right password works fine if JCE Unlimited Strength has been 
installed!!!
+        if (isStrongCryptoAvailable()) {
+            recursingContext.set(PasswordProvider.class, new 
PasswordProvider() {
+                @Override
+                public String getPassword(Metadata metadata) {
+                    return "Tika";
+                }
+            });
+            handler = new BodyContentHandler();
+            stream = Seven7ParserTest.class.getResourceAsStream(
+                    "/test-documents/test7Z_protected_passTika.7z");
+            try {
+                parser.parse(stream, handler, metadata, recursingContext);
+            } finally {
+                stream.close();
             }
-        });
-        handler = new BodyContentHandler();
-        stream = Seven7ParserTest.class.getResourceAsStream(
-                "/test-documents/test7Z_protected_passTika.7z");
-        try {
-            parser.parse(stream, handler, metadata, recursingContext);
-        } finally {
-            stream.close();
+
+            assertEquals(TYPE_7ZIP.toString(), 
metadata.get(Metadata.CONTENT_TYPE));
+            String content = handler.toString();
+
+            // Should get filename
+            assertContains("text.txt", content);
+
+            // Should get contents from the text file in the 7z file
+            assertContains("TEST DATA FOR TIKA.", content);
+            assertContains("This is text inside an encrypted 7zip (7z) file.", 
content);
+            assertContains("It should be processed by Tika just fine!", 
content);
+            assertContains("TIKA-1521", content);
+        } else {
+            //if jce is not installed, test for IOException wrapped in 
TikaException
+            boolean ioe = false;
+            recursingContext.set(PasswordProvider.class, new 
PasswordProvider() {
+                @Override
+                public String getPassword(Metadata metadata) {
+                    return "Tika";
+                }
+            });
+            handler = new BodyContentHandler();
+            stream = Seven7ParserTest.class.getResourceAsStream(
+                    "/test-documents/test7Z_protected_passTika.7z");
+            try {
+                parser.parse(stream, handler, metadata, recursingContext);
+            } catch (TikaException e) {
+                ioe = true;
+            } finally {
+                stream.close();
+            }
+            assertTrue("IOException because JCE was not installed", ioe);
         }
+    }
 
-        assertEquals(TYPE_7ZIP.toString(), 
metadata.get(Metadata.CONTENT_TYPE));
-        String content = handler.toString();
-        
-        // Should get filename
-        assertContains("text.txt", content);
-        
-        // Should get contents from the text file in the 7z file
-        assertContains("TEST DATA FOR TIKA.", content);
-        assertContains("This is text inside an encrypted 7zip (7z) file.", 
content);
-        assertContains("It should be processed by Tika just fine!", content);
-        assertContains("TIKA-1521", content);
+    private static boolean isStrongCryptoAvailable() throws 
NoSuchAlgorithmException {
+        return Cipher.getMaxAllowedKeyLength("AES/ECB/PKCS5Padding") >= 256;
     }
 }


Reply via email to