Author: nick
Date: Sun Jan 18 23:15:48 2015
New Revision: 1652869

URL: http://svn.apache.org/r1652869
Log:
TIKA-1521 Support password protected 7zip files

Added:
    
tika/trunk/tika-parsers/src/test/resources/test-documents/test7Z_protected_passTika.7z
   (with props)
Modified:
    tika/trunk/CHANGES.txt
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java

Modified: tika/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1652869&r1=1652868&r2=1652869&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Sun Jan 18 23:15:48 2015
@@ -1,5 +1,10 @@
 Release 1.8 - Current Development
 
+  * Support password protected 7zip files (using a PasswordProvider,
+    in keeping with the other password supporting formats) (TIKA-1521)
+
+  * Password protected Zip files should not trigger an exception (TIKA-1028)
+
 Release 1.7 - 1/9/2015
 
   * Fixed resource leak in OutlookPSTParser that caused TikaException 

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java?rev=1652869&r1=1652868&r2=1652869&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
 Sun Jan 18 23:15:48 2015
@@ -48,6 +48,7 @@ import org.apache.tika.metadata.TikaCore
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AbstractParser;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.PasswordProvider;
 import org.apache.tika.sax.XHTMLContentHandler;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
@@ -131,9 +132,22 @@ public class PackageParser extends Abstr
                 stream.reset();
                 TikaInputStream tstream = TikaInputStream.get(stream, tmp);
                 
-                // Pending a fix for COMPRESS-269, this bit is a little nasty
-                ais = new SevenZWrapper(new SevenZFile(tstream.getFile()));
+                // Seven Zip suports passwords, was one given?
+                String password = null;
+                PasswordProvider provider = 
context.get(PasswordProvider.class);
+                if (provider != null) {
+                    password = provider.getPassword(metadata);
+                }
+                
+                SevenZFile sevenz;
+                if (password == null) {
+                    sevenz = new SevenZFile(tstream.getFile());
+                } else {
+                    sevenz = new SevenZFile(tstream.getFile(), 
password.getBytes("UnicodeLittleUnmarked"));
+                }
                 
+                // Pending a fix for COMPRESS-269, this bit is a little nasty
+                ais = new SevenZWrapper(sevenz);
             } else {
                 tmp.close();
                 throw new TikaException("Unknown non-streaming format " + 
sne.getFormat(), sne);
@@ -170,6 +184,13 @@ public class PackageParser extends Abstr
                 throw new EncryptedDocumentException(zfe);
             }
             // Otherwise fall through to raise the exception as normal
+        } catch (IOException ie) {
+            // Is this a password protection error? 
+            // (COMPRESS-298 should give a nicer way when implemented)
+            if ("Cannot read encrypted files without a 
password".equals(ie.getMessage())) {
+                throw new EncryptedDocumentException();
+            }
+            // Otherwise fall through to raise the exception as normal
         } finally {
             ais.close();
             tmp.close();

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java?rev=1652869&r1=1652868&r2=1652869&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/Seven7ParserTest.java
 Sun Jan 18 23:15:48 2015
@@ -20,13 +20,16 @@ import static org.junit.Assert.assertEqu
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import java.io.InputStream;
 
+import org.apache.tika.exception.EncryptedDocumentException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.PasswordProvider;
 import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
@@ -48,7 +51,7 @@ public class Seven7ParserTest extends Ab
                 
parser.getSupportedTypes(recursingContext).contains(TYPE_7ZIP));
         
         // Parse
-        InputStream stream = TarParserTest.class.getResourceAsStream(
+        InputStream stream = Seven7ParserTest.class.getResourceAsStream(
                 "/test-documents/test-documents.7z");
         try {
             parser.parse(stream, handler, metadata, recursingContext);
@@ -88,7 +91,7 @@ public class Seven7ParserTest extends Ab
        ContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();
 
-       InputStream stream = ZipParserTest.class.getResourceAsStream(
+       InputStream stream = Seven7ParserTest.class.getResourceAsStream(
                "/test-documents/test-documents.7z");
        try {
            parser.parse(stream, handler, metadata, trackingContext);
@@ -121,4 +124,75 @@ public class Seven7ParserTest extends Ab
            assertTrue("Modified at " + mod, mod.startsWith("20"));
        }
     }
+    
+    @Test
+    public void testPasswordProtected() throws Exception {
+        Parser parser = new AutoDetectParser();
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+        
+        // No password, will fail with EncryptedDocumentException
+        InputStream stream = Seven7ParserTest.class.getResourceAsStream(
+                "/test-documents/test7Z_protected_passTika.7z");
+        try {
+            parser.parse(stream, handler, metadata, recursingContext);
+            fail("Shouldn't be able to read a password protected 7z without 
the password");
+        } catch (EncryptedDocumentException e) {
+            // Good
+        } finally {
+            stream.close();
+        }
+        
+        
+        // Wrong password currently silently gives no content
+        // Ideally we'd like Commons Compress to give an error, but it 
doesn't...
+        recursingContext.set(PasswordProvider.class, new PasswordProvider() {
+            @Override
+            public String getPassword(Metadata metadata) {
+                return "wrong";
+            }
+        });
+        handler = new BodyContentHandler();
+        stream = Seven7ParserTest.class.getResourceAsStream(
+                "/test-documents/test7Z_protected_passTika.7z");
+        try {
+            parser.parse(stream, handler, metadata, recursingContext);
+//            fail("Shouldn't be able to read a password protected 7z with 
wrong password");
+//        } catch (EncryptedDocumentException e) {
+        } finally {
+            stream.close();
+        }
+        
+        // Will be empty
+        assertEquals("", handler.toString());
+        
+        
+        // Right password works fine
+        recursingContext.set(PasswordProvider.class, new PasswordProvider() {
+            @Override
+            public String getPassword(Metadata metadata) {
+                return "Tika";
+            }
+        });
+        handler = new BodyContentHandler();
+        stream = Seven7ParserTest.class.getResourceAsStream(
+                "/test-documents/test7Z_protected_passTika.7z");
+        try {
+            parser.parse(stream, handler, metadata, recursingContext);
+        } finally {
+            stream.close();
+        }
+
+        assertEquals(TYPE_7ZIP.toString(), 
metadata.get(Metadata.CONTENT_TYPE));
+        String content = handler.toString();
+        
+        // Should get filename
+        assertContains("text.txt", content);
+        
+        // Should get contents from the text file in the 7z file
+        assertContains("TEST DATA FOR TIKA.", content);
+        assertContains("This is text inside an encrypted 7zip (7z) file.", 
content);
+        assertContains("It should be processed by Tika just fine!", content);
+        assertContains("TIKA-1521", content);
+    }
 }

Added: 
tika/trunk/tika-parsers/src/test/resources/test-documents/test7Z_protected_passTika.7z
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/test7Z_protected_passTika.7z?rev=1652869&view=auto
==============================================================================
Binary file - no diff available.

Propchange: 
tika/trunk/tika-parsers/src/test/resources/test-documents/test7Z_protected_passTika.7z
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream


Reply via email to