Author: tallison
Date: Tue Jul 21 12:44:59 2015
New Revision: 1692099

URL: http://svn.apache.org/r1692099
Log:
TIKA-1683 -- add encryption support for Jackcess

Modified:
    tika/trunk/tika-parsers/pom.xml
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java

Modified: tika/trunk/tika-parsers/pom.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/pom.xml?rev=1692099&r1=1692098&r2=1692099&view=diff
==============================================================================
--- tika/trunk/tika-parsers/pom.xml (original)
+++ tika/trunk/tika-parsers/pom.xml Tue Jul 21 12:44:59 2015
@@ -77,11 +77,22 @@
       <artifactId>vorbis-java-tika</artifactId>
       <version>${vorbis.version}</version>
     </dependency>
-<dependency>
+    <dependency>
       <groupId>com.healthmarketscience.jackcess</groupId>
       <artifactId>jackcess</artifactId>
       <version>2.1.2</version>
     </dependency>
+    <dependency>
+      <groupId>com.healthmarketscience.jackcess</groupId>
+      <artifactId>jackcess-encrypt</artifactId>
+      <version>2.1.0</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.bouncycastle</groupId>
+          <artifactId>bcprov-jdk15</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
 
     <!-- Optional OSGi dependencies, used only when running within OSGi -->
     <dependency>

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java?rev=1692099&r1=1692098&r2=1692099&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
 Tue Jul 21 12:44:59 2015
@@ -24,9 +24,11 @@ import java.util.Collections;
 import java.util.Locale;
 import java.util.Set;
 
+import com.healthmarketscience.jackcess.CryptCodecProvider;
 import com.healthmarketscience.jackcess.Database;
 import com.healthmarketscience.jackcess.DatabaseBuilder;
 import com.healthmarketscience.jackcess.util.LinkResolver;
+import org.apache.tika.exception.EncryptedDocumentException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
@@ -34,6 +36,7 @@ import org.apache.tika.metadata.Property
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AbstractParser;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.PasswordProvider;
 import org.apache.tika.sax.XHTMLContentHandler;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
@@ -76,11 +79,32 @@ public class JackcessParser extends Abst
         Database db = null;
         XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
         xhtml.startDocument();
+
+        String password = null;
+        PasswordProvider passwordProvider = 
context.get(PasswordProvider.class);
+        if (passwordProvider != null) {
+            password = passwordProvider.getPassword(metadata);
+        }
         try {
-            db = new DatabaseBuilder(tis.getFile()).setReadOnly(true).open();
+            if (password == null) {
+                //do this to ensure encryption/wrong password exception vs. 
more generic
+                //"need right codec" error message.
+                db = new DatabaseBuilder(tis.getFile())
+                        .setCodecProvider(new CryptCodecProvider())
+                        .setReadOnly(true).open();
+            } else {
+                db = new DatabaseBuilder(tis.getFile())
+                        .setCodecProvider(new CryptCodecProvider(password))
+                        .setReadOnly(true).open();
+            }
             db.setLinkResolver(IGNORE_LINK_RESOLVER);//just in case
             JackcessExtractor ex = new JackcessExtractor(context, locale);
             ex.parse(db, xhtml, metadata);
+        } catch (IllegalStateException e) {
+            if (e.getMessage() != null && e.getMessage().contains("Incorrect 
password")) {
+                throw new EncryptedDocumentException(e);
+            }
+            throw e;
         } finally {
             if (db != null) {
                 try {
@@ -102,4 +126,4 @@ public class JackcessParser extends Abst
             throw new AssertionError("DO NOT ALLOW RESOLVING OF LINKS!!!");
         }
     }
-}
\ No newline at end of file
+}

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java?rev=1692099&r1=1692098&r2=1692099&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java
 Tue Jul 21 12:44:59 2015
@@ -18,11 +18,14 @@
 package org.apache.tika.parser.microsoft;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
 
 import java.io.InputStream;
 import java.util.List;
 
 import org.apache.tika.TikaTest;
+import org.apache.tika.exception.EncryptedDocumentException;
 import org.apache.tika.io.IOUtils;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.OfficeOpenXMLExtended;
@@ -30,6 +33,7 @@ import org.apache.tika.metadata.TikaCore
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.PasswordProvider;
 import org.apache.tika.parser.RecursiveParserWrapper;
 import org.apache.tika.sax.BasicContentHandlerFactory;
 import org.junit.Test;
@@ -85,6 +89,94 @@ public class JackcessParserTest extends
     }
 
     @Test
+    public void testPassword() throws Exception {
+        ParseContext c = new ParseContext();
+        c.set(PasswordProvider.class, new PasswordProvider() {
+            @Override
+            public String getPassword(Metadata metadata) {
+                return "tika";
+            }
+        });
+        Parser p = new AutoDetectParser();
+        String content = null;
+        try (InputStream is =
+                     this.getResourceAsStream(
+                             "/test-documents/testAccess2_encrypted.accdb")){
+            content = getText(is, p, c);
+        }
+        assertContains("red and brown", content);
+
+        //now try wrong password
+        c.set(PasswordProvider.class, new PasswordProvider() {
+            @Override
+            public String getPassword(Metadata metadata) {
+                return "WRONG";
+            }
+        });
+
+        boolean ex = false;
+        try (InputStream is =
+                     this.getResourceAsStream(
+                             "/test-documents/testAccess2_encrypted.accdb")){
+            getText(is, p, c);
+        } catch (EncryptedDocumentException e) {
+            ex = true;
+        }
+        assertTrue("failed to throw encrypted document exception for wrong 
password", ex);
+
+        //now try null
+        c.set(PasswordProvider.class, new PasswordProvider() {
+            @Override
+            public String getPassword(Metadata metadata) {
+                return null;
+            }
+        });
+
+        ex = false;
+        try (InputStream is =
+                     this.getResourceAsStream(
+                             "/test-documents/testAccess2_encrypted.accdb")){
+            getText(is, p, c);
+        } catch (EncryptedDocumentException e) {
+            ex = true;
+        }
+        assertTrue("failed to throw encrypted document exception for null 
password", ex);
+
+
+        //now try missing password provider
+        c = new ParseContext();
+        ex = false;
+        try (InputStream is =
+                     this.getResourceAsStream(
+                             "/test-documents/testAccess2_encrypted.accdb")){
+            getText(is, p, c);
+        } catch (EncryptedDocumentException e) {
+            ex = true;
+        }
+        assertTrue("failed to throw encrypted document exception for missing 
password provider", ex);
+
+        //now try password on file that doesn't need a password
+        c = new ParseContext();
+        c.set(PasswordProvider.class, new PasswordProvider() {
+            @Override
+            public String getPassword(Metadata metadata) {
+                return "tika";
+            }
+        });
+        ex = false;
+        try (InputStream is =
+                     this.getResourceAsStream(
+                             "/test-documents/testAccess2.accdb")){
+            content = getText(is, p, c);
+        } catch (EncryptedDocumentException e) {
+            ex = true;
+        }
+        assertFalse("shouldn't have thrown encrypted document exception for "+
+                        "opening unencrypted file that doesn't need passowrd", 
ex);
+        assertContains("red and brown", content);
+    }
+
+    @Test
     public void testReadOnly() throws Exception {
         //TIKA-1681: just make sure an exception is not thrown
         XMLResult r = getXML("testAccess_V1997.mdb");


Reply via email to