Author: tallison
Date: Tue Jul 21 12:44:59 2015
New Revision: 1692099
URL: http://svn.apache.org/r1692099
Log:
TIKA-1683 -- add encryption support for Jackcess
Modified:
tika/trunk/tika-parsers/pom.xml
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java
Modified: tika/trunk/tika-parsers/pom.xml
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/pom.xml?rev=1692099&r1=1692098&r2=1692099&view=diff
==============================================================================
--- tika/trunk/tika-parsers/pom.xml (original)
+++ tika/trunk/tika-parsers/pom.xml Tue Jul 21 12:44:59 2015
@@ -77,11 +77,22 @@
<artifactId>vorbis-java-tika</artifactId>
<version>${vorbis.version}</version>
</dependency>
-<dependency>
+ <dependency>
<groupId>com.healthmarketscience.jackcess</groupId>
<artifactId>jackcess</artifactId>
<version>2.1.2</version>
</dependency>
+ <dependency>
+ <groupId>com.healthmarketscience.jackcess</groupId>
+ <artifactId>jackcess-encrypt</artifactId>
+ <version>2.1.0</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.bouncycastle</groupId>
+ <artifactId>bcprov-jdk15</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
<!-- Optional OSGi dependencies, used only when running within OSGi -->
<dependency>
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java?rev=1692099&r1=1692098&r2=1692099&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
Tue Jul 21 12:44:59 2015
@@ -24,9 +24,11 @@ import java.util.Collections;
import java.util.Locale;
import java.util.Set;
+import com.healthmarketscience.jackcess.CryptCodecProvider;
import com.healthmarketscience.jackcess.Database;
import com.healthmarketscience.jackcess.DatabaseBuilder;
import com.healthmarketscience.jackcess.util.LinkResolver;
+import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
@@ -34,6 +36,7 @@ import org.apache.tika.metadata.Property
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -76,11 +79,32 @@ public class JackcessParser extends Abst
Database db = null;
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
+
+ String password = null;
+ PasswordProvider passwordProvider =
context.get(PasswordProvider.class);
+ if (passwordProvider != null) {
+ password = passwordProvider.getPassword(metadata);
+ }
try {
- db = new DatabaseBuilder(tis.getFile()).setReadOnly(true).open();
+ if (password == null) {
+ //do this to ensure encryption/wrong password exception vs.
more generic
+ //"need right codec" error message.
+ db = new DatabaseBuilder(tis.getFile())
+ .setCodecProvider(new CryptCodecProvider())
+ .setReadOnly(true).open();
+ } else {
+ db = new DatabaseBuilder(tis.getFile())
+ .setCodecProvider(new CryptCodecProvider(password))
+ .setReadOnly(true).open();
+ }
db.setLinkResolver(IGNORE_LINK_RESOLVER);//just in case
JackcessExtractor ex = new JackcessExtractor(context, locale);
ex.parse(db, xhtml, metadata);
+ } catch (IllegalStateException e) {
+ if (e.getMessage() != null && e.getMessage().contains("Incorrect
password")) {
+ throw new EncryptedDocumentException(e);
+ }
+ throw e;
} finally {
if (db != null) {
try {
@@ -102,4 +126,4 @@ public class JackcessParser extends Abst
throw new AssertionError("DO NOT ALLOW RESOLVING OF LINKS!!!");
}
}
-}
\ No newline at end of file
+}
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java?rev=1692099&r1=1692098&r2=1692099&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java
Tue Jul 21 12:44:59 2015
@@ -18,11 +18,14 @@
package org.apache.tika.parser.microsoft;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
import java.io.InputStream;
import java.util.List;
import org.apache.tika.TikaTest;
+import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.OfficeOpenXMLExtended;
@@ -30,6 +33,7 @@ import org.apache.tika.metadata.TikaCore
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.parser.RecursiveParserWrapper;
import org.apache.tika.sax.BasicContentHandlerFactory;
import org.junit.Test;
@@ -85,6 +89,94 @@ public class JackcessParserTest extends
}
@Test
+ public void testPassword() throws Exception {
+ ParseContext c = new ParseContext();
+ c.set(PasswordProvider.class, new PasswordProvider() {
+ @Override
+ public String getPassword(Metadata metadata) {
+ return "tika";
+ }
+ });
+ Parser p = new AutoDetectParser();
+ String content = null;
+ try (InputStream is =
+ this.getResourceAsStream(
+ "/test-documents/testAccess2_encrypted.accdb")){
+ content = getText(is, p, c);
+ }
+ assertContains("red and brown", content);
+
+ //now try wrong password
+ c.set(PasswordProvider.class, new PasswordProvider() {
+ @Override
+ public String getPassword(Metadata metadata) {
+ return "WRONG";
+ }
+ });
+
+ boolean ex = false;
+ try (InputStream is =
+ this.getResourceAsStream(
+ "/test-documents/testAccess2_encrypted.accdb")){
+ getText(is, p, c);
+ } catch (EncryptedDocumentException e) {
+ ex = true;
+ }
+ assertTrue("failed to throw encrypted document exception for wrong
password", ex);
+
+ //now try null
+ c.set(PasswordProvider.class, new PasswordProvider() {
+ @Override
+ public String getPassword(Metadata metadata) {
+ return null;
+ }
+ });
+
+ ex = false;
+ try (InputStream is =
+ this.getResourceAsStream(
+ "/test-documents/testAccess2_encrypted.accdb")){
+ getText(is, p, c);
+ } catch (EncryptedDocumentException e) {
+ ex = true;
+ }
+ assertTrue("failed to throw encrypted document exception for null
password", ex);
+
+
+ //now try missing password provider
+ c = new ParseContext();
+ ex = false;
+ try (InputStream is =
+ this.getResourceAsStream(
+ "/test-documents/testAccess2_encrypted.accdb")){
+ getText(is, p, c);
+ } catch (EncryptedDocumentException e) {
+ ex = true;
+ }
+ assertTrue("failed to throw encrypted document exception for missing
password provider", ex);
+
+ //now try password on file that doesn't need a password
+ c = new ParseContext();
+ c.set(PasswordProvider.class, new PasswordProvider() {
+ @Override
+ public String getPassword(Metadata metadata) {
+ return "tika";
+ }
+ });
+ ex = false;
+ try (InputStream is =
+ this.getResourceAsStream(
+ "/test-documents/testAccess2.accdb")){
+ content = getText(is, p, c);
+ } catch (EncryptedDocumentException e) {
+ ex = true;
+ }
+ assertFalse("shouldn't have thrown encrypted document exception for "+
+ "opening unencrypted file that doesn't need passowrd",
ex);
+ assertContains("red and brown", content);
+ }
+
+ @Test
public void testReadOnly() throws Exception {
//TIKA-1681: just make sure an exception is not thrown
XMLResult r = getXML("testAccess_V1997.mdb");