This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/master by this push:
     new 0d9dacd  TIKA-2873 -- workaround for newly re-discovered bug in POI's 
ChunkedCipherInputStream - bug 63431
0d9dacd is described below

commit 0d9dacdc6ca153572a7570f7934ec82f1c2ea92e
Author: TALLISON <[email protected]>
AuthorDate: Tue May 14 16:12:34 2019 -0400

    TIKA-2873 -- workaround for newly re-discovered bug in POI's 
ChunkedCipherInputStream - bug 63431
---
 .../org/apache/tika/parser/microsoft/OfficeParser.java  |   9 +++++----
 .../tika/parser/microsoft/ooxml/OOXMLParserTest.java    |   6 ++++++
 .../test-documents/testEXCEL_protected_passtika_2.xlsx  | Bin 0 -> 15872 bytes
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
index 517db05..7fa7bb7 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
@@ -224,10 +224,11 @@ public class OfficeParser extends AbstractOfficeParser {
                     // Decrypt the OLE2 stream, and delegate the resulting 
OOXML
                     //  file to the regular OOXML parser for normal handling
                     OOXMLParser parser = new OOXMLParser();
-
-                    parser.parse(d.getDataStream(root), new 
EmbeddedContentHandler(
-                                    new BodyContentHandler(xhtml)),
-                            metadata, context);
+                    try (TikaInputStream tis = 
TikaInputStream.get(d.getDataStream(root))) {
+                        parser.parse(tis, new EmbeddedContentHandler(
+                                        new BodyContentHandler(xhtml)),
+                                metadata, context);
+                    }
                 } catch (GeneralSecurityException ex) {
                     throw new EncryptedDocumentException(ex);
                 }
diff --git 
a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
 
b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
index 3cb1cf6..1edd89b 100644
--- 
a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
+++ 
b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
@@ -1204,12 +1204,18 @@ public class OOXMLParserTest extends TikaTest {
     @Test
     public void testEncrypted() throws Exception {
         Map<String, String> tests = new HashMap<String, String>();
+        //the first three contain javax.crypto.CipherInputStream
         tests.put("testWORD_protected_passtika.docx",
                 "This is an encrypted Word 2007 File");
         tests.put("testPPT_protected_passtika.pptx",
                 "This is an encrypted PowerPoint 2007 slide.");
         tests.put("testEXCEL_protected_passtika.xlsx",
                 "This is an Encrypted Excel spreadsheet.");
+        //TIKA-2873 this one contains a ChunkedCipherInputStream
+        //that is buggy at the POI level...can unwrap TikaInputStream in 
OfficeParser
+        //once https://bz.apache.org/bugzilla/show_bug.cgi?id=63431 is fixed.
+        tests.put("testEXCEL_protected_passtika_2.xlsx",
+                "This is an Encrypted Excel spreadsheet with a 
ChunkedCipherInputStream.");
 
         Parser parser = new AutoDetectParser();
         Metadata m = new Metadata();
diff --git 
a/tika-parsers/src/test/resources/test-documents/testEXCEL_protected_passtika_2.xlsx
 
b/tika-parsers/src/test/resources/test-documents/testEXCEL_protected_passtika_2.xlsx
new file mode 100644
index 0000000..82ef3e3
Binary files /dev/null and 
b/tika-parsers/src/test/resources/test-documents/testEXCEL_protected_passtika_2.xlsx
 differ

Reply via email to