This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new 0d9dacd TIKA-2873 -- workaround for newly re-discovered bug in POI's
ChunkedCipherInputStream - bug 63431
0d9dacd is described below
commit 0d9dacdc6ca153572a7570f7934ec82f1c2ea92e
Author: TALLISON <[email protected]>
AuthorDate: Tue May 14 16:12:34 2019 -0400
TIKA-2873 -- workaround for newly re-discovered bug in POI's
ChunkedCipherInputStream - bug 63431
---
.../org/apache/tika/parser/microsoft/OfficeParser.java | 9 +++++----
.../tika/parser/microsoft/ooxml/OOXMLParserTest.java | 6 ++++++
.../test-documents/testEXCEL_protected_passtika_2.xlsx | Bin 0 -> 15872 bytes
3 files changed, 11 insertions(+), 4 deletions(-)
diff --git
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
index 517db05..7fa7bb7 100644
---
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
+++
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
@@ -224,10 +224,11 @@ public class OfficeParser extends AbstractOfficeParser {
// Decrypt the OLE2 stream, and delegate the resulting
OOXML
// file to the regular OOXML parser for normal handling
OOXMLParser parser = new OOXMLParser();
-
- parser.parse(d.getDataStream(root), new
EmbeddedContentHandler(
- new BodyContentHandler(xhtml)),
- metadata, context);
+ try (TikaInputStream tis =
TikaInputStream.get(d.getDataStream(root))) {
+ parser.parse(tis, new EmbeddedContentHandler(
+ new BodyContentHandler(xhtml)),
+ metadata, context);
+ }
} catch (GeneralSecurityException ex) {
throw new EncryptedDocumentException(ex);
}
diff --git
a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
index 3cb1cf6..1edd89b 100644
---
a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
+++
b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
@@ -1204,12 +1204,18 @@ public class OOXMLParserTest extends TikaTest {
@Test
public void testEncrypted() throws Exception {
Map<String, String> tests = new HashMap<String, String>();
+ //the first three contain javax.crypto.CipherInputStream
tests.put("testWORD_protected_passtika.docx",
"This is an encrypted Word 2007 File");
tests.put("testPPT_protected_passtika.pptx",
"This is an encrypted PowerPoint 2007 slide.");
tests.put("testEXCEL_protected_passtika.xlsx",
"This is an Encrypted Excel spreadsheet.");
+ //TIKA-2873 this one contains a ChunkedCipherInputStream
+ //that is buggy at the POI level...can unwrap TikaInputStream in
OfficeParser
+ //once https://bz.apache.org/bugzilla/show_bug.cgi?id=63431 is fixed.
+ tests.put("testEXCEL_protected_passtika_2.xlsx",
+ "This is an Encrypted Excel spreadsheet with a
ChunkedCipherInputStream.");
Parser parser = new AutoDetectParser();
Metadata m = new Metadata();
diff --git
a/tika-parsers/src/test/resources/test-documents/testEXCEL_protected_passtika_2.xlsx
b/tika-parsers/src/test/resources/test-documents/testEXCEL_protected_passtika_2.xlsx
new file mode 100644
index 0000000..82ef3e3
Binary files /dev/null and
b/tika-parsers/src/test/resources/test-documents/testEXCEL_protected_passtika_2.xlsx
differ