This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_1x by this push:
new ceee42a TIKA-2530 -- temporary workaround -- check for zero length
byte array in rtf body to avoid buffer underflow from POI, via Pascal Essiembre.
ceee42a is described below
commit ceee42a3982574704ecb30b1ac2e7e658d7d18b3
Author: tballison <[email protected]>
AuthorDate: Thu Mar 8 14:10:36 2018 -0500
TIKA-2530 -- temporary workaround -- check for zero length byte array in
rtf body to avoid buffer underflow from POI, via Pascal Essiembre.
---
.../tika/parser/microsoft/OutlookExtractor.java | 30 +++++++++++++---------
1 file changed, 18 insertions(+), 12 deletions(-)
diff --git
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
index 649de67..dc355ae 100644
---
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
+++
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
@@ -74,6 +74,7 @@ import org.apache.tika.parser.txt.CharsetMatch;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.EmbeddedContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
+import org.bouncycastle.cms.Recipient;
import org.xml.sax.SAXException;
/**
@@ -321,19 +322,24 @@ public class OutlookExtractor extends
AbstractPOIFSExtractor {
}
if (rtfChunk != null && (extractAllAlternatives || !doneBody)) {
ByteChunk chunk = (ByteChunk) rtfChunk;
- MAPIRtfAttribute rtf = new MAPIRtfAttribute(
- MAPIProperty.RTF_COMPRESSED, Types.BINARY.getId(),
chunk.getValue()
- );
- Parser rtfParser =
-
EmbeddedDocumentUtil.tryToFindExistingLeafParser(RTFParser.class, parseContext);
- if (rtfParser == null) {
- rtfParser = new RTFParser();
+ //avoid buffer underflow TIKA-2530
+ //TODO -- would be good to find an example triggering file and
+ //figure out if this is a bug in POI or a genuine 0 length chunk
+ if (chunk.getValue() != null && chunk.getValue().length > 0) {
+ MAPIRtfAttribute rtf = new MAPIRtfAttribute(
+ MAPIProperty.RTF_COMPRESSED, Types.BINARY.getId(),
chunk.getValue()
+ );
+ Parser rtfParser =
+
EmbeddedDocumentUtil.tryToFindExistingLeafParser(RTFParser.class, parseContext);
+ if (rtfParser == null) {
+ rtfParser = new RTFParser();
+ }
+ rtfParser.parse(
+ new ByteArrayInputStream(rtf.getData()),
+ new EmbeddedContentHandler(new
BodyContentHandler(xhtml)),
+ new Metadata(), parseContext);
+ doneBody = true;
}
- rtfParser.parse(
- new ByteArrayInputStream(rtf.getData()),
- new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
- new Metadata(), parseContext);
- doneBody = true;
}
if (textChunk != null && (extractAllAlternatives || !doneBody)) {
xhtml.element("p", ((StringChunk) textChunk).getValue());
--
To stop receiving notification emails like this one, please contact
[email protected].