This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_1x by this push:
     new ceee42a  TIKA-2530 -- temporary workaround -- check for zero length 
byte array in rtf body to avoid buffer underflow from POI, via Pascal Essiembre.
ceee42a is described below

commit ceee42a3982574704ecb30b1ac2e7e658d7d18b3
Author: tballison <[email protected]>
AuthorDate: Thu Mar 8 14:10:36 2018 -0500

    TIKA-2530 -- temporary workaround -- check for zero length byte array in
    rtf body to avoid buffer underflow from POI, via Pascal Essiembre.
---
 .../tika/parser/microsoft/OutlookExtractor.java    | 30 +++++++++++++---------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
index 649de67..dc355ae 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
@@ -74,6 +74,7 @@ import org.apache.tika.parser.txt.CharsetMatch;
 import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.EmbeddedContentHandler;
 import org.apache.tika.sax.XHTMLContentHandler;
+import org.bouncycastle.cms.Recipient;
 import org.xml.sax.SAXException;
 
 /**
@@ -321,19 +322,24 @@ public class OutlookExtractor extends 
AbstractPOIFSExtractor {
         }
         if (rtfChunk != null && (extractAllAlternatives || !doneBody)) {
             ByteChunk chunk = (ByteChunk) rtfChunk;
-            MAPIRtfAttribute rtf = new MAPIRtfAttribute(
-                    MAPIProperty.RTF_COMPRESSED, Types.BINARY.getId(), 
chunk.getValue()
-            );
-            Parser rtfParser =
-                    
EmbeddedDocumentUtil.tryToFindExistingLeafParser(RTFParser.class, parseContext);
-            if (rtfParser == null) {
-                rtfParser = new RTFParser();
+            //avoid buffer underflow TIKA-2530
+            //TODO -- would be good to find an example triggering file and
+            //figure out if this is a bug in POI or a genuine 0 length chunk
+            if (chunk.getValue() != null && chunk.getValue().length > 0) {
+                MAPIRtfAttribute rtf = new MAPIRtfAttribute(
+                        MAPIProperty.RTF_COMPRESSED, Types.BINARY.getId(), 
chunk.getValue()
+                );
+                Parser rtfParser =
+                        
EmbeddedDocumentUtil.tryToFindExistingLeafParser(RTFParser.class, parseContext);
+                if (rtfParser == null) {
+                    rtfParser = new RTFParser();
+                }
+                rtfParser.parse(
+                        new ByteArrayInputStream(rtf.getData()),
+                        new EmbeddedContentHandler(new 
BodyContentHandler(xhtml)),
+                        new Metadata(), parseContext);
+                doneBody = true;
             }
-            rtfParser.parse(
-                    new ByteArrayInputStream(rtf.getData()),
-                    new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
-                    new Metadata(), parseContext);
-            doneBody = true;
         }
         if (textChunk != null && (extractAllAlternatives || !doneBody)) {
             xhtml.element("p", ((StringChunk) textChunk).getValue());

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to