This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 5b2b17de9 TIKA-4490 (#2350)
5b2b17de9 is described below

commit 5b2b17de9ec47602425d7dcc8d40d1605cfe5ba1
Author: Tim Allison <[email protected]>
AuthorDate: Thu Oct 2 18:10:31 2025 -0400

    TIKA-4490 (#2350)
    
    * TIKA-4490 -- move the check to the parser level
---
 .../org/apache/tika/parser/mail/MailContentHandler.java     |  7 -------
 .../main/java/org/apache/tika/parser/mail/RFC822Parser.java | 13 +++++++++++++
 .../src/test/java/org/apache/tika/ossfuzz/ParserFuzzer.java |  1 -
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
index 69ec3f598..9af23d004 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
@@ -128,10 +128,6 @@ class MailContentHandler implements ContentHandler {
         if (!extractAllAlternatives && alternativePartBuffer.size() > 0) {
             UnsynchronizedByteArrayOutputStream bos = 
UnsynchronizedByteArrayOutputStream.builder().get();
             IOUtils.copy(is, bos);
-            byte[] bytes = bos.toByteArray();
-            if (bytes.length == 0) {
-                return;
-            }
             alternativePartBuffer.peek().children.add(new BodyContents(submd, 
bos.toByteArray()));
         } else if (!extractAllAlternatives && parts.size() < 2) {
             //if you're at the first level of embedding
@@ -141,9 +137,6 @@ class MailContentHandler implements ContentHandler {
             UnsynchronizedByteArrayOutputStream bos = 
UnsynchronizedByteArrayOutputStream.builder().get();
             IOUtils.copy(is, bos);
             final byte[] bytes = bos.toByteArray();
-            if (bytes.length == 0) {
-                return;
-            }
             if (detectInlineTextOrHtml(submd, bytes)) {
                 handleInlineBodyPart(new BodyContents(submd, bytes));
             } else {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java
index bd964db4a..b2b552122 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java
@@ -31,6 +31,7 @@ import org.xml.sax.SAXException;
 import org.apache.tika.config.Field;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.ZeroByteFileException;
 import org.apache.tika.extractor.EmbeddedDocumentUtil;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
@@ -95,6 +96,7 @@ public class RFC822Parser implements Parser {
         parser.setNoRecurse();
         xhtml.startDocument();
         TikaInputStream tstream = TikaInputStream.get(stream);
+        checkForZeroByte(tstream);//avoid stackoverflow
         try {
             parser.parse(tstream);
         } catch (IOException e) {
@@ -114,6 +116,17 @@ public class RFC822Parser implements Parser {
         xhtml.endDocument();
     }
 
+    private void checkForZeroByte(TikaInputStream tstream) throws IOException, 
ZeroByteFileException {
+        tstream.mark(1);
+        try {
+            if (tstream.read() < 0) {
+                throw new ZeroByteFileException("rfc822 parser found zero 
bytes");
+            }
+        } finally {
+            tstream.reset();
+        }
+    }
+
     /**
      * Until version 1.17, Tika handled all body parts as embedded objects 
(see TIKA-2478).
      * In 1.17, we modified the parser to select only the best alternative body
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/ossfuzz/ParserFuzzer.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/ossfuzz/ParserFuzzer.java
index d13a4748f..129b964d6 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/ossfuzz/ParserFuzzer.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/ossfuzz/ParserFuzzer.java
@@ -27,7 +27,6 @@ import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.RecursiveParserWrapper;
 import org.apache.tika.sax.BasicContentHandlerFactory;
 import org.apache.tika.sax.RecursiveParserWrapperHandler;
-
 import org.apache.tika.sax.ToTextContentHandler;
 
 

Reply via email to