This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4490
in repository https://gitbox.apache.org/repos/asf/tika.git

commit b545fd387fc3a49443b5daf811b7b6a7f576a8b4
Author: tallison <[email protected]>
AuthorDate: Thu Oct 2 16:45:42 2025 -0400

    TIKA-4490 -- move the check to the parser level
---
 .../org/apache/tika/parser/mail/MailContentHandler.java     |  7 -------
 .../main/java/org/apache/tika/parser/mail/RFC822Parser.java | 13 +++++++++++++
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
index 69ec3f598..9af23d004 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
@@ -128,10 +128,6 @@ class MailContentHandler implements ContentHandler {
         if (!extractAllAlternatives && alternativePartBuffer.size() > 0) {
             UnsynchronizedByteArrayOutputStream bos = 
UnsynchronizedByteArrayOutputStream.builder().get();
             IOUtils.copy(is, bos);
-            byte[] bytes = bos.toByteArray();
-            if (bytes.length == 0) {
-                return;
-            }
             alternativePartBuffer.peek().children.add(new BodyContents(submd, 
bos.toByteArray()));
         } else if (!extractAllAlternatives && parts.size() < 2) {
             //if you're at the first level of embedding
@@ -141,9 +137,6 @@ class MailContentHandler implements ContentHandler {
             UnsynchronizedByteArrayOutputStream bos = 
UnsynchronizedByteArrayOutputStream.builder().get();
             IOUtils.copy(is, bos);
             final byte[] bytes = bos.toByteArray();
-            if (bytes.length == 0) {
-                return;
-            }
             if (detectInlineTextOrHtml(submd, bytes)) {
                 handleInlineBodyPart(new BodyContents(submd, bytes));
             } else {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java
index bd964db4a..b2b552122 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java
@@ -31,6 +31,7 @@ import org.xml.sax.SAXException;
 import org.apache.tika.config.Field;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.ZeroByteFileException;
 import org.apache.tika.extractor.EmbeddedDocumentUtil;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
@@ -95,6 +96,7 @@ public class RFC822Parser implements Parser {
         parser.setNoRecurse();
         xhtml.startDocument();
         TikaInputStream tstream = TikaInputStream.get(stream);
+        checkForZeroByte(tstream);//avoid stackoverflow
         try {
             parser.parse(tstream);
         } catch (IOException e) {
@@ -114,6 +116,17 @@ public class RFC822Parser implements Parser {
         xhtml.endDocument();
     }
 
+    private void checkForZeroByte(TikaInputStream tstream) throws IOException, 
ZeroByteFileException {
+        tstream.mark(1);
+        try {
+            if (tstream.read() < 0) {
+                throw new ZeroByteFileException("rfc822 parser found zero 
bytes");
+            }
+        } finally {
+            tstream.reset();
+        }
+    }
+
     /**
      * Until version 1.17, Tika handled all body parts as embedded objects 
(see TIKA-2478).
      * In 1.17, we modified the parser to select only the best alternative body

Reply via email to