This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4490 in repository https://gitbox.apache.org/repos/asf/tika.git
commit d2d427063539d5f406edb4f498cce8371930fb23 Author: tallison <[email protected]> AuthorDate: Thu Oct 2 16:26:57 2025 -0400 TIKA-4490 -- fixes for problems found via ossfuzz integration --- .../tika/parser/mail/MailContentHandler.java | 7 ++ .../org/apache/tika/ossfuzz/OssFuzzReplicator.java | 32 +++++++++ .../java/org/apache/tika/ossfuzz/ParserFuzzer.java | 77 ++++++++++++++++++++++ 3 files changed, 116 insertions(+) diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java index 9af23d004..69ec3f598 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java @@ -128,6 +128,10 @@ class MailContentHandler implements ContentHandler { if (!extractAllAlternatives && alternativePartBuffer.size() > 0) { UnsynchronizedByteArrayOutputStream bos = UnsynchronizedByteArrayOutputStream.builder().get(); IOUtils.copy(is, bos); + byte[] bytes = bos.toByteArray(); + if (bytes.length == 0) { + return; + } alternativePartBuffer.peek().children.add(new BodyContents(submd, bos.toByteArray())); } else if (!extractAllAlternatives && parts.size() < 2) { //if you're at the first level of embedding @@ -137,6 +141,9 @@ class MailContentHandler implements ContentHandler { UnsynchronizedByteArrayOutputStream bos = UnsynchronizedByteArrayOutputStream.builder().get(); IOUtils.copy(is, bos); final byte[] bytes = bos.toByteArray(); + if (bytes.length == 0) { + return; + } if (detectInlineTextOrHtml(submd, bytes)) { handleInlineBodyPart(new BodyContents(submd, bytes)); } else { diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/ossfuzz/OssFuzzReplicator.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/ossfuzz/OssFuzzReplicator.java new file mode 100644 index 000000000..83e168e1a --- /dev/null +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/ossfuzz/OssFuzzReplicator.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.ossfuzz; + +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import org.apache.tika.parser.mail.RFC822Parser; + +public class OssFuzzReplicator { + + @Test + @Disabled("turn this on for debugging ossfuzz findings") + public void testOne() throws Throwable { + byte[] bytes = new byte[0];//specify bytes here...probably from a path + ParserFuzzer.parseOne(new RFC822Parser(), bytes); + } +} diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/ossfuzz/ParserFuzzer.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/ossfuzz/ParserFuzzer.java new file mode 100644 index 000000000..d13a4748f --- /dev/null +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/ossfuzz/ParserFuzzer.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.ossfuzz; + +import java.io.InputStream; + +import org.xml.sax.ContentHandler; + +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; +import org.apache.tika.parser.RecursiveParserWrapper; +import org.apache.tika.sax.BasicContentHandlerFactory; +import org.apache.tika.sax.RecursiveParserWrapperHandler; + +import org.apache.tika.sax.ToTextContentHandler; + + +class ParserFuzzer { + + public static void parseOne(Parser parser, byte[] bytes, ParseContext parseContext) throws Throwable { + parseBytes(parser, bytes, parseContext); + parseFile(parser, bytes, parseContext); + } + + + public static void parseOne(Parser parser, byte[] bytes) throws Throwable { + parseBytes(parser, bytes, new ParseContext()); + parseFile(parser, bytes, new ParseContext()); + } + + public static void parseRMetaFile(Parser parser, byte[] bytes) throws Throwable { + RecursiveParserWrapper wrapper = new RecursiveParserWrapper(parser); + RecursiveParserWrapperHandler rpwh = new RecursiveParserWrapperHandler( + new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.XML, -1)); + try (TikaInputStream tis = TikaInputStream.get(bytes)) { + tis.getPath(); + wrapper.parse(tis, rpwh, new Metadata(), new ParseContext()); + } + } + + public static void parseBytes(Parser parser, byte[] bytes, ParseContext parseContext) throws Throwable { + ContentHandler handler = new ToTextContentHandler(); + //make sure that other parsers cannot be invoked + parseContext.set(Parser.class, parser); + //try first with bytes + try (InputStream is = TikaInputStream.get(bytes)) { + parser.parse(is, handler, new Metadata(), parseContext); + } + } + + public static void parseFile(Parser parser, byte[] bytes, ParseContext parseContext) throws Throwable { + ContentHandler handler = new ToTextContentHandler(); + //make sure that other parsers cannot be invoked + parseContext.set(Parser.class, parser); + try (TikaInputStream tis = TikaInputStream.get(bytes)) { + //force writing to tmp file + tis.getPath(); + parser.parse(tis, handler, new Metadata(), parseContext); + } + } +}
