This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tika.git
commit 0bffbb44c34b6ccbbd2eda8b0e813e51750cf857 Author: tballison <talli...@apache.org> AuthorDate: Fri Nov 4 15:44:03 2022 -0400 TIKA-3918 -- special handling when spoolToDisk == 0 --- .../org/apache/tika/parser/AutoDetectParser.java | 44 +++++++++++++--------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java index 4d870d771..b8a0cb8aa 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java +++ b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java @@ -30,6 +30,7 @@ import org.apache.tika.exception.ZeroByteFileException; import org.apache.tika.extractor.EmbeddedDocumentExtractor; import org.apache.tika.io.TemporaryResources; import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.HttpHeaders; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.mime.MediaType; @@ -137,7 +138,8 @@ public class AutoDetectParser extends CompositeParser { public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { if (autoDetectParserConfig.getMetadataWriteFilterFactory() != null) { - metadata.setMetadataWriteFilter(autoDetectParserConfig.getMetadataWriteFilterFactory().newInstance()); + metadata.setMetadataWriteFilter( + autoDetectParserConfig.getMetadataWriteFilterFactory().newInstance()); } TemporaryResources tmp = new TemporaryResources(); try { @@ -164,9 +166,8 @@ public class AutoDetectParser extends CompositeParser { } handler = decorateHandler(handler, metadata, context, autoDetectParserConfig); // TIKA-216: Zip bomb prevention - SecureContentHandler sch = - handler != null ? - createSecureContentHandler(handler, tis, autoDetectParserConfig) : null; + SecureContentHandler sch = handler != null ? + createSecureContentHandler(handler, tis, autoDetectParserConfig) : null; initializeEmbeddedDocumentExtractor(metadata, context); @@ -183,13 +184,12 @@ public class AutoDetectParser extends CompositeParser { } } - private ContentHandler decorateHandler(ContentHandler handler, - Metadata metadata, ParseContext context, + private ContentHandler decorateHandler(ContentHandler handler, Metadata metadata, + ParseContext context, AutoDetectParserConfig autoDetectParserConfig) { if (context.get(RecursiveParserWrapper.RecursivelySecureContentHandler.class) != null) { //using the recursiveparserwrapper. we should decorate this handler - return autoDetectParserConfig - .getContentHandlerDecoratorFactory() + return autoDetectParserConfig.getContentHandlerDecoratorFactory() .decorate(handler, metadata, context); } ParseRecord parseRecord = context.get(ParseRecord.class); @@ -203,16 +203,26 @@ public class AutoDetectParser extends CompositeParser { private void maybeSpool(TikaInputStream tis, AutoDetectParserConfig autoDetectParserConfig, Metadata metadata) throws IOException { - if (! tis.hasFile() && //if there's already a file, stop now - autoDetectParserConfig.getSpoolToDisk() != null && //if this is not - // configured, stop now - autoDetectParserConfig.getSpoolToDisk() > -1 && - metadata.get(Metadata.CONTENT_LENGTH) != null) { + if (tis.hasFile()) { + return; + } + if (autoDetectParserConfig.getSpoolToDisk() == null) { + return; + } + //whether or not a content-length has been sent in, + //if spoolToDisk == 0, spool it + if (autoDetectParserConfig.getSpoolToDisk() == 0) { + tis.getPath(); + metadata.set(HttpHeaders.CONTENT_LENGTH, Long.toString(tis.getLength())); + return; + } + if (metadata.get(Metadata.CONTENT_LENGTH) != null) { long len = -1l; try { len = Long.parseLong(metadata.get(Metadata.CONTENT_LENGTH)); if (len > autoDetectParserConfig.getSpoolToDisk()) { tis.getPath(); + metadata.set(HttpHeaders.CONTENT_LENGTH, Long.toString(tis.getLength())); } } catch (NumberFormatException e) { //swallow...maybe log? @@ -230,9 +240,8 @@ public class AutoDetectParser extends CompositeParser { if (p == null) { context.set(Parser.class, this); } - EmbeddedDocumentExtractor edx = - autoDetectParserConfig.getEmbeddedDocumentExtractorFactory() - .newInstance(metadata, context); + EmbeddedDocumentExtractor edx = autoDetectParserConfig.getEmbeddedDocumentExtractorFactory() + .newInstance(metadata, context); context.set(EmbeddedDocumentExtractor.class, edx); } @@ -243,7 +252,8 @@ public class AutoDetectParser extends CompositeParser { parse(stream, handler, metadata, context); } - private SecureContentHandler createSecureContentHandler(ContentHandler handler, TikaInputStream tis, + private SecureContentHandler createSecureContentHandler(ContentHandler handler, + TikaInputStream tis, AutoDetectParserConfig config) { SecureContentHandler sch = new SecureContentHandler(handler, tis); if (config == null) {