This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch branch_1x in repository https://gitbox.apache.org/repos/asf/tika.git
commit b9a26e7e9dfcf3bf7102c6fd85e725325ecd7428 Author: tallison <[email protected]> AuthorDate: Thu Oct 24 12:38:50 2019 -0400 TIKA-2974 -- RecursiveParserWrapper shouldn't close inputstream --- .../apache/tika/parser/RecursiveParserWrapper.java | 13 +++--- .../tika/parser/RecursiveParserWrapperTest.java | 47 +++++++++++++++++++++- 2 files changed, 53 insertions(+), 7 deletions(-) diff --git a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java index 87dc332..3f38e32 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java +++ b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java @@ -20,13 +20,13 @@ package org.apache.tika.parser; import org.apache.tika.exception.CorruptedFileException; import org.apache.tika.exception.TikaException; import org.apache.tika.io.FilenameUtils; +import org.apache.tika.io.TemporaryResources; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.Property; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.mime.MediaType; import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler; -import org.apache.tika.sax.ContentHandlerDecorator; import org.apache.tika.sax.ContentHandlerFactory; import org.apache.tika.sax.RecursiveParserWrapperHandler; import org.apache.tika.sax.SecureContentHandler; @@ -224,13 +224,13 @@ public class RecursiveParserWrapper extends ParserDecorator { ContentHandler localHandler = parserState.recursiveParserWrapperHandler.getNewContentHandler(); long started = System.currentTimeMillis(); parserState.recursiveParserWrapperHandler.startDocument(); + TemporaryResources tmp = new TemporaryResources(); try { - try (TikaInputStream tis = TikaInputStream.get(stream)) { - RecursivelySecureContentHandler secureContentHandler = + TikaInputStream tis = TikaInputStream.get(stream, tmp); + RecursivelySecureContentHandler secureContentHandler = new RecursivelySecureContentHandler(localHandler, tis); - context.set(RecursivelySecureContentHandler.class, secureContentHandler); - getWrappedParser().parse(tis, secureContentHandler, metadata, context); - } + context.set(RecursivelySecureContentHandler.class, secureContentHandler); + getWrappedParser().parse(tis, secureContentHandler, metadata, context); } catch (SAXException e) { boolean wlr = isWriteLimitReached(e); if (wlr == false) { @@ -244,6 +244,7 @@ public class RecursiveParserWrapper extends ParserDecorator { metadata.add(RecursiveParserWrapperHandler.CONTAINER_EXCEPTION, stackTrace); throw e; } finally { + tmp.dispose(); long elapsedMillis = System.currentTimeMillis() - started; metadata.set(RecursiveParserWrapperHandler.PARSE_TIME_MILLIS, Long.toString(elapsedMillis)); parserState.recursiveParserWrapperHandler.endDocument(localHandler, metadata); diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java index ff6f8ef..549f749 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java @@ -24,13 +24,17 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +import java.io.IOException; import java.io.InputStream; +import java.nio.file.Files; import java.util.HashSet; import java.util.List; import java.util.Set; import org.apache.commons.io.IOUtils; import org.apache.tika.exception.TikaException; +import org.apache.tika.io.ClosedInputStream; +import org.apache.tika.io.ProxyInputStream; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; @@ -345,6 +349,29 @@ public class RecursiveParserWrapperTest { assertEquals("a869bf6432ebd14e19fc79416274e0c9", list.get(7).get(md5Key)); } + @Test + public void testStreamNotClosed() throws Exception { + //TIKA-2974 + ParseContext context = new ParseContext(); + Parser wrapped = new AutoDetectParser(); + Metadata metadata = new Metadata(); + RecursiveParserWrapper wrapper = new RecursiveParserWrapper(wrapped, true); + String path = "/test-documents/test_recursive_embedded.docx"; + ContentHandlerFactory contentHandlerFactory = + new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1); + + CloseCountingInputStream stream = null; + RecursiveParserWrapperHandler handler = new RecursiveParserWrapperHandler(contentHandlerFactory); + try { + stream = new CloseCountingInputStream(RecursiveParserWrapperTest.class.getResourceAsStream(path)); + wrapper.parse(stream, handler, metadata, context); + assertEquals(0, stream.counter); + } finally { + IOUtils.closeQuietly(stream); + } + + } + private List<Metadata> getMetadata(Metadata metadata, ContentHandlerFactory contentHandlerFactory, boolean catchEmbeddedExceptions, DigestingParser.Digester digester) throws Exception { @@ -369,11 +396,29 @@ public class RecursiveParserWrapperTest { IOUtils.closeQuietly(stream); } return handler.getMetadataList(); - } private List<Metadata> getMetadata(Metadata metadata, ContentHandlerFactory contentHandlerFactory) throws Exception { return getMetadata(metadata, contentHandlerFactory, true, null); } + + private static class CloseCountingInputStream extends ProxyInputStream { + int counter = 0; + + public CloseCountingInputStream(InputStream in) { + super(in); + } + + /** + * Replaces the underlying input stream with a {@link ClosedInputStream} + * sentinel. The original input stream will remain open, but this proxy + * will appear closed. + */ + @Override + public void close() throws IOException { + in.close(); + counter++; + } + } }
