This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new 5cb880d TIKA-2974 -- RecursiveParserWrapper shouldn't close
inputstream
5cb880d is described below
commit 5cb880d6cbee6fd49580d11d8b472c05b04ff5f1
Author: tallison <[email protected]>
AuthorDate: Thu Oct 24 12:38:50 2019 -0400
TIKA-2974 -- RecursiveParserWrapper shouldn't close inputstream
---
.../apache/tika/parser/RecursiveParserWrapper.java | 13 +++---
.../tika/parser/RecursiveParserWrapperTest.java | 47 +++++++++++++++++++++-
2 files changed, 53 insertions(+), 7 deletions(-)
diff --git
a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
index 37ed63c..2c94ae0 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
@@ -20,13 +20,13 @@ package org.apache.tika.parser;
import org.apache.tika.exception.CorruptedFileException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.FilenameUtils;
+import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
-import org.apache.tika.sax.ContentHandlerDecorator;
import org.apache.tika.sax.ContentHandlerFactory;
import org.apache.tika.sax.RecursiveParserWrapperHandler;
import org.apache.tika.sax.SecureContentHandler;
@@ -224,13 +224,13 @@ public class RecursiveParserWrapper extends
ParserDecorator {
ContentHandler localHandler =
parserState.recursiveParserWrapperHandler.getNewContentHandler();
long started = System.currentTimeMillis();
parserState.recursiveParserWrapperHandler.startDocument();
+ TemporaryResources tmp = new TemporaryResources();
try {
- try (TikaInputStream tis = TikaInputStream.get(stream)) {
- RecursivelySecureContentHandler secureContentHandler =
+ TikaInputStream tis = TikaInputStream.get(stream, tmp);
+ RecursivelySecureContentHandler secureContentHandler =
new RecursivelySecureContentHandler(localHandler, tis);
- context.set(RecursivelySecureContentHandler.class,
secureContentHandler);
- getWrappedParser().parse(tis, secureContentHandler, metadata,
context);
- }
+ context.set(RecursivelySecureContentHandler.class,
secureContentHandler);
+ getWrappedParser().parse(tis, secureContentHandler, metadata,
context);
} catch (SAXException e) {
boolean wlr = isWriteLimitReached(e);
if (wlr == false) {
@@ -244,6 +244,7 @@ public class RecursiveParserWrapper extends ParserDecorator
{
metadata.add(RecursiveParserWrapperHandler.CONTAINER_EXCEPTION,
stackTrace);
throw e;
} finally {
+ tmp.dispose();
long elapsedMillis = System.currentTimeMillis() - started;
metadata.set(RecursiveParserWrapperHandler.PARSE_TIME_MILLIS,
Long.toString(elapsedMillis));
parserState.recursiveParserWrapperHandler.endDocument(localHandler, metadata);
diff --git
a/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
b/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
index 0e50d7c..e48c7d6 100644
---
a/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
+++
b/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
@@ -24,13 +24,17 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
+import java.io.IOException;
import java.io.InputStream;
+import java.nio.file.Files;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.ClosedInputStream;
+import org.apache.tika.io.ProxyInputStream;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
@@ -345,6 +349,29 @@ public class RecursiveParserWrapperTest {
assertEquals("a869bf6432ebd14e19fc79416274e0c9",
list.get(7).get(md5Key));
}
+ @Test
+ public void testStreamNotClosed() throws Exception {
+ //TIKA-2974
+ ParseContext context = new ParseContext();
+ Parser wrapped = new AutoDetectParser();
+ Metadata metadata = new Metadata();
+ RecursiveParserWrapper wrapper = new RecursiveParserWrapper(wrapped,
true);
+ String path = "/test-documents/test_recursive_embedded.docx";
+ ContentHandlerFactory contentHandlerFactory =
+ new
BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1);
+
+ CloseCountingInputStream stream = null;
+ RecursiveParserWrapperHandler handler = new
RecursiveParserWrapperHandler(contentHandlerFactory);
+ try {
+ stream = new
CloseCountingInputStream(RecursiveParserWrapperTest.class.getResourceAsStream(path));
+ wrapper.parse(stream, handler, metadata, context);
+ assertEquals(0, stream.counter);
+ } finally {
+ IOUtils.closeQuietly(stream);
+ }
+
+ }
+
private List<Metadata> getMetadata(Metadata metadata,
ContentHandlerFactory contentHandlerFactory,
boolean catchEmbeddedExceptions,
DigestingParser.Digester digester)
throws Exception {
@@ -369,11 +396,29 @@ public class RecursiveParserWrapperTest {
IOUtils.closeQuietly(stream);
}
return handler.getMetadataList();
-
}
private List<Metadata> getMetadata(Metadata metadata,
ContentHandlerFactory contentHandlerFactory)
throws Exception {
return getMetadata(metadata, contentHandlerFactory, true, null);
}
+
+ private static class CloseCountingInputStream extends ProxyInputStream {
+ int counter = 0;
+
+ public CloseCountingInputStream(InputStream in) {
+ super(in);
+ }
+
+ /**
+ * Replaces the underlying input stream with a {@link
ClosedInputStream}
+ * sentinel. The original input stream will remain open, but this proxy
+ * will appear closed.
+ */
+ @Override
+ public void close() throws IOException {
+ in.close();
+ counter++;
+ }
+ }
}