[
https://issues.apache.org/jira/browse/TIKA-2395?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16354133#comment-16354133
]
Thomas Mortagne edited comment on TIKA-2395 at 2/6/18 5:27 PM:
---------------------------------------------------------------
So I finally have more information (tried to upgrade again) and especially what
kind of InputStream is not supported anymore since 1.5:
org.apache.commons.io.input.AutoCloseInputStream. (from Apache commons-io)
You can reproduce with the following:
{code}
new Tika().parseToString(new
AutoCloseInputStream(getClass().getResourceAsStream("/pdf.pdf"))))
{code}
and the stack trace:
{noformat}
org.apache.tika.io.TaggedIOException: mark/reset not supported
at
org.apache.tika.io.TaggedInputStream.handleIOException(TaggedInputStream.java:133)
at org.apache.tika.io.ProxyInputStream.reset(ProxyInputStream.java:170)
at org.apache.tika.io.TikaInputStream.reset(TikaInputStream.java:673)
at org.apache.tika.mime.MimeTypes.detect(MimeTypes.java:496)
at
org.apache.tika.detect.CompositeDetector.detect(CompositeDetector.java:84)
at
org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:116)
at org.apache.tika.Tika.parseToString(Tika.java:527)
at org.apache.tika.Tika.parseToString(Tika.java:602)
at org.xwiki.tika.internal.TikaUtils.parseToString(TikaUtils.java:157)
at
org.xwiki.tika.internal.TikaUtilsTest.parseAutoclosable(TikaUtilsTest.java:47)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.junit.platform.commons.util.ReflectionUtils.invokeMethod(ReflectionUtils.java:389)
at
org.junit.jupiter.engine.execution.ExecutableInvoker.invoke(ExecutableInvoker.java:115)
at
org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.lambda$invokeTestMethod$6(TestMethodTestDescriptor.java:167)
at
org.junit.jupiter.engine.execution.ThrowableCollector.execute(ThrowableCollector.java:40)
at
org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.invokeTestMethod(TestMethodTestDescriptor.java:163)
at
org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.execute(TestMethodTestDescriptor.java:110)
at
org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.execute(TestMethodTestDescriptor.java:57)
at
org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.lambda$execute$3(HierarchicalTestExecutor.java:83)
at
org.junit.platform.engine.support.hierarchical.SingleTestExecutor.executeSafely(SingleTestExecutor.java:66)
at
org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.execute(HierarchicalTestExecutor.java:77)
at
org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.lambda$null$2(HierarchicalTestExecutor.java:92)
at
java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:184)
at
java.util.stream.ReferencePipeline$2$1.accept(ReferencePipeline.java:175)
at java.util.Iterator.forEachRemaining(Iterator.java:116)
at
java.util.Spliterators$IteratorSpliterator.forEachRemaining(Spliterators.java:1801)
at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481)
at
java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471)
at
java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:151)
at
java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:174)
at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
at
java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:418)
at
org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.lambda$execute$3(HierarchicalTestExecutor.java:92)
at
org.junit.platform.engine.support.hierarchical.SingleTestExecutor.executeSafely(SingleTestExecutor.java:66)
at
org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.execute(HierarchicalTestExecutor.java:77)
at
org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.lambda$null$2(HierarchicalTestExecutor.java:92)
at
java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:184)
at
java.util.stream.ReferencePipeline$2$1.accept(ReferencePipeline.java:175)
at java.util.Iterator.forEachRemaining(Iterator.java:116)
at
java.util.Spliterators$IteratorSpliterator.forEachRemaining(Spliterators.java:1801)
at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481)
at
java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471)
at
java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:151)
at
java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:174)
at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
at
java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:418)
at
org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.lambda$execute$3(HierarchicalTestExecutor.java:92)
at
org.junit.platform.engine.support.hierarchical.SingleTestExecutor.executeSafely(SingleTestExecutor.java:66)
at
org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.execute(HierarchicalTestExecutor.java:77)
at
org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.execute(HierarchicalTestExecutor.java:51)
at
org.junit.platform.engine.support.hierarchical.HierarchicalTestEngine.execute(HierarchicalTestEngine.java:43)
at
org.junit.platform.launcher.core.DefaultLauncher.execute(DefaultLauncher.java:170)
at
org.junit.platform.launcher.core.DefaultLauncher.execute(DefaultLauncher.java:154)
at
org.junit.platform.launcher.core.DefaultLauncher.execute(DefaultLauncher.java:90)
at
org.eclipse.jdt.internal.junit5.runner.JUnit5TestReference.run(JUnit5TestReference.java:86)
at
org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38)
at
org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:538)
at
org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:760)
at
org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:460)
at
org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:206)
Caused by: java.io.IOException: mark/reset not supported
at java.io.InputStream.reset(InputStream.java:348)
at
org.apache.commons.io.input.ProxyInputStream.reset(ProxyInputStream.java:168)
at org.apache.tika.io.ProxyInputStream.reset(ProxyInputStream.java:168)
... 61 more
{noformat}
was (Author: tmortagne):
So I finally have more information (tried to upgrade again) and especially what
kind of InputStream is not supported anymore since 1.5:
org.apache.commons.io.input.AutoCloseInputStream. (from Apache commons-io)
You can reproduce with the following:
{code}
new Tika().parseToString(new
AutoCloseInputStream(getClass().getResourceAsStream("/pdf.pdf"))))
{code}
> The parser does not support InputStream without built in mark/reset support
> anymore
> -----------------------------------------------------------------------------------
>
> Key: TIKA-2395
> URL: https://issues.apache.org/jira/browse/TIKA-2395
> Project: Tika
> Issue Type: Bug
> Components: detector, parser
> Affects Versions: 1.15
> Reporter: Thomas Mortagne
> Priority: Blocker
>
> After upgrade to 1.5 (from 1.4) it seems that the detector does not properly
> support all kinds of InputStream like it used to.
> I get tons of:
> {noformat}
> org.apache.tika.io.TaggedIOException: mark/reset not supported
> at
> org.apache.tika.io.TaggedInputStream.handleIOException(TaggedInputStream.java:133)
> at org.apache.tika.io.ProxyInputStream.reset(ProxyInputStream.java:170)
> at org.apache.tika.io.TikaInputStream.reset(TikaInputStream.java:673)
> at org.apache.tika.mime.MimeTypes.detect(MimeTypes.java:474)
> at
> org.apache.tika.detect.CompositeDetector.detect(CompositeDetector.java:77)
> at
> org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:115)
> at org.apache.tika.Tika.parseToString(Tika.java:527)
> at
> org.xwiki.search.solr.internal.metadata.AbstractSolrMetadataExtractor.getContentAsText(AbstractSolrMetadataExtractor.java:509)
> at
> org.xwiki.search.solr.internal.metadata.AttachmentSolrMetadataExtractor.setLocaleAndContentFields(AttachmentSolrMetadataExtractor.java:111)
> at
> org.xwiki.search.solr.internal.metadata.AttachmentSolrMetadataExtractor.setFieldsInternal(AttachmentSolrMetadataExtractor.java:93)
> at
> org.xwiki.search.solr.internal.metadata.AbstractSolrMetadataExtractor.getSolrDocument(AbstractSolrMetadataExtractor.java:133)
> at
> org.xwiki.search.solr.internal.DefaultSolrIndexer.getSolrDocument(DefaultSolrIndexer.java:504)
> at
> org.xwiki.search.solr.internal.DefaultSolrIndexer.processBatch(DefaultSolrIndexer.java:411)
> at
> org.xwiki.search.solr.internal.DefaultSolrIndexer.run(DefaultSolrIndexer.java:377)
> at java.lang.Thread.run(Thread.java:748)
> Caused by: java.io.IOException: mark/reset not supported
> at java.io.InputStream.reset(InputStream.java:348)
> at
> org.apache.commons.io.input.ProxyInputStream.reset(ProxyInputStream.java:169)
> at org.apache.tika.io.ProxyInputStream.reset(ProxyInputStream.java:168)
> ... 13 common frames omitted
> {noformat}
> This regression makes tika unusable for us.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)