[ 
https://issues.apache.org/jira/browse/TIKA-2395?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16354133#comment-16354133
 ] 

Thomas Mortagne edited comment on TIKA-2395 at 2/6/18 5:27 PM:
---------------------------------------------------------------

So I finally have more information (tried to upgrade again) and especially what 
kind of InputStream is not supported anymore since 1.5: 
org.apache.commons.io.input.AutoCloseInputStream. (from Apache commons-io)

You can reproduce with the following:

{code}
new Tika().parseToString(new 
AutoCloseInputStream(getClass().getResourceAsStream("/pdf.pdf"))))
{code}

and the stack trace:

{noformat}
org.apache.tika.io.TaggedIOException: mark/reset not supported
        at 
org.apache.tika.io.TaggedInputStream.handleIOException(TaggedInputStream.java:133)
        at org.apache.tika.io.ProxyInputStream.reset(ProxyInputStream.java:170)
        at org.apache.tika.io.TikaInputStream.reset(TikaInputStream.java:673)
        at org.apache.tika.mime.MimeTypes.detect(MimeTypes.java:496)
        at 
org.apache.tika.detect.CompositeDetector.detect(CompositeDetector.java:84)
        at 
org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:116)
        at org.apache.tika.Tika.parseToString(Tika.java:527)
        at org.apache.tika.Tika.parseToString(Tika.java:602)
        at org.xwiki.tika.internal.TikaUtils.parseToString(TikaUtils.java:157)
        at 
org.xwiki.tika.internal.TikaUtilsTest.parseAutoclosable(TikaUtilsTest.java:47)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at 
org.junit.platform.commons.util.ReflectionUtils.invokeMethod(ReflectionUtils.java:389)
        at 
org.junit.jupiter.engine.execution.ExecutableInvoker.invoke(ExecutableInvoker.java:115)
        at 
org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.lambda$invokeTestMethod$6(TestMethodTestDescriptor.java:167)
        at 
org.junit.jupiter.engine.execution.ThrowableCollector.execute(ThrowableCollector.java:40)
        at 
org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.invokeTestMethod(TestMethodTestDescriptor.java:163)
        at 
org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.execute(TestMethodTestDescriptor.java:110)
        at 
org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.execute(TestMethodTestDescriptor.java:57)
        at 
org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.lambda$execute$3(HierarchicalTestExecutor.java:83)
        at 
org.junit.platform.engine.support.hierarchical.SingleTestExecutor.executeSafely(SingleTestExecutor.java:66)
        at 
org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.execute(HierarchicalTestExecutor.java:77)
        at 
org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.lambda$null$2(HierarchicalTestExecutor.java:92)
        at 
java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:184)
        at 
java.util.stream.ReferencePipeline$2$1.accept(ReferencePipeline.java:175)
        at java.util.Iterator.forEachRemaining(Iterator.java:116)
        at 
java.util.Spliterators$IteratorSpliterator.forEachRemaining(Spliterators.java:1801)
        at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481)
        at 
java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471)
        at 
java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:151)
        at 
java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:174)
        at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
        at 
java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:418)
        at 
org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.lambda$execute$3(HierarchicalTestExecutor.java:92)
        at 
org.junit.platform.engine.support.hierarchical.SingleTestExecutor.executeSafely(SingleTestExecutor.java:66)
        at 
org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.execute(HierarchicalTestExecutor.java:77)
        at 
org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.lambda$null$2(HierarchicalTestExecutor.java:92)
        at 
java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:184)
        at 
java.util.stream.ReferencePipeline$2$1.accept(ReferencePipeline.java:175)
        at java.util.Iterator.forEachRemaining(Iterator.java:116)
        at 
java.util.Spliterators$IteratorSpliterator.forEachRemaining(Spliterators.java:1801)
        at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481)
        at 
java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471)
        at 
java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:151)
        at 
java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:174)
        at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
        at 
java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:418)
        at 
org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.lambda$execute$3(HierarchicalTestExecutor.java:92)
        at 
org.junit.platform.engine.support.hierarchical.SingleTestExecutor.executeSafely(SingleTestExecutor.java:66)
        at 
org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.execute(HierarchicalTestExecutor.java:77)
        at 
org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.execute(HierarchicalTestExecutor.java:51)
        at 
org.junit.platform.engine.support.hierarchical.HierarchicalTestEngine.execute(HierarchicalTestEngine.java:43)
        at 
org.junit.platform.launcher.core.DefaultLauncher.execute(DefaultLauncher.java:170)
        at 
org.junit.platform.launcher.core.DefaultLauncher.execute(DefaultLauncher.java:154)
        at 
org.junit.platform.launcher.core.DefaultLauncher.execute(DefaultLauncher.java:90)
        at 
org.eclipse.jdt.internal.junit5.runner.JUnit5TestReference.run(JUnit5TestReference.java:86)
        at 
org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38)
        at 
org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:538)
        at 
org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:760)
        at 
org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:460)
        at 
org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:206)
Caused by: java.io.IOException: mark/reset not supported
        at java.io.InputStream.reset(InputStream.java:348)
        at 
org.apache.commons.io.input.ProxyInputStream.reset(ProxyInputStream.java:168)
        at org.apache.tika.io.ProxyInputStream.reset(ProxyInputStream.java:168)
        ... 61 more
{noformat}


was (Author: tmortagne):
So I finally have more information (tried to upgrade again) and especially what 
kind of InputStream is not supported anymore since 1.5: 
org.apache.commons.io.input.AutoCloseInputStream. (from Apache commons-io)

You can reproduce with the following:

{code}
new Tika().parseToString(new 
AutoCloseInputStream(getClass().getResourceAsStream("/pdf.pdf"))))
{code}

> The parser does not support InputStream without built in mark/reset support 
> anymore
> -----------------------------------------------------------------------------------
>
>                 Key: TIKA-2395
>                 URL: https://issues.apache.org/jira/browse/TIKA-2395
>             Project: Tika
>          Issue Type: Bug
>          Components: detector, parser
>    Affects Versions: 1.15
>            Reporter: Thomas Mortagne
>            Priority: Blocker
>
> After upgrade to 1.5 (from 1.4) it seems that the detector does not properly 
> support all kinds of InputStream like it used to.
> I get tons of:
> {noformat}
> org.apache.tika.io.TaggedIOException: mark/reset not supported
>       at 
> org.apache.tika.io.TaggedInputStream.handleIOException(TaggedInputStream.java:133)
>       at org.apache.tika.io.ProxyInputStream.reset(ProxyInputStream.java:170)
>       at org.apache.tika.io.TikaInputStream.reset(TikaInputStream.java:673)
>       at org.apache.tika.mime.MimeTypes.detect(MimeTypes.java:474)
>       at 
> org.apache.tika.detect.CompositeDetector.detect(CompositeDetector.java:77)
>       at 
> org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:115)
>       at org.apache.tika.Tika.parseToString(Tika.java:527)
>       at 
> org.xwiki.search.solr.internal.metadata.AbstractSolrMetadataExtractor.getContentAsText(AbstractSolrMetadataExtractor.java:509)
>       at 
> org.xwiki.search.solr.internal.metadata.AttachmentSolrMetadataExtractor.setLocaleAndContentFields(AttachmentSolrMetadataExtractor.java:111)
>       at 
> org.xwiki.search.solr.internal.metadata.AttachmentSolrMetadataExtractor.setFieldsInternal(AttachmentSolrMetadataExtractor.java:93)
>       at 
> org.xwiki.search.solr.internal.metadata.AbstractSolrMetadataExtractor.getSolrDocument(AbstractSolrMetadataExtractor.java:133)
>       at 
> org.xwiki.search.solr.internal.DefaultSolrIndexer.getSolrDocument(DefaultSolrIndexer.java:504)
>       at 
> org.xwiki.search.solr.internal.DefaultSolrIndexer.processBatch(DefaultSolrIndexer.java:411)
>       at 
> org.xwiki.search.solr.internal.DefaultSolrIndexer.run(DefaultSolrIndexer.java:377)
>       at java.lang.Thread.run(Thread.java:748)
> Caused by: java.io.IOException: mark/reset not supported
>       at java.io.InputStream.reset(InputStream.java:348)
>       at 
> org.apache.commons.io.input.ProxyInputStream.reset(ProxyInputStream.java:169)
>       at org.apache.tika.io.ProxyInputStream.reset(ProxyInputStream.java:168)
>       ... 13 common frames omitted
> {noformat}
> This regression makes tika unusable for us.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to