Carina created TIKA-3060: ---------------------------- Summary: Unpack file .ppt leads to TikaException Key: TIKA-3060 URL: https://issues.apache.org/jira/browse/TIKA-3060 Project: Tika Issue Type: Bug Components: server Affects Versions: 1.23 Environment: Tika server: docker image apache/tika:1.23 with command:
java -jar /tika-server-1.23.jar -spawnChild -JXmx4g -JXms512m -maxFiles 10000-h 0.0.0.0 -log info Reporter: Carina Attachments: LGT_CIEMAT_CONTRIBUTION_TO_CERN_20161011.ppt Processing the attached file leads to a Tika Exception : {{}}{{WARN unpack/all: Text extraction failed (b'data')}} {{org.apache.tika.exception.TikaException: Unexpected RuntimeException from org.apache.tika.parser.microsoft.OfficeParser@46a07b2b}} {{ at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:282)}} {{ at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)}} {{ at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:143)}} {{ at org.apache.tika.server.resource.TikaResource.parse(TikaResource.java:409)}} {{ at org.apache.tika.server.resource.UnpackerResource.process(UnpackerResource.java:144)}} {{ at org.apache.tika.server.resource.UnpackerResource.unpackAll(UnpackerResource.java:110)}} {{ at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)}} {{ at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)}} {{ at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)}} {{ at java.base/java.lang.reflect.Method.invoke(Method.java:566)}} {{ at org.apache.cxf.service.invoker.AbstractInvoker.performInvocation(AbstractInvoker.java:179)}} {{ at org.apache.cxf.service.invoker.AbstractInvoker.invoke(AbstractInvoker.java:96)}} {{ at org.apache.cxf.jaxrs.JAXRSInvoker.invoke(JAXRSInvoker.java:201)}} {{ at org.apache.cxf.jaxrs.JAXRSInvoker.invoke(JAXRSInvoker.java:104)}} {{ at org.apache.cxf.interceptor.ServiceInvokerInterceptor$1.run(ServiceInvokerInterceptor.java:59)}} {{ at org.apache.cxf.interceptor.ServiceInvokerInterceptor.handleMessage(ServiceInvokerInterceptor.java:96)}} {{ at org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:308)}} {{ at org.apache.cxf.transport.ChainInitiationObserver.onMessage(ChainInitiationObserver.java:121)}} {{ at org.apache.cxf.transport.http.AbstractHTTPDestination.invoke(AbstractHTTPDestination.java:267)}} {{ at org.apache.cxf.transport.http_jetty.JettyHTTPDestination.doService(JettyHTTPDestination.java:247)}} {{ at org.apache.cxf.transport.http_jetty.JettyHTTPHandler.handle(JettyHTTPHandler.java:79)}} {{ at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)}} {{ at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:235)}} {{ at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1296)}} {{ at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:190)}} {{ at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1211)}} {{ at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)}} {{ at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:221)}} {{ at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)}} {{ at org.eclipse.jetty.server.Server.handle(Server.java:500)}} {{ at org.eclipse.jetty.server.HttpChannel.lambda$handle$1(HttpChannel.java:386)}} {{ at org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:560)}} {{ at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:378)}} {{ at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:268)}} {{ at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311)}} {{ at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:103)}} {{ at org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:117)}} {{ at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:336)}} {{ at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:313)}} {{ at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:171)}} {{ at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:129)}} {{ at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:367)}} {{ at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:782)}} {{ at org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:914)}} {{ at java.base/java.lang.Thread.run(Thread.java:834)}} {{Caused by: java.lang.IndexOutOfBoundsException: Block 69124 not found}} {{ at org.apache.poi.poifs.filesystem.POIFSFileSystem.getBlockAt(POIFSFileSystem.java:429)}} {{ at org.apache.poi.poifs.filesystem.POIFSFileSystem.readBAT(POIFSFileSystem.java:399)}} {{ at org.apache.poi.poifs.filesystem.POIFSFileSystem.readCoreContents(POIFSFileSystem.java:373)}} {{ at org.apache.poi.poifs.filesystem.POIFSFileSystem.<init>(POIFSFileSystem.java:232)}} {{ at org.apache.poi.poifs.filesystem.POIFSFileSystem.<init>(POIFSFileSystem.java:170)}} {{ at org.apache.tika.parser.microsoft.OfficeParser.parse(OfficeParser.java:121)}} {{ at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)}} {{ ... 44 more}} {{Caused by: java.lang.IndexOutOfBoundsException: Position 35392000 past the end of the file}} {{ at org.apache.poi.poifs.nio.FileBackedDataSource.read(FileBackedDataSource.java:84)}} {{ at org.apache.poi.poifs.filesystem.POIFSFileSystem.getBlockAt(POIFSFileSystem.java:427)}} {{ ... 50 more}} -- This message was sent by Atlassian Jira (v8.3.4#803005)