If I truncate the test file with a hexeditor, I see this: INFO [main] 12:04:20,641 org.apache.tika.server.core.TikaServerProcess Starting Apache Tika 2.4.2-SNAPSHOT server INFO [main] 12:04:20,823 org.apache.tika.server.core.TikaServerProcess loading resource from SPI: class org.apache.tika.server.standard.resource.XMPMetadataResource INFO [main] 12:04:21,044 org.apache.cxf.endpoint.ServerImpl Setting the server's publish address to be http://localhost:9998/ INFO [main] 12:04:21,111 org.eclipse.jetty.util.log Logging initialized @1675ms to org.eclipse.jetty.util.log.Slf4jLog INFO [main] 12:04:21,169 org.eclipse.jetty.server.Server jetty-9.4.48.v20220622; built: 2022-06-21T20:42:25.880Z; git: 6b67c5719d1f4371b33655ff2d047d24e171e49a; jvm 11.0.11+9 INFO [main] 12:04:21,205 org.eclipse.jetty.server.AbstractConnector Started ServerConnector@352e787a{HTTP/1.1, (http/1.1)}{localhost:9998} INFO [main] 12:04:21,205 org.eclipse.jetty.server.Server Started @1771ms WARN [main] 12:04:21,212 org.eclipse.jetty.server.handler.ContextHandler Empty contextPath INFO [main] 12:04:21,226 org.eclipse.jetty.server.handler.ContextHandler Started o.e.j.s.h.ContextHandler@408b87aa{/,null,AVAILABLE} INFO [main] 12:04:21,232 org.apache.tika.server.core.TikaServerProcess Started Apache Tika server fabf267b-a86c-43d7-9845-e15f36d032e2 at http://localhost:9998/ INFO [qtp499951827-28] 12:04:24,324 org.apache.tika.server.core.resource.TikaResource /tika (autodetecting type) WARN [qtp499951827-28] 12:04:24,683 org.apache.pdfbox.pdfparser.COSParser Skipped incomplete object stream:108 0 R at 67085 WARN [qtp499951827-28] 12:04:24,688 org.apache.tika.server.core.resource.TikaResource tika: Text extraction failed (null) org.apache.tika.exception.TikaException: TIKA-198: Illegal IOException from org.apache.tika.parser.pdf.PDFParser@5ec70124 at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:304) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:298) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:167) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.tika.server.core.resource.TikaResource.parse(TikaResource.java:352) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.tika.server.core.resource.TikaResource.lambda$produceOutput$2(TikaResource.java:680) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.cxf.jaxrs.provider.BinaryDataProvider.writeTo(BinaryDataProvider.java:177) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.cxf.jaxrs.utils.JAXRSUtils.writeMessageBody(JAXRSUtils.java:1616) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.cxf.jaxrs.interceptor.JAXRSOutInterceptor.serializeMessage(JAXRSOutInterceptor.java:249) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.cxf.jaxrs.interceptor.JAXRSOutInterceptor.processResponse(JAXRSOutInterceptor.java:122) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.cxf.jaxrs.interceptor.JAXRSOutInterceptor.handleMessage(JAXRSOutInterceptor.java:84) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:307) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.cxf.interceptor.OutgoingChainInterceptor.handleMessage(OutgoingChainInterceptor.java:90) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:307) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.cxf.transport.ChainInitiationObserver.onMessage(ChainInitiationObserver.java:121) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.cxf.transport.http.AbstractHTTPDestination.invoke(AbstractHTTPDestination.java:265) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.cxf.transport.http_jetty.JettyHTTPDestination.doService(JettyHTTPDestination.java:247) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.cxf.transport.http_jetty.JettyHTTPHandler.handle(JettyHTTPHandler.java:79) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:235) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1440) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:190) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1355) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:191) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.eclipse.jetty.server.Server.handle(Server.java:516) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.eclipse.jetty.server.HttpChannel.lambda$handle$1(HttpChannel.java:487) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:732) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:479) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:277) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:105) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.eclipse.jetty.io.ChannelEndPoint$1.run(ChannelEndPoint.java:104) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:883) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1034) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at java.lang.Thread.run(Thread.java:829) ~[?:?] Caused by: java.io.IOException: Page tree root must be a dictionary at org.apache.pdfbox.pdfparser.PDFParser.initialParse(PDFParser.java:198) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.pdfbox.pdfparser.PDFParser.parse(PDFParser.java:226) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.pdfbox.pdmodel.PDDocument.load(PDDocument.java:1230) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.pdfbox.pdmodel.PDDocument.load(PDDocument.java:1204) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.tika.parser.pdf.PDFParser.getPDDocument(PDFParser.java:284) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.tika.parser.pdf.PDFParser.parse(PDFParser.java:171) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:298) ~[tika-server-standard-2.4.2-SNAPSHOT.jar:2.4.2-SNAPSHOT] ... 35 more ERROR [qtp499951827-28] 12:04:24,705 org.apache.cxf.jaxrs.utils.JAXRSUtils Problem with writing the data, class org.apache.tika.server.core.resource.TikaResource$$Lambda$349/0x0000000800394c40, ContentType: text/xml
On Fri, Jul 15, 2022 at 12:01 PM Tim Allison <[email protected]> wrote: > If you curl the test file (GetStartedWithSmallpdf.pdf) against your > tika-server, what do you see? The test file works for me with > 2.4.2-SNAPSHOT at least. Are the files getting truncated somehow? > > > > On Fri, Jul 15, 2022 at 9:41 AM PGNet Dev <[email protected]> wrote: > >> i'm running tika-server 2.4.1 on a linux box, >> >> lsb_release -rd >> Description: Fedora release 36 (Thirty Six) >> Release: 36 >> >> uname -rm >> 5.18.11-200.fc36.x86_64 x86_64 >> >> java -version >> Picked up JAVA_TOOL_OPTIONS: -Xmx512M >> openjdk version "18.0.1" 2022-04-19 >> OpenJDK Runtime Environment 22.3 (build 18.0.1+10) >> OpenJDK 64-Bit Server VM 22.3 (build 18.0.1+10, mixed >> mode, sharing) >> >> >> ps ax | grep tika-server >> 1003 ? Ssl 0:12 /usr/bin/java -jar >> /srv/webapps/tika/tika-server.jar -c >> /usr/local/etc/tika/tika-server-config-custom.xml >> 1143 ? Sl 0:37 /usr/bin/java -Xms1g -Xmx1g >> -Dpdfbox.fontcache=/var/tika -Dlog4j2.info -Djava.awt.headless=true -cp >> /srv/webapps/tika/tika-server.jar -Dtika.server.id= >> org.apache.tika.server.core.TikaServerProcess -h 127.0.0.1 -p 9998 -i -c >> /usr/local/etc/tika/tika-server-config-custom.xml -forkedStatusFile >> /tmp/apache-tika-server-forked-tmp-9638775429532759882 -numRestarts 0 >> >> it's invoked from a dovecot imap server instance, for attachment parsing, >> >> dovecot --version >> 2.3.19.1 (9b53102964) >> >> cat dovecot/conf.d/10-master.com >> ... >> plugin { >> ... >> fts_tika = http://127.0.0.1:9998/tika/ >> } >> ... >> >> on receipt of an email with a standard attachment/exmaple -- e.g. the >> example pdf @ >> >> https://smallpdf.com/edit-pdf >> >> , per journal logs, the message is submitted to tika, but fails due to a >> 'corrupt stream' >> >> Jul 15 08:41:27 mx tika[1143]: INFO [qtp1837533591-27] >> 08:41:27,224 org.apache.tika.server.core.resource.TikaResource /tika >> (application/pdf) >> Jul 15 08:41:27 mx tika[1143]: WARN [qtp1837533591-27] >> 08:41:27,453 org.apache.pdfbox.pdfparser.COSParser The end of the stream >> doesn't point to the correct offset, using workaround to read the stream, >> stream start position: 104315, length: 356, expected end position: 104671 >> Jul 15 08:41:27 mx tika[1143]: ERROR [qtp1837533591-27] >> 08:41:27,457 org.apache.pdfbox.filter.FlateFilter FlateFilter: stop reading >> corrupt stream due to a DataFormatException >> Jul 15 08:41:27 mx tika[1143]: WARN [qtp1837533591-27] >> 08:41:27,730 org.apache.pdfbox.pdfparser.COSParser The end of the stream >> doesn't point to the correct offset, using workaround to read the stream, >> stream start position: 101699, length: 1472, expected end position: 103171 >> Jul 15 08:41:27 mx tika[1143]: ERROR [qtp1837533591-27] >> 08:41:27,735 org.apache.pdfbox.filter.FlateFilter FlateFilter: stop reading >> corrupt stream due to a DataFormatException >> Jul 15 08:41:27 mx tika[1143]: WARN [qtp1837533591-27] >> 08:41:27,742 org.apache.pdfbox.pdfparser.COSParser The end of the stream >> doesn't point to the correct offset, using workaround to read the stream, >> stream start position: 101509, length: 66, expected end position: 101575 >> Jul 15 08:41:27 mx tika[1143]: ERROR [qtp1837533591-27] >> 08:41:27,744 org.apache.pdfbox.filter.FlateFilter FlateFilter: stop reading >> corrupt stream due to a DataFormatException >> Jul 15 08:41:27 mx tika[1143]: WARN [qtp1837533591-27] >> 08:41:27,748 org.apache.pdfbox.pdfparser.COSParser The end of the stream >> doesn't point to the correct offset, using workaround to read the stream, >> stream start position: 2011, length: 2482, expected end position: 4493 >> Jul 15 08:41:27 mx tika[1143]: WARN [qtp1837533591-27] >> 08:41:27,752 org.apache.tika.server.core.resource.TikaResource tika/: Text >> extraction failed (test.pdf) >> Jul 15 08:41:27 mx tika[1143]: >> org.apache.tika.exception.TikaException: TIKA-198: Illegal IOException from >> org.apache.tika.parser.pdf.PDFParser@356fdbd7 >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:304) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:298) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:167) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.tika.parser.ParserDecorator.parse(ParserDecorator.java:152) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.tika.parser.DigestingParser.parse(DigestingParser.java:55) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.tika.server.core.resource.TikaResource.parse(TikaResource.java:352) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.tika.server.core.resource.TikaResource.lambda$produceText$1(TikaResource.java:502) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.cxf.jaxrs.provider.BinaryDataProvider.writeTo(BinaryDataProvider.java:177) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.cxf.jaxrs.utils.JAXRSUtils.writeMessageBody(JAXRSUtils.java:1616) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.cxf.jaxrs.interceptor.JAXRSOutInterceptor.serializeMessage(JAXRSOutInterceptor.java:249) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.cxf.jaxrs.interceptor.JAXRSOutInterceptor.processResponse(JAXRSOutInterceptor.java:122) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.cxf.jaxrs.interceptor.JAXRSOutInterceptor.handleMessage(JAXRSOutInterceptor.java:84) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:307) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.cxf.interceptor.OutgoingChainInterceptor.handleMessage(OutgoingChainInterceptor.java:90) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:307) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.cxf.transport.ChainInitiationObserver.onMessage(ChainInitiationObserver.java:121) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.cxf.transport.http.AbstractHTTPDestination.invoke(AbstractHTTPDestination.java:265) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.cxf.transport.http_jetty.JettyHTTPDestination.doService(JettyHTTPDestination.java:247) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.cxf.transport.http_jetty.JettyHTTPHandler.handle(JettyHTTPHandler.java:79) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:235) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1440) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:190) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1355) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:191) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.eclipse.jetty.server.Server.handle(Server.java:516) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.eclipse.jetty.server.HttpChannel.lambda$handle$1(HttpChannel.java:487) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:732) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:479) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:277) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:105) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.eclipse.jetty.io.ChannelEndPoint$1.run(ChannelEndPoint.java:104) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:883) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1034) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> java.lang.Thread.run(Thread.java:833) ~[?:?] >> Jul 15 08:41:27 mx tika[1143]: Caused by: java.io.IOException: >> Page tree root must be a dictionary >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.pdfbox.pdfparser.PDFParser.initialParse(PDFParser.java:198) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.pdfbox.pdfparser.PDFParser.parse(PDFParser.java:226) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.pdfbox.pdmodel.PDDocument.load(PDDocument.java:1230) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.pdfbox.pdmodel.PDDocument.load(PDDocument.java:1204) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.tika.parser.pdf.PDFParser.getPDDocument(PDFParser.java:284) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.tika.parser.pdf.PDFParser.parse(PDFParser.java:171) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: at >> org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:298) >> ~[tika-server-standard-2.4.1.jar:2.4.1] >> Jul 15 08:41:27 mx tika[1143]: ... 37 more >> Jul 15 08:41:27 mx tika[1143]: ERROR [qtp1837533591-27] >> 08:41:27,767 org.apache.cxf.jaxrs.utils.JAXRSUtils Problem with writing the >> data, class >> org.apache.tika.server.core.resource.TikaResource$$Lambda$337/0x0000000800eabbf8, >> ContentType: text/plain >> >> Is this likely an issue with tika-server itself? &/or java/dovecot? >> >> What additional diagnostics can help narrow it down? >> >
