Hello!
While indexing PDF's with solr I stumbled upon one copy which threw an
"Unexpected RuntimeException from
org.apache.tika.parser.pdf.PDFParser@b9b618"
Should I upload that PDF somwhere? If yes, where?
regards
alex
org.apache.tika.exception.TikaException: Unexpected RuntimeException from
org.apache.tika.parser.pdf.PDFParser@b9b618
at
org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:199)
at
org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:197)
at
org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135)
at org.apache.tika.gui.TikaGUI.importStream(TikaGUI.java:186)
at
org.apache.tika.gui.ParsingTransferHandler.importData(ParsingTransferHandler.java:99)
at javax.swing.TransferHandler.importData(Unknown Source)
at javax.swing.TransferHandler$DropHandler.drop(Unknown Source)
at java.awt.dnd.DropTarget.drop(Unknown Source)
at javax.swing.TransferHandler$SwingDropTarget.drop(Unknown Source)
at sun.awt.dnd.SunDropTargetContextPeer.processDropMessage(Unknown
Source)
at
sun.awt.dnd.SunDropTargetContextPeer$EventDispatcher.dispatchDropEvent(Unknown
Source)
at
sun.awt.dnd.SunDropTargetContextPeer$EventDispatcher.dispatchEvent(Unknown
Source)
at sun.awt.dnd.SunDropTargetEvent.dispatch(Unknown Source)
at java.awt.Component.dispatchEventImpl(Unknown Source)
at java.awt.Container.dispatchEventImpl(Unknown Source)
at java.awt.Component.dispatchEvent(Unknown Source)
at java.awt.LightweightDispatcher.retargetMouseEvent(Unknown Source)
at java.awt.LightweightDispatcher.processDropTargetEvent(Unknown Source)
at java.awt.LightweightDispatcher.dispatchEvent(Unknown Source)
at java.awt.Container.dispatchEventImpl(Unknown Source)
at java.awt.Window.dispatchEventImpl(Unknown Source)
at java.awt.Component.dispatchEvent(Unknown Source)
at java.awt.EventQueue.dispatchEventImpl(Unknown Source)
at java.awt.EventQueue.access$000(Unknown Source)
at java.awt.EventQueue$1.run(Unknown Source)
at java.awt.EventQueue$1.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at java.security.AccessControlContext$1.doIntersectionPrivilege(Unknown
Source)
at java.security.AccessControlContext$1.doIntersectionPrivilege(Unknown
Source)
at java.awt.EventQueue$2.run(Unknown Source)
at java.awt.EventQueue$2.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at java.security.AccessControlContext$1.doIntersectionPrivilege(Unknown
Source)
at java.awt.EventQueue.dispatchEvent(Unknown Source)
at java.awt.EventDispatchThread.pumpOneEventForFilters(Unknown Source)
at java.awt.EventDispatchThread.pumpEventsForFilter(Unknown Source)
at java.awt.EventDispatchThread.pumpEventsForHierarchy(Unknown Source)
at java.awt.EventDispatchThread.pumpEvents(Unknown Source)
at java.awt.EventDispatchThread.pumpEvents(Unknown Source)
at java.awt.EventDispatchThread.run(Unknown Source)
Caused by: java.lang.RuntimeException: java.io.IOException: Value is not an
integer: 150140324056065666235
at
org.apache.pdfbox.pdfparser.PDFStreamParser$1.tryNext(PDFStreamParser.java:178)
at
org.apache.pdfbox.pdfparser.PDFStreamParser$1.hasNext(PDFStreamParser.java:187)
at
org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:266)
at
org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:251)
at
org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:225)
at
org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:442)
at
org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:366)
at
org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:322)
at org.apache.tika.parser.pdf.PDF2XHTML.process(PDF2XHTML.java:56)
at org.apache.tika.parser.pdf.PDFParser.parse(PDFParser.java:89)
at
org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:197)
... 39 more
Caused by: java.io.IOException: Value is not an integer: 150140324056065666235
at org.apache.pdfbox.cos.COSNumber.get(COSNumber.java:96)
at
org.apache.pdfbox.pdfparser.PDFStreamParser.parseNextToken(PDFStreamParser.java:347)
at
org.apache.pdfbox.pdfparser.PDFStreamParser.access$000(PDFStreamParser.java:46)
at
org.apache.pdfbox.pdfparser.PDFStreamParser$1.tryNext(PDFStreamParser.java:175)
... 49 more