[ 
https://issues.apache.org/jira/browse/PDFBOX-5737?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17797008#comment-17797008
 ] 

ASF subversion and git services commented on PDFBOX-5737:
---------------------------------------------------------

Commit 1914669 from Tilman Hausherr in branch 'pdfbox/trunk'
[ https://svn.apache.org/r1914669 ]

PDFBOX-5737: catch EOF

> java.lang.ArrayIndexOutOfBoundsException Bug Report
> ---------------------------------------------------
>
>                 Key: PDFBOX-5737
>                 URL: https://issues.apache.org/jira/browse/PDFBOX-5737
>             Project: PDFBox
>          Issue Type: Bug
>    Affects Versions: 3.0.0 PDFBox
>            Reporter: Xiaohan Zhang
>            Priority: Major
>         Attachments: crash-38ee70b5cb74519b642c150694f601239f492168
>
>
> Recently we discovered a bug in latest pdfbox (3.0.0).
> Due to the lack of contextual knowledge in the pdfbox library, we cannot 
> thoroughly fix some bugs hence we look forward to any proposed plan from the 
> developers in fixing these bugs.
>  
> # Test Program
>  
> package com.test;
> import java.io.File;
> import java.io.IOException;
> import java.io.InputStream;
> import org.apache.pdfbox.pdmodel.PDDocument;
> import org.apache.pdfbox.pdmodel.PDPage;
> import org.apache.pdfbox.text.PDFTextStripper;
> import org.apache.pdfbox.pdmodel.PDDocumentInformation;
> import org.apache.pdfbox.Loader;
>  
> public class Entry {
>    public static void main (String args[]) throws IOException {
>       assert args.length == 1;
>       try {
>            File file = new File(args[0]);
>            PDDocument document = Loader.loadPDF(file);
>            PDDocumentInformation pdd = document.getDocumentInformation();
>            System.out.println("Author of the document is :"+ pdd.getAuthor());
>            System.out.println("Title of the document is :"+ pdd.getTitle());
>            System.out.println("Subject of the document is :"+ 
> pdd.getSubject());
>            int noOfPages= document.getNumberOfPages();
>            for (int i = 0; i < noOfPages; i++) {
>                PDPage page_doc = document.getPage(i);
>                System.out.println("Page:"+ i + ". Content: " + 
> page_doc.getContents());
>            }
>            PDFTextStripper pdfStripper = new PDFTextStripper();
>            String text = pdfStripper.getText(document);
>            System.out.println("Full Content:"+ text);
>            document.close();
>       } catch (java.io.IOException ignore) {
>       }
>       System.out.println("end test, no crash");
>    }
> }
>  
> # POC file
> See the attachments.
>  
> # Crash Stack
> Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException: 
> arraycopy: length -1 is negative
> at java.base/java.lang.System.arraycopy(Native Method)
> at java.base/java.io.PushbackInputStream.unread(PushbackInputStream.java:232)
> at org.apache.pdfbox.filter.CCITTFaxFilter.decode(CCITTFaxFilter.java:75)
> at org.apache.pdfbox.filter.Filter.decode(Filter.java:96)
> at org.apache.pdfbox.filter.Filter.decode(Filter.java:238)
> at org.apache.pdfbox.cos.COSStream.createView(COSStream.java:196)
> at 
> org.apache.pdfbox.pdfparser.PDFObjectStreamParser.<init>(PDFObjectStreamParser.java:51)
> at 
> org.apache.pdfbox.pdfparser.BruteForceParser.bfSearchForObjStreams(BruteForceParser.java:336)
> at 
> org.apache.pdfbox.pdfparser.BruteForceParser.rebuildTrailer(BruteForceParser.java:838)
> at org.apache.pdfbox.pdfparser.COSParser.retrieveTrailer(COSParser.java:250)
> at org.apache.pdfbox.pdfparser.PDFParser.initialParse(PDFParser.java:127)
> at org.apache.pdfbox.pdfparser.PDFParser.parse(PDFParser.java:184)
> at org.apache.pdfbox.pdfparser.PDFParser.parse(PDFParser.java:156)
> at org.apache.pdfbox.Loader.loadPDF(Loader.java:466)
> at org.apache.pdfbox.Loader.loadPDF(Loader.java:348)
> at org.apache.pdfbox.Loader.loadPDF(Loader.java:303)
> at org.apache.pdfbox.Loader.loadPDF(Loader.java:246)
> at com.test.Entry.main(Entry.java:21)



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to