[ 
https://issues.apache.org/jira/browse/PDFBOX-4623?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Alex Rebert updated PDFBOX-4623:
--------------------------------
    Description: 
Parsing an invalid PDF can lead to an infinite recursion in COSParser, which 
results in a StackOverflowError.

*Steps to repro*
 # Download malformed PDF (attached)
 # {{Run: java -jar pdfbox-app-2.0.16.jar ExtractText infinite-recursion.pdf}}

*Stacktrace*
{noformat}
Exception in thread "main" java.lang.StackOverflowError [1005/1916]
 at java.base/sun.nio.cs.UTF_8.updatePositions(UTF_8.java:79)
 at java.base/sun.nio.cs.UTF_8$Decoder.xflow(UTF_8.java:210)
 at java.base/sun.nio.cs.UTF_8$Decoder.decodeArrayLoop(UTF_8.java:321)
 at java.base/sun.nio.cs.UTF_8$Decoder.decodeLoop(UTF_8.java:414)
 at java.base/java.nio.charset.CharsetDecoder.decode(CharsetDecoder.java:578)
 at java.base/java.nio.charset.CharsetDecoder.decode(CharsetDecoder.java:801)
 at org.apache.pdfbox.pdfparser.BaseParser.isValidUTF8(BaseParser.java:787)
 at org.apache.pdfbox.pdfparser.BaseParser.parseCOSName(BaseParser.java:768)
 at org.apache.pdfbox.pdfparser.BaseParser.parseDirObject(BaseParser.java:887)
 at 
org.apache.pdfbox.pdfparser.BaseParser.parseCOSDictionaryValue(BaseParser.java:154)
 at 
org.apache.pdfbox.pdfparser.BaseParser.parseCOSDictionaryNameValuePair(BaseParser.java:283)
 at 
org.apache.pdfbox.pdfparser.BaseParser.parseCOSDictionary(BaseParser.java:216)
 at org.apache.pdfbox.pdfparser.BaseParser.parseDirObject(BaseParser.java:867)
 at org.apache.pdfbox.pdfparser.COSParser.parseFileObject(COSParser.java:912)
 at 
org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:881)
 at 
org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:801)
 at org.apache.pdfbox.pdfparser.COSParser.getLength(COSParser.java:1055)
 at org.apache.pdfbox.pdfparser.COSParser.parseCOSStream(COSParser.java:1114)
 at org.apache.pdfbox.pdfparser.COSParser.parseFileObject(COSParser.java:920)
 at 
org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:881)
 at 
org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:801)
 at org.apache.pdfbox.pdfparser.COSParser.getLength(COSParser.java:1055)
 at org.apache.pdfbox.pdfparser.COSParser.parseCOSStream(COSParser.java:1114)
 at org.apache.pdfbox.pdfparser.COSParser.parseFileObject(COSParser.java:920)
 at 
org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:881)
 at 
org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:801)
 at org.apache.pdfbox.pdfparser.COSParser.getLength(COSParser.java:1055)
 at org.apache.pdfbox.pdfparser.COSParser.parseCOSStream(COSParser.java:1114)
 at org.apache.pdfbox.pdfparser.COSParser.parseFileObject(COSParser.java:920)
 at 
org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:881)
 at 
org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:801)
 at org.apache.pdfbox.pdfparser.COSParser.getLength(COSParser.java:1055)
 at org.apache.pdfbox.pdfparser.COSParser.parseCOSStream(COSParser.java:1114)
 ...
{noformat}
The file was generated by fuzzing and is (probably) not a valid PDF file.

 

  was:
Parsing an invalid PDF can lead to an infinite recursion in COSParser, which 
results in a StackOverflowError.

*Steps to repro*
 # Download malformed PDF (attached)
 # {{Run: java -jar pdfbox-app-2.0.16.jar ExtractText infinite-recursion.pdf}}

*Stacktrace*

 
{noformat}
Exception in thread "main" java.lang.StackOverflowError [1005/1916]
 at java.base/sun.nio.cs.UTF_8.updatePositions(UTF_8.java:79)
 at java.base/sun.nio.cs.UTF_8$Decoder.xflow(UTF_8.java:210)
 at java.base/sun.nio.cs.UTF_8$Decoder.decodeArrayLoop(UTF_8.java:321)
 at java.base/sun.nio.cs.UTF_8$Decoder.decodeLoop(UTF_8.java:414)
 at java.base/java.nio.charset.CharsetDecoder.decode(CharsetDecoder.java:578)
 at java.base/java.nio.charset.CharsetDecoder.decode(CharsetDecoder.java:801)
 at org.apache.pdfbox.pdfparser.BaseParser.isValidUTF8(BaseParser.java:787)
 at org.apache.pdfbox.pdfparser.BaseParser.parseCOSName(BaseParser.java:768)
 at org.apache.pdfbox.pdfparser.BaseParser.parseDirObject(BaseParser.java:887)
 at 
org.apache.pdfbox.pdfparser.BaseParser.parseCOSDictionaryValue(BaseParser.java:154)
 at 
org.apache.pdfbox.pdfparser.BaseParser.parseCOSDictionaryNameValuePair(BaseParser.java:283)
 at 
org.apache.pdfbox.pdfparser.BaseParser.parseCOSDictionary(BaseParser.java:216)
 at org.apache.pdfbox.pdfparser.BaseParser.parseDirObject(BaseParser.java:867)
 at org.apache.pdfbox.pdfparser.COSParser.parseFileObject(COSParser.java:912)
 at 
org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:881)
 at 
org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:801)
 at org.apache.pdfbox.pdfparser.COSParser.getLength(COSParser.java:1055)
 at org.apache.pdfbox.pdfparser.COSParser.parseCOSStream(COSParser.java:1114)
 at org.apache.pdfbox.pdfparser.COSParser.parseFileObject(COSParser.java:920)
 at 
org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:881)
 at 
org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:801)
 at org.apache.pdfbox.pdfparser.COSParser.getLength(COSParser.java:1055)
 at org.apache.pdfbox.pdfparser.COSParser.parseCOSStream(COSParser.java:1114)
 at org.apache.pdfbox.pdfparser.COSParser.parseFileObject(COSParser.java:920)
 at 
org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:881)
 at 
org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:801)
 at org.apache.pdfbox.pdfparser.COSParser.getLength(COSParser.java:1055)
 at org.apache.pdfbox.pdfparser.COSParser.parseCOSStream(COSParser.java:1114)
 at org.apache.pdfbox.pdfparser.COSParser.parseFileObject(COSParser.java:920)
 at 
org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:881)
 at 
org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:801)
 at org.apache.pdfbox.pdfparser.COSParser.getLength(COSParser.java:1055)
 at org.apache.pdfbox.pdfparser.COSParser.parseCOSStream(COSParser.java:1114)
 ...
{noformat}
The files was generated by fuzzing and is (probably) not a valid PDF file.

 


> COSParser: Infinite recursion
> -----------------------------
>
>                 Key: PDFBOX-4623
>                 URL: https://issues.apache.org/jira/browse/PDFBOX-4623
>             Project: PDFBox
>          Issue Type: Bug
>          Components: Parsing
>    Affects Versions: 2.0.16
>         Environment: java version "12" 2019-03-19
> Java(TM) SE Runtime Environment (build 12+33)
> Java HotSpot(TM) 64-Bit Server VM (build 12+33, mixed mode, sharing) 
> MacOS Mojave
>            Reporter: Alex Rebert
>            Priority: Minor
>         Attachments: infinite-recursion.pdf
>
>
> Parsing an invalid PDF can lead to an infinite recursion in COSParser, which 
> results in a StackOverflowError.
> *Steps to repro*
>  # Download malformed PDF (attached)
>  # {{Run: java -jar pdfbox-app-2.0.16.jar ExtractText infinite-recursion.pdf}}
> *Stacktrace*
> {noformat}
> Exception in thread "main" java.lang.StackOverflowError [1005/1916]
>  at java.base/sun.nio.cs.UTF_8.updatePositions(UTF_8.java:79)
>  at java.base/sun.nio.cs.UTF_8$Decoder.xflow(UTF_8.java:210)
>  at java.base/sun.nio.cs.UTF_8$Decoder.decodeArrayLoop(UTF_8.java:321)
>  at java.base/sun.nio.cs.UTF_8$Decoder.decodeLoop(UTF_8.java:414)
>  at java.base/java.nio.charset.CharsetDecoder.decode(CharsetDecoder.java:578)
>  at java.base/java.nio.charset.CharsetDecoder.decode(CharsetDecoder.java:801)
>  at org.apache.pdfbox.pdfparser.BaseParser.isValidUTF8(BaseParser.java:787)
>  at org.apache.pdfbox.pdfparser.BaseParser.parseCOSName(BaseParser.java:768)
>  at org.apache.pdfbox.pdfparser.BaseParser.parseDirObject(BaseParser.java:887)
>  at 
> org.apache.pdfbox.pdfparser.BaseParser.parseCOSDictionaryValue(BaseParser.java:154)
>  at 
> org.apache.pdfbox.pdfparser.BaseParser.parseCOSDictionaryNameValuePair(BaseParser.java:283)
>  at 
> org.apache.pdfbox.pdfparser.BaseParser.parseCOSDictionary(BaseParser.java:216)
>  at org.apache.pdfbox.pdfparser.BaseParser.parseDirObject(BaseParser.java:867)
>  at org.apache.pdfbox.pdfparser.COSParser.parseFileObject(COSParser.java:912)
>  at 
> org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:881)
>  at 
> org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:801)
>  at org.apache.pdfbox.pdfparser.COSParser.getLength(COSParser.java:1055)
>  at org.apache.pdfbox.pdfparser.COSParser.parseCOSStream(COSParser.java:1114)
>  at org.apache.pdfbox.pdfparser.COSParser.parseFileObject(COSParser.java:920)
>  at 
> org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:881)
>  at 
> org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:801)
>  at org.apache.pdfbox.pdfparser.COSParser.getLength(COSParser.java:1055)
>  at org.apache.pdfbox.pdfparser.COSParser.parseCOSStream(COSParser.java:1114)
>  at org.apache.pdfbox.pdfparser.COSParser.parseFileObject(COSParser.java:920)
>  at 
> org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:881)
>  at 
> org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:801)
>  at org.apache.pdfbox.pdfparser.COSParser.getLength(COSParser.java:1055)
>  at org.apache.pdfbox.pdfparser.COSParser.parseCOSStream(COSParser.java:1114)
>  at org.apache.pdfbox.pdfparser.COSParser.parseFileObject(COSParser.java:920)
>  at 
> org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:881)
>  at 
> org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:801)
>  at org.apache.pdfbox.pdfparser.COSParser.getLength(COSParser.java:1055)
>  at org.apache.pdfbox.pdfparser.COSParser.parseCOSStream(COSParser.java:1114)
>  ...
> {noformat}
> The file was generated by fuzzing and is (probably) not a valid PDF file.
>  



--
This message was sent by Atlassian JIRA
(v7.6.14#76016)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to