Author: lehmi
Date: Sun Oct  5 11:34:39 2025
New Revision: 1928953

Log:
PDFBOX-6041: limit recursion depth to avoid a stack overflow exception as 
proposed by Davia Justamante

Added:
   
pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/PDFBOX-6041-example.pdf
   (contents, props changed)
Modified:
   pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
   
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java   
    Sun Oct  5 10:05:12 2025        (r1928952)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java   
    Sun Oct  5 11:34:39 2025        (r1928953)
@@ -65,6 +65,11 @@ public abstract class BaseParser
     private static final int MAX_LENGTH_LONG = 
Long.toString(Long.MAX_VALUE).length();
 
     private static final Charset ALTERNATIVE_CHARSET;
+    private static final int MAX_RECURSION_DEPTH = 500;
+    private static final String MAX_RECUSRION_MSG = //
+            "Reached maximum recursion depth " + 
Integer.toString(MAX_RECURSION_DEPTH);
+    
+    private int recursionDepth = 0;
 
     private final Map<Long, COSObjectKey> keyCache = new HashMap<>();
 
@@ -280,51 +285,63 @@ public abstract class BaseParser
      */
     protected COSDictionary parseCOSDictionary(boolean isDirect) throws 
IOException
     {
-        readExpectedChar('<');
-        readExpectedChar('<');
-        skipSpaces();
-        COSDictionary obj = new COSDictionary();
-        obj.setDirect(isDirect);
-        while (true)
+        try
         {
-            skipSpaces();
-            char c = (char) source.peek();
-            if (c == '>')
+            recursionDepth++;
+            if (recursionDepth > MAX_RECURSION_DEPTH)
             {
-                break;
+                throw new IOException(MAX_RECUSRION_MSG);
             }
-            else if (c == '/')
+            readExpectedChar('<');
+            readExpectedChar('<');
+            skipSpaces();
+            COSDictionary obj = new COSDictionary();
+            obj.setDirect(isDirect);
+            while (true)
             {
-                // something went wrong, most likely the dictionary is 
corrupted
-                // stop immediately and return everything read so far
-                if (!parseCOSDictionaryNameValuePair(obj))
+                skipSpaces();
+                char c = (char) source.peek();
+                if (c == '>')
+                {
+                    break;
+                }
+                else if (c == '/')
                 {
-                    return obj;
+                    // something went wrong, most likely the dictionary is 
corrupted
+                    // stop immediately and return everything read so far
+                    if (!parseCOSDictionaryNameValuePair(obj))
+                    {
+                        return obj;
+                    }
+                }
+                else
+                {
+                    // invalid dictionary, we were expecting a /Name, read 
until the end or until we can recover
+                    LOG.warn("Invalid dictionary, found: '{}' but expected: 
'/' at offset {}", c,
+                            source.getPosition());
+                    if (readUntilEndOfCOSDictionary())
+                    {
+                        // we couldn't recover
+                        return obj;
+                    }
                 }
             }
-            else
+            try
+            {
+                readExpectedChar('>');
+                readExpectedChar('>');
+            }
+            catch (IOException exception)
             {
-                // invalid dictionary, we were expecting a /Name, read until 
the end or until we can recover
-                LOG.warn("Invalid dictionary, found: '{}' but expected: '/' at 
offset {}", c,
+                LOG.warn("Invalid dictionary, can't find end of dictionary at 
offset {}",
                         source.getPosition());
-                if (readUntilEndOfCOSDictionary())
-                {
-                    // we couldn't recover
-                    return obj;
-                }
             }
+            return obj;
         }
-        try
-        {
-            readExpectedChar('>');
-            readExpectedChar('>');
-        }
-        catch (IOException exception)
+        finally
         {
-            LOG.warn("Invalid dictionary, can't find end of dictionary at 
offset {}",
-                    source.getPosition());
+            recursionDepth--;
         }
-        return obj;
     }
 
     /**
@@ -754,71 +771,83 @@ public abstract class BaseParser
      */
     protected COSArray parseCOSArray() throws IOException
     {
-        long startPosition = source.getPosition();
-        readExpectedChar('[');
-        COSArray po = new COSArray();
-        COSBase pbo;
-        skipSpaces();
-        int i;
-        while (((i = source.peek()) > 0) && ((char) i != ']'))
+        try
         {
-            pbo = parseDirObject();
-            if( pbo instanceof COSObject )
+            recursionDepth++;
+            if (recursionDepth > MAX_RECURSION_DEPTH)
             {
-                // the current empty COSObject is replaced with the correct one
-                pbo = null;
-                // We have to check if the expected values are there or not 
PDFBOX-385
-                if (po.size() > 1 && po.get(po.size() - 1) instanceof 
COSInteger)
+                throw new IOException(MAX_RECUSRION_MSG);
+            }
+            long startPosition = source.getPosition();
+            readExpectedChar('[');
+            COSArray po = new COSArray();
+            COSBase pbo;
+            skipSpaces();
+            int i;
+            while (((i = source.peek()) > 0) && ((char) i != ']'))
+            {
+                pbo = parseDirObject();
+                if (pbo instanceof COSObject)
                 {
-                    COSInteger genNumber = (COSInteger)po.remove( po.size() -1 
);
-                    if (po.size() > 0 && po.get(po.size() - 1) instanceof 
COSInteger)
+                    // the current empty COSObject is replaced with the 
correct one
+                    pbo = null;
+                    // We have to check if the expected values are there or 
not PDFBOX-385
+                    if (po.size() > 1 && po.get(po.size() - 1) instanceof 
COSInteger)
                     {
-                        COSInteger number = (COSInteger)po.remove( po.size() 
-1 );
-                        if (number.longValue() >= 0 && genNumber.intValue() >= 
0)
-                        {
-                            COSObjectKey key = getObjectKey(number.longValue(),
-                                    genNumber.intValue());
-                            pbo = getObjectFromPool(key);
-                        }
-                        else
+                        COSInteger genNumber = (COSInteger) 
po.remove(po.size() - 1);
+                        if (po.size() > 0 && po.get(po.size() - 1) instanceof 
COSInteger)
                         {
-                            LOG.warn("Invalid value(s) for an object key {} 
{}", number.longValue(),
-                                    genNumber.intValue());
+                            COSInteger number = (COSInteger) 
po.remove(po.size() - 1);
+                            if (number.longValue() >= 0 && 
genNumber.intValue() >= 0)
+                            {
+                                COSObjectKey key = 
getObjectKey(number.longValue(),
+                                        genNumber.intValue());
+                                pbo = getObjectFromPool(key);
+                            }
+                            else
+                            {
+                                LOG.warn("Invalid value(s) for an object key 
{} {}", number.longValue(),
+                                        genNumber.intValue());
+                            }
                         }
                     }
                 }
-            }
-            // something went wrong
-            if (pbo == null)
-            {
-                //it could be a bad object in the array which is just skipped
-                LOG.warn("Corrupt array element at offset {}, start offset: 
{}",
-                        source.getPosition(), startPosition);
-                String isThisTheEnd = readString();
-                // return immediately if a corrupt element is followed by 
another array
-                // to avoid a possible infinite recursion as most likely the 
whole array is corrupted
-                if (isThisTheEnd.isEmpty() && source.peek() == '[')
-                {
-                    return po;
-                }
-                
source.rewind(isThisTheEnd.getBytes(StandardCharsets.ISO_8859_1).length);
-                // This could also be an "endobj" or "endstream" which means 
we can assume that
-                // the array has ended.
-                if(ENDOBJ_STRING.equals(isThisTheEnd) || 
ENDSTREAM_STRING.equals(isThisTheEnd))
+                // something went wrong
+                if (pbo == null)
                 {
-                    return po;
+                    //it could be a bad object in the array which is just 
skipped
+                    LOG.warn("Corrupt array element at offset {}, start 
offset: {}",
+                            source.getPosition(), startPosition);
+                    String isThisTheEnd = readString();
+                    // return immediately if a corrupt element is followed by 
another array
+                    // to avoid a possible infinite recursion as most likely 
the whole array is corrupted
+                    if (isThisTheEnd.isEmpty() && source.peek() == '[')
+                    {
+                        return po;
+                    }
+                    
source.rewind(isThisTheEnd.getBytes(StandardCharsets.ISO_8859_1).length);
+                    // This could also be an "endobj" or "endstream" which 
means we can assume that
+                    // the array has ended.
+                    if (ENDOBJ_STRING.equals(isThisTheEnd) || 
ENDSTREAM_STRING.equals(isThisTheEnd))
+                    {
+                        return po;
+                    }
                 }
+                else
+                {
+                    po.add(pbo);
+                }
+                skipSpaces();
             }
-            else
-            {
-                po.add(pbo);
-            }
+            // read ']'
+            source.read();
             skipSpaces();
+            return po;
+        }
+        finally
+        {
+            recursionDepth--;
         }
-        // read ']'
-        source.read();
-        skipSpaces();
-        return po;
     }
 
     /**
@@ -947,72 +976,84 @@ public abstract class BaseParser
      */
     protected COSBase parseDirObject() throws IOException
     {
-        skipSpaces();
-        char c = (char) source.peek();
-        switch(c)
+        try
         {
-        case '<':
-            // pull off first left bracket
-            source.read();
-            // check for second left bracket
-            c = (char) source.peek();
-            source.rewind(1);
-            return c == '<' ? parseCOSDictionary(true) : parseCOSString();
-        case '[':
-            // array
-            return parseCOSArray();
-        case '(':
-            return parseCOSString();
-        case '/':   
-            // name
-            return parseCOSName();
-        case 'n':   
-            // null
-            readExpectedString(NULL, false);
-            return COSNull.NULL;
-        case 't':
-            readExpectedString(TRUE, false);
-            return COSBoolean.TRUE;
-        case 'f':
-            readExpectedString(FALSE, false);
-            return COSBoolean.FALSE;
-        case 'R':
-            source.read();
-            return new COSObject(null);
-        case (char)-1:
-            return null;
-        default:
-            if (isDigit(c) || c == '-' || c == '+' || c == '.')
+            recursionDepth++;
+            if (recursionDepth > MAX_RECURSION_DEPTH)
             {
-                return parseCOSNumber();
+                throw new IOException(MAX_RECUSRION_MSG);
             }
-            // This is not suppose to happen, but we will allow for it
-            // so we are more compatible with POS writers that don't
-            // follow the spec
-            long startOffset = source.getPosition();
-            String badString = readString();
-            if (badString.isEmpty())
+            skipSpaces();
+            char c = (char) source.peek();
+            switch (c)
             {
-                int peek = source.peek();
-                // we can end up in an infinite loop otherwise
-                throw new IOException("Unknown dir object c='" + c + "' cInt=" 
+ (int) c + " peek='"
-                        + (char) peek + "' peekInt=" + peek + " at offset " + 
source.getPosition()
-                        + " (start offset: " + startOffset + ")");
-            }
+            case '<':
+                // pull off first left bracket
+                source.read();
+                // check for second left bracket
+                c = (char) source.peek();
+                source.rewind(1);
+                return c == '<' ? parseCOSDictionary(true) : parseCOSString();
+            case '[':
+                // array
+                return parseCOSArray();
+            case '(':
+                return parseCOSString();
+            case '/':
+                // name
+                return parseCOSName();
+            case 'n':
+                // null
+                readExpectedString(NULL, false);
+                return COSNull.NULL;
+            case 't':
+                readExpectedString(TRUE, false);
+                return COSBoolean.TRUE;
+            case 'f':
+                readExpectedString(FALSE, false);
+                return COSBoolean.FALSE;
+            case 'R':
+                source.read();
+                return new COSObject(null);
+            case (char) -1:
+                return null;
+            default:
+                if (isDigit(c) || c == '-' || c == '+' || c == '.')
+                {
+                    return parseCOSNumber();
+                }
+                // This is not suppose to happen, but we will allow for it
+                // so we are more compatible with POS writers that don't
+                // follow the spec
+                long startOffset = source.getPosition();
+                String badString = readString();
+                if (badString.isEmpty())
+                {
+                    int peek = source.peek();
+                    // we can end up in an infinite loop otherwise
+                    throw new IOException("Unknown dir object c='" + c + "' 
cInt=" + (int) c + " peek='"
+                            + (char) peek + "' peekInt=" + peek + " at offset 
" + source.getPosition()
+                            + " (start offset: " + startOffset + ")");
+                }
 
-            // if it's an endstream/endobj, we want to put it back so the 
caller will see it
-            if (ENDOBJ_STRING.equals(badString) || 
ENDSTREAM_STRING.equals(badString))
-            {
-                
source.rewind(badString.getBytes(StandardCharsets.ISO_8859_1).length);
-            }
-            else
-            {
-                LOG.warn("Skipped unexpected dir object = '{}' at offset {} 
(start offset: {})",
-                        badString, source.getPosition(), startOffset);
-                return this instanceof PDFStreamParser ? null : COSNull.NULL;
+                // if it's an endstream/endobj, we want to put it back so the 
caller will see it
+                if (ENDOBJ_STRING.equals(badString) || 
ENDSTREAM_STRING.equals(badString))
+                {
+                    
source.rewind(badString.getBytes(StandardCharsets.ISO_8859_1).length);
+                }
+                else
+                {
+                    LOG.warn("Skipped unexpected dir object = '{}' at offset 
{} (start offset: {})",
+                            badString, source.getPosition(), startOffset);
+                    return this instanceof PDFStreamParser ? null : 
COSNull.NULL;
+                }
             }
+            return null;
+        }
+        finally
+        {
+            recursionDepth--;
         }
-        return null;
     }
 
     private COSNumber parseCOSNumber() throws IOException

Modified: 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java
   Sun Oct  5 10:05:12 2025        (r1928952)
+++ 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java
   Sun Oct  5 11:34:39 2025        (r1928953)
@@ -18,9 +18,12 @@
 package org.apache.pdfbox.pdfparser;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.fail;
 
 import java.io.IOException;
+import java.io.InputStream;
 
+import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.cos.COSString;
 import org.apache.pdfbox.io.RandomAccessReadBuffer;
 import org.junit.jupiter.api.Test;
@@ -88,4 +91,23 @@ class TestBaseParser
         assertEquals(output, cosString.getString());
     }
 
+    @Test
+    void testBaseParserStackOverflow()
+    {
+        // PDFBOX-6041
+        try (InputStream is = 
TestPDFParser.class.getResourceAsStream("PDFBOX-6041-example.pdf"))
+        {
+            Loader.loadPDF(new RandomAccessReadBuffer(is)).close();
+        }
+        catch (IOException exception)
+        {
+            assertEquals("Missing root object specification in trailer.", 
exception.getMessage());
+        }
+        catch (Exception exception)
+        {
+            fail("Unexpected Exception");
+        }
+
+    }
+
 }

Added: 
pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/PDFBOX-6041-example.pdf
==============================================================================
Binary file. No diff available.

Reply via email to