Author: lehmi
Date: Sun Oct 5 11:34:39 2025
New Revision: 1928953
Log:
PDFBOX-6041: limit recursion depth to avoid a stack overflow exception as
proposed by Davia Justamante
Added:
pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/PDFBOX-6041-example.pdf
(contents, props changed)
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
==============================================================================
---
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
Sun Oct 5 10:05:12 2025 (r1928952)
+++
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
Sun Oct 5 11:34:39 2025 (r1928953)
@@ -65,6 +65,11 @@ public abstract class BaseParser
private static final int MAX_LENGTH_LONG =
Long.toString(Long.MAX_VALUE).length();
private static final Charset ALTERNATIVE_CHARSET;
+ private static final int MAX_RECURSION_DEPTH = 500;
+ private static final String MAX_RECUSRION_MSG = //
+ "Reached maximum recursion depth " +
Integer.toString(MAX_RECURSION_DEPTH);
+
+ private int recursionDepth = 0;
private final Map<Long, COSObjectKey> keyCache = new HashMap<>();
@@ -280,51 +285,63 @@ public abstract class BaseParser
*/
protected COSDictionary parseCOSDictionary(boolean isDirect) throws
IOException
{
- readExpectedChar('<');
- readExpectedChar('<');
- skipSpaces();
- COSDictionary obj = new COSDictionary();
- obj.setDirect(isDirect);
- while (true)
+ try
{
- skipSpaces();
- char c = (char) source.peek();
- if (c == '>')
+ recursionDepth++;
+ if (recursionDepth > MAX_RECURSION_DEPTH)
{
- break;
+ throw new IOException(MAX_RECUSRION_MSG);
}
- else if (c == '/')
+ readExpectedChar('<');
+ readExpectedChar('<');
+ skipSpaces();
+ COSDictionary obj = new COSDictionary();
+ obj.setDirect(isDirect);
+ while (true)
{
- // something went wrong, most likely the dictionary is
corrupted
- // stop immediately and return everything read so far
- if (!parseCOSDictionaryNameValuePair(obj))
+ skipSpaces();
+ char c = (char) source.peek();
+ if (c == '>')
+ {
+ break;
+ }
+ else if (c == '/')
{
- return obj;
+ // something went wrong, most likely the dictionary is
corrupted
+ // stop immediately and return everything read so far
+ if (!parseCOSDictionaryNameValuePair(obj))
+ {
+ return obj;
+ }
+ }
+ else
+ {
+ // invalid dictionary, we were expecting a /Name, read
until the end or until we can recover
+ LOG.warn("Invalid dictionary, found: '{}' but expected:
'/' at offset {}", c,
+ source.getPosition());
+ if (readUntilEndOfCOSDictionary())
+ {
+ // we couldn't recover
+ return obj;
+ }
}
}
- else
+ try
+ {
+ readExpectedChar('>');
+ readExpectedChar('>');
+ }
+ catch (IOException exception)
{
- // invalid dictionary, we were expecting a /Name, read until
the end or until we can recover
- LOG.warn("Invalid dictionary, found: '{}' but expected: '/' at
offset {}", c,
+ LOG.warn("Invalid dictionary, can't find end of dictionary at
offset {}",
source.getPosition());
- if (readUntilEndOfCOSDictionary())
- {
- // we couldn't recover
- return obj;
- }
}
+ return obj;
}
- try
- {
- readExpectedChar('>');
- readExpectedChar('>');
- }
- catch (IOException exception)
+ finally
{
- LOG.warn("Invalid dictionary, can't find end of dictionary at
offset {}",
- source.getPosition());
+ recursionDepth--;
}
- return obj;
}
/**
@@ -754,71 +771,83 @@ public abstract class BaseParser
*/
protected COSArray parseCOSArray() throws IOException
{
- long startPosition = source.getPosition();
- readExpectedChar('[');
- COSArray po = new COSArray();
- COSBase pbo;
- skipSpaces();
- int i;
- while (((i = source.peek()) > 0) && ((char) i != ']'))
+ try
{
- pbo = parseDirObject();
- if( pbo instanceof COSObject )
+ recursionDepth++;
+ if (recursionDepth > MAX_RECURSION_DEPTH)
{
- // the current empty COSObject is replaced with the correct one
- pbo = null;
- // We have to check if the expected values are there or not
PDFBOX-385
- if (po.size() > 1 && po.get(po.size() - 1) instanceof
COSInteger)
+ throw new IOException(MAX_RECUSRION_MSG);
+ }
+ long startPosition = source.getPosition();
+ readExpectedChar('[');
+ COSArray po = new COSArray();
+ COSBase pbo;
+ skipSpaces();
+ int i;
+ while (((i = source.peek()) > 0) && ((char) i != ']'))
+ {
+ pbo = parseDirObject();
+ if (pbo instanceof COSObject)
{
- COSInteger genNumber = (COSInteger)po.remove( po.size() -1
);
- if (po.size() > 0 && po.get(po.size() - 1) instanceof
COSInteger)
+ // the current empty COSObject is replaced with the
correct one
+ pbo = null;
+ // We have to check if the expected values are there or
not PDFBOX-385
+ if (po.size() > 1 && po.get(po.size() - 1) instanceof
COSInteger)
{
- COSInteger number = (COSInteger)po.remove( po.size()
-1 );
- if (number.longValue() >= 0 && genNumber.intValue() >=
0)
- {
- COSObjectKey key = getObjectKey(number.longValue(),
- genNumber.intValue());
- pbo = getObjectFromPool(key);
- }
- else
+ COSInteger genNumber = (COSInteger)
po.remove(po.size() - 1);
+ if (po.size() > 0 && po.get(po.size() - 1) instanceof
COSInteger)
{
- LOG.warn("Invalid value(s) for an object key {}
{}", number.longValue(),
- genNumber.intValue());
+ COSInteger number = (COSInteger)
po.remove(po.size() - 1);
+ if (number.longValue() >= 0 &&
genNumber.intValue() >= 0)
+ {
+ COSObjectKey key =
getObjectKey(number.longValue(),
+ genNumber.intValue());
+ pbo = getObjectFromPool(key);
+ }
+ else
+ {
+ LOG.warn("Invalid value(s) for an object key
{} {}", number.longValue(),
+ genNumber.intValue());
+ }
}
}
}
- }
- // something went wrong
- if (pbo == null)
- {
- //it could be a bad object in the array which is just skipped
- LOG.warn("Corrupt array element at offset {}, start offset:
{}",
- source.getPosition(), startPosition);
- String isThisTheEnd = readString();
- // return immediately if a corrupt element is followed by
another array
- // to avoid a possible infinite recursion as most likely the
whole array is corrupted
- if (isThisTheEnd.isEmpty() && source.peek() == '[')
- {
- return po;
- }
-
source.rewind(isThisTheEnd.getBytes(StandardCharsets.ISO_8859_1).length);
- // This could also be an "endobj" or "endstream" which means
we can assume that
- // the array has ended.
- if(ENDOBJ_STRING.equals(isThisTheEnd) ||
ENDSTREAM_STRING.equals(isThisTheEnd))
+ // something went wrong
+ if (pbo == null)
{
- return po;
+ //it could be a bad object in the array which is just
skipped
+ LOG.warn("Corrupt array element at offset {}, start
offset: {}",
+ source.getPosition(), startPosition);
+ String isThisTheEnd = readString();
+ // return immediately if a corrupt element is followed by
another array
+ // to avoid a possible infinite recursion as most likely
the whole array is corrupted
+ if (isThisTheEnd.isEmpty() && source.peek() == '[')
+ {
+ return po;
+ }
+
source.rewind(isThisTheEnd.getBytes(StandardCharsets.ISO_8859_1).length);
+ // This could also be an "endobj" or "endstream" which
means we can assume that
+ // the array has ended.
+ if (ENDOBJ_STRING.equals(isThisTheEnd) ||
ENDSTREAM_STRING.equals(isThisTheEnd))
+ {
+ return po;
+ }
}
+ else
+ {
+ po.add(pbo);
+ }
+ skipSpaces();
}
- else
- {
- po.add(pbo);
- }
+ // read ']'
+ source.read();
skipSpaces();
+ return po;
+ }
+ finally
+ {
+ recursionDepth--;
}
- // read ']'
- source.read();
- skipSpaces();
- return po;
}
/**
@@ -947,72 +976,84 @@ public abstract class BaseParser
*/
protected COSBase parseDirObject() throws IOException
{
- skipSpaces();
- char c = (char) source.peek();
- switch(c)
+ try
{
- case '<':
- // pull off first left bracket
- source.read();
- // check for second left bracket
- c = (char) source.peek();
- source.rewind(1);
- return c == '<' ? parseCOSDictionary(true) : parseCOSString();
- case '[':
- // array
- return parseCOSArray();
- case '(':
- return parseCOSString();
- case '/':
- // name
- return parseCOSName();
- case 'n':
- // null
- readExpectedString(NULL, false);
- return COSNull.NULL;
- case 't':
- readExpectedString(TRUE, false);
- return COSBoolean.TRUE;
- case 'f':
- readExpectedString(FALSE, false);
- return COSBoolean.FALSE;
- case 'R':
- source.read();
- return new COSObject(null);
- case (char)-1:
- return null;
- default:
- if (isDigit(c) || c == '-' || c == '+' || c == '.')
+ recursionDepth++;
+ if (recursionDepth > MAX_RECURSION_DEPTH)
{
- return parseCOSNumber();
+ throw new IOException(MAX_RECUSRION_MSG);
}
- // This is not suppose to happen, but we will allow for it
- // so we are more compatible with POS writers that don't
- // follow the spec
- long startOffset = source.getPosition();
- String badString = readString();
- if (badString.isEmpty())
+ skipSpaces();
+ char c = (char) source.peek();
+ switch (c)
{
- int peek = source.peek();
- // we can end up in an infinite loop otherwise
- throw new IOException("Unknown dir object c='" + c + "' cInt="
+ (int) c + " peek='"
- + (char) peek + "' peekInt=" + peek + " at offset " +
source.getPosition()
- + " (start offset: " + startOffset + ")");
- }
+ case '<':
+ // pull off first left bracket
+ source.read();
+ // check for second left bracket
+ c = (char) source.peek();
+ source.rewind(1);
+ return c == '<' ? parseCOSDictionary(true) : parseCOSString();
+ case '[':
+ // array
+ return parseCOSArray();
+ case '(':
+ return parseCOSString();
+ case '/':
+ // name
+ return parseCOSName();
+ case 'n':
+ // null
+ readExpectedString(NULL, false);
+ return COSNull.NULL;
+ case 't':
+ readExpectedString(TRUE, false);
+ return COSBoolean.TRUE;
+ case 'f':
+ readExpectedString(FALSE, false);
+ return COSBoolean.FALSE;
+ case 'R':
+ source.read();
+ return new COSObject(null);
+ case (char) -1:
+ return null;
+ default:
+ if (isDigit(c) || c == '-' || c == '+' || c == '.')
+ {
+ return parseCOSNumber();
+ }
+ // This is not suppose to happen, but we will allow for it
+ // so we are more compatible with POS writers that don't
+ // follow the spec
+ long startOffset = source.getPosition();
+ String badString = readString();
+ if (badString.isEmpty())
+ {
+ int peek = source.peek();
+ // we can end up in an infinite loop otherwise
+ throw new IOException("Unknown dir object c='" + c + "'
cInt=" + (int) c + " peek='"
+ + (char) peek + "' peekInt=" + peek + " at offset
" + source.getPosition()
+ + " (start offset: " + startOffset + ")");
+ }
- // if it's an endstream/endobj, we want to put it back so the
caller will see it
- if (ENDOBJ_STRING.equals(badString) ||
ENDSTREAM_STRING.equals(badString))
- {
-
source.rewind(badString.getBytes(StandardCharsets.ISO_8859_1).length);
- }
- else
- {
- LOG.warn("Skipped unexpected dir object = '{}' at offset {}
(start offset: {})",
- badString, source.getPosition(), startOffset);
- return this instanceof PDFStreamParser ? null : COSNull.NULL;
+ // if it's an endstream/endobj, we want to put it back so the
caller will see it
+ if (ENDOBJ_STRING.equals(badString) ||
ENDSTREAM_STRING.equals(badString))
+ {
+
source.rewind(badString.getBytes(StandardCharsets.ISO_8859_1).length);
+ }
+ else
+ {
+ LOG.warn("Skipped unexpected dir object = '{}' at offset
{} (start offset: {})",
+ badString, source.getPosition(), startOffset);
+ return this instanceof PDFStreamParser ? null :
COSNull.NULL;
+ }
}
+ return null;
+ }
+ finally
+ {
+ recursionDepth--;
}
- return null;
}
private COSNumber parseCOSNumber() throws IOException
Modified:
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java
==============================================================================
---
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java
Sun Oct 5 10:05:12 2025 (r1928952)
+++
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java
Sun Oct 5 11:34:39 2025 (r1928953)
@@ -18,9 +18,12 @@
package org.apache.pdfbox.pdfparser;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.fail;
import java.io.IOException;
+import java.io.InputStream;
+import org.apache.pdfbox.Loader;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.junit.jupiter.api.Test;
@@ -88,4 +91,23 @@ class TestBaseParser
assertEquals(output, cosString.getString());
}
+ @Test
+ void testBaseParserStackOverflow()
+ {
+ // PDFBOX-6041
+ try (InputStream is =
TestPDFParser.class.getResourceAsStream("PDFBOX-6041-example.pdf"))
+ {
+ Loader.loadPDF(new RandomAccessReadBuffer(is)).close();
+ }
+ catch (IOException exception)
+ {
+ assertEquals("Missing root object specification in trailer.",
exception.getMessage());
+ }
+ catch (Exception exception)
+ {
+ fail("Unexpected Exception");
+ }
+
+ }
+
}
Added:
pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/PDFBOX-6041-example.pdf
==============================================================================
Binary file. No diff available.