Author: leleueri
Date: Sun Jun 16 12:25:35 2013
New Revision: 1493503
URL: http://svn.apache.org/r1493503
Log:
[PDFBOX-1639] Fix infinite loop on the PDFParser by managing 10 digits Object
Number
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/VisualSignatureParser.java
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL:
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1493503&r1=1493502&r2=1493503&view=diff
==============================================================================
---
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
(original)
+++
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
Sun Jun 16 12:25:35 2013
@@ -19,15 +19,12 @@ package org.apache.pdfbox.pdfparser;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
-import java.io.InputStream;
import java.io.IOException;
+import java.io.InputStream;
import java.io.OutputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.pdfbox.io.PushBackInputStream;
-import org.apache.pdfbox.io.RandomAccess;
-
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSBoolean;
@@ -41,7 +38,8 @@ import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.exceptions.WrappedIOException;
-
+import org.apache.pdfbox.io.PushBackInputStream;
+import org.apache.pdfbox.io.RandomAccess;
import org.apache.pdfbox.persistence.util.COSObjectKey;
/**
@@ -54,6 +52,10 @@ import org.apache.pdfbox.persistence.uti
public abstract class BaseParser
{
+ private static final long OBJECT_NUMBER_THRESHOLD = 10000000000L;
+
+ private static final long GENERATION_NUMBER_THRESHOLD = 65535;
+
/**
* system property allowing to define size of push back buffer.
*/
@@ -1585,6 +1587,36 @@ public abstract class BaseParser
}
/**
+ * This will read a long from the Stream and throw an {@link
IllegalArgumentException} if the long value
+ * has more than 10 digits (i.e. : bigger than {@link
#OBJECT_NUMBER_THRESHOLD})
+ * @return
+ * @throws IOException
+ */
+ protected long readObjectNumber() throws IOException
+ {
+ long retval = readLong();
+ if(retval < 0 || retval >= OBJECT_NUMBER_THRESHOLD) {
+ throw new IOException("Object Number '" + retval + "' has more
than 10 digits or is negative");
+ }
+ return retval;
+ }
+
+ /**
+ * This will read a integer from the Stream and throw an {@link
IllegalArgumentException} if the integer value
+ * has more than the maximum object revision (i.e. : bigger than {@link
#GENERATION_NUMBER_THRESHOLD})
+ * @return
+ * @throws IOException
+ */
+ protected int readGenerationNumber() throws IOException
+ {
+ int retval = readInt();
+ if(retval < 0 || retval >= GENERATION_NUMBER_THRESHOLD) {
+ throw new IOException("Generation Number '" + retval + "' has more
than 5 digits");
+ }
+ return retval;
+ }
+
+ /**
* This will read an integer from the stream.
*
* @return The integer that was read from the stream.
@@ -1596,8 +1628,57 @@ public abstract class BaseParser
skipSpaces();
int retval = 0;
+ StringBuilder intBuffer = readStringNumber();
+
+ try
+ {
+ retval = Integer.parseInt( intBuffer.toString() );
+ }
+ catch( NumberFormatException e )
+ {
+ pdfSource.unread(intBuffer.toString().getBytes("ISO-8859-1"));
+ throw new IOException( "Error: Expected an integer type, actual='"
+ intBuffer + "'" );
+ }
+ return retval;
+ }
+
+
+ /**
+ * This will read an long from the stream.
+ *
+ * @return The long that was read from the stream.
+ *
+ * @throws IOException If there is an error reading from the stream.
+ */
+ protected long readLong() throws IOException
+ {
+ skipSpaces();
+ long retval = 0;
+
+ StringBuilder longBuffer = readStringNumber();
+
+ try
+ {
+ retval = Long.parseLong( longBuffer.toString() );
+ }
+ catch( NumberFormatException e )
+ {
+ pdfSource.unread(longBuffer.toString().getBytes("ISO-8859-1"));
+ throw new IOException( "Error: Expected a long type, actual='" +
longBuffer + "'" );
+ }
+ return retval;
+ }
+
+ /**
+ * This method is used to read a token by the {@linkplain #readInt()}
method and the {@linkplain #readLong()} method.
+ *
+ * @return the token to parse as integer or long by the calling method.
+ * @throws IOException throws by the {@link #pdfSource} methods.
+ */
+ protected final StringBuilder readStringNumber() throws IOException
+ {
int lastByte = 0;
- StringBuffer intBuffer = new StringBuffer();
+ StringBuilder buffer = new StringBuilder();
while( (lastByte = pdfSource.read() ) != 32 &&
lastByte != 10 &&
lastByte != 13 &&
@@ -1605,22 +1686,13 @@ public abstract class BaseParser
lastByte != 0 && //See sourceforge bug 853328
lastByte != -1 )
{
- intBuffer.append( (char)lastByte );
+ buffer.append( (char)lastByte );
}
if( lastByte != -1 )
{
pdfSource.unread( lastByte );
}
-
- try
- {
- retval = Integer.parseInt( intBuffer.toString() );
- }
- catch( NumberFormatException e )
- {
- pdfSource.unread(intBuffer.toString().getBytes("ISO-8859-1"));
- throw new IOException( "Error: Expected an integer type, actual='"
+ intBuffer + "'" );
- }
- return retval;
+ return buffer;
}
+
}
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
URL:
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java?rev=1493503&r1=1493502&r2=1493503&view=diff
==============================================================================
---
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
(original)
+++
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
Sun Jun 16 12:25:35 2013
@@ -455,8 +455,8 @@ public class NonSequentialPDFParser exte
private long parseXrefObjStream(long objByteOffset) throws IOException
{
// ---- parse indirect object head
- readInt();
- readInt();
+ readObjectNumber();
+ readGenerationNumber();
readPattern(OBJ_MARKER);
COSDictionary dict = parseCOSDictionary();
@@ -1180,8 +1180,8 @@ public class NonSequentialPDFParser exte
setPdfSource(offsetOrObjstmObNr);
// ---- we must have an indirect object
- final int readObjNr = readInt();
- final int readObjGen = readInt();
+ final long readObjNr = readObjectNumber();
+ final long readObjGen = readGenerationNumber();
readPattern(OBJ_MARKER);
// ---- consistency check
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java
URL:
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java?rev=1493503&r1=1493502&r2=1493503&view=diff
==============================================================================
---
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java
(original)
+++
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java
Sun Jun 16 12:25:35 2013
@@ -44,7 +44,7 @@ public class PDFObjectStreamParser exten
LogFactory.getLog(PDFObjectStreamParser.class);
private List<COSObject> streamObjects = null;
- private List<Integer> objectNumbers = null;
+ private List<Long> objectNumbers = null;
private COSStream stream;
/**
@@ -92,13 +92,13 @@ public class PDFObjectStreamParser exten
{
//need to first parse the header.
int numberOfObjects = stream.getInt( "N" );
- objectNumbers = new ArrayList<Integer>( numberOfObjects );
+ objectNumbers = new ArrayList<Long>( numberOfObjects );
streamObjects = new ArrayList<COSObject>( numberOfObjects );
for( int i=0; i<numberOfObjects; i++ )
{
- int objectNumber = readInt();
- int offset = readInt();
- objectNumbers.add( new Integer( objectNumber ) );
+ long objectNumber = readObjectNumber();
+ long offset = readLong();
+ objectNumbers.add( new Long( objectNumber ) );
}
COSObject object = null;
COSBase cosObject = null;
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL:
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1493503&r1=1493502&r2=1493503&view=diff
==============================================================================
---
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
(original)
+++
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
Sun Jun 16 12:25:35 2013
@@ -183,6 +183,7 @@ public class PDFParser extends BaseParse
{
break;
}
+
try
{
wasLastParsedObjectEOF = parseObject();
@@ -204,7 +205,19 @@ public class PDFParser extends BaseParse
* we skipped over an object
*/
LOG.warn("Parsing Error, Skipping Object", e);
+
+ skipSpaces();
+ long lastOffset = pdfSource.getOffset();
skipToNextObj();
+
+ /* the nextObject is the one we want to skip
+ * so read the 'Object Number' without interpret it
+ * in order to force the skipObject
+ */
+ if (lastOffset == pdfSource.getOffset()) {
+ readStringNumber();
+ skipToNextObj();
+ }
}
else
{
@@ -507,7 +520,7 @@ public class PDFParser extends BaseParse
//we are going to parse an normal object
else
{
- int number = -1;
+ long number = -1;
int genNum = -1;
String objectKey = null;
boolean missingObjectNumber = false;
@@ -520,7 +533,7 @@ public class PDFParser extends BaseParse
}
else
{
- number = readInt();
+ number = readObjectNumber();
}
}
catch( IOException e )
@@ -529,12 +542,12 @@ public class PDFParser extends BaseParse
//statements after an object, of course this is nonsense
//but because we want to support as many PDFs as possible
//we will simply try again
- number = readInt();
+ number = readObjectNumber();
}
if( !missingObjectNumber )
{
skipSpaces();
- genNum = readInt();
+ genNum = readGenerationNumber();
objectKey = readString( 3 );
//System.out.println( "parseObject() num=" + number +
@@ -676,7 +689,7 @@ public class PDFParser extends BaseParse
/* This integer is the byte offset of the first object referenced by
the xref or xref stream
* Needed for the incremental update (PREV)
*/
- getDocument().setStartXref(readInt());
+ getDocument().setStartXref(readLong());
return true;
}
@@ -709,8 +722,8 @@ public class PDFParser extends BaseParse
*/
while(true)
{
- int currObjID = readInt(); // first obj id
- int count = readInt(); // the number of objects in the xref table
+ long currObjID = readObjectNumber(); // first obj id
+ long count = readLong(); // the number of objects in the xref table
skipSpaces();
for(int i = 0; i < count; i++)
{
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/VisualSignatureParser.java
URL:
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/VisualSignatureParser.java?rev=1493503&r1=1493502&r2=1493503&view=diff
==============================================================================
---
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/VisualSignatureParser.java
(original)
+++
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/VisualSignatureParser.java
Sun Jun 16 12:25:35 2013
@@ -176,7 +176,7 @@ public class VisualSignatureParser exten
else
{
//we are going to parse an normal object
- int number = -1;
+ long number = -1;
int genNum = -1;
String objectKey = null;
boolean missingObjectNumber = false;
@@ -189,7 +189,7 @@ public class VisualSignatureParser exten
}
else
{
- number = readInt();
+ number = readObjectNumber();
}
}
catch(IOException e)
@@ -198,12 +198,12 @@ public class VisualSignatureParser exten
//statements after an object, of course this is nonsense
//but because we want to support as many PDFs as possible
//we will simply try again
- number = readInt();
+ number = readObjectNumber();
}
if(!missingObjectNumber)
{
skipSpaces();
- genNum = readInt();
+ genNum = readGenerationNumber();
objectKey = readString(3);
//System.out.println( "parseObject() num=" + number +
Modified:
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
URL:
http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java?rev=1493503&r1=1493502&r2=1493503&view=diff
==============================================================================
---
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
(original)
+++
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
Sun Jun 16 12:25:35 2013
@@ -352,8 +352,8 @@ public class PreflightParser extends Non
while (true)
{
// just after the xref<EOL> there are an integer
- int currObjID = 0; // first obj id
- int count = 0; // the number of objects in the xref table
+ long currObjID = 0; // first obj id
+ long count = 0; // the number of objects in the xref table
long offset = pdfSource.getOffset();
String line = readLine();
@@ -370,8 +370,8 @@ public class PreflightParser extends Non
"Cross reference subsection header is invalid"));
// reset pdfSource cursor to read xref information
pdfSource.seek(offset);
- currObjID = readInt(); // first obj id
- count = readInt(); // the number of objects in the xref table
+ currObjID = readObjectNumber(); // first obj id
+ count = readLong(); // the number of objects in the xref table
}
skipSpaces();
@@ -669,7 +669,7 @@ public class PreflightParser extends Non
// ---- go to object start
setPdfSource(offsetOrObjstmObNr);
// ---- we must have an indirect object
- int readObjNr = 0;
+ long readObjNr = 0;
int readObjGen = 0;
long offset = pdfSource.getOffset();
@@ -687,8 +687,8 @@ public class PreflightParser extends Non
addValidationError(new
ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Single space expected"));
// reset pdfSource cursor to read object information
pdfSource.seek(offset);
- readObjNr = readInt();
- readObjGen = readInt();
+ readObjNr = readObjectNumber();
+ readObjGen = readGenerationNumber();
for (char c : OBJ_MARKER)
{
if (pdfSource.read() != c)