Author: lehmi
Date: Sat Jan  4 16:50:31 2020
New Revision: 1872321

URL: http://svn.apache.org/viewvc?rev=1872321&view=rev
Log:
PDFBOX-4569: refactor preflight parser, split parseObjectDynamically

Modified:
    
pdfbox/branches/issue4569/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java

Modified: 
pdfbox/branches/issue4569/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/issue4569/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java?rev=1872321&r1=1872320&r2=1872321&view=diff
==============================================================================
--- 
pdfbox/branches/issue4569/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
 (original)
+++ 
pdfbox/branches/issue4569/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
 Sat Jan  4 16:50:31 2020
@@ -681,131 +681,146 @@ public class PreflightParser extends PDF
             else if (offsetOrObjstmObNr > 0)
             {
                 // offset of indirect object in file
-                // ---- go to object start
-                source.seek(offsetOrObjstmObNr);
-                // ---- we must have an indirect object
-                long readObjNr;
-                int readObjGen;
-
-                long offset = source.getPosition();
-                String line = readLine();
-                Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)\\sobj");
-                Matcher matcher = pattern.matcher(line);
-                if (matcher.matches())
-                {
-                    readObjNr = Long.parseLong(matcher.group(1));
-                    readObjGen = Integer.parseInt(matcher.group(2));
-                }
-                else
-                {
-
-                    addValidationError(new 
ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Single space expected 
[offset="+offset+"; key="+offsetOrObjstmObNr.toString()+"; line="+line+"; 
object="+pdfObject.toString()+"]"));
+                referencedObject = parseFileObject(offsetOrObjstmObNr, objKey);
+            }
+            else
+            {
+                // xref value is object nr of object stream containing object 
to be parsed
+                // since our object was not found it means object stream was 
not parsed so far
+                referencedObject = parseObjectStreamObject((int) 
-offsetOrObjstmObNr, objKey);
+            }
+            if (referencedObject != null && referencedObject != COSNull.NULL)
+            {
+                pdfObject.setObject(referencedObject);
+            }
+            else
+            {
+                pdfObject.setToNull();
+            }
+        }
+        return referencedObject;
+    }
 
-                    // reset source cursor to read object information
-                    source.seek(offset);
-                    readObjNr = readObjectNumber();
-                    readObjGen = readGenerationNumber();
-                    skipSpaces(); // skip spaces between Object Generation 
number and the 'obj' keyword 
-                    for (char c : OBJ_MARKER)
-                    {
-                        if (source.read() != c)
-                        {
-                            addValidationError(new 
ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Expected pattern '"
-                                    + new String(OBJ_MARKER) + " but missed at 
character '" + c + "'"));
-                            throw new SyntaxValidationException("Expected 
pattern '" + new String(OBJ_MARKER)
-                                            + " but missed at character '" + c 
+ "'",
-                                    validationResult);
-                        }
-                    }
-                }
+    private COSBase parseFileObject(Long offsetOrObjstmObNr, final 
COSObjectKey objKey)
+            throws IOException
+    {
+        // offset of indirect object in file
+        // ---- go to object start
+        source.seek(offsetOrObjstmObNr);
+        // ---- we must have an indirect object
+        long readObjNr;
+        int readObjGen;
+
+        long offset = source.getPosition();
+        String line = readLine();
+        Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)\\sobj");
+        Matcher matcher = pattern.matcher(line);
+        if (matcher.matches())
+        {
+            readObjNr = Long.parseLong(matcher.group(1));
+            readObjGen = Integer.parseInt(matcher.group(2));
+        }
+        else
+        {
 
-                // ---- consistency check
-                if ((readObjNr != objKey.getNumber()) || (readObjGen != 
objKey.getGeneration()))
+            addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER,
+                    "Single space expected [offset=" + offset + "; key="
+                            + offsetOrObjstmObNr.toString() + "; line=" + line 
+ "; object="
+                            + objKey.getNumber() + " " + 
objKey.getGeneration() + "]"));
+
+            // reset source cursor to read object information
+            source.seek(offset);
+            readObjNr = readObjectNumber();
+            readObjGen = readGenerationNumber();
+            skipSpaces(); // skip spaces between Object Generation number and 
the 'obj' keyword
+            for (char c : OBJ_MARKER)
+            {
+                if (source.read() != c)
                 {
-                    throw new IOException("XREF for " + objKey.getNumber() + 
":" + objKey.getGeneration()
-                            + " points to wrong object: " + readObjNr + ":" + 
readObjGen);
+                    addValidationError(new 
ValidationError(ERROR_SYNTAX_OBJ_DELIMITER,
+                            "Expected pattern '" + new String(OBJ_MARKER)
+                                    + " but missed at character '" + c + "'"));
+                    throw new SyntaxValidationException("Expected pattern '"
+                            + new String(OBJ_MARKER) + " but missed at 
character '" + c + "'",
+                            validationResult);
                 }
+            }
+        }
 
-                skipSpaces();
-                referencedObject = parseDirObject();
-                skipSpaces();
-                long endObjectOffset = source.getPosition();
-                String endObjectKey = readString();
-
-                if (endObjectKey.equals("stream"))
-                {
-                    source.seek(endObjectOffset);
-                    if (referencedObject instanceof COSDictionary)
-                    {
-                        COSStream stream = parseCOSStream((COSDictionary) 
referencedObject);
-                        if (securityHandler != null)
-                        {
-                            securityHandler.decryptStream(stream, objNr, 
objGenNr);
-                        }
-                        referencedObject = stream;
-                    }
-                    else
-                    {
-                        // this is not legal
-                        // the combination of a dict and the stream/endstream 
forms a complete stream object
-                        throw new IOException("Stream not preceded by 
dictionary (offset: " + offsetOrObjstmObNr + ").");
-                    }
-                    skipSpaces();
-                    endObjectOffset = source.getPosition();
-                    endObjectKey = readString();
-
-                    // we have case with a second 'endstream' before endobj
-                    if (!endObjectKey.startsWith("endobj") && 
endObjectKey.startsWith("endstream"))
-                    {
-                        endObjectKey = endObjectKey.substring(9).trim();
-                        if (endObjectKey.length() == 0)
-                        {
-                            // no other characters in extra endstream line
-                            endObjectKey = readString(); // read next line
-                        }
-                    }
-                }
-                else if (securityHandler != null)
-                {
-                    securityHandler.decrypt(referencedObject, objNr, objGenNr);
-                }
-                if (!endObjectKey.startsWith("endobj"))
-                {
-                    throw new IOException("Object (" + readObjNr + ":" + 
readObjGen + ") at offset "
-                            + offsetOrObjstmObNr + " does not end with 
'endobj'.");
-                }
-                else
-                {
-                    offset = source.getPosition();
-                    source.seek(endObjectOffset - 1);
-                    if (!nextIsEOL())
-                    {
-                        addValidationError(new 
ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER,
-                                "EOL expected before the 'endobj' keyword at 
offset "+source.getPosition()));
-                    }
-                    source.seek(offset);
-                }
+        // ---- consistency check
+        if ((readObjNr != objKey.getNumber()) || (readObjGen != 
objKey.getGeneration()))
+        {
+            throw new IOException("XREF for " + objKey.getNumber() + ":" + 
objKey.getGeneration()
+                    + " points to wrong object: " + readObjNr + ":" + 
readObjGen);
+        }
 
-                if (!nextIsEOL())
+        skipSpaces();
+        COSBase referencedObject = parseDirObject();
+        skipSpaces();
+        long endObjectOffset = source.getPosition();
+        String endObjectKey = readString();
+
+        if (endObjectKey.equals("stream"))
+        {
+            source.seek(endObjectOffset);
+            if (referencedObject instanceof COSDictionary)
+            {
+                COSStream stream = parseCOSStream((COSDictionary) 
referencedObject);
+                if (securityHandler != null)
                 {
-                    addValidationError(new 
ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER,
-                            "EOL expected after the 'endobj' keyword at offset 
"+source.getPosition()));
+                    securityHandler.decryptStream(stream, readObjNr, 
readObjGen);
                 }
+                referencedObject = stream;
             }
             else
             {
-                // xref value is object nr of object stream containing object 
to be parsed
-                // since our object was not found it means object stream was 
not parsed so far
-                referencedObject = parseObjectStreamObject((int) 
-offsetOrObjstmObNr, objKey);
+                // this is not legal
+                // the combination of a dict and the stream/endstream forms a 
complete stream object
+                throw new IOException(
+                        "Stream not preceded by dictionary (offset: " + 
offsetOrObjstmObNr + ").");
             }
-            if (referencedObject != null && referencedObject != COSNull.NULL)
+            skipSpaces();
+            endObjectOffset = source.getPosition();
+            endObjectKey = readString();
+
+            // we have case with a second 'endstream' before endobj
+            if (!endObjectKey.startsWith("endobj") && 
endObjectKey.startsWith("endstream"))
             {
-                pdfObject.setObject(referencedObject);
+                endObjectKey = endObjectKey.substring(9).trim();
+                if (endObjectKey.length() == 0)
+                {
+                    // no other characters in extra endstream line
+                    endObjectKey = readString(); // read next line
+                }
             }
-            else
+        }
+        else if (securityHandler != null)
+        {
+            securityHandler.decrypt(referencedObject, readObjNr, readObjGen);
+        }
+        if (!endObjectKey.startsWith("endobj"))
+        {
+            throw new IOException("Object (" + readObjNr + ":" + readObjGen + 
") at offset "
+                    + offsetOrObjstmObNr + " does not end with 'endobj'.");
+        }
+        else
+        {
+            offset = source.getPosition();
+            source.seek(endObjectOffset - 1);
+            if (!nextIsEOL())
             {
-                pdfObject.setToNull();
+                addValidationError(
+                        new 
ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER,
+                                "EOL expected before the 'endobj' keyword at 
offset "
+                                        + source.getPosition()));
             }
+            source.seek(offset);
+        }
+
+        if (!nextIsEOL())
+        {
+            addValidationError(new 
ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER,
+                    "EOL expected after the 'endobj' keyword at offset " + 
source.getPosition()));
         }
         return referencedObject;
     }


Reply via email to