Author: lehmi Date: Sat Jan 4 16:50:31 2020 New Revision: 1872321 URL: http://svn.apache.org/viewvc?rev=1872321&view=rev Log: PDFBOX-4569: refactor preflight parser, split parseObjectDynamically
Modified: pdfbox/branches/issue4569/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java Modified: pdfbox/branches/issue4569/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java URL: http://svn.apache.org/viewvc/pdfbox/branches/issue4569/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java?rev=1872321&r1=1872320&r2=1872321&view=diff ============================================================================== --- pdfbox/branches/issue4569/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java (original) +++ pdfbox/branches/issue4569/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java Sat Jan 4 16:50:31 2020 @@ -681,131 +681,146 @@ public class PreflightParser extends PDF else if (offsetOrObjstmObNr > 0) { // offset of indirect object in file - // ---- go to object start - source.seek(offsetOrObjstmObNr); - // ---- we must have an indirect object - long readObjNr; - int readObjGen; - - long offset = source.getPosition(); - String line = readLine(); - Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)\\sobj"); - Matcher matcher = pattern.matcher(line); - if (matcher.matches()) - { - readObjNr = Long.parseLong(matcher.group(1)); - readObjGen = Integer.parseInt(matcher.group(2)); - } - else - { - - addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Single space expected [offset="+offset+"; key="+offsetOrObjstmObNr.toString()+"; line="+line+"; object="+pdfObject.toString()+"]")); + referencedObject = parseFileObject(offsetOrObjstmObNr, objKey); + } + else + { + // xref value is object nr of object stream containing object to be parsed + // since our object was not found it means object stream was not parsed so far + referencedObject = parseObjectStreamObject((int) -offsetOrObjstmObNr, objKey); + } + if (referencedObject != null && referencedObject != COSNull.NULL) + { + pdfObject.setObject(referencedObject); + } + else + { + pdfObject.setToNull(); + } + } + return referencedObject; + } - // reset source cursor to read object information - source.seek(offset); - readObjNr = readObjectNumber(); - readObjGen = readGenerationNumber(); - skipSpaces(); // skip spaces between Object Generation number and the 'obj' keyword - for (char c : OBJ_MARKER) - { - if (source.read() != c) - { - addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Expected pattern '" - + new String(OBJ_MARKER) + " but missed at character '" + c + "'")); - throw new SyntaxValidationException("Expected pattern '" + new String(OBJ_MARKER) - + " but missed at character '" + c + "'", - validationResult); - } - } - } + private COSBase parseFileObject(Long offsetOrObjstmObNr, final COSObjectKey objKey) + throws IOException + { + // offset of indirect object in file + // ---- go to object start + source.seek(offsetOrObjstmObNr); + // ---- we must have an indirect object + long readObjNr; + int readObjGen; + + long offset = source.getPosition(); + String line = readLine(); + Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)\\sobj"); + Matcher matcher = pattern.matcher(line); + if (matcher.matches()) + { + readObjNr = Long.parseLong(matcher.group(1)); + readObjGen = Integer.parseInt(matcher.group(2)); + } + else + { - // ---- consistency check - if ((readObjNr != objKey.getNumber()) || (readObjGen != objKey.getGeneration())) + addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, + "Single space expected [offset=" + offset + "; key=" + + offsetOrObjstmObNr.toString() + "; line=" + line + "; object=" + + objKey.getNumber() + " " + objKey.getGeneration() + "]")); + + // reset source cursor to read object information + source.seek(offset); + readObjNr = readObjectNumber(); + readObjGen = readGenerationNumber(); + skipSpaces(); // skip spaces between Object Generation number and the 'obj' keyword + for (char c : OBJ_MARKER) + { + if (source.read() != c) { - throw new IOException("XREF for " + objKey.getNumber() + ":" + objKey.getGeneration() - + " points to wrong object: " + readObjNr + ":" + readObjGen); + addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, + "Expected pattern '" + new String(OBJ_MARKER) + + " but missed at character '" + c + "'")); + throw new SyntaxValidationException("Expected pattern '" + + new String(OBJ_MARKER) + " but missed at character '" + c + "'", + validationResult); } + } + } - skipSpaces(); - referencedObject = parseDirObject(); - skipSpaces(); - long endObjectOffset = source.getPosition(); - String endObjectKey = readString(); - - if (endObjectKey.equals("stream")) - { - source.seek(endObjectOffset); - if (referencedObject instanceof COSDictionary) - { - COSStream stream = parseCOSStream((COSDictionary) referencedObject); - if (securityHandler != null) - { - securityHandler.decryptStream(stream, objNr, objGenNr); - } - referencedObject = stream; - } - else - { - // this is not legal - // the combination of a dict and the stream/endstream forms a complete stream object - throw new IOException("Stream not preceded by dictionary (offset: " + offsetOrObjstmObNr + ")."); - } - skipSpaces(); - endObjectOffset = source.getPosition(); - endObjectKey = readString(); - - // we have case with a second 'endstream' before endobj - if (!endObjectKey.startsWith("endobj") && endObjectKey.startsWith("endstream")) - { - endObjectKey = endObjectKey.substring(9).trim(); - if (endObjectKey.length() == 0) - { - // no other characters in extra endstream line - endObjectKey = readString(); // read next line - } - } - } - else if (securityHandler != null) - { - securityHandler.decrypt(referencedObject, objNr, objGenNr); - } - if (!endObjectKey.startsWith("endobj")) - { - throw new IOException("Object (" + readObjNr + ":" + readObjGen + ") at offset " - + offsetOrObjstmObNr + " does not end with 'endobj'."); - } - else - { - offset = source.getPosition(); - source.seek(endObjectOffset - 1); - if (!nextIsEOL()) - { - addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, - "EOL expected before the 'endobj' keyword at offset "+source.getPosition())); - } - source.seek(offset); - } + // ---- consistency check + if ((readObjNr != objKey.getNumber()) || (readObjGen != objKey.getGeneration())) + { + throw new IOException("XREF for " + objKey.getNumber() + ":" + objKey.getGeneration() + + " points to wrong object: " + readObjNr + ":" + readObjGen); + } - if (!nextIsEOL()) + skipSpaces(); + COSBase referencedObject = parseDirObject(); + skipSpaces(); + long endObjectOffset = source.getPosition(); + String endObjectKey = readString(); + + if (endObjectKey.equals("stream")) + { + source.seek(endObjectOffset); + if (referencedObject instanceof COSDictionary) + { + COSStream stream = parseCOSStream((COSDictionary) referencedObject); + if (securityHandler != null) { - addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, - "EOL expected after the 'endobj' keyword at offset "+source.getPosition())); + securityHandler.decryptStream(stream, readObjNr, readObjGen); } + referencedObject = stream; } else { - // xref value is object nr of object stream containing object to be parsed - // since our object was not found it means object stream was not parsed so far - referencedObject = parseObjectStreamObject((int) -offsetOrObjstmObNr, objKey); + // this is not legal + // the combination of a dict and the stream/endstream forms a complete stream object + throw new IOException( + "Stream not preceded by dictionary (offset: " + offsetOrObjstmObNr + ")."); } - if (referencedObject != null && referencedObject != COSNull.NULL) + skipSpaces(); + endObjectOffset = source.getPosition(); + endObjectKey = readString(); + + // we have case with a second 'endstream' before endobj + if (!endObjectKey.startsWith("endobj") && endObjectKey.startsWith("endstream")) { - pdfObject.setObject(referencedObject); + endObjectKey = endObjectKey.substring(9).trim(); + if (endObjectKey.length() == 0) + { + // no other characters in extra endstream line + endObjectKey = readString(); // read next line + } } - else + } + else if (securityHandler != null) + { + securityHandler.decrypt(referencedObject, readObjNr, readObjGen); + } + if (!endObjectKey.startsWith("endobj")) + { + throw new IOException("Object (" + readObjNr + ":" + readObjGen + ") at offset " + + offsetOrObjstmObNr + " does not end with 'endobj'."); + } + else + { + offset = source.getPosition(); + source.seek(endObjectOffset - 1); + if (!nextIsEOL()) { - pdfObject.setToNull(); + addValidationError( + new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, + "EOL expected before the 'endobj' keyword at offset " + + source.getPosition())); } + source.seek(offset); + } + + if (!nextIsEOL()) + { + addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, + "EOL expected after the 'endobj' keyword at offset " + source.getPosition())); } return referencedObject; }