s...

leleueri Mon, 24 Sep 2012 14:29:24 -0700

Author: leleueri
Date: Mon Sep 24 21:28:34 2012
New Revision: 1389604

URL: http://svn.apache.org/viewvc?rev=1389604&view=rev
Log:
[https://issues.apache.org/jira/browse/PDFBOX-1373] Syntax validation is done 
by the preflight parser


Added:
    
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/exception/SyntaxValidationException.java
   (with props)
Modified:
    pdfbox/trunk/preflight/pom.xml
    
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConstants.java
    
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java
    
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/Validator_A1b.java
    
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
    
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/TrailerValidationProcess.java
    
pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestInvalidDirectory.java
    
pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestIsartorValidationFromClasspath.java
    
pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestValidDirectory.java
    pdfbox/trunk/preflight/src/test/resources/expected_errors.txt

Modified: pdfbox/trunk/preflight/pom.xml
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/preflight/pom.xml?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/pom.xml (original)
+++ pdfbox/trunk/preflight/pom.xml Mon Sep 24 21:28:34 2012
@@ -235,6 +235,18 @@
         <scope>test</scope>
     </dependency>
   -->
+      <dependency>
+      <groupId>org.bouncycastle</groupId>
+      <artifactId>bcmail-jdk15</artifactId>
+      <version>1.44</version>
+      <optional>true</optional>
+    </dependency>
+    <dependency>
+      <groupId>org.bouncycastle</groupId>
+      <artifactId>bcprov-jdk15</artifactId>
+      <version>1.44</version>
+      <optional>true</optional>
+    </dependency>
   </dependencies>
 
   <reporting>

Modified: 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConstants.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConstants.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConstants.java
 (original)
+++ 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightConstants.java
 Mon Sep 24 21:28:34 2012
@@ -339,6 +339,18 @@ public interface PreflightConstants {
         * CID too long
         */
        String ERROR_SYNTAX_CID_RANGE = "1.0.10";
+       /**
+        * Hexa string shall contain even number of non white space char
+        */
+       String ERROR_SYNTAX_HEXA_STRING_EVEN_NUMBER = "1.0.11";
+       /**
+        * Hexa string contain non hexadecimal characters
+        */
+       String ERROR_SYNTAX_HEXA_STRING_INVALID= "1.0.12";      
+       /**
+        * An object is missing from the document (offset is negative)
+        */
+       String ERROR_SYNTAX_NEGATIVE_OFFSET = "1.0.13";
 
        String ERROR_SYNTAX_HEADER = "1.1";
        
@@ -444,6 +456,10 @@ public interface PreflightConstants {
         * Errors in the Outlines dictionary
         */
        String ERROR_SYNTAX_TRAILER_OUTLINES_INVALID = "1.4.9";
+       /**
+        * Last %%EOF sequence is followed by data
+        */
+       String ERROR_SYNTAX_TRAILER_EOF = "1.4.10";
 
        // -----------------------------------------------------------
        // ---- GRAPHIC ERRORS 2.x...

Modified: 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java
 (original)
+++ 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/PreflightContext.java
 Mon Sep 24 21:28:34 2012
@@ -24,15 +24,16 @@ package org.apache.pdfbox.preflight;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.SortedMap;
 
 import javax.activation.DataSource;
 
 import org.apache.padaf.xmpbox.XMPMetadata;
 import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.pdfparser.XrefTrailerResolver;
 import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
 import org.apache.pdfbox.preflight.font.container.FontContainer;
 import org.apache.pdfbox.preflight.graphic.ICCProfileWrapper;
-import org.apache.pdfbox.preflight.javacc.extractor.ExtractorTokenManager;
 import org.apache.pdfbox.preflight.utils.COSUtils;
 
 public class PreflightContext {
@@ -50,13 +51,18 @@ public class PreflightContext {
         * The datasource to load the document from
         */
        protected DataSource source = null;
-
-       /**
-        * JavaCC Token Manager used to get some content of the PDF file as 
string (ex
-        * : Trailers)
-        */
-       protected ExtractorTokenManager pdfExtractor = null;
-
+//
+//     /**
+//      * JavaCC Token Manager used to get some content of the PDF file as 
string (ex
+//      * : Trailers)
+//      */
+//     protected ExtractorTokenManager pdfExtractor = null;
+
+  /** Contains all Xref/trailer objects and resolves them into single
+   *  object using startxref reference. 
+   */
+       private XrefTrailerResolver xrefTableResolver;
+       
        /**
         * This wrapper contains the ICCProfile used by the PDF file.
         */
@@ -103,21 +109,21 @@ public class PreflightContext {
                this.metadata = metadata;
        }
        
-       /**
-        * @return the value of the pdfExtractor attribute.
-        */
-       public ExtractorTokenManager getPdfExtractor() {
-               return pdfExtractor;
-       }
-
-       /**
-        * Initialize the pdfExtractor attribute.
-        * 
-        * @param pdfExtractor
-        */
-       public void setPdfExtractor(ExtractorTokenManager pdfExtractor) {
-               this.pdfExtractor = pdfExtractor;
-       }
+//     /**
+//      * @return the value of the pdfExtractor attribute.
+//      */
+//     public ExtractorTokenManager getPdfExtractor() {
+//             return pdfExtractor;
+//     }
+//
+//     /**
+//      * Initialize the pdfExtractor attribute.
+//      * 
+//      * @param pdfExtractor
+//      */
+//     public void setPdfExtractor(ExtractorTokenManager pdfExtractor) {
+//             this.pdfExtractor = pdfExtractor;
+//     }
 
        /**
         * @return the PDFBox object representation of the document
@@ -126,6 +132,14 @@ public class PreflightContext {
                return document;
        }
 
+       public XrefTrailerResolver getXrefTableResolver() {
+       return xrefTableResolver;
+  }
+
+       public void setXrefTableResolver(XrefTrailerResolver xrefTableResolver) 
{
+       this.xrefTableResolver = xrefTableResolver;
+  }
+
        /**
         * Initialize the PDFBox object which present the PDF File.
         * 

Modified: 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/Validator_A1b.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/Validator_A1b.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/Validator_A1b.java
 (original)
+++ 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/Validator_A1b.java
 Mon Sep 24 21:28:34 2012
@@ -25,6 +25,7 @@ import javax.activation.FileDataSource;
 
 import org.apache.pdfbox.Version;
 import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
+import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
 import org.apache.pdfbox.preflight.parser.PreflightParser;
 
 
@@ -48,16 +49,20 @@ public class Validator_A1b {
                        System.exit(1);
                }
 
+               ValidationResult result = null;
                FileDataSource fd = new FileDataSource(args[0]);
-               
                PreflightParser parser = new PreflightParser(fd);
-               parser.parse();
-               PreflightDocument document = 
(PreflightDocument)parser.getPDDocument();
-               document.validate();
+               try {
+                       parser.parse();
+                       PreflightDocument document = 
parser.getPreflightDocument();
+                       document.validate();
+                       result = document.getResult();
+                       document.close();
+               } catch (SyntaxValidationException e) {
+                       result = e.getResult();
+               }
 
-               ValidationResult result = document.getResult();
                if (result.isValid()) {
-                       document.close();
                        System.out.println("The file " + args[0] + " is a valid 
PDF/A-1b file");
                        System.exit(0);
                } else {
@@ -66,7 +71,6 @@ public class Validator_A1b {
                                System.out.println(error.getErrorCode() + " : " 
+ error.getDetails());
                        }
 
-                       document.close();
                        System.exit(-1);
                }
        }

Added: 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/exception/SyntaxValidationException.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/exception/SyntaxValidationException.java?rev=1389604&view=auto
==============================================================================
--- 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/exception/SyntaxValidationException.java
 (added)
+++ 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/exception/SyntaxValidationException.java
 Mon Sep 24 21:28:34 2012
@@ -0,0 +1,49 @@
+/*****************************************************************************
+ * 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * 
+ ****************************************************************************/
+
+package org.apache.pdfbox.preflight.exception;
+
+import org.apache.pdfbox.preflight.ValidationResult;
+
+public class SyntaxValidationException extends ValidationException {
+
+       private final ValidationResult result;
+       
+       public SyntaxValidationException(String message, Throwable cause, 
ValidationResult result) {
+         super(message, cause);
+         this.result = result;
+  }
+
+       public SyntaxValidationException(String message, ValidationResult 
result) {
+         super(message);
+         this.result = result;
+  }
+
+       public SyntaxValidationException(Throwable cause, ValidationResult 
result) {
+         super(cause);
+         this.result = result;
+  }
+
+       public ValidationResult getResult() {
+       return result;
+  }
+
+}

Propchange: 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/exception/SyntaxValidationException.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
 (original)
+++ 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
 Mon Sep 24 21:28:34 2012
@@ -21,17 +21,61 @@
 
 package org.apache.pdfbox.preflight.parser;
 
+import static org.apache.pdfbox.preflight.PreflightConstants.*;
+import static 
org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_CROSS_REF;
+import static 
org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_HEXA_STRING_EVEN_NUMBER;
+import static 
org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_HEXA_STRING_INVALID;
+import static 
org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_HEXA_STRING_TOO_LONG;
+import static 
org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_NAME_TOO_LONG;
+import static 
org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_NUMERIC_RANGE;
+import static 
org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER;
+import static 
org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DELIMITER;
+import static 
org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_TOO_MANY_ENTRIES;
+import static 
org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_TRAILER_EOF;
+import static 
org.apache.pdfbox.preflight.PreflightConstants.MAX_ARRAY_ELEMENTS;
+import static org.apache.pdfbox.preflight.PreflightConstants.MAX_DICT_ENTRIES;
+import static org.apache.pdfbox.preflight.PreflightConstants.MAX_NAME_SIZE;
+import static 
org.apache.pdfbox.preflight.PreflightConstants.MAX_NEGATIVE_FLOAT;
+import static 
org.apache.pdfbox.preflight.PreflightConstants.MAX_POSITIVE_FLOAT;
+import static org.apache.pdfbox.preflight.PreflightConstants.MAX_STRING_LENGTH;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.nio.charset.Charset;
+import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import javax.activation.DataSource;
+import javax.activation.FileDataSource;
 
+import org.apache.pdfbox.cos.COSArray;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.cos.COSFloat;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSNull;
+import org.apache.pdfbox.cos.COSNumber;
+import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.cos.COSString;
+import org.apache.pdfbox.exceptions.CryptographyException;
 import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.io.RandomAccess;
+import org.apache.pdfbox.pdfparser.BaseParser;
+import org.apache.pdfbox.pdfparser.NonSequentialPDFParser;
+import org.apache.pdfbox.pdfparser.PDFObjectStreamParser;
 import org.apache.pdfbox.pdfparser.PDFParser;
 import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.persistence.util.COSObjectKey;
 import org.apache.pdfbox.preflight.Format;
 import org.apache.pdfbox.preflight.PreflightConfiguration;
 import org.apache.pdfbox.preflight.PreflightConstants;
@@ -39,13 +83,9 @@ import org.apache.pdfbox.preflight.Prefl
 import org.apache.pdfbox.preflight.PreflightDocument;
 import org.apache.pdfbox.preflight.ValidationResult;
 import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
-import org.apache.pdfbox.preflight.exception.PdfParseException;
-import org.apache.pdfbox.preflight.exception.ValidationException;
-import org.apache.pdfbox.preflight.javacc.ParseException;
-import org.apache.pdfbox.preflight.javacc.extractor.ExtractorTokenManager;
-import org.apache.pdfbox.preflight.javacc.extractor.SimpleCharStream;
+import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
 
-public class PreflightParser extends PDFParser {
+public class PreflightParser extends NonSequentialPDFParser {
        /**
         * Define a one byte encoding that hasn't specific encoding in UTF-8 
charset.
         * Avoid unexpected error when the encoding is Cp5816
@@ -56,18 +96,20 @@ public class PreflightParser extends PDF
 
        protected ValidationResult validationResult;
 
-       protected PreflightDocument document;
+       protected PreflightDocument preflightDocument;
 
        protected PreflightContext ctx;
 
-       public PreflightParser(DataSource input, RandomAccess rafi, boolean 
force)      throws IOException {
-               super(input.getInputStream(), rafi, force);
-               this.originalDocument = input;
+       public PreflightParser(File file, RandomAccess rafi) throws IOException 
{
+               super(file, rafi);
+               this.originalDocument = new FileDataSource(file);
        }
 
-       public PreflightParser(DataSource input, RandomAccess rafi) throws 
IOException {
-               super(input.getInputStream(), rafi);
-               this.originalDocument = input;
+       public PreflightParser(File file) throws IOException {
+               this(file, null);
+       }
+       public PreflightParser(String filename) throws IOException {
+               this(new File(filename), null);
        }
 
        public PreflightParser(DataSource input) throws IOException {
@@ -75,29 +117,6 @@ public class PreflightParser extends PDF
                this.originalDocument = input;
        }
 
-
-       /**
-        * Create an instance of ValidationResult. This object contains an 
instance of
-        * ValidationError. If the ParseException is an instance of 
PdfParseException,
-        * the embedded validation error is initialized with the error code of 
the
-        * exception, otherwise it is an UnknownError.
-        * 
-        * @param e
-        * @return
-        */
-       protected static ValidationResult createErrorResult(ParseException e) {
-               if (e instanceof PdfParseException) {
-                       if (e.getCause()==null) {
-                               return new ValidationResult(new 
ValidationError(((PdfParseException)e).getErrorCode()));
-                       } else if (e.getCause().getMessage()==null) {
-                               return new ValidationResult(new 
ValidationError(((PdfParseException)e).getErrorCode()));
-                       } else {
-                               return new ValidationResult(new 
ValidationError(((PdfParseException)e).getErrorCode(),e.getCause().getMessage()));
-                       }
-               }
-               return createUnknownErrorResult();
-       }
-
        /**
         * Create an instance of ValidationResult with a
         * ValidationError(UNKNOWN_ERROR)
@@ -152,58 +171,578 @@ public class PreflightParser extends PDF
         * @throws IOException
         */
        public void parse(Format format, PreflightConfiguration config) throws 
IOException {
-               checkFileSyntax();
-               // run PDFBox Parser
-               super.parse();
+               checkPdfHeader();
+               try {
+                       super.parse();
+               } catch (IOException e) {
+                       addValidationError(new 
ValidationError(PreflightConstants.ERROR_SYNTAX_COMMON, e.getMessage()));
+                       throw new SyntaxValidationException(e, 
this.validationResult);
+               }
                Format formatToUse = (format == null ? Format.PDF_A1B : format);
                createPdfADocument(formatToUse, config);
                createContext();
-               extractTrailers();
        }
-       
+
+       protected void createPdfADocument(Format format, PreflightConfiguration 
config) throws IOException {
+               COSDocument cosDocument = getDocument();
+               this.preflightDocument = new PreflightDocument(cosDocument, 
format, config);
+       }
+
+       /**
+        * Create a validation context.
+        * This context is set to the PreflightDocument.
+        */
+       protected void createContext() {
+               this.ctx = new PreflightContext(this.originalDocument);
+               ctx.setDocument(preflightDocument);
+               preflightDocument.setContext(ctx);
+               ctx.setXrefTableResolver(xrefTrailerResolver);
+       }
+
+       @Override
+       public PDDocument getPDDocument() throws IOException {
+               preflightDocument.setResult(validationResult);
+               // Add XMP MetaData
+               return preflightDocument;
+       }
+
+       public PreflightDocument getPreflightDocument() throws IOException {
+               return (PreflightDocument)getPDDocument();
+       }
+
+
+       // --------------------------------------------------------
+       // - Below All methods that adds controls on the PDF syntax
+       // --------------------------------------------------------
+
+       @Override
+       /**
+        * Fill the CosDocument with some object that isn't set by the 
NonSequentialParser
+        */
+       protected void initialParse() throws IOException {
+               super.initialParse();
+
+               // fill xref table
+               document.addXRefTable(xrefTrailerResolver.getXrefTable());
+
+               // Trailer entries are useful in the preflight document
+               for (COSBase trailerEntry : 
getDocument().getTrailer().getValues()) {
+                       if ( trailerEntry instanceof COSObject )
+                       {
+                               COSObject tmpObj = (COSObject) trailerEntry;
+                               parseObjectDynamically( tmpObj, true );
+                       }
+               }
+
+               // For each ObjectKey, we check if the object has been loaded
+               Map<COSObjectKey, Long> xrefTable = document.getXrefTable();
+               for (Entry<COSObjectKey, Long> entry : xrefTable.entrySet()) {
+                       COSObject co = 
document.getObjectFromPool(entry.getKey());
+                       if ( co.getObject() == null) {
+                               // object isn't loaded - parse the object to 
load its content
+                               parseObjectDynamically( co, true );             
                
+                       }
+               }
+       }
+
        /**
-        * Run the JavaCC parser to check the PDF syntax.
-        * @throws ValidationException
+        * Check that the PDF header match rules of the PDF/A specification.
+        * First line (offset 0) must be a comment with the PDF version 
(version 1.0 isn't conform to the PDF/A specification)
+        * Second line is a comment with at least 4 bytes greater than 0x80  
         */
-       protected void checkFileSyntax() throws ValidationException {
-               // syntax (javacc) validation
+       protected void checkPdfHeader() {
+               BufferedReader reader = null;
                try {
-                       InputStreamReader reader = new 
InputStreamReader(this.originalDocument.getInputStream(), encoding); 
-                       org.apache.pdfbox.preflight.javacc.PDFParser 
javaCCParser = new org.apache.pdfbox.preflight.javacc.PDFParser(reader);
-                       javaCCParser.PDF();
-                       IOUtils.closeQuietly(reader);
+                       reader = new BufferedReader(new InputStreamReader(new 
FileInputStream(getPdfFile()), "ISO-8859-1"));
+                       String firstLine = reader.readLine();
+                       if (firstLine == null || (firstLine != null && 
!firstLine.matches("%PDF-1\\.[1-9]"))) {
+                               addValidationError(new 
ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "First line must match 
%PDF-1.\\d"));    
+                       }
+
+                       String secondLine = reader.readLine(); 
+                       if (secondLine != null && secondLine.getBytes().length 
>= 5) {
+                               for (int i = 0; i < 
secondLine.getBytes().length; ++i ) {
+                                       byte b = secondLine.getBytes()[i]; 
+                                       if (i == 0 && ((char)b != '%')) {
+                                               addValidationError(new 
ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "Second line must 
contains at least 4 bytes greater than 127"));
+                                               break;
+                                       } else if (i > 0 && ((b & 0xFF) < 
0x80)) {
+                                               addValidationError(new 
ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "Second line must 
contains at least 4 bytes greater than 127"));
+                                               break;
+                                       }
+                               }
+                       } else {
+                               addValidationError(new 
ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER ,"Second line must 
contains at least 4 bytes greater than 127"));
+                       }
+
                } catch (IOException e) {
-                       throw new ValidationException("Failed to parse 
datasource due to : " + e.getMessage(), e);
-               } catch (ParseException e) {
-                       this.validationResult = createErrorResult(e);
+                       addValidationError(new 
ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "Unable to read the PDF 
file : " + e.getMessage()));
+               } finally {
+                       IOUtils.closeQuietly(reader);
                }
        }
 
-       protected void createPdfADocument(Format format, PreflightConfiguration 
config) throws IOException {
-               this.document = new PreflightDocument(getDocument(), format, 
config);
+       /**
+        * Same method than the {@linkplain PDFParser#parseXrefTable(long)} 
with additional controls :
+        * - EOL mandatory after the 'xref' keyword
+        * - Cross reference subsection header uses single white space as 
separator
+        * - and so on
+        */
+       protected boolean parseXrefTable( long startByteOffset ) throws 
IOException
+       {
+               if(pdfSource.peek() != 'x')
+               {
+                       return false;
+               }
+               String xref = readString();
+               if( !xref.equals( "xref" ) )
+               {
+                       addValidationError(new 
ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "xref must be 
followed by a EOL character"));
+                       return false;
+               }
+               if (!nextIsEOL()) {
+                       addValidationError(new 
ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "xref must be 
followed by EOL"));
+               }
+
+               // signal start of new XRef
+               xrefTrailerResolver.nextXrefObj( startByteOffset );
+
+               /*
+                * Xref tables can have multiple sections.
+                * Each starts with a starting object id and a count.
+                */
+               while(true)
+               {
+                       // just after the xref<EOL> there are an integer
+                       int currObjID = 0; // first obj id
+                       int count = 0; // the number of objects in the xref 
table
+
+                       long offset = pdfSource.getOffset();
+                       String line = readLine();
+                       Pattern pattern = 
Pattern.compile("(\\d+)\\s(\\d+)(\\s*)");
+                       Matcher matcher = pattern.matcher(line);
+                       if (matcher.matches()) {
+                               currObjID = Integer.parseInt(matcher.group(1));
+                               count = Integer.parseInt(matcher.group(2));
+                       } else {
+                               addValidationError(new 
ValidationError(ERROR_SYNTAX_CROSS_REF, "Cross reference subsection header is 
invalid"));
+                               // reset pdfSource cursor to read xref 
information
+                               pdfSource.seek(offset);
+                               currObjID = readInt(); // first obj id
+                               count = readInt(); // the number of objects in 
the xref table
+                       }
+
+                       skipSpaces();
+                       for(int i = 0; i < count; i++)
+                       {
+                               if(pdfSource.isEOF() || 
isEndOfName((char)pdfSource.peek()))
+                               {
+                                       break;
+                               }
+                               if(pdfSource.peek() == 't')
+                               {
+                                       addValidationError(new 
ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "Expected xref line 
but 't' found"));
+                                       break;
+                               }
+                               //Ignore table contents
+                               String currentLine = readLine();
+                               String[] splitString = currentLine.split(" ");
+                               if (splitString.length < 3)
+                               {
+                                       addValidationError(new 
ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "invalid xref line: 
" + currentLine));
+                                       break;
+                               }
+                               /* This supports the corrupt table as reported 
in
+                                * PDFBOX-474 (XXXX XXX XX n) */
+                               
if(splitString[splitString.length-1].equals("n"))
+                               {
+                                       try
+                                       {
+                                               long currOffset = 
Long.parseLong(splitString[0]);
+                                               int currGenID = 
Integer.parseInt(splitString[1]);
+                                               COSObjectKey objKey = new 
COSObjectKey(currObjID, currGenID);
+                                               
xrefTrailerResolver.setXRef(objKey, currOffset);
+                                       }
+                                       catch(NumberFormatException e)
+                                       {
+                                               addValidationError(new 
ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "offset or genid 
can't be read as number " + e.getMessage()));
+                                       }
+                               }
+                               else if(!splitString[2].equals("f"))
+                               {
+                                       addValidationError(new 
ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "Corrupt XRefTable 
Entry - ObjID:" + currObjID));
+                               }
+                               currObjID++;
+                               skipSpaces();
+                       }
+                       skipSpaces();
+                       char c = (char)pdfSource.peek();
+                       if(c < '0' || c > '9')
+                       {
+                               break;
+                       }
+               }
+               return true;
        }
 
-       protected void createContext() {
-               this.ctx = new PreflightContext(this.originalDocument);
-               ctx.setDocument(document);
-               document.setContext(ctx);
+       /**
+        * Wraps the {@link NonSequentialPDFParser#parseCOSStream} to check 
rules on 'stream' and 'endstream' keywords.
+        * {@link #checkStreamKeyWord()} and {@link #checkEndstreamKeyWord()}
+        */
+       protected COSStream parseCOSStream( COSDictionary dic, RandomAccess 
file ) throws IOException   {
+               checkStreamKeyWord();
+               COSStream result = super.parseCOSStream(dic, file);
+               checkEndstreamKeyWord();
+               return result;
+       }
+
+       /**
+        * 'stream' must be followed by <CR><LF> or only <LF>
+        * @throws IOException
+        */
+       protected void checkStreamKeyWord() throws IOException {
+               String streamV = readString();
+               if (!streamV.equals("stream")) {
+                       addValidationError(new 
ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'stream' keyword but 
found '" + streamV +"'"));
+               }
+               int nextChar = pdfSource.read();
+               if ( !((nextChar == 13 && pdfSource.peek() == 10) || nextChar 
== 10)) {
+                       addValidationError(new 
ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'EOL' after the stream 
keyword"));
+               }
+               // set the offset before stream
+               pdfSource.seek(pdfSource.getOffset()-7);
+       }
+       
+       /**
+        * 'endstream' must be preceded by an EOL
+        * @throws IOException
+        */
+       protected void checkEndstreamKeyWord() throws IOException {
+               pdfSource.seek(pdfSource.getOffset()-10);
+               if (!nextIsEOL()) {
+                       addValidationError(new 
ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'EOL' before the 
endstream keyword"));
+               }
+               String endstreamV = readString();
+               if (!endstreamV.equals("endstream")) {
+                       addValidationError(new 
ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'endstream' keyword 
but found '" + endstreamV +"'"));
+               }
        }
 
-       protected void extractTrailers() throws IOException {
-               SimpleCharStream scs = new 
SimpleCharStream(this.originalDocument.getInputStream());
-               ExtractorTokenManager extractor = new 
ExtractorTokenManager(scs);
-               extractor.parse();
-               ctx.setPdfExtractor(extractor);
+       protected boolean nextIsEOL() throws IOException {
+               boolean succeed = false;
+               int nextChar = pdfSource.read();
+               if ( nextChar == 13 && pdfSource.peek() == 10 ) {
+                       pdfSource.read();
+                       succeed = true;
+               } else if ( nextChar == 13 || nextChar == 10 ) {
+                       succeed = true;
+               }
+               return succeed;
+       }
+
+       /**
+        * @return true if the next character is a space. (The character is 
consumed)
+        * @throws IOException
+        */
+       protected boolean nextIsSpace() throws IOException {
+               return ' ' == pdfSource.read();
        }
 
        @Override
-       public PDDocument getPDDocument() throws IOException {
-               document.setResult(validationResult);
-               // Add XMP MetaData
-               return document;
+       /**
+        * Call {@link BaseParser#parseCOSArray()} and check the number of 
element in the array
+        */
+       protected COSArray parseCOSArray() throws IOException {
+               COSArray result = super.parseCOSArray();
+               if (result != null && result.size() > MAX_ARRAY_ELEMENTS) {
+                       addValidationError(new 
ValidationError(ERROR_SYNTAX_ARRAY_TOO_LONG, "Array too long : " + 
result.size()));
+               }
+               return result;
        }
-       
-       public PreflightDocument getPreflightDocument() throws IOException {
-               return (PreflightDocument)getPDDocument();
+
+       @Override
+       /**
+        * Call {@link BaseParser#parseCOSName()} and check the length of the 
name
+        */
+       protected COSName parseCOSName() throws IOException {
+               COSName result = super.parseCOSName();
+               if (result != null && result.getName().getBytes().length > 
MAX_NAME_SIZE) {
+                       addValidationError(new 
ValidationError(ERROR_SYNTAX_NAME_TOO_LONG, "Name too long"));
+               }
+               return result;
+       }
+
+       /**
+        * Check that the hexa string contains only an even number of 
Hexadecimal characters.
+        * Once it is done, reset the offset at the beginning of the string and 
call {@link BaseParser#parseCOSString()}
+        */
+       protected COSString parseCOSString() throws IOException
+       {
+               // offset reminder
+               long offset = pdfSource.getOffset();
+               char nextChar = (char)pdfSource.read();
+               int count = 0;
+               if (nextChar == '<') {
+                       do {
+                               nextChar = (char)pdfSource.read();
+                               if (nextChar != '>') {
+                                       if (Character.digit((char)nextChar, 16) 
>= 0) {
+                                               count++;
+                                       } else {
+                                               addValidationError(new 
ValidationError(ERROR_SYNTAX_HEXA_STRING_INVALID, "Hexa String must have only 
Hexadecimal Characters (found '" + nextChar +"')" ));
+                                               break;
+                                       }
+                               }
+                       } while (nextChar != '>');
+               }
+
+               if (count % 2 != 0) {
+                       addValidationError(new 
ValidationError(ERROR_SYNTAX_HEXA_STRING_EVEN_NUMBER, "Hexa string shall 
contain even number of non white space char"));
+               }
+
+               // reset the offset to parse the COSString
+               pdfSource.seek(offset);
+               COSString result = super.parseCOSString();
+
+               if ( result.getString().length() > MAX_STRING_LENGTH) {
+                       addValidationError(new 
ValidationError(ERROR_SYNTAX_HEXA_STRING_TOO_LONG, "Hexa string is too long"));
+               }
+               return result;
+       }
+
+       /**
+        * Call {@link BaseParser#parseDirObject()} check limit range for 
Float, Integer and number of Dictionary entries.
+        */
+       protected COSBase parseDirObject() throws IOException
+       {
+               COSBase result = super.parseDirObject();
+
+
+               if (result instanceof COSNumber) {
+                       COSNumber number = (COSNumber)result;
+                       if (number instanceof COSFloat) {
+                               Double real = number.doubleValue();
+                               if (real > MAX_POSITIVE_FLOAT || real < 
MAX_NEGATIVE_FLOAT) {
+                                       addValidationError(new 
ValidationError(ERROR_SYNTAX_NUMERIC_RANGE, "Float is too long or too small: " 
+ real));
+                               }
+                       } else {
+                               long numAsLong = number.longValue();
+                               if (numAsLong > Integer.MAX_VALUE || numAsLong 
< Integer.MIN_VALUE) {
+                                       addValidationError(new 
ValidationError(ERROR_SYNTAX_NUMERIC_RANGE, "Numeric is too long or too small: 
" + numAsLong));  
+                               }
+                       }
+               }
+
+               if (result instanceof COSDictionary) {
+                       COSDictionary dic = (COSDictionary)result;
+                       if (dic.size() > MAX_DICT_ENTRIES) {
+                               addValidationError(new 
ValidationError(ERROR_SYNTAX_TOO_MANY_ENTRIES, "Too Many Entries In 
Dictionary"));
+                       }
+               }
+               return result;
+       }
+
+       protected COSBase parseObjectDynamically( int objNr, int objGenNr, 
boolean requireExistingNotCompressedObj ) throws IOException {
+               // ---- create object key and get object (container) from pool
+               final COSObjectKey objKey    = new COSObjectKey( objNr, 
objGenNr );
+               final COSObject    pdfObject = document.getObjectFromPool( 
objKey );
+
+               if ( pdfObject.getObject() == null )
+               {
+                       // not previously parsed
+                       // ---- read offset or object stream object number from 
xref table
+                       Long offsetOrObjstmObNr = 
xrefTrailerResolver.getXrefTable().get( objKey );
+
+                       // sanity test to circumvent loops with broken documents
+                       if ( requireExistingNotCompressedObj && ( ( 
offsetOrObjstmObNr == null ) || ( offsetOrObjstmObNr <= 0 ) ) )     {   
+                               addValidationError(new 
ValidationError(ERROR_SYNTAX_NEGATIVE_OFFSET, "Object must be defined and must 
not be compressed object: " +     objKey.getNumber() + ":" + 
objKey.getGeneration()));
+                               throw new SyntaxValidationException( "Object 
must be defined and must not be compressed object: " +     objKey.getNumber() + 
":" + objKey.getGeneration(), validationResult);
+                       }
+
+                       if ( offsetOrObjstmObNr == null )       {
+                               // not defined object -> NULL object (Spec. 
1.7, chap. 3.2.9)
+                               pdfObject.setObject( COSNull.NULL );
+                       }       else if ( offsetOrObjstmObNr > 0 )      {
+                               // offset of indirect object in file
+                               // ---- go to object start
+                               setPdfSource( offsetOrObjstmObNr );
+                               // ---- we must have an indirect object
+                               int readObjNr  = 0;
+                               int readObjGen = 0;
+
+                               long offset = pdfSource.getOffset();
+                               String line = readLine();
+                               Pattern pattern = 
Pattern.compile("(\\d+)\\s(\\d+)\\sobj");
+                               Matcher matcher = pattern.matcher(line);
+                               if (matcher.matches()) {
+                                       readObjNr = 
Integer.parseInt(matcher.group(1));
+                                       readObjGen = 
Integer.parseInt(matcher.group(2));
+                               } else {
+
+                                       addValidationError(new 
ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Single space expected"));
+                                       // reset pdfSource cursor to read 
object information
+                                       pdfSource.seek(offset);
+                                       readObjNr  = readInt();
+                                       readObjGen = readInt();
+                                       for ( char c : OBJ_MARKER )
+                                       {
+                                               if ( pdfSource.read() != c )
+                                               {
+                                                       addValidationError(new 
ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Expected pattern '" + new String( 
OBJ_MARKER )  +   " but missed at character '" + c + "'" ));
+                                                       throw new 
SyntaxValidationException( "Expected pattern '" + new String( OBJ_MARKER )  + " 
but missed at character '" + c + "'" , validationResult);
+                                               }
+                                       }
+                               }
+
+                               // ---- consistency check
+                               if ( ( readObjNr != objKey.getNumber() ) ||     
( readObjGen != objKey.getGeneration() ) ) 
+                               {
+                                       throw new IOException( "XREF for " + 
objKey.getNumber() + ":" + objKey.getGeneration() +" points to wrong object: " 
+ readObjNr + ":" + readObjGen );
+                               }
+
+                               skipSpaces();
+                               COSBase pb           = parseDirObject();
+                               skipSpaces();                           
+                               long endObjectOffset = pdfSource.getOffset();
+                               String  endObjectKey = readString();
+
+                               if ( endObjectKey.equals( "stream" ) ) 
+                               {
+                                       pdfSource.seek(endObjectOffset);
+                                       if( pb instanceof COSDictionary )
+                                       {
+                                               COSStream stream = 
parseCOSStream( (COSDictionary)pb, getDocument().getScratchFile() );
+                                               if ( securityHandler != null )
+                                               {
+                                                       try 
+                                                       {
+                                                               
securityHandler.decryptStream(stream, objNr, objGenNr );
+                                                       } 
+                                                       catch ( 
CryptographyException ce ) 
+                                                       {
+                                                               throw new 
IOException( "Error decrypting stream object " + objNr + ": " + ce.getMessage()
+                                                                               
/*, ce // TODO: remove remark with Java 1.6 */ );
+                                                       }
+                                               }
+                                               pb = stream;
+                                       }
+                                       else
+                                       {
+                                               // this is not legal
+                                               // the combination of a dict 
and the stream/endstream forms a complete stream object
+                                               throw new IOException( "Stream 
not preceded by dictionary (offset: " + offsetOrObjstmObNr + ")." );
+                                       }
+                                       skipSpaces();
+                                       endObjectOffset = pdfSource.getOffset();
+                                       endObjectKey = readString();
+
+                                       // we have case with a second 
'endstream' before endobj
+                                       if ( ! endObjectKey.startsWith( 
"endobj" ) )
+                                       {
+                                               if ( endObjectKey.startsWith( 
"endstream" ) ) 
+                                               {
+                                                       endObjectKey = 
endObjectKey.substring( 9 ).trim();
+                                                       if ( 
endObjectKey.length() == 0 )
+                                                       {
+                                                               // no other 
characters in extra endstream line
+                                                               endObjectKey = 
readString();    // read next line 
+                                                       }
+                                               }
+                                       }
+                               } else if ( securityHandler != null )
+                               {
+                                       // decrypt
+                                       if ( pb instanceof COSString )
+                                       {
+                                               decrypt( (COSString) pb, objNr, 
objGenNr );
+                                       }
+                                       else if ( pb instanceof COSDictionary )
+                                       {
+                                               for( Entry<COSName,COSBase> 
entry : ((COSDictionary) pb).entrySet() )
+                                               {
+                                                       // TODO: specially 
handle 'Contents' entry of signature dictionary like in 
SecurityHandler#decryptDictionary
+                                                       if ( entry.getValue() 
instanceof COSString )
+                                                       {
+                                                               decrypt( 
(COSString) entry.getValue(), objNr, objGenNr );
+                                                       }
+                                               }
+                                       }
+                                       else if ( pb instanceof COSArray )
+                                       {
+                                               final COSArray array = 
(COSArray) pb;
+                                               for( int aIdx = 0, len = 
array.size(); aIdx < len; aIdx++ )
+                                               {
+                                                       if ( array.get( aIdx ) 
instanceof COSString )
+                                                       {
+                                                               decrypt( 
(COSString) array.get( aIdx ), objNr, objGenNr );
+                                                       }
+                                               }
+                                       }
+                               }
+
+                               pdfObject.setObject( pb );
+
+                               if ( ! endObjectKey.startsWith( "endobj" ) )
+                               {
+                                       throw new IOException( "Object (" + 
readObjNr + ":" + readObjGen + ") at offset " + offsetOrObjstmObNr + " does not 
end with 'endobj'." );
+                               } else {
+                                       offset = pdfSource.getOffset();
+                                       pdfSource.seek(endObjectOffset-1);
+                                       if (!nextIsEOL()) {
+                                               addValidationError(new 
ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, "EOL expected 
before the 'endobj' keyword"));
+                                       }
+                                       pdfSource.seek(offset);
+                               }
+
+                               if (!nextIsEOL()) {
+                                       addValidationError(new 
ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, "EOL expected 
after the 'endobj' keyword"));
+                               }
+
+                               releasePdfSourceInputStream();
+                       }       else {
+                               // xref value is object nr of object stream 
containing object to be parsed;
+                               // since our object was not found it means 
object stream was not parsed so far
+                               final int     objstmObjNr   = (int) ( - 
offsetOrObjstmObNr );
+                               final COSBase objstmBaseObj = 
parseObjectDynamically( objstmObjNr, 0, true );
+                               if ( objstmBaseObj instanceof COSStream )
+                               {
+                                       // parse object stream
+                                       PDFObjectStreamParser parser =  new 
PDFObjectStreamParser( (COSStream) objstmBaseObj, document, forceParsing );
+                                       parser.parse();
+
+                                       // get set of object numbers referenced 
for this object stream
+                                       final Set<Long> refObjNrs = 
xrefTrailerResolver.getContainedObjectNumbers( objstmObjNr );
+
+                                       // register all objects which are 
referenced to be contained in object stream
+                                       for( COSObject next : 
parser.getObjects() )
+                                       {
+                                               COSObjectKey stmObjKey = new 
COSObjectKey( next );
+                                               if ( refObjNrs.contains( 
stmObjKey.getNumber() ) )
+                                               {
+                                                       COSObject stmObj = 
document.getObjectFromPool( stmObjKey );
+                                                       stmObj.setObject( 
next.getObject() );
+                                               }
+                                       }
+                               }
+                       }
+               }   
+               return pdfObject.getObject();
+       }
+
+       protected int lastIndexOf( final char[] pattern, final byte[] buf, 
final int endOff )
+       {
+               int offset = super.lastIndexOf(pattern, buf, endOff);
+               if (offset > 0 && Arrays.equals(pattern, EOF_MARKER)) {
+                       // this is the offset of the last %%EOF sequence.
+                       // nothing should be present after this sequence.
+                       int tmpOffset = offset + pattern.length;
+                       if (tmpOffset != buf.length) {
+                               // EOL is authorized
+                               if ((buf.length - tmpOffset) > 2 || 
!(buf[tmpOffset] == 10 || buf[tmpOffset] == 13 || buf[tmpOffset+1] == 10)) {
+                                       addValidationError(new 
ValidationError(ERROR_SYNTAX_TRAILER_EOF,"File contains data after the last 
%%EOF sequence"));
+                               }
+                       }
+               }
+               return offset;
        }
 }
\ No newline at end of file

Modified: 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/TrailerValidationProcess.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/TrailerValidationProcess.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/TrailerValidationProcess.java
 (original)
+++ 
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/process/TrailerValidationProcess.java
 Mon Sep 24 21:28:34 2012
@@ -53,7 +53,6 @@ import org.apache.pdfbox.preflight.Prefl
 import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
 import org.apache.pdfbox.preflight.exception.ValidationException;
 import org.apache.pdfbox.preflight.utils.COSUtils;
-import org.apache.pdfbox.preflight.utils.PdfElementParser;
 
 public class TrailerValidationProcess extends AbstractProcess {
 
@@ -89,26 +88,14 @@ public class TrailerValidationProcess ex
         * @param result
         */
        protected void checkTrailersForLinearizedPDF14(PreflightContext ctx) {
-               List<String> lTrailers = ctx.getPdfExtractor().getAllTrailers();
-
-               if (lTrailers.isEmpty()) {
+               COSDictionary first = 
ctx.getXrefTableResolver().getFirstTrailer();
+               if (first == null) {
                        addValidationError(ctx, new 
ValidationError(ERROR_SYNTAX_TRAILER, "There are no trailer in the PDF file"));
-               } else {
-                       String firstTrailer = lTrailers.get(0);
-                       String lastTrailer = lTrailers.get(lTrailers.size() - 
1);
-
-                       COSDictionary first = null;
-                       COSDictionary last = null;
+               } else {        
+                       COSDictionary last = 
ctx.getXrefTableResolver().getLastTrailer();
                        COSDocument cosDoc = null;
                        try {
                                cosDoc = new COSDocument();
-
-                               PdfElementParser parser1 = new 
PdfElementParser(cosDoc, firstTrailer.getBytes());
-                               first = parser1.parseAsDictionary();
-
-                               PdfElementParser parser2 = new 
PdfElementParser(cosDoc, lastTrailer.getBytes());
-                               last = parser2.parseAsDictionary();
-
                                checkMainTrailer(ctx, first);
                                if (!compareIds(first, last, cosDoc)) {
                                        addValidationError(ctx, new 
ValidationError(
@@ -194,7 +181,7 @@ public class TrailerValidationProcess ex
                if (idFirst == null || idLast == null) {
                        return false;
                }
-               
+
                // ---- cast two COSBase to COSArray.
                COSArray af = COSUtils.getAsArray(idFirst, cosDocument);
                COSArray al = COSUtils.getAsArray(idLast, cosDocument);
@@ -260,7 +247,7 @@ public class TrailerValidationProcess ex
                                id = true;
                        }
                }
-               
+
                COSDocument cosDocument = ctx.getDocument().getDocument();
                // PDF/A Trailer dictionary must contain the ID key
                if (!id) {

Modified: 
pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestInvalidDirectory.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestInvalidDirectory.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- 
pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestInvalidDirectory.java
 (original)
+++ 
pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestInvalidDirectory.java
 Mon Sep 24 21:28:34 2012
@@ -30,6 +30,7 @@ import javax.activation.FileDataSource;
 
 import junit.framework.Assert;
 
+import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
 import org.apache.pdfbox.preflight.parser.PreflightParser;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -51,13 +52,25 @@ public class TestInvalidDirectory {
 
        @Test
        public void validate () throws Exception {
+               PreflightDocument document = null;
+
                System.out.println(target);
-               PreflightParser parser = new PreflightParser(new 
FileDataSource(target));
-               parser.parse();
-               PreflightDocument document = (PreflightDocument) 
parser.getPDDocument();
-               document.validate();
-               Assert.assertFalse(document.getResult().isValid());
-               document.close();
+               ValidationResult result = null;
+               try {
+                       PreflightParser parser = new PreflightParser(new 
FileDataSource(target));
+                       parser.parse();
+                       document = (PreflightDocument)parser.getPDDocument();
+                       document.validate();
+                       result = document.getResult();
+               } catch (SyntaxValidationException e) {
+                       result = e.getResult();
+               } finally {
+                       if (document != null) {
+                               document.close();
+                       }
+               }
+               Assert.assertFalse("Test of " + target, result.isValid());
+
        }
 
        @Parameters

Modified: 
pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestIsartorValidationFromClasspath.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestIsartorValidationFromClasspath.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- 
pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestIsartorValidationFromClasspath.java
 (original)
+++ 
pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestIsartorValidationFromClasspath.java
 Mon Sep 24 21:28:34 2012
@@ -37,6 +37,7 @@ import junit.framework.Assert;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
+import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
 import org.apache.pdfbox.preflight.exception.ValidationException;
 import org.apache.pdfbox.preflight.parser.PreflightParser;
 import org.junit.AfterClass;
@@ -90,13 +91,21 @@ public class TestIsartorValidationFromCl
                try {
                        System.out.println(path);
                        InputStream input = 
this.getClass().getResourceAsStream(path);
-                       PreflightParser parser = new PreflightParser(new 
org.apache.pdfbox.preflight.utils.ByteArrayDataSource(input));
-                       parser.parse();
-                       document = (PreflightDocument)parser.getPDDocument();
-                       document.validate();
-                       ValidationResult result = document.getResult();
+
+                       ValidationResult result = null;
+                       try {
+                               PreflightParser parser = new 
PreflightParser(new 
org.apache.pdfbox.preflight.utils.ByteArrayDataSource(input));
+                               parser.parse();
+                               document = 
(PreflightDocument)parser.getPDDocument();
+                               document.validate();
+                               result = document.getResult();
+                       } catch (SyntaxValidationException e) {
+                               result = e.getResult();
+                       }
+
                        Assert.assertFalse(path + " : Isartor file should be 
invalid (" + path + ")", result.isValid());
                        Assert.assertTrue(path + " : Should find at least one 
error", result.getErrorsList().size() > 0);
+
                        // could contain more than one error
                        boolean found = false;
                        for (ValidationError error : result.getErrorsList()) {
@@ -104,8 +113,7 @@ public class TestIsartorValidationFromCl
                                        found = true;
                                }
                                if (isartorResultFile != null) {
-                                       String log = path.replace(".pdf", "") + 
"#" 
-                                                       
+error.getErrorCode()+"#"+error.getDetails()+"\n";
+                                       String log = path.replace(".pdf", "") + 
"#" + error.getErrorCode()+"#"+error.getDetails()+"\n";
                                        isartorResultFile.write(log.getBytes());
                                }
                        }
@@ -145,7 +153,7 @@ public class TestIsartorValidationFromCl
                IOUtils.closeQuietly(expected);
                // prepare config
                List<Object[]> data = new ArrayList<Object[]>();
-        InputStream is = Class.class.getResourceAsStream("/Isartor 
testsuite.list");
+               InputStream is = Class.class.getResourceAsStream("/Isartor 
testsuite.list");
                if (is != null)
                {
                        BufferedReader reader = new BufferedReader(new 
InputStreamReader(is));

Modified: 
pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestValidDirectory.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestValidDirectory.java?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- 
pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestValidDirectory.java
 (original)
+++ 
pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/TestValidDirectory.java
 Mon Sep 24 21:28:34 2012
@@ -30,6 +30,7 @@ import javax.activation.FileDataSource;
 
 import junit.framework.Assert;
 
+import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
 import org.apache.pdfbox.preflight.parser.PreflightParser;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -38,58 +39,62 @@ import org.junit.runners.Parameterized.P
 
 @RunWith(Parameterized.class)
 public class TestValidDirectory {
-  
-  protected File target = null;
-  
-  public TestValidDirectory (File file) {
-    this.target = file;
-  }
-  
-  @Test
-  public void validate () throws Exception {
-       PreflightDocument document = null;
-       try {
-               System.out.println(target);
-               PreflightParser parser = new PreflightParser(new 
FileDataSource(target));
-               parser.parse();
-               document = (PreflightDocument) parser.getPDDocument();
-               document.validate();
-               Assert.assertTrue("Validation of " + target 
,document.getResult().isValid());
-       } finally {
-               if (document != null) {
-                       document.close();
-               }
-       }
-  }
-  
-  @Parameters
-  public static Collection<Object[]> initializeParameters() throws Exception {
-    // check directory
-    File directory = null;
-    String pdfPath = System.getProperty("pdfa.valid", null);
-    if ("${user.pdfa.valid}".equals(pdfPath)) {pdfPath=null;}
-    if (pdfPath!=null) {
-      directory = new File(pdfPath);
-      if (!directory.exists()) throw new Exception ("directory does not exists 
: "+directory.getAbsolutePath());
-      if (!directory.isDirectory()) throw new Exception ("not a directory : 
"+directory.getAbsolutePath());
-    } else {
-      System.err.println("System property 'pdfa.valid' not defined, will not 
run TestValidaDirectory");
-    }
-    // create list
-    if (directory==null) {
-      return new ArrayList<Object[]>(0);
-    } else {
-      File [] files = directory.listFiles();
-      List<Object[]> data = new ArrayList<Object[]>(files.length);
-      for (File file : files) {
-        if (file.isFile()) {
-          data.add(new Object [] {file});
-        }
-      }
-      return data;
-    }
-  }
-  
-  
-  
+
+       protected File target = null;
+
+       public TestValidDirectory (File file) {
+               this.target = file;
+       }
+
+       @Test
+       public void validate () throws Exception {
+               PreflightDocument document = null;
+               System.out.println(target);
+               ValidationResult result = null;
+               try {
+                       PreflightParser parser = new PreflightParser(new 
FileDataSource(target));
+                       parser.parse();
+                       document = (PreflightDocument)parser.getPDDocument();
+                       document.validate();
+                       result = document.getResult();
+               } catch (SyntaxValidationException e) {
+                       result = e.getResult();
+               } finally {
+                       if (document != null) {
+                               document.close();
+                       }
+               }
+               Assert.assertTrue("Validation of " + target , result.isValid());
+       }
+
+       @Parameters
+       public static Collection<Object[]> initializeParameters() throws 
Exception {
+               // check directory
+               File directory = null;
+               String pdfPath = System.getProperty("pdfa.valid", null);
+               if ("${user.pdfa.valid}".equals(pdfPath)) {pdfPath=null;}
+               if (pdfPath!=null) {
+                       directory = new File(pdfPath);
+                       if (!directory.exists()) throw new Exception 
("directory does not exists : "+directory.getAbsolutePath());
+                       if (!directory.isDirectory()) throw new Exception ("not 
a directory : "+directory.getAbsolutePath());
+               } else {
+                       System.err.println("System property 'pdfa.valid' not 
defined, will not run TestValidaDirectory");
+               }
+               // create list
+               if (directory==null) {
+                       return new ArrayList<Object[]>(0);
+               } else {
+                       File [] files = directory.listFiles();
+                       List<Object[]> data = new 
ArrayList<Object[]>(files.length);
+                       for (File file : files) {
+                               if (file.isFile()) {
+                                       data.add(new Object [] {file});
+                               }
+                       }
+                       return data;
+               }
+       }
+
+
+
 }

Modified: pdfbox/trunk/preflight/src/test/resources/expected_errors.txt
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/test/resources/expected_errors.txt?rev=1389604&r1=1389603&r2=1389604&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/test/resources/expected_errors.txt (original)
+++ pdfbox/trunk/preflight/src/test/resources/expected_errors.txt Mon Sep 24 
21:28:34 2012
@@ -23,12 +23,12 @@ isartor-6-1-2-t01-fail-a.pdf=1.1
 isartor-6-1-2-t02-fail-a.pdf=1.1
 isartor-6-1-3-t01-fail-a.pdf=1.4.1
 isartor-6-1-3-t02-fail-a.pdf=1.4.2
-isartor-6-1-3-t03-fail-a.pdf=1.4
+isartor-6-1-3-t03-fail-a.pdf=1.4.10 // 1.4 due to JavaCC
 isartor-6-1-3-t04-fail-a.pdf=1.4.6 // Revoir le parser pour les linearized 
file (LIGNE VIDE qui encadre le body?????)
 isartor-6-1-4-t01-fail-a.pdf=1.3
 isartor-6-1-4-t02-fail-a.pdf=1.3
-isartor-6-1-6-t01-fail-a.pdf=1.2
-isartor-6-1-7-t01-fail-a.pdf=1.2
+isartor-6-1-6-t01-fail-a.pdf=1.0.11 // String Hex error - before was 1.2 due 
to JavaCC validation
+isartor-6-1-7-t01-fail-a.pdf=1.2.2 // Stream keyword must be followed by CR&LF 
or LF only- before was 1.2 due to JavaCC validation
 isartor-6-1-7-t02-fail-a.pdf=1.2.2
 isartor-6-1-7-t03-fail-a.pdf=1.2.5
 isartor-6-1-7-t04-fail-a.pdf=1.2.6
@@ -36,7 +36,7 @@ isartor-6-1-7-t04-fail-b.pdf=1.2.6
 isartor-6-1-7-t04-fail-c.pdf=1.2.6
 isartor-6-1-8-t01-fail-a.pdf=1.2.1
 isartor-6-1-8-t02-fail-a.pdf=1.2.1
-isartor-6-1-8-t03-fail-a.pdf=1.2
+isartor-6-1-8-t03-fail-a.pdf=1.2.1 // before was 1.2 due to JavaCC
 isartor-6-1-8-t04-fail-a.pdf=1.2.1
 isartor-6-1-8-t05-fail-a.pdf=1.2.1
 isartor-6-1-8-t06-fail-a.pdf=1.2.1

svn commit: r1389604 - in /pdfbox/trunk/preflight: ./ src/main/java/org/apache/pdfbox/preflight/ src/main/java/org/apache/pdfbox/preflight/exception/ src/main/java/org/apache/pdfbox/preflight/parser/ src/main/java/org/apache/pdfbox/preflight/process/ s...

Reply via email to