Author: lehmi
Date: Wed Apr  4 17:05:42 2018
New Revision: 1828365

URL: http://svn.apache.org/viewvc?rev=1828365&view=rev
Log:
PDFBOX-4097: try to decrypt encrypted object streams when rebuilding the trailer

Modified:
    
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
    
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java

Modified: 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1828365&r1=1828364&r2=1828365&view=diff
==============================================================================
--- 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
 (original)
+++ 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
 Wed Apr  4 17:05:42 2018
@@ -17,7 +17,9 @@
 package org.apache.pdfbox.pdfparser;
 
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.OutputStream;
+import java.security.KeyStore;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -45,10 +47,16 @@ import org.apache.pdfbox.cos.COSNumber;
 import org.apache.pdfbox.cos.COSObject;
 import org.apache.pdfbox.cos.COSObjectKey;
 import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.io.RandomAccessRead;
 import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
+import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
+import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial;
+import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
+import org.apache.pdfbox.pdmodel.encryption.PDEncryption;
+import org.apache.pdfbox.pdmodel.encryption.PublicKeyDecryptionMaterial;
 import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
-
+import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
 
 import static org.apache.pdfbox.util.Charsets.ISO_8859_1;
 
@@ -85,7 +93,12 @@ public class COSParser extends BaseParse
     private final byte[] strmBuf    = new byte[ STRMBUFLEN ];
 
     protected final RandomAccessRead source;
-    
+
+    private AccessPermission accessPermission;
+    private InputStream keyStoreInputStream = null;
+    private String password = "";
+    private String keyAlias = null;
+
     /**
      * Only parse the PDF file minimally allowing access to basic information.
      */
@@ -144,6 +157,7 @@ public class COSParser extends BaseParse
     private Long lastEOFMarker = null;
     private List<Long> bfSearchXRefTablesOffsets = null;
     private List<Long> bfSearchXRefStreamsOffsets = null;
+    private PDEncryption encryption = null;
 
     /**
      * The security handler.
@@ -179,6 +193,25 @@ public class COSParser extends BaseParse
     }
 
     /**
+     * Constructor for encrypted pdfs.
+     * 
+     * @param source input representing the pdf.
+     * @param password password to be used for decryption.
+     * @param keyStore key store to be used for decryption when using public 
key security
+     * @param keyAlias alias to be used for decryption when using public key 
security
+     * 
+     */
+    public COSParser(RandomAccessRead source, String password, InputStream 
keyStore,
+            String keyAlias)
+    {
+        super(new RandomAccessSource(source));
+        this.source = source;
+        this.password = password;
+        this.keyAlias = keyAlias;
+        keyStoreInputStream = keyStore;
+    }
+
+    /**
      * Sets how many trailing bytes of PDF file are searched for EOF marker 
and 'startxref' marker. If not set we use
      * default value {@link #DEFAULT_TRAIL_BYTECOUNT}.
      * 
@@ -245,6 +278,15 @@ public class COSParser extends BaseParse
         {
             trailer = rebuildTrailer();
         }
+        else
+        {
+            // prepare decryption if necessary
+            prepareDecryption();
+            if (bfSearchCOSObjectKeyOffsets != null && 
!bfSearchCOSObjectKeyOffsets.isEmpty())
+            {
+                bfSearchForObjStreams();
+            }
+        }
         return trailer;
     }
 
@@ -1580,7 +1622,6 @@ public class COSParser extends BaseParse
                 bfSearchCOSObjectKeyOffsets.put(new COSObjectKey(lastObjectId, 
lastGenID),
                         lastObjOffset);
             }
-            bfSearchForObjStreams();
             // reestablish origin position
             source.seek(originOffset);
         }
@@ -1941,7 +1982,7 @@ public class COSParser extends BaseParse
             {
                 source.seek(offset);
                 long stmObjNumber = readObjectNumber();
-                readGenerationNumber();
+                int stmGenNumber = readGenerationNumber();
                 readExpectedString(OBJ_MARKER, true);
                 int nrOfObjects = 0;
                 byte[] numbersBytes = null;
@@ -1958,6 +1999,10 @@ public class COSParser extends BaseParse
                         continue;
                     }
                     stream = parseCOSStream(dict);
+                    if (securityHandler != null)
+                    {
+                        securityHandler.decryptStream(stream, stmObjNumber, 
stmGenNumber);
+                    }
                     is = stream.createInputStream();
                     numbersBytes = new byte[offsetFirstStream];
                     is.read(numbersBytes);
@@ -1995,6 +2040,7 @@ public class COSParser extends BaseParse
                             "Skipped corrupt stream: (" + stmObjNumber + " 0 
at offset " + offset);
                     continue;
                 }
+                Map<COSObjectKey, Long> xrefOffset = 
xrefTrailerResolver.getXrefTable();
                 for (int i = 0; i < nrOfObjects; i++)
                 {
                     long objNumber = Long.parseLong(numbers[i * 2]);
@@ -2003,6 +2049,7 @@ public class COSParser extends BaseParse
                     if (existingOffset == null || offset > existingOffset)
                     {
                         bfSearchCOSObjectKeyOffsets.put(objKey, -stmObjNumber);
+                        xrefOffset.put(objKey, -stmObjNumber);
                     }
                 }
             }
@@ -2150,36 +2197,57 @@ public class COSParser extends BaseParse
             xrefTrailerResolver.setStartxref(0);
             trailer = xrefTrailerResolver.getTrailer();
             getDocument().setTrailer(trailer);
+            boolean searchForObjStreamsDone = false;
             if (!bfSearchForTrailer(trailer))
             {
                 // search for the different parts of the trailer dictionary
-                for (Entry<COSObjectKey, Long> entry : 
bfSearchCOSObjectKeyOffsets.entrySet())
+                if (!searchForTrailerItems(trailer))
                 {
-                    COSDictionary dictionary = 
retrieveCOSDictionary(entry.getKey(),
-                            entry.getValue());
-                    if (dictionary == null)
-                    {
-                        continue;
-                    }
-                    // document catalog
-                    if (isCatalog(dictionary))
-                    {
-                        trailer.setItem(COSName.ROOT, 
document.getObjectFromPool(entry.getKey()));
-                    }
-                    // info dictionary
-                    else if (isInfo(dictionary))
-                    {
-                        trailer.setItem(COSName.INFO, 
document.getObjectFromPool(entry.getKey()));
-                    }
-                    // encryption dictionary, if existing, is lost
-                    // We can't run "Algorithm 2" from PDF specification 
because of missing ID
+                    // root entry wasn't found, maybe it is part of an object 
stream
+                    bfSearchForObjStreams();
+                    searchForObjStreamsDone = true;
+                    // search again for the root entry
+                    searchForTrailerItems(trailer);
                 }
             }
+            // prepare decryption if necessary
+            prepareDecryption();
+            if (!searchForObjStreamsDone)
+            {
+                bfSearchForObjStreams();
+            }
         }
         trailerWasRebuild = true;
         return trailer;
     }
 
+    private boolean searchForTrailerItems(COSDictionary trailer) throws 
IOException
+    {
+        boolean rootFound = false;
+        for (Entry<COSObjectKey, Long> entry : 
bfSearchCOSObjectKeyOffsets.entrySet())
+        {
+            COSDictionary dictionary = retrieveCOSDictionary(entry.getKey(), 
entry.getValue());
+            if (dictionary == null)
+            {
+                continue;
+            }
+            // document catalog
+            if (isCatalog(dictionary))
+            {
+                trailer.setItem(COSName.ROOT, 
document.getObjectFromPool(entry.getKey()));
+                rootFound = true;
+            }
+            // info dictionary
+            else if (isInfo(dictionary))
+            {
+                trailer.setItem(COSName.INFO, 
document.getObjectFromPool(entry.getKey()));
+            }
+            // encryption dictionary, if existing, is lost
+            // We can't run "Algorithm 2" from PDF specification because of 
missing ID
+        }
+        return rootFound;
+    }
+
     private COSDictionary retrieveCOSDictionary(COSObject object) throws 
IOException
     {
         COSObjectKey key = new COSObjectKey((COSObject) object);
@@ -2688,9 +2756,8 @@ public class COSParser extends BaseParse
     }
 
     /**
-     * This will get the document that was parsed.  parse() must be called 
before this is called.
-     * When you are done with this document you must call close() on it to 
release
-     * resources.
+     * This will get the document that was parsed. The document must be parsed 
before this is called. When you are done
+     * with this document you must call close() on it to release resources.
      *
      * @return The document that was parsed.
      *
@@ -2700,18 +2767,51 @@ public class COSParser extends BaseParse
     {
         if( document == null )
         {
-            throw new IOException( "You must call parse() before calling 
getDocument()" );
+            throw new IOException("You must parse the document first before 
calling getDocument()");
         }
         return document;
     }
 
     /**
+     * This will get the encryption dictionary. The document must be parsed 
before this is called.
+     *
+     * @return The encryption dictionary of the document that was parsed.
+     *
+     * @throws IOException If there is an error getting the document.
+     */
+    public PDEncryption getEncryption() throws IOException
+    {
+        if (document == null)
+        {
+            throw new IOException(
+                    "You must parse the document first before calling 
getEncryption()");
+        }
+        return encryption;
+    }
+
+    /**
+     * This will get the AccessPermission. The document must be parsed before 
this is called.
+     *
+     * @return The access permission of document that was parsed.
+     *
+     * @throws IOException If there is an error getting the document.
+     */
+    public AccessPermission getAccessPermission() throws IOException
+    {
+        if (document == null)
+        {
+            throw new IOException(
+                    "You must parse the document first before calling 
getAccessPermission()");
+        }
+        return accessPermission;
+    }
+
+    /**
      * Parse the values of the trailer dictionary and return the root object.
      *
      * @param trailer The trailer dictionary.
      * @return The parsed root object.
-     * @throws IOException If an IO error occurs or if the root object is
-     * missing in the trailer dictionary.
+     * @throws IOException If an IO error occurs or if the root object is 
missing in the trailer dictionary.
      */
     protected COSBase parseTrailerValuesDynamically(COSDictionary trailer) 
throws IOException
     {
@@ -2734,4 +2834,88 @@ public class COSParser extends BaseParse
         return root.getObject();
     }
 
+    /**
+     * Prepare for decryption.
+     * 
+     * @throws InvalidPasswordException If the password is incorrect.
+     * @throws IOException if something went wrong
+     */
+    private void prepareDecryption() throws InvalidPasswordException, 
IOException
+    {
+        if (encryption == null)
+        {
+            COSBase trailerEncryptItem = 
document.getTrailer().getItem(COSName.ENCRYPT);
+            if (trailerEncryptItem != null && !(trailerEncryptItem instanceof 
COSNull))
+            {
+                if (trailerEncryptItem instanceof COSObject)
+                {
+                    COSObject trailerEncryptObj = (COSObject) 
trailerEncryptItem;
+                    parseDictionaryRecursive(trailerEncryptObj);
+                }
+                try
+                {
+                    encryption = new 
PDEncryption(document.getEncryptionDictionary());
+                    DecryptionMaterial decryptionMaterial;
+                    if (keyStoreInputStream != null)
+                    {
+                        KeyStore ks = KeyStore.getInstance("PKCS12");
+                        ks.load(keyStoreInputStream, password.toCharArray());
+
+                        decryptionMaterial = new 
PublicKeyDecryptionMaterial(ks, keyAlias,
+                                password);
+                    }
+                    else
+                    {
+                        decryptionMaterial = new 
StandardDecryptionMaterial(password);
+                    }
+
+                    securityHandler = encryption.getSecurityHandler();
+                    securityHandler.prepareForDecryption(encryption, 
document.getDocumentID(),
+                            decryptionMaterial);
+                    accessPermission = 
securityHandler.getCurrentAccessPermission();
+                }
+                catch (IOException e)
+                {
+                    throw e;
+                }
+                catch (Exception e)
+                {
+                    throw new IOException("Error (" + 
e.getClass().getSimpleName()
+                            + ") while creating security handler for 
decryption", e);
+                }
+                finally
+                {
+                    if (keyStoreInputStream != null)
+                    {
+                        IOUtils.closeQuietly(keyStoreInputStream);
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * Resolves all not already parsed objects of a dictionary recursively.
+     * 
+     * @param dictionaryObject dictionary to be parsed
+     * @throws IOException if something went wrong
+     * 
+     */
+    private void parseDictionaryRecursive(COSObject dictionaryObject) throws 
IOException
+    {
+        parseObjectDynamically(dictionaryObject, true);
+        COSDictionary dictionary = (COSDictionary) 
dictionaryObject.getObject();
+        for (COSBase value : dictionary.getValues())
+        {
+            if (value instanceof COSObject)
+            {
+                COSObject object = (COSObject) value;
+                if (object.getObject() == null)
+                {
+                    parseDictionaryRecursive(object);
+                }
+            }
+        }
+    }
+
 }

Modified: 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1828365&r1=1828364&r2=1828365&view=diff
==============================================================================
--- 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
 (original)
+++ 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
 Wed Apr  4 17:05:42 2018
@@ -18,7 +18,6 @@ package org.apache.pdfbox.pdfparser;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.security.KeyStore;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -26,30 +25,16 @@ import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSDocument;
 import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.cos.COSNull;
-import org.apache.pdfbox.cos.COSObject;
 import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.io.RandomAccessRead;
 import org.apache.pdfbox.io.ScratchFile;
 import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
-import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial;
 import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
-import org.apache.pdfbox.pdmodel.encryption.PDEncryption;
-import org.apache.pdfbox.pdmodel.encryption.PublicKeyDecryptionMaterial;
-import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
 
 public class PDFParser extends COSParser
 {
     private static final Log LOG = LogFactory.getLog(PDFParser.class);
 
-    private String password = "";
-    private InputStream keyStoreInputStream = null;
-    private String keyAlias = null;
-
-    private PDEncryption encryption = null;    
-    private AccessPermission accessPermission;
-
     /**
      * Constructor.
      * Unrestricted main memory will be used for buffering PDF streams.
@@ -135,11 +120,8 @@ public class PDFParser extends COSParser
     public PDFParser(RandomAccessRead source, String decryptionPassword, 
InputStream keyStore,
                      String alias, ScratchFile scratchFile) throws IOException
     {
-        super(source);
+        super(source, decryptionPassword, keyStore, alias);
         fileLen = source.length();
-        password = decryptionPassword;
-        keyStoreInputStream = keyStore;
-        keyAlias = alias;
         init(scratchFile);
     }
     
@@ -171,8 +153,8 @@ public class PDFParser extends COSParser
      */
     public PDDocument getPDDocument() throws IOException
     {
-        PDDocument doc = new PDDocument(getDocument(), source, 
accessPermission);
-        doc.setEncryptionDictionary(encryption);
+        PDDocument doc = new PDDocument(getDocument(), source, 
getAccessPermission());
+        doc.setEncryptionDictionary(getEncryption());
         return doc;
     }
 
@@ -187,8 +169,6 @@ public class PDFParser extends COSParser
     protected void initialParse() throws InvalidPasswordException, IOException
     {
         COSDictionary trailer = retrieveTrailer();
-        // prepare decryption if necessary
-        prepareDecryption();
     
         COSBase base = parseTrailerValuesDynamically(trailer);
         if (!(base instanceof COSDictionary))
@@ -243,8 +223,6 @@ public class PDFParser extends COSParser
         }
         finally
         {
-            IOUtils.closeQuietly(keyStoreInputStream);
-    
             if (exceptionOccurred && document != null)
             {
                 IOUtils.closeQuietly(document);
@@ -252,78 +230,5 @@ public class PDFParser extends COSParser
             }
         }
     }
-
-    /**
-     * Prepare for decryption.
-     * 
-     * @throws InvalidPasswordException If the password is incorrect.
-     * @throws IOException if something went wrong
-     */
-    private void prepareDecryption() throws InvalidPasswordException, 
IOException
-    {
-        COSBase trailerEncryptItem = 
document.getTrailer().getItem(COSName.ENCRYPT);
-        if (trailerEncryptItem != null && !(trailerEncryptItem instanceof 
COSNull))
-        {
-            if (trailerEncryptItem instanceof COSObject)
-            {
-                COSObject trailerEncryptObj = (COSObject) trailerEncryptItem;
-                parseDictionaryRecursive(trailerEncryptObj);
-            }
-            try
-            {
-                encryption = new 
PDEncryption(document.getEncryptionDictionary());
-                DecryptionMaterial decryptionMaterial;
-                if (keyStoreInputStream != null)
-                {
-                    KeyStore ks = KeyStore.getInstance("PKCS12");
-                    ks.load(keyStoreInputStream, password.toCharArray());
-    
-                    decryptionMaterial = new PublicKeyDecryptionMaterial(ks, 
keyAlias, password);
-                }
-                else
-                {
-                    decryptionMaterial = new 
StandardDecryptionMaterial(password);
-                }
-    
-                securityHandler = encryption.getSecurityHandler();
-                securityHandler.prepareForDecryption(encryption, 
document.getDocumentID(),
-                        decryptionMaterial);
-                accessPermission = 
securityHandler.getCurrentAccessPermission();
-            }
-            catch (IOException e)
-            {
-                throw e;
-            }
-            catch (Exception e)
-            {
-                throw new IOException("Error (" + e.getClass().getSimpleName()
-                        + ") while creating security handler for decryption", 
e);
-            }
-        }
-    }
-
-    /**
-     * Resolves all not already parsed objects of a dictionary recursively.
-     * 
-     * @param dictionaryObject dictionary to be parsed
-     * @throws IOException if something went wrong
-     * 
-     */
-    private void parseDictionaryRecursive(COSObject dictionaryObject) throws 
IOException
-    {
-        parseObjectDynamically(dictionaryObject, true);
-        COSDictionary dictionary = (COSDictionary)dictionaryObject.getObject();
-        for(COSBase value : dictionary.getValues())
-        {
-            if (value instanceof COSObject)
-            {
-                COSObject object = (COSObject)value;
-                if (object.getObject() == null)
-                {
-                    parseDictionaryRecursive(object);
-                }
-            }
-        }
-    }
 
 }


Reply via email to