Author: lehmi Date: Wed Apr 4 17:05:42 2018 New Revision: 1828365 URL: http://svn.apache.org/viewvc?rev=1828365&view=rev Log: PDFBOX-4097: try to decrypt encrypted object streams when rebuilding the trailer
Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1828365&r1=1828364&r2=1828365&view=diff ============================================================================== --- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original) +++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Wed Apr 4 17:05:42 2018 @@ -17,7 +17,9 @@ package org.apache.pdfbox.pdfparser; import java.io.IOException; +import java.io.InputStream; import java.io.OutputStream; +import java.security.KeyStore; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -45,10 +47,16 @@ import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSObjectKey; import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.io.RandomAccessRead; import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType; +import org.apache.pdfbox.pdmodel.encryption.AccessPermission; +import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial; +import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException; +import org.apache.pdfbox.pdmodel.encryption.PDEncryption; +import org.apache.pdfbox.pdmodel.encryption.PublicKeyDecryptionMaterial; import org.apache.pdfbox.pdmodel.encryption.SecurityHandler; - +import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial; import static org.apache.pdfbox.util.Charsets.ISO_8859_1; @@ -85,7 +93,12 @@ public class COSParser extends BaseParse private final byte[] strmBuf = new byte[ STRMBUFLEN ]; protected final RandomAccessRead source; - + + private AccessPermission accessPermission; + private InputStream keyStoreInputStream = null; + private String password = ""; + private String keyAlias = null; + /** * Only parse the PDF file minimally allowing access to basic information. */ @@ -144,6 +157,7 @@ public class COSParser extends BaseParse private Long lastEOFMarker = null; private List<Long> bfSearchXRefTablesOffsets = null; private List<Long> bfSearchXRefStreamsOffsets = null; + private PDEncryption encryption = null; /** * The security handler. @@ -179,6 +193,25 @@ public class COSParser extends BaseParse } /** + * Constructor for encrypted pdfs. + * + * @param source input representing the pdf. + * @param password password to be used for decryption. + * @param keyStore key store to be used for decryption when using public key security + * @param keyAlias alias to be used for decryption when using public key security + * + */ + public COSParser(RandomAccessRead source, String password, InputStream keyStore, + String keyAlias) + { + super(new RandomAccessSource(source)); + this.source = source; + this.password = password; + this.keyAlias = keyAlias; + keyStoreInputStream = keyStore; + } + + /** * Sets how many trailing bytes of PDF file are searched for EOF marker and 'startxref' marker. If not set we use * default value {@link #DEFAULT_TRAIL_BYTECOUNT}. * @@ -245,6 +278,15 @@ public class COSParser extends BaseParse { trailer = rebuildTrailer(); } + else + { + // prepare decryption if necessary + prepareDecryption(); + if (bfSearchCOSObjectKeyOffsets != null && !bfSearchCOSObjectKeyOffsets.isEmpty()) + { + bfSearchForObjStreams(); + } + } return trailer; } @@ -1580,7 +1622,6 @@ public class COSParser extends BaseParse bfSearchCOSObjectKeyOffsets.put(new COSObjectKey(lastObjectId, lastGenID), lastObjOffset); } - bfSearchForObjStreams(); // reestablish origin position source.seek(originOffset); } @@ -1941,7 +1982,7 @@ public class COSParser extends BaseParse { source.seek(offset); long stmObjNumber = readObjectNumber(); - readGenerationNumber(); + int stmGenNumber = readGenerationNumber(); readExpectedString(OBJ_MARKER, true); int nrOfObjects = 0; byte[] numbersBytes = null; @@ -1958,6 +1999,10 @@ public class COSParser extends BaseParse continue; } stream = parseCOSStream(dict); + if (securityHandler != null) + { + securityHandler.decryptStream(stream, stmObjNumber, stmGenNumber); + } is = stream.createInputStream(); numbersBytes = new byte[offsetFirstStream]; is.read(numbersBytes); @@ -1995,6 +2040,7 @@ public class COSParser extends BaseParse "Skipped corrupt stream: (" + stmObjNumber + " 0 at offset " + offset); continue; } + Map<COSObjectKey, Long> xrefOffset = xrefTrailerResolver.getXrefTable(); for (int i = 0; i < nrOfObjects; i++) { long objNumber = Long.parseLong(numbers[i * 2]); @@ -2003,6 +2049,7 @@ public class COSParser extends BaseParse if (existingOffset == null || offset > existingOffset) { bfSearchCOSObjectKeyOffsets.put(objKey, -stmObjNumber); + xrefOffset.put(objKey, -stmObjNumber); } } } @@ -2150,36 +2197,57 @@ public class COSParser extends BaseParse xrefTrailerResolver.setStartxref(0); trailer = xrefTrailerResolver.getTrailer(); getDocument().setTrailer(trailer); + boolean searchForObjStreamsDone = false; if (!bfSearchForTrailer(trailer)) { // search for the different parts of the trailer dictionary - for (Entry<COSObjectKey, Long> entry : bfSearchCOSObjectKeyOffsets.entrySet()) + if (!searchForTrailerItems(trailer)) { - COSDictionary dictionary = retrieveCOSDictionary(entry.getKey(), - entry.getValue()); - if (dictionary == null) - { - continue; - } - // document catalog - if (isCatalog(dictionary)) - { - trailer.setItem(COSName.ROOT, document.getObjectFromPool(entry.getKey())); - } - // info dictionary - else if (isInfo(dictionary)) - { - trailer.setItem(COSName.INFO, document.getObjectFromPool(entry.getKey())); - } - // encryption dictionary, if existing, is lost - // We can't run "Algorithm 2" from PDF specification because of missing ID + // root entry wasn't found, maybe it is part of an object stream + bfSearchForObjStreams(); + searchForObjStreamsDone = true; + // search again for the root entry + searchForTrailerItems(trailer); } } + // prepare decryption if necessary + prepareDecryption(); + if (!searchForObjStreamsDone) + { + bfSearchForObjStreams(); + } } trailerWasRebuild = true; return trailer; } + private boolean searchForTrailerItems(COSDictionary trailer) throws IOException + { + boolean rootFound = false; + for (Entry<COSObjectKey, Long> entry : bfSearchCOSObjectKeyOffsets.entrySet()) + { + COSDictionary dictionary = retrieveCOSDictionary(entry.getKey(), entry.getValue()); + if (dictionary == null) + { + continue; + } + // document catalog + if (isCatalog(dictionary)) + { + trailer.setItem(COSName.ROOT, document.getObjectFromPool(entry.getKey())); + rootFound = true; + } + // info dictionary + else if (isInfo(dictionary)) + { + trailer.setItem(COSName.INFO, document.getObjectFromPool(entry.getKey())); + } + // encryption dictionary, if existing, is lost + // We can't run "Algorithm 2" from PDF specification because of missing ID + } + return rootFound; + } + private COSDictionary retrieveCOSDictionary(COSObject object) throws IOException { COSObjectKey key = new COSObjectKey((COSObject) object); @@ -2688,9 +2756,8 @@ public class COSParser extends BaseParse } /** - * This will get the document that was parsed. parse() must be called before this is called. - * When you are done with this document you must call close() on it to release - * resources. + * This will get the document that was parsed. The document must be parsed before this is called. When you are done + * with this document you must call close() on it to release resources. * * @return The document that was parsed. * @@ -2700,18 +2767,51 @@ public class COSParser extends BaseParse { if( document == null ) { - throw new IOException( "You must call parse() before calling getDocument()" ); + throw new IOException("You must parse the document first before calling getDocument()"); } return document; } /** + * This will get the encryption dictionary. The document must be parsed before this is called. + * + * @return The encryption dictionary of the document that was parsed. + * + * @throws IOException If there is an error getting the document. + */ + public PDEncryption getEncryption() throws IOException + { + if (document == null) + { + throw new IOException( + "You must parse the document first before calling getEncryption()"); + } + return encryption; + } + + /** + * This will get the AccessPermission. The document must be parsed before this is called. + * + * @return The access permission of document that was parsed. + * + * @throws IOException If there is an error getting the document. + */ + public AccessPermission getAccessPermission() throws IOException + { + if (document == null) + { + throw new IOException( + "You must parse the document first before calling getAccessPermission()"); + } + return accessPermission; + } + + /** * Parse the values of the trailer dictionary and return the root object. * * @param trailer The trailer dictionary. * @return The parsed root object. - * @throws IOException If an IO error occurs or if the root object is - * missing in the trailer dictionary. + * @throws IOException If an IO error occurs or if the root object is missing in the trailer dictionary. */ protected COSBase parseTrailerValuesDynamically(COSDictionary trailer) throws IOException { @@ -2734,4 +2834,88 @@ public class COSParser extends BaseParse return root.getObject(); } + /** + * Prepare for decryption. + * + * @throws InvalidPasswordException If the password is incorrect. + * @throws IOException if something went wrong + */ + private void prepareDecryption() throws InvalidPasswordException, IOException + { + if (encryption == null) + { + COSBase trailerEncryptItem = document.getTrailer().getItem(COSName.ENCRYPT); + if (trailerEncryptItem != null && !(trailerEncryptItem instanceof COSNull)) + { + if (trailerEncryptItem instanceof COSObject) + { + COSObject trailerEncryptObj = (COSObject) trailerEncryptItem; + parseDictionaryRecursive(trailerEncryptObj); + } + try + { + encryption = new PDEncryption(document.getEncryptionDictionary()); + DecryptionMaterial decryptionMaterial; + if (keyStoreInputStream != null) + { + KeyStore ks = KeyStore.getInstance("PKCS12"); + ks.load(keyStoreInputStream, password.toCharArray()); + + decryptionMaterial = new PublicKeyDecryptionMaterial(ks, keyAlias, + password); + } + else + { + decryptionMaterial = new StandardDecryptionMaterial(password); + } + + securityHandler = encryption.getSecurityHandler(); + securityHandler.prepareForDecryption(encryption, document.getDocumentID(), + decryptionMaterial); + accessPermission = securityHandler.getCurrentAccessPermission(); + } + catch (IOException e) + { + throw e; + } + catch (Exception e) + { + throw new IOException("Error (" + e.getClass().getSimpleName() + + ") while creating security handler for decryption", e); + } + finally + { + if (keyStoreInputStream != null) + { + IOUtils.closeQuietly(keyStoreInputStream); + } + } + } + } + } + + /** + * Resolves all not already parsed objects of a dictionary recursively. + * + * @param dictionaryObject dictionary to be parsed + * @throws IOException if something went wrong + * + */ + private void parseDictionaryRecursive(COSObject dictionaryObject) throws IOException + { + parseObjectDynamically(dictionaryObject, true); + COSDictionary dictionary = (COSDictionary) dictionaryObject.getObject(); + for (COSBase value : dictionary.getValues()) + { + if (value instanceof COSObject) + { + COSObject object = (COSObject) value; + if (object.getObject() == null) + { + parseDictionaryRecursive(object); + } + } + } + } + } Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1828365&r1=1828364&r2=1828365&view=diff ============================================================================== --- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original) +++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Wed Apr 4 17:05:42 2018 @@ -18,7 +18,6 @@ package org.apache.pdfbox.pdfparser; import java.io.IOException; import java.io.InputStream; -import java.security.KeyStore; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -26,30 +25,16 @@ import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSDocument; import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.cos.COSNull; -import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.io.RandomAccessRead; import org.apache.pdfbox.io.ScratchFile; import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.encryption.AccessPermission; -import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial; import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException; -import org.apache.pdfbox.pdmodel.encryption.PDEncryption; -import org.apache.pdfbox.pdmodel.encryption.PublicKeyDecryptionMaterial; -import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial; public class PDFParser extends COSParser { private static final Log LOG = LogFactory.getLog(PDFParser.class); - private String password = ""; - private InputStream keyStoreInputStream = null; - private String keyAlias = null; - - private PDEncryption encryption = null; - private AccessPermission accessPermission; - /** * Constructor. * Unrestricted main memory will be used for buffering PDF streams. @@ -135,11 +120,8 @@ public class PDFParser extends COSParser public PDFParser(RandomAccessRead source, String decryptionPassword, InputStream keyStore, String alias, ScratchFile scratchFile) throws IOException { - super(source); + super(source, decryptionPassword, keyStore, alias); fileLen = source.length(); - password = decryptionPassword; - keyStoreInputStream = keyStore; - keyAlias = alias; init(scratchFile); } @@ -171,8 +153,8 @@ public class PDFParser extends COSParser */ public PDDocument getPDDocument() throws IOException { - PDDocument doc = new PDDocument(getDocument(), source, accessPermission); - doc.setEncryptionDictionary(encryption); + PDDocument doc = new PDDocument(getDocument(), source, getAccessPermission()); + doc.setEncryptionDictionary(getEncryption()); return doc; } @@ -187,8 +169,6 @@ public class PDFParser extends COSParser protected void initialParse() throws InvalidPasswordException, IOException { COSDictionary trailer = retrieveTrailer(); - // prepare decryption if necessary - prepareDecryption(); COSBase base = parseTrailerValuesDynamically(trailer); if (!(base instanceof COSDictionary)) @@ -243,8 +223,6 @@ public class PDFParser extends COSParser } finally { - IOUtils.closeQuietly(keyStoreInputStream); - if (exceptionOccurred && document != null) { IOUtils.closeQuietly(document); @@ -252,78 +230,5 @@ public class PDFParser extends COSParser } } } - - /** - * Prepare for decryption. - * - * @throws InvalidPasswordException If the password is incorrect. - * @throws IOException if something went wrong - */ - private void prepareDecryption() throws InvalidPasswordException, IOException - { - COSBase trailerEncryptItem = document.getTrailer().getItem(COSName.ENCRYPT); - if (trailerEncryptItem != null && !(trailerEncryptItem instanceof COSNull)) - { - if (trailerEncryptItem instanceof COSObject) - { - COSObject trailerEncryptObj = (COSObject) trailerEncryptItem; - parseDictionaryRecursive(trailerEncryptObj); - } - try - { - encryption = new PDEncryption(document.getEncryptionDictionary()); - DecryptionMaterial decryptionMaterial; - if (keyStoreInputStream != null) - { - KeyStore ks = KeyStore.getInstance("PKCS12"); - ks.load(keyStoreInputStream, password.toCharArray()); - - decryptionMaterial = new PublicKeyDecryptionMaterial(ks, keyAlias, password); - } - else - { - decryptionMaterial = new StandardDecryptionMaterial(password); - } - - securityHandler = encryption.getSecurityHandler(); - securityHandler.prepareForDecryption(encryption, document.getDocumentID(), - decryptionMaterial); - accessPermission = securityHandler.getCurrentAccessPermission(); - } - catch (IOException e) - { - throw e; - } - catch (Exception e) - { - throw new IOException("Error (" + e.getClass().getSimpleName() - + ") while creating security handler for decryption", e); - } - } - } - - /** - * Resolves all not already parsed objects of a dictionary recursively. - * - * @param dictionaryObject dictionary to be parsed - * @throws IOException if something went wrong - * - */ - private void parseDictionaryRecursive(COSObject dictionaryObject) throws IOException - { - parseObjectDynamically(dictionaryObject, true); - COSDictionary dictionary = (COSDictionary)dictionaryObject.getObject(); - for(COSBase value : dictionary.getValues()) - { - if (value instanceof COSObject) - { - COSObject object = (COSObject)value; - if (object.getObject() == null) - { - parseDictionaryRecursive(object); - } - } - } - } }