Author: msahyoun Date: Sat Apr 14 17:20:11 2018 New Revision: 1829154 URL: http://svn.apache.org/viewvc?rev=1829154&view=rev Log: PDFBOX-4182, PDFBOX-4188: add new merge mode which closes the source PDDocument after the individual merge; early implementation
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java?rev=1829154&r1=1829153&r2=1829154&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java Sat Apr 14 17:20:11 2018 @@ -87,9 +87,32 @@ public class PDFMergerUtility private boolean ignoreAcroFormErrors = false; private PDDocumentInformation destinationDocumentInformation = null; private PDMetadata destinationMetadata = null; + + private DocumentMergeMode documentMergeMode = DocumentMergeMode.PDFBOX_LEGACY_MODE; private AcroFormMergeMode acroFormMergeMode = AcroFormMergeMode.PDFBOX_LEGACY_MODE; /** + * The mode to use when merging documents. + * + * <p><ul> + * <li>{@link DocumentMergeMode#OPTIMIZE_RESOURCES_MODE} Optimizes resource handling such as + * closing documents early. <strong>Not all document elements are merged</strong> compared to + * the PDFBOX_LEGACY_MODE. Currently supported are: + * <ul> + * <li>Page content and resources + * </ul> + * <li>{@link DocumentMergeMode#PDFBOX_LEGACY_MODE} fields with the same fully qualified name + * will be renamed and treated as independent. This mode was used in versions + * of PDFBox up to 2.x. + * </ul> + */ + public enum DocumentMergeMode + { + OPTIMIZE_RESOURCES_MODE, + PDFBOX_LEGACY_MODE + } + + /** * The mode to use when merging AcroForm between documents. * * <p><ul> @@ -115,6 +138,26 @@ public class PDFMergerUtility sources = new ArrayList<>(); fileInputStreams = new ArrayList<>(); } + + /** + * Get the mode to be used for merging the documents + * + * {@link DocumentMergeMode} + */ + public DocumentMergeMode getDocumentMergeMode() + { + return documentMergeMode; + } + + /** + * Set the mode to be used for merging the documents + * + * {@link DocumentMergeMode} + */ + public void setAcroFormMergeMode(DocumentMergeMode theDocumentMergeMode) + { + this.documentMergeMode = theDocumentMergeMode; + } /** * Get the name of the destination file. @@ -270,6 +313,71 @@ public class PDFMergerUtility */ public void mergeDocuments(MemoryUsageSetting memUsageSetting) throws IOException { + if (documentMergeMode == DocumentMergeMode.PDFBOX_LEGACY_MODE) + { + legacyMergeDocuments(memUsageSetting); + } + else if (documentMergeMode == DocumentMergeMode.OPTIMIZE_RESOURCES_MODE) + { + optimizedMergeDocuments(memUsageSetting, sources); + } + } + + private void optimizedMergeDocuments(MemoryUsageSetting memUsageSetting, List<InputStream> sourceDocuments) throws IOException + { + try (PDDocument destination = new PDDocument(memUsageSetting)) + { + PDFCloneUtility cloner = new PDFCloneUtility(destination); + for (InputStream sourceInputStream : sources) + { + try (PDDocument sourceDoc = PDDocument.load(sourceInputStream, memUsageSetting)) + { + for (PDPage page : sourceDoc.getPages()) + { + PDPage newPage = new PDPage((COSDictionary) cloner.cloneForNewDocument(page.getCOSObject())); + newPage.setCropBox(page.getCropBox()); + newPage.setMediaBox(page.getMediaBox()); + newPage.setRotation(page.getRotation()); + PDResources resources = page.getResources(); + if (resources != null) + { + // this is smart enough to just create references for resources that are used on multiple pages + newPage.setResources(new PDResources((COSDictionary) cloner.cloneForNewDocument(resources))); + } + else + { + newPage.setResources(new PDResources()); + } + destination.addPage(newPage); + } + sourceDoc.close(); + } + sourceInputStream.close(); + } + + if (destinationStream == null) + { + destination.save(destinationFileName); + } + else + { + destination.save(destinationStream); + } + } + } + + + /** + * Merge the list of source documents, saving the result in the destination + * file. + * + * @param memUsageSetting defines how memory is used for buffering PDF streams; + * in case of <code>null</code> unrestricted main memory is used + * + * @throws IOException If there is an error saving the document. + */ + private void legacyMergeDocuments(MemoryUsageSetting memUsageSetting) throws IOException + { if (sources != null && !sources.isEmpty()) { // Make sure that: