PDFMergerUtility.java

msahyoun Sat, 14 Apr 2018 10:21:11 -0700

Author: msahyoun
Date: Sat Apr 14 17:20:11 2018
New Revision: 1829154

URL: http://svn.apache.org/viewvc?rev=1829154&view=rev
Log:
PDFBOX-4182, PDFBOX-4188: add new merge mode which closes the source PDDocument 
after the individual merge; early implementation


Modified:
    
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java?rev=1829154&r1=1829153&r2=1829154&view=diff
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java
 (original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java
 Sat Apr 14 17:20:11 2018
@@ -87,9 +87,32 @@ public class PDFMergerUtility
     private boolean ignoreAcroFormErrors = false;
     private PDDocumentInformation destinationDocumentInformation = null;
     private PDMetadata destinationMetadata = null;
+
+    private DocumentMergeMode documentMergeMode = 
DocumentMergeMode.PDFBOX_LEGACY_MODE;
     private AcroFormMergeMode acroFormMergeMode = 
AcroFormMergeMode.PDFBOX_LEGACY_MODE;
     
     /**
+     * The mode to use when merging documents.
+     * 
+     * <p><ul>
+     * <li>{@link DocumentMergeMode#OPTIMIZE_RESOURCES_MODE} Optimizes 
resource handling such as
+     *      closing documents early. <strong>Not all document elements are 
merged</strong> compared to
+     *      the PDFBOX_LEGACY_MODE. Currently supported are:
+     *      <ul>
+     *          <li>Page content and resources
+     *      </ul>  
+     * <li>{@link DocumentMergeMode#PDFBOX_LEGACY_MODE} fields with the same 
fully qualified name
+     *      will be renamed and treated as independent. This mode was used in 
versions
+     *      of PDFBox up to 2.x.
+     * </ul>
+     */
+    public enum DocumentMergeMode
+    {
+        OPTIMIZE_RESOURCES_MODE,
+        PDFBOX_LEGACY_MODE
+    }
+    
+    /**
      * The mode to use when merging AcroForm between documents.
      * 
      * <p><ul>
@@ -115,6 +138,26 @@ public class PDFMergerUtility
         sources = new ArrayList<>();
         fileInputStreams = new ArrayList<>();
     }
+    
+    /**
+     * Get the mode to be used for merging the documents
+     * 
+     * {@link DocumentMergeMode}
+     */
+    public DocumentMergeMode getDocumentMergeMode()
+    {
+        return documentMergeMode;
+    }
+    
+    /**
+     * Set the mode to be used for merging the documents
+     * 
+     * {@link DocumentMergeMode}
+     */
+    public void setAcroFormMergeMode(DocumentMergeMode theDocumentMergeMode)
+    {
+        this.documentMergeMode = theDocumentMergeMode;
+    }
 
     /**
      * Get the name of the destination file.
@@ -270,6 +313,71 @@ public class PDFMergerUtility
      */
     public void mergeDocuments(MemoryUsageSetting memUsageSetting) throws 
IOException
     {
+        if (documentMergeMode == DocumentMergeMode.PDFBOX_LEGACY_MODE)
+        {
+            legacyMergeDocuments(memUsageSetting);
+        }
+        else if (documentMergeMode == 
DocumentMergeMode.OPTIMIZE_RESOURCES_MODE)
+        {
+            optimizedMergeDocuments(memUsageSetting, sources);
+        }
+    }
+    
+    private void optimizedMergeDocuments(MemoryUsageSetting memUsageSetting, 
List<InputStream> sourceDocuments) throws IOException
+    {
+        try (PDDocument destination = new PDDocument(memUsageSetting))
+        {
+            PDFCloneUtility cloner = new PDFCloneUtility(destination);
+            for (InputStream sourceInputStream : sources)
+            {
+                try (PDDocument sourceDoc = PDDocument.load(sourceInputStream, 
memUsageSetting))
+                {
+                    for (PDPage page : sourceDoc.getPages())
+                    {
+                        PDPage newPage = new PDPage((COSDictionary) 
cloner.cloneForNewDocument(page.getCOSObject()));
+                        newPage.setCropBox(page.getCropBox());
+                        newPage.setMediaBox(page.getMediaBox());
+                        newPage.setRotation(page.getRotation());
+                        PDResources resources = page.getResources();
+                        if (resources != null)
+                        {
+                            // this is smart enough to just create references 
for resources that are used on multiple pages
+                            newPage.setResources(new 
PDResources((COSDictionary) cloner.cloneForNewDocument(resources)));
+                        }
+                        else
+                        {
+                            newPage.setResources(new PDResources());
+                        }
+                        destination.addPage(newPage);
+                    }
+                    sourceDoc.close();
+                }
+                sourceInputStream.close();
+            }
+            
+            if (destinationStream == null)
+            {
+                destination.save(destinationFileName);
+            }
+            else
+            {
+                destination.save(destinationStream);
+            }
+        }
+    }
+    
+    
+    /**
+     * Merge the list of source documents, saving the result in the destination
+     * file.
+     *
+     * @param memUsageSetting defines how memory is used for buffering PDF 
streams;
+     *                        in case of <code>null</code> unrestricted main 
memory is used 
+     * 
+     * @throws IOException If there is an error saving the document.
+     */
+    private void legacyMergeDocuments(MemoryUsageSetting memUsageSetting) 
throws IOException
+    {
         if (sources != null && !sources.isEmpty())
         {
             // Make sure that:

svn commit: r1829154 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java

Reply via email to