Author: tilman Date: Wed Feb 12 08:17:53 2025 New Revision: 1923747 URL: http://svn.apache.org/viewvc?rev=1923747&view=rev Log: PDFBOX-5950: use clone instead of setItem of source; improve logging; remove dead code
Modified: pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java Modified: pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java URL: http://svn.apache.org/viewvc/pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java?rev=1923747&r1=1923746&r2=1923747&view=diff ============================================================================== --- pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java (original) +++ pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java Wed Feb 12 08:17:53 2025 @@ -512,6 +512,7 @@ public class PDFMergerUtility */ public void appendDocument(PDDocument destination, PDDocument source) throws IOException { + PDFCloneUtility cloner = new PDFCloneUtility(destination); if (source.getDocument().isClosed()) { throw new IOException("Error: source PDF is closed."); @@ -529,7 +530,7 @@ public class PDFMergerUtility PDDocumentInformation destInfo = destination.getDocumentInformation(); PDDocumentInformation srcInfo = source.getDocumentInformation(); - mergeInto(srcInfo.getCOSObject(), destInfo.getCOSObject(), Collections.emptySet()); + mergeInto(srcInfo.getCOSObject(), destInfo.getCOSObject(), cloner, Collections.emptySet()); // use the highest version number for the resulting pdf float destVersion = destination.getVersion(); @@ -541,8 +542,6 @@ public class PDFMergerUtility } PDDocumentCatalog destCatalog = destination.getDocumentCatalog(); - PDFCloneUtility cloner = new PDFCloneUtility(destination); - mergeAcroForm(cloner, destCatalog, srcCatalog); COSArray destThreads = destCatalog.getCOSObject().getCOSArray(COSName.THREADS); @@ -687,7 +686,7 @@ public class PDFMergerUtility try { PDStream newStream = new PDStream(destination, srcMetadata.createInputStream(), (COSName) null); - mergeInto(srcMetadata, newStream.getCOSObject(), + mergeInto(srcMetadata, newStream.getCOSObject(), cloner, new HashSet<>(Arrays.asList(COSName.FILTER, COSName.LENGTH))); destCatalog.getCOSObject().setItem(COSName.METADATA, newStream); } @@ -709,7 +708,7 @@ public class PDFMergerUtility cloner.cloneMerge(srcOCP, destOCP); } - mergeOutputIntents(cloner, srcCatalog, destCatalog); + mergeOutputIntents(srcCatalog, destCatalog, cloner); // merge logical structure hierarchy boolean mergeStructTree = false; @@ -769,7 +768,6 @@ public class PDFMergerUtility } Map<COSDictionary, COSDictionary> objMapping = new HashMap<>(); - int pageIndex = 0; PDPageTree destinationPageTree = destination.getPages(); // cache PageTree for (PDPage page : srcCatalog.getPages()) { @@ -812,7 +810,6 @@ public class PDFMergerUtility // TODO update mapping for XObjects } destinationPageTree.add(newPage); - ++pageIndex; } mergeOpenAction(srcCatalog, destCatalog, cloner); if (mergeStructTree) @@ -831,7 +828,7 @@ public class PDFMergerUtility // Note that all elements are stored flatly. This could become a problem for large files // when these are opened in a viewer that uses the tagging information. - // If this happens, then âPDNumberTreeNode should be improved with a convenience method that + // If this happens, then PDNumberTreeNode should be improved with a convenience method that // stores the map into a B+Tree, see https://en.wikipedia.org/wiki/B+_tree newParentTreeNode.setNumbers(destNumberTreeAsMap); @@ -839,11 +836,11 @@ public class PDFMergerUtility destStructTree.setParentTreeNextKey(destParentTreeNextKey); mergeKEntries(cloner, srcStructTree, destStructTree); - mergeRoleMap(srcStructTree, destStructTree); + mergeRoleMap(srcStructTree, destStructTree, cloner); mergeIDTree(cloner, srcStructTree, destStructTree); mergeMarkInfo(destCatalog, srcCatalog); mergeLanguage(destCatalog, srcCatalog); - mergeViewerPreferences(destCatalog, srcCatalog); + mergeViewerPreferences(destCatalog, srcCatalog, cloner); } } @@ -892,7 +889,8 @@ public class PDFMergerUtility } } - private void mergeViewerPreferences(PDDocumentCatalog destCatalog, PDDocumentCatalog srcCatalog) + private void mergeViewerPreferences(PDDocumentCatalog destCatalog, PDDocumentCatalog srcCatalog, + PDFCloneUtility cloner) throws IOException { PDViewerPreferences srcViewerPreferences = srcCatalog.getViewerPreferences(); if (srcViewerPreferences == null) @@ -905,7 +903,7 @@ public class PDFMergerUtility destViewerPreferences = new PDViewerPreferences(); destCatalog.setViewerPreferences(destViewerPreferences); } - mergeInto(srcViewerPreferences.getCOSObject(), destViewerPreferences.getCOSObject(), + mergeInto(srcViewerPreferences.getCOSObject(), destViewerPreferences.getCOSObject(), cloner, Collections.emptySet()); // check the booleans - set to true if one is set and true @@ -1105,7 +1103,7 @@ public class PDFMergerUtility { if (destNames.containsKey(entry.getKey())) { - LOG.warn("key " + entry.getKey() + " already exists in destination IDTree"); + LOG.warn("key '" + entry.getKey() + "' already exists in destination IDTree"); } else { @@ -1182,7 +1180,8 @@ public class PDFMergerUtility return numbers; } - private void mergeRoleMap(PDStructureTreeRoot srcStructTree, PDStructureTreeRoot destStructTree) + private void mergeRoleMap(PDStructureTreeRoot srcStructTree, PDStructureTreeRoot destStructTree, + PDFCloneUtility cloner) throws IOException { COSDictionary srcDict = srcStructTree.getCOSObject().getCOSDictionary(COSName.ROLE_MAP); if (srcDict == null) @@ -1192,7 +1191,7 @@ public class PDFMergerUtility COSDictionary destDict = destStructTree.getCOSObject().getCOSDictionary(COSName.ROLE_MAP); if (destDict == null) { - destStructTree.getCOSObject().setItem(COSName.ROLE_MAP, srcDict); // clone not needed + destStructTree.getCOSObject().setItem(COSName.ROLE_MAP, cloner.cloneForNewDocument(srcDict)); return; } for (Map.Entry<COSName, COSBase> entry : srcDict.entrySet()) @@ -1205,11 +1204,11 @@ public class PDFMergerUtility } if (destDict.containsKey(entry.getKey())) { - LOG.warn("key " + entry.getKey() + " already exists in destination RoleMap"); + LOG.warn("key '" + entry.getKey() + "' already exists in destination RoleMap"); } else { - destDict.setItem(entry.getKey(), entry.getValue()); + destDict.setItem(entry.getKey(), cloner.cloneForNewDocument(entry.getValue())); } } } @@ -1331,11 +1330,9 @@ public class PDFMergerUtility } } - // copy outputIntents to destination, but avoid duplicate OutputConditionIdentifier, // except when it is missing or is named "Custom". - private void mergeOutputIntents(PDFCloneUtility cloner, - PDDocumentCatalog srcCatalog, PDDocumentCatalog destCatalog) throws IOException + private void mergeOutputIntents(PDDocumentCatalog srcCatalog, PDDocumentCatalog destCatalog, PDFCloneUtility cloner) throws IOException { List<PDOutputIntent> srcOutputIntents = srcCatalog.getOutputIntents(); List<PDOutputIntent> dstOutputIntents = destCatalog.getOutputIntents(); @@ -1537,13 +1534,13 @@ public class PDFMergerUtility * @param dst The destination dictionary to merge the keys/values into. * @param exclude Names of keys that shall be skipped. */ - private void mergeInto(COSDictionary src, COSDictionary dst, Set<COSName> exclude) + private void mergeInto(COSDictionary src, COSDictionary dst, PDFCloneUtility cloner, Set<COSName> exclude) throws IOException { for (Map.Entry<COSName, COSBase> entry : src.entrySet()) { if (!exclude.contains(entry.getKey()) && !dst.containsKey(entry.getKey())) { - dst.setItem(entry.getKey(), entry.getValue()); + dst.setItem(entry.getKey(), cloner.cloneForNewDocument(entry.getValue())); } } }