[ https://issues.apache.org/jira/browse/PDFBOX-5945?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17924385#comment-17924385 ]
ASF subversion and git services commented on PDFBOX-5945: --------------------------------------------------------- Commit 1923601 from le...@apache.org in branch 'pdfbox/branches/3.0' [ https://svn.apache.org/r1923601 ] PDFBOX-5945: fix SIZE entry in trailer dictionary, add test provided by Lahcen FTIH > Wrong size entry in trailer after incremental save > -------------------------------------------------- > > Key: PDFBOX-5945 > URL: https://issues.apache.org/jira/browse/PDFBOX-5945 > Project: PDFBox > Issue Type: Bug > Components: Writing > Affects Versions: 2.0.33, 3.0.4 PDFBox, 4.0.0 > Reporter: Andreas Lehmkühler > Assignee: Andreas Lehmkühler > Priority: Major > > Lahcen FTIH reports the following issue on users@pdfbox > > *Issue Description* > When performing an incremental update on a PDF document that has a > cross-reference table (not a cross-reference stream) without creating any > new object references, the *trailer’s size entry* is set incorrectly. > Specifically, it becomes 1 greater than the highest object number in the > *current* trailer—rather than 1 greater than the highest object number in > the *entire* document. This leads to problems because: > 1. Some PDF editors determine the next reference to use based on this > size value, leading to inconsistencies. > 2. Some PDF readers may consider the file corrupt since, according to > the PDF specification, any object in a cross-reference section whose number > is greater than the *size* value “shall be ignored and defined to be > missing.” > Below is the minimal code example to reproduce this issue. As you can see > in the log output, the input PDF has a last trailer size of 10, but after > the incremental save, the last trailer size becomes 6, even though the > highest object number remains 9. > {code:java} > package org.apache.pdfbox; > import org.apache.pdfbox.cos.COSDocument;import > org.apache.pdfbox.cos.COSName;import > org.apache.pdfbox.cos.COSObjectKey;import > org.apache.pdfbox.pdfwriter.compress.CompressParameters;import > org.apache.pdfbox.pdmodel.PDDocument;import > org.apache.pdfbox.pdmodel.PDPage;import > org.apache.pdfbox.pdmodel.PDResources;import > org.apache.pdfbox.pdmodel.common.PDRectangle;import > org.apache.pdfbox.pdmodel.font.PDFont;import > org.apache.pdfbox.pdmodel.font.PDType1Font;import > org.apache.pdfbox.pdmodel.font.Standard14Fonts;import > org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget;import > org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;import > org.apache.pdfbox.pdmodel.interactive.form.PDTextField; > import java.io.ByteArrayOutputStream;import java.io.IOException; > public class ReproduceBug { > private static final String FIELD_NAME = "textFieldName"; > public static void main(String[] args) { > byte[] input = create(); > printInfo(input, "input"); > byte[] output = edit(input); > printInfo(output, "output"); > } > private static byte[] create() { > try (PDDocument pdDocument = new PDDocument()) { > PDAcroForm acroForm = addAcroForm(pdDocument); > addFonts(acroForm); > PDPage page = addPage(pdDocument); > addTextField(acroForm, page); > ByteArrayOutputStream out = new ByteArrayOutputStream(); > pdDocument.save(out, CompressParameters.NO_COMPRESSION); > return out.toByteArray(); > } catch (IOException e) { > throw new RuntimeException(e); > } > } > private static byte[] edit(byte[] input) { > try (PDDocument pdDocument = loadPDF(input)) { > PDTextField textField = (PDTextField) > pdDocument.getDocumentCatalog().getAcroForm().getField(FIELD_NAME); > assert textField != null; > textField.setMultiline(true); > ByteArrayOutputStream out = new ByteArrayOutputStream(); > pdDocument.saveIncremental(out); > return out.toByteArray(); > } catch (IOException e) { > throw new RuntimeException(e); > } > } > private static PDDocument loadPDF(byte[] content) throws IOException { > return org.apache.pdfbox.Loader.loadPDF(content); > } > private static void addFonts(PDAcroForm acroForm) throws IOException { > PDFont font1 = new PDType1Font(Standard14Fonts.FontName.HELVETICA); > PDFont font2 = new > PDType1Font(Standard14Fonts.FontName.ZAPF_DINGBATS); > PDResources resources = new PDResources(); > resources.put(COSName.getPDFName("Helv"), font1); > resources.put(COSName.getPDFName("ZaDb"), font2); > acroForm.setDefaultResources(resources); > } > private static PDAcroForm addAcroForm(PDDocument pdDocument) > throws IOException { > PDAcroForm acroForm = new PDAcroForm(pdDocument); > pdDocument.getDocumentCatalog().setAcroForm(acroForm); > return acroForm; > } > private static PDPage addPage(PDDocument pdDocument) throws IOException { > PDPage page = new PDPage(PDRectangle.A4); > pdDocument.addPage(page); > return page; > } > private static void addTextField(PDAcroForm acroForm, PDPage page) > throws IOException { > PDTextField textField = new PDTextField(acroForm); > textField.setPartialName(FIELD_NAME); > acroForm.getFields().add(textField); > PDAnnotationWidget widget = textField.getWidgets().get(0); > widget.setPage(page); > page.getAnnotations().add(widget); > PDRectangle rectangle = new PDRectangle(10, 200, 200, 15); > widget.setRectangle(rectangle); > } > private static void printInfo(byte[] content, String name) { > > System.out.println("------------------------------------------------------"); > System.out.println("-- " + name); > try (PDDocument pdDocument = loadPDF(content)) { > COSDocument cosDocument = pdDocument.getDocument(); > System.out.println("getXrefTable.keySet.maxNumber: " + > cosDocument.getXrefTable().keySet().stream() > .mapToLong(COSObjectKey::getNumber) > .max().getAsLong()); > System.out.println("lastTrailer.size: " + > cosDocument.getTrailer().getLong(COSName.SIZE)); > } catch (IOException e) { > throw new RuntimeException(e); > } > > System.out.println("------------------------------------------------------"); > } > } > {code} > *Console Output* > ------------------------------------------------------ > -- input > getXrefTable.keySet.maxNumber: 9lastTrailer.size: 10 > ------------------------------------------------------** lastEntry is: 5 > ------------------------------------------------------ > -- output > getXrefTable.keySet.maxNumber: 9lastTrailer.size: 6 > ------------------------------------------------------ > As the output shows: > - The highest object number remains 9. > - The final trailer size is incorrectly set to 6 after the incremental > save, whereas it should remain 10. > *Impact* > - PDF editors that use the trailer’s size to determine the next free > object reference can fail to handle the PDF correctly. > - Some readers may reject or treat the PDF as corrupted if they strictly > follow the specification about ignored objects. > *Request* > Could you please confirm whether this is indeed a regression? I would > appreciate any workaround suggestions or information on whether a fix is > planned in upcoming releases. -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@pdfbox.apache.org For additional commands, e-mail: dev-h...@pdfbox.apache.org