Author: msahyoun Date: Mon Aug 15 19:27:14 2016 New Revision: 1756411 URL: http://svn.apache.org/viewvc?rev=1756411&view=rev Log: PDFBOX-3461: add handling for empty paragraphs; enhance unit tests; compare streams
Added: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PlainTextTest.java (with props) Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PlainText.java pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacterTest.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PlainText.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PlainText.java?rev=1756411&r1=1756410&r2=1756411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PlainText.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PlainText.java Mon Aug 15 19:27:14 2016 @@ -55,6 +55,11 @@ class PlainText paragraphs = new ArrayList<Paragraph>(); for (String part : parts) { + // Acrobat prints a space for an empty paragraph + if (part.length() == 0) + { + part = " "; + } paragraphs.add(new Paragraph(part)); } } Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacterTest.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacterTest.java?rev=1756411&r1=1756410&r2=1756411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacterTest.java (original) +++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacterTest.java Mon Aug 15 19:27:14 2016 @@ -16,10 +16,22 @@ */ package org.apache.pdfbox.pdmodel.interactive.form; +import static org.junit.Assert.assertEquals; + import java.io.File; import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Deque; +import java.util.List; +import org.apache.pdfbox.cos.COSString; +import org.apache.pdfbox.pdfparser.PDFStreamParser; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -78,45 +90,85 @@ public class ControlCharacterTest { @Test public void characterSPACE() throws IOException { - acroForm.getField("pdfbox-space").setValue("SPACE SPACE"); + PDField field = acroForm.getField("pdfbox-space"); + field.setValue("SPACE SPACE"); + + List<String> pdfboxValues = getStringsFromStream(field); + List<String> acrobatValues = getStringsFromStream(acroForm.getField("acrobat-space")); + + assertEquals(pdfboxValues, acrobatValues); } @Test public void characterCR() throws IOException { - acroForm.getField("pdfbox-cr").setValue("CR\rCR"); + PDField field = acroForm.getField("pdfbox-cr"); + field.setValue("CR\rCR"); + + List<String> pdfboxValues = getStringsFromStream(field); + List<String> acrobatValues = getStringsFromStream(acroForm.getField("acrobat-cr")); + + assertEquals(pdfboxValues, acrobatValues); } @Test public void characterLF() throws IOException { - acroForm.getField("pdfbox-lf").setValue("LF\nLF"); + PDField field = acroForm.getField("pdfbox-lf"); + field.setValue("LF\nLF"); + + List<String> pdfboxValues = getStringsFromStream(field); + List<String> acrobatValues = getStringsFromStream(acroForm.getField("acrobat-lf")); + + assertEquals(pdfboxValues, acrobatValues); } @Test public void characterCRLF() throws IOException { - acroForm.getField("pdfbox-crlf").setValue("CRLF\r\nCRLF"); + PDField field = acroForm.getField("pdfbox-crlf"); + field.setValue("CRLF\r\nCRLF"); + + List<String> pdfboxValues = getStringsFromStream(field); + List<String> acrobatValues = getStringsFromStream(acroForm.getField("acrobat-crlf")); + + assertEquals(pdfboxValues, acrobatValues); } @Test public void characterLFCR() throws IOException { - acroForm.getField("pdfbox-lfcr").setValue("LFCR\r\nLFCR"); + PDField field = acroForm.getField("pdfbox-lfcr"); + field.setValue("LFCR\n\rLFCR"); + + List<String> pdfboxValues = getStringsFromStream(field); + List<String> acrobatValues = getStringsFromStream(acroForm.getField("acrobat-lfcr")); + + assertEquals(pdfboxValues, acrobatValues); } @Test public void characterUnicodeLinebreak() throws IOException { - acroForm.getField("pdfbox-linebreak").setValue("linebreak\u2028linebreak"); + PDField field = acroForm.getField("pdfbox-linebreak"); + field.setValue("linebreak\u2028linebreak"); + List<String> pdfboxValues = getStringsFromStream(field); + List<String> acrobatValues = getStringsFromStream(acroForm.getField("acrobat-linebreak")); + + assertEquals(pdfboxValues, acrobatValues); } @Test public void characterUnicodeParagraphbreak() throws IOException { - acroForm.getField("pdfbox-paragraphbreak").setValue("paragraphbreak\u2029paragraphbreak"); + PDField field = acroForm.getField("pdfbox-paragraphbreak"); + field.setValue("paragraphbreak\u2029paragraphbreak"); + List<String> pdfboxValues = getStringsFromStream(field); + List<String> acrobatValues = getStringsFromStream(acroForm.getField("acrobat-paragraphbreak")); + + assertEquals(pdfboxValues, acrobatValues); } @After @@ -124,4 +176,27 @@ public class ControlCharacterTest { { document.close(); } + + private List<String> getStringsFromStream(PDField field) throws IOException + { + PDAnnotationWidget widget = field.getWidgets().get(0); + PDFStreamParser parser = new PDFStreamParser(widget.getNormalAppearanceStream()); + + Object token = parser.parseNextToken(); + + List<String> stringValues = new ArrayList<String>(); + + while (token != null) + { + if (token instanceof COSString) + { + // TODO: improve the string output to better match + // trimming as Acrobat adds spaces to strings + // where we don't + stringValues.add(((COSString) token).getString().trim()); + } + token = parser.parseNextToken(); + } + return stringValues; + } } Added: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PlainTextTest.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PlainTextTest.java?rev=1756411&view=auto ============================================================================== --- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PlainTextTest.java (added) +++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PlainTextTest.java Mon Aug 15 19:27:14 2016 @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.pdmodel.interactive.form; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +/** + * Test if a sequence of linebreak/paragraph characters produces the same + * number of paragraphs as Adobe Acrobat produces when setting the value + * via JavaScript. + * + */ +public class PlainTextTest { + @Test + public void characterCR() + { + PlainText text = new PlainText("CR\rCR"); + assertEquals(2,text.getParagraphs().size()); + } + + @Test + public void characterLF() + { + PlainText text = new PlainText("LF\nLF"); + assertEquals(2,text.getParagraphs().size()); + } + + @Test + public void characterCRLF() + { + PlainText text = new PlainText("CRLF\r\nCRLF"); + assertEquals(2,text.getParagraphs().size()); + } + + @Test + public void characterLFCR() + { + PlainText text = new PlainText("LFCR\n\rLFCR"); + assertEquals(3,text.getParagraphs().size()); + } + + @Test + public void characterUnicodeLinebreak() + { + PlainText text = new PlainText("linebreak\u2028linebreak"); + assertEquals(2,text.getParagraphs().size()); + } + + @Test + public void characterUnicodeParagraphbreak() + { + PlainText text = new PlainText("paragraphbreak\u2029paragraphbreak"); + assertEquals(2,text.getParagraphs().size()); + } + +} Propchange: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PlainTextTest.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PlainTextTest.java ------------------------------------------------------------------------------ svn:mime-type = text/plain