Author: msahyoun
Date: Mon Aug 15 19:31:58 2016
New Revision: 1756416

URL: http://svn.apache.org/viewvc?rev=1756416&view=rev
Log:
PDFBOX-3461: add handling for empty paragraphs; enhance unit tests; compare 
streams

Added:
    
pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PlainTextTest.java
      - copied unchanged from r1756411, 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PlainTextTest.java
Modified:
    
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PlainText.java
    
pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacterTest.java

Modified: 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PlainText.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PlainText.java?rev=1756416&r1=1756415&r2=1756416&view=diff
==============================================================================
--- 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PlainText.java
 (original)
+++ 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PlainText.java
 Mon Aug 15 19:31:58 2016
@@ -55,6 +55,11 @@ class PlainText
         paragraphs = new ArrayList<Paragraph>();
         for (String part : parts)
         {
+               // Acrobat prints a space for an empty paragraph
+               if (part.length() == 0)
+               {
+                       part = " ";
+               }
             paragraphs.add(new Paragraph(part));
         }
     }

Modified: 
pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacterTest.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacterTest.java?rev=1756416&r1=1756415&r2=1756416&view=diff
==============================================================================
--- 
pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacterTest.java
 (original)
+++ 
pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacterTest.java
 Mon Aug 15 19:31:58 2016
@@ -16,10 +16,22 @@
  */
 package org.apache.pdfbox.pdmodel.interactive.form;
 
+import static org.junit.Assert.assertEquals;
+
 import java.io.File;
 import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Deque;
+import java.util.List;
 
+import org.apache.pdfbox.cos.COSString;
+import org.apache.pdfbox.pdfparser.PDFStreamParser;
 import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget;
+import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -78,45 +90,85 @@ public class ControlCharacterTest {
     @Test
     public void characterSPACE() throws IOException
     {
-       acroForm.getField("pdfbox-space").setValue("SPACE SPACE");
+       PDField field = acroForm.getField("pdfbox-space");
+       field.setValue("SPACE SPACE");
+
+       List<String> pdfboxValues = getStringsFromStream(field);
+       List<String> acrobatValues = 
getStringsFromStream(acroForm.getField("acrobat-space"));
+
+       assertEquals(pdfboxValues, acrobatValues);
     }
 
     @Test
     public void characterCR() throws IOException
     {
-       acroForm.getField("pdfbox-cr").setValue("CR\rCR");
+       PDField field = acroForm.getField("pdfbox-cr");
+       field.setValue("CR\rCR");
+
+       List<String> pdfboxValues = getStringsFromStream(field);
+       List<String> acrobatValues = 
getStringsFromStream(acroForm.getField("acrobat-cr"));
+
+       assertEquals(pdfboxValues, acrobatValues);
     }
 
     @Test
     public void characterLF() throws IOException
     {
-       acroForm.getField("pdfbox-lf").setValue("LF\nLF");
+       PDField field = acroForm.getField("pdfbox-lf");
+       field.setValue("LF\nLF");
+
+       List<String> pdfboxValues = getStringsFromStream(field);
+       List<String> acrobatValues = 
getStringsFromStream(acroForm.getField("acrobat-lf"));
+
+       assertEquals(pdfboxValues, acrobatValues);
     }
     
     @Test
     public void characterCRLF() throws IOException
     {
-       acroForm.getField("pdfbox-crlf").setValue("CRLF\r\nCRLF");
+       PDField field = acroForm.getField("pdfbox-crlf");
+       field.setValue("CRLF\r\nCRLF");
+
+       List<String> pdfboxValues = getStringsFromStream(field);
+       List<String> acrobatValues = 
getStringsFromStream(acroForm.getField("acrobat-crlf"));
+
+       assertEquals(pdfboxValues, acrobatValues);
     }
 
     @Test
     public void characterLFCR() throws IOException
     {
-       acroForm.getField("pdfbox-lfcr").setValue("LFCR\r\nLFCR");
+       PDField field = acroForm.getField("pdfbox-lfcr");
+       field.setValue("LFCR\n\rLFCR");
+       
+       List<String> pdfboxValues = getStringsFromStream(field);
+       List<String> acrobatValues = 
getStringsFromStream(acroForm.getField("acrobat-lfcr"));
+
+       assertEquals(pdfboxValues, acrobatValues);
     }
     
     @Test
     public void characterUnicodeLinebreak() throws IOException
     {
-       
acroForm.getField("pdfbox-linebreak").setValue("linebreak\u2028linebreak");
+       PDField field = acroForm.getField("pdfbox-linebreak");
+       field.setValue("linebreak\u2028linebreak");
        
+       List<String> pdfboxValues = getStringsFromStream(field);
+       List<String> acrobatValues = 
getStringsFromStream(acroForm.getField("acrobat-linebreak"));
+
+       assertEquals(pdfboxValues, acrobatValues);
     }
     
     @Test
     public void characterUnicodeParagraphbreak() throws IOException
     {
-       
acroForm.getField("pdfbox-paragraphbreak").setValue("paragraphbreak\u2029paragraphbreak");
+       PDField field = acroForm.getField("pdfbox-paragraphbreak");
+       field.setValue("paragraphbreak\u2029paragraphbreak");
        
+       List<String> pdfboxValues = getStringsFromStream(field);
+       List<String> acrobatValues = 
getStringsFromStream(acroForm.getField("acrobat-paragraphbreak"));
+
+       assertEquals(pdfboxValues, acrobatValues);
     }
     
     @After
@@ -124,4 +176,27 @@ public class ControlCharacterTest {
     {
         document.close();
     }
+    
+    private List<String> getStringsFromStream(PDField field) throws IOException
+    {
+       PDAnnotationWidget widget = field.getWidgets().get(0);
+       PDFStreamParser parser = new 
PDFStreamParser(widget.getNormalAppearanceStream());
+       
+       Object token = parser.parseNextToken();
+       
+       List<String> stringValues = new ArrayList<String>();
+       
+       while (token != null)
+       {
+               if (token instanceof COSString)
+               {
+                       // TODO: improve the string output to better match
+                       // trimming as Acrobat adds spaces to strings
+                       // where we don't
+                       stringValues.add(((COSString) 
token).getString().trim());
+               }
+               token = parser.parseNextToken();
+       }
+       return stringValues;    
+    }
 }


Reply via email to