Author: tilman
Date: Fri Oct 19 17:59:07 2018
New Revision: 1844361

URL: http://svn.apache.org/viewvc?rev=1844361&view=rev
Log:
PDFBOX-3646, PDFBOX-4345: fix problems with missing text and improper handling 
of special characters, by Kai Keggenhoff:
- Instead of traversing the children of an element with the XPath "*" 
expression, simply iterate the children obtained from Node.getChildNodes(), 
process Text and CDATASection nodes directly and call richContentsToString for 
any elements
- escape "<" and "&" in the text values read from the node values
- added quoting " as " to the attribute values to avoid possible corruption

Modified:
    
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotation.java

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotation.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotation.java?rev=1844361&r1=1844360&r2=1844361&view=diff
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotation.java
 (original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFAnnotation.java
 Fri Oct 19 17:59:07 2018
@@ -37,10 +37,12 @@ import org.apache.pdfbox.pdmodel.common.
 import 
org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderEffectDictionary;
 import 
org.apache.pdfbox.pdmodel.interactive.annotation.PDBorderStyleDictionary;
 import org.apache.pdfbox.util.DateConverter;
+import org.w3c.dom.CDATASection;
 import org.w3c.dom.Element;
 import org.w3c.dom.NamedNodeMap;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
+import org.w3c.dom.Text;
 
 /**
  * This represents an FDF annotation that is part of the FDF document.
@@ -952,43 +954,49 @@ public abstract class FDFAnnotation impl
 
     private String richContentsToString(Node node, boolean root)
     {
-        String retval = "";
-        XPath xpath = XPathFactory.newInstance().newXPath();
-        try
+        String subString = "";
+
+        NodeList nodelist = node.getChildNodes();
+        for (int i = 0; i < nodelist.getLength(); i++)
         {
-            NodeList nodelist = (NodeList) xpath.evaluate("*", node, 
XPathConstants.NODESET);
-            String subString = "";
-            if (nodelist.getLength() == 0)
-            {
-                subString = node.getFirstChild().getNodeValue();
-            }
-            for (int i = 0; i < nodelist.getLength(); i++)
+            Node child = nodelist.item(i);
+            if (child instanceof Element)
             {
-                Node child = nodelist.item(i);
-                if (child instanceof Element)
-                {
-                    subString += richContentsToString(child, false);
-                }
+                subString += richContentsToString(child, false);
             }
-            NamedNodeMap attributes = node.getAttributes();
-            StringBuilder builder = new StringBuilder();
-            for (int i = 0; i < attributes.getLength(); i++)
+            else if (child instanceof CDATASection)
             {
-                Node attribute = attributes.item(i);
-                builder.append(String.format(" %s=\"%s\"", 
attribute.getNodeName(),
-                        attribute.getNodeValue()));
+               subString += "<![CDATA[" + ((CDATASection) child).getData() + 
"]]>";
             }
-            if (root)
+            else if (child instanceof Text)
             {
-                return subString;
+               String cdata = ((Text) child).getData();
+               if (cdata!=null)
+               {
+                       cdata = cdata.replace("&", "&amp;").replace("<", 
"&lt;");
+               }
+               subString += cdata;
             }
-            retval = String.format("<%s%s>%s</%s>", node.getNodeName(), 
builder.toString(),
-                    subString, node.getNodeName());
         }
-        catch (XPathExpressionException e)
+        if (root)
         {
-            LOG.debug("Error while evaluating XPath expression for richtext 
contents", e);
+            return subString;
         }
-        return retval;
+
+        NamedNodeMap attributes = node.getAttributes();
+        StringBuilder builder = new StringBuilder();
+        for (int i = 0; i < attributes.getLength(); i++)
+        {
+            Node attribute = attributes.item(i);
+            String attributeNodeValue = attribute.getNodeValue();
+            if (attributeNodeValue!=null)
+            {
+               attributeNodeValue = attributeNodeValue.replace("\"", "&quot;");
+            }
+            builder.append(String.format(" %s=\"%s\"", attribute.getNodeName(),
+                    attributeNodeValue));
+        }
+        return String.format("<%s%s>%s</%s>", node.getNodeName(), 
builder.toString(),
+                subString, node.getNodeName());
     }
-}
+}
\ No newline at end of file


Reply via email to