Author: gbailleul
Date: Fri Jun 20 20:58:41 2014
New Revision: 1604276

URL: http://svn.apache.org/r1604276
Log:
PDFBOX-1995: Do not trim the text in a node if this text is the unique child of 
the node. Made some rework on removeComments. Test added

Added:
    pdfbox/trunk/xmpbox/src/test/resources/validxmp/only_space_fields.xmp
Modified:
    pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
    
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/parser/DeserializationTest.java

Modified: 
pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java?rev=1604276&r1=1604275&r2=1604276&view=diff
==============================================================================
--- pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java 
(original)
+++ pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java 
Fri Jun 20 20:58:41 2014
@@ -125,7 +125,7 @@ public class DomXmpParser
         XMPMetadata xmp = null;
 
         // Start reading
-        removeComments(document.getFirstChild());
+        removeComments(document);
         Node node = document.getFirstChild();
 
         // expect xpacket processing instruction
@@ -716,37 +716,36 @@ public class DomXmpParser
     /**
      * Remove all the comments node in the parent element of the parameter
      * 
-     * @param node
+     * @param root
      *            the first node of an element or document to clear
      */
     private void removeComments(Node root)
     {
-        Node node = root;
-        while (node != null)
-        {
-            Node next = node.getNextSibling();
+        if (root.getChildNodes().getLength()<=1) {
+            // There is only one node so we do not remove it
+            return;
+        }
+        NodeList nl = root.getChildNodes();
+        for (int i=0; i < nl.getLength() ; i ++) {
+            Node node = nl.item(i);
             if (node instanceof Comment)
             {
                 // remove the comment
-                node.getParentNode().removeChild(node);
+                root.removeChild(node);
             }
             else if (node instanceof Text)
             {
-                Text t = (Text) node;
-                if (t.getTextContent().trim().length() == 0)
+                if (((Text)node).getTextContent().trim().length() == 0)
                 {
-                    // XXX is there a better way to remove useless Text ?
-                    node.getParentNode().removeChild(node);
+                        root.removeChild(node);
                 }
             }
             else if (node instanceof Element)
             {
                 // clean child
-                removeComments(node.getFirstChild());
+                removeComments(node);
             } // else do nothing
-            node = next;
         }
-        // end of document
     }
 
     private AbstractStructuredType instanciateStructured(TypeMapping tm, Types 
type, String name,

Modified: 
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/parser/DeserializationTest.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/parser/DeserializationTest.java?rev=1604276&r1=1604275&r2=1604276&view=diff
==============================================================================
--- 
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/parser/DeserializationTest.java
 (original)
+++ 
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/parser/DeserializationTest.java
 Fri Jun 20 20:58:41 2014
@@ -340,4 +340,17 @@ public class DeserializationTest
 
     }
 
+    @Test
+    public void testSpaceTextValues () throws Exception {
+        // check values with spaces at start or end
+        // in this case, the value should not be trimmed
+        InputStream is = 
DomXmpParser.class.getResourceAsStream("/validxmp/only_space_fields.xmp");
+        DomXmpParser xdb = new DomXmpParser();
+        XMPMetadata meta = xdb.parse(is);
+        // check producer
+        Assert.assertEquals(" ", meta.getAdobePDFSchema().getProducer());
+        // check creator tool
+        Assert.assertEquals("Canon 
",meta.getXMPBasicSchema().getCreatorTool());
+
+    }
 }

Added: pdfbox/trunk/xmpbox/src/test/resources/validxmp/only_space_fields.xmp
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/xmpbox/src/test/resources/validxmp/only_space_fields.xmp?rev=1604276&view=auto
==============================================================================
--- pdfbox/trunk/xmpbox/src/test/resources/validxmp/only_space_fields.xmp 
(added)
+++ pdfbox/trunk/xmpbox/src/test/resources/validxmp/only_space_fields.xmp Fri 
Jun 20 20:58:41 2014
@@ -0,0 +1,28 @@
+<!-- ! Licensed to the Apache Software Foundation (ASF) under one or more
+       ! contributor license agreements. See the NOTICE file distributed with !
+       this work for additional information regarding copyright ownership. ! 
The
+       ASF licenses this file to You under the Apache License, Version 2.0 ! 
(the
+       "License"); you may not use this file except in compliance with ! the 
License.
+       You may obtain a copy of the License at ! ! 
http://www.apache.org/licenses/LICENSE-2.0
+       ! ! Unless required by applicable law or agreed to in writing, software 
!
+       distributed under the License is distributed on an "AS IS" BASIS, ! 
WITHOUT
+       WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See 
the
+       License for the specific language governing permissions and ! 
limitations
+       under the License. ! -->
+<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
+<x:xmpmeta xmlns:x="adobe:ns:meta/">
+    <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";>
+        <rdf:Description rdf:about="" xmlns:xap="http://ns.adobe.com/xap/1.0/";>
+            <xap:CreatorTool>Canon </xap:CreatorTool>
+            <xap:CreateDate>2014-01-23T20:09:45+01:00</xap:CreateDate>
+        </rdf:Description>
+        <rdf:Description rdf:about="" xmlns:pdf="http://ns.adobe.com/pdf/1.3/";>
+            <pdf:Producer> </pdf:Producer>
+        </rdf:Description>
+        <rdf:Description rdf:about="" 
xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/";>
+            <pdfaid:part>1</pdfaid:part>
+            <pdfaid:conformance>B</pdfaid:conformance>
+        </rdf:Description>
+    </rdf:RDF>
+</x:xmpmeta>
+<?xpacket end="w"?>
\ No newline at end of file


Reply via email to