Author: gbailleul Date: Fri Jun 20 20:58:41 2014 New Revision: 1604276 URL: http://svn.apache.org/r1604276 Log: PDFBOX-1995: Do not trim the text in a node if this text is the unique child of the node. Made some rework on removeComments. Test added
Added: pdfbox/trunk/xmpbox/src/test/resources/validxmp/only_space_fields.xmp Modified: pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/parser/DeserializationTest.java Modified: pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java?rev=1604276&r1=1604275&r2=1604276&view=diff ============================================================================== --- pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java (original) +++ pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java Fri Jun 20 20:58:41 2014 @@ -125,7 +125,7 @@ public class DomXmpParser XMPMetadata xmp = null; // Start reading - removeComments(document.getFirstChild()); + removeComments(document); Node node = document.getFirstChild(); // expect xpacket processing instruction @@ -716,37 +716,36 @@ public class DomXmpParser /** * Remove all the comments node in the parent element of the parameter * - * @param node + * @param root * the first node of an element or document to clear */ private void removeComments(Node root) { - Node node = root; - while (node != null) - { - Node next = node.getNextSibling(); + if (root.getChildNodes().getLength()<=1) { + // There is only one node so we do not remove it + return; + } + NodeList nl = root.getChildNodes(); + for (int i=0; i < nl.getLength() ; i ++) { + Node node = nl.item(i); if (node instanceof Comment) { // remove the comment - node.getParentNode().removeChild(node); + root.removeChild(node); } else if (node instanceof Text) { - Text t = (Text) node; - if (t.getTextContent().trim().length() == 0) + if (((Text)node).getTextContent().trim().length() == 0) { - // XXX is there a better way to remove useless Text ? - node.getParentNode().removeChild(node); + root.removeChild(node); } } else if (node instanceof Element) { // clean child - removeComments(node.getFirstChild()); + removeComments(node); } // else do nothing - node = next; } - // end of document } private AbstractStructuredType instanciateStructured(TypeMapping tm, Types type, String name, Modified: pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/parser/DeserializationTest.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/parser/DeserializationTest.java?rev=1604276&r1=1604275&r2=1604276&view=diff ============================================================================== --- pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/parser/DeserializationTest.java (original) +++ pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/parser/DeserializationTest.java Fri Jun 20 20:58:41 2014 @@ -340,4 +340,17 @@ public class DeserializationTest } + @Test + public void testSpaceTextValues () throws Exception { + // check values with spaces at start or end + // in this case, the value should not be trimmed + InputStream is = DomXmpParser.class.getResourceAsStream("/validxmp/only_space_fields.xmp"); + DomXmpParser xdb = new DomXmpParser(); + XMPMetadata meta = xdb.parse(is); + // check producer + Assert.assertEquals(" ", meta.getAdobePDFSchema().getProducer()); + // check creator tool + Assert.assertEquals("Canon ",meta.getXMPBasicSchema().getCreatorTool()); + + } } Added: pdfbox/trunk/xmpbox/src/test/resources/validxmp/only_space_fields.xmp URL: http://svn.apache.org/viewvc/pdfbox/trunk/xmpbox/src/test/resources/validxmp/only_space_fields.xmp?rev=1604276&view=auto ============================================================================== --- pdfbox/trunk/xmpbox/src/test/resources/validxmp/only_space_fields.xmp (added) +++ pdfbox/trunk/xmpbox/src/test/resources/validxmp/only_space_fields.xmp Fri Jun 20 20:58:41 2014 @@ -0,0 +1,28 @@ +<!-- ! Licensed to the Apache Software Foundation (ASF) under one or more + ! contributor license agreements. See the NOTICE file distributed with ! + this work for additional information regarding copyright ownership. ! The + ASF licenses this file to You under the Apache License, Version 2.0 ! (the + "License"); you may not use this file except in compliance with ! the License. + You may obtain a copy of the License at ! ! http://www.apache.org/licenses/LICENSE-2.0 + ! ! Unless required by applicable law or agreed to in writing, software ! + distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT + WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the + License for the specific language governing permissions and ! limitations + under the License. ! --> +<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?> +<x:xmpmeta xmlns:x="adobe:ns:meta/"> + <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> + <rdf:Description rdf:about="" xmlns:xap="http://ns.adobe.com/xap/1.0/"> + <xap:CreatorTool>Canon </xap:CreatorTool> + <xap:CreateDate>2014-01-23T20:09:45+01:00</xap:CreateDate> + </rdf:Description> + <rdf:Description rdf:about="" xmlns:pdf="http://ns.adobe.com/pdf/1.3/"> + <pdf:Producer> </pdf:Producer> + </rdf:Description> + <rdf:Description rdf:about="" xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/"> + <pdfaid:part>1</pdfaid:part> + <pdfaid:conformance>B</pdfaid:conformance> + </rdf:Description> + </rdf:RDF> +</x:xmpmeta> +<?xpacket end="w"?> \ No newline at end of file