Author: tilman
Date: Sat Dec 20 18:36:40 2025
New Revision: 1930767
Log:
PDFBOX-6130: revert some changes in findDescriptionsParent(), fix and rename
removeComments because Document had 1 Document child; remove todo
Modified:
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Modified:
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
==============================================================================
---
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Sat Dec 20 17:40:07 2025 (r1930766)
+++
pdfbox/branches/3.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Sat Dec 20 18:36:40 2025 (r1930767)
@@ -143,7 +143,7 @@ public class DomXmpParser
XMPMetadata xmp = null;
// Start reading
- removeComments(document);
+ removeCommentsAndBlanks(document);
Node node = document.getFirstChild();
// expect xpacket processing instruction
@@ -991,25 +991,17 @@ public class DomXmpParser
// empty description
throw new XmpParsingException(ErrorType.Format, "No rdf
description found in xmp");
}
- else if (nl.getLength() > 1 && strictParsing)
+ else if (nl.getLength() > 1)
{
// only expect one element
throw new XmpParsingException(ErrorType.Format, "More than one
element found in x:xmpmeta");
}
- // find element (there may be a text before the element)
- for (int i = 0; i < nl.getLength(); ++i)
- {
- if (nl.item(i) instanceof Element)
- {
- rdfRdf = (Element) nl.item(i);
- break;
- }
- }
- if (rdfRdf == null)
+ else if (!(root.getFirstChild() instanceof Element))
{
// should be an element
- throw new XmpParsingException(ErrorType.Format, "x:xmpmeta
does not contains rdf:RDF element");
+ throw new XmpParsingException(ErrorType.Format, "x:xmpmeta
does not contains rdf:RDF element but " + root.getFirstChild());
} // else let's parse
+ rdfRdf = (Element) root.getFirstChild();
}
else
{
@@ -1043,45 +1035,43 @@ public class DomXmpParser
}
/**
- * Remove all the comments node in the parent element of the parameter
- *
- * @param root
- * the first node of an element or document to clear
+ * Remove all the comments and blank nodes in the parent element of the
parameter
+ *
+ * @param root the first node of an element or document to clear
*/
- private void removeComments(Node root)
+ private void removeCommentsAndBlanks(Node root)
{
- // will hold the nodes which are to be deleted
- List<Node> forDeletion = new ArrayList<>();
-
- NodeList nl = root.getChildNodes();
-
- if (nl.getLength()<=1)
+ // will hold the nodes which are to be deleted
+ List<Node> forDeletion = new ArrayList<>();
+
+ NodeList nl = root.getChildNodes();
+
+ if (!(root instanceof Document) && nl.getLength() <= 1)
{
- // There is only one node so we do not remove it
+ // There is only one node so we're done, except when Document
return;
}
-
+
for (int i = 0; i < nl.getLength(); i++)
{
Node node = nl.item(i);
if (node instanceof Comment)
{
// comments to be deleted
- forDeletion.add(node);
+ forDeletion.add(node);
}
else if (node instanceof Text)
{
if (node.getTextContent().trim().isEmpty())
{
- // TODO: verify why this is necessary
- // empty text nodes to be deleted
- forDeletion.add(node);
+ // empty text nodes to be deleted
+ forDeletion.add(node);
}
}
else if (node instanceof Element)
{
// clean child
- removeComments(node);
+ removeCommentsAndBlanks(node);
} // else do nothing
}