Author: tilman
Date: Sat Dec 20 10:41:24 2025
New Revision: 1930751
Log:
PDFBOX-6129: pass strict mode to PDFA Helper; be lenient when missing property;
add test
Modified:
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/PdfaExtensionHelper.java
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Modified:
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
==============================================================================
---
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Sat Dec 20 08:30:16 2025 (r1930750)
+++
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Sat Dec 20 10:41:24 2025 (r1930751)
@@ -202,7 +202,7 @@ public class DomXmpParser
}
// find schema description
- PdfaExtensionHelper.populateSchemaMapping(xmp);
+ PdfaExtensionHelper.populateSchemaMapping(xmp, strictParsing);
// parse data description
for (Element description : descriptions)
Modified:
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/PdfaExtensionHelper.java
==============================================================================
---
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/PdfaExtensionHelper.java
Sat Dec 20 08:30:16 2025 (r1930750)
+++
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/PdfaExtensionHelper.java
Sat Dec 20 10:41:24 2025 (r1930751)
@@ -94,8 +94,20 @@ public final class PdfaExtensionHelper
}
}
+ /**
+ *
+ * @param meta
+ * @throws XmpParsingException
+ * @deprecated use {@link
#populateSchemaMapping(org.apache.xmpbox.XMPMetadata, boolean)}
+ */
+ @Deprecated
public static void populateSchemaMapping(XMPMetadata meta) throws
XmpParsingException
{
+ populateSchemaMapping(meta, true);
+ }
+
+ public static void populateSchemaMapping(XMPMetadata meta, boolean
strictParsing) throws XmpParsingException
+ {
List<XMPSchema> schems = meta.getAllSchemas();
TypeMapping tm = meta.getTypeMapping();
StructuredType stPdfaExt =
PDFAExtensionSchema.class.getAnnotation(StructuredType.class);
@@ -117,14 +129,14 @@ public final class PdfaExtensionHelper
{
if (af instanceof PDFASchemaType)
{
- populatePDFASchemaType(meta, (PDFASchemaType) af, tm);
+ populatePDFASchemaType(meta, (PDFASchemaType) af, tm,
strictParsing);
} // TODO unmanaged ?
}
}
}
}
- private static void populatePDFASchemaType(XMPMetadata meta,
PDFASchemaType st, TypeMapping tm)
+ private static void populatePDFASchemaType(XMPMetadata meta,
PDFASchemaType st, TypeMapping tm, boolean strictParsing)
throws XmpParsingException
{
String namespaceUri = st.getNamespaceURI();
@@ -158,6 +170,10 @@ public final class PdfaExtensionHelper
}
}
// populate properties
+ if (properties == null && !strictParsing)
+ {
+ return;
+ }
if (properties == null)
{
throw new XmpParsingException(ErrorType.RequiredProperty,
Modified:
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
==============================================================================
---
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Sat Dec 20 08:30:16 2025 (r1930750)
+++
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Sat Dec 20 10:41:24 2025 (r1930751)
@@ -48,6 +48,7 @@ import org.apache.xmpbox.type.ResourceEv
import org.apache.xmpbox.type.ResourceRefType;
import org.apache.xmpbox.type.TextType;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@@ -1234,4 +1235,96 @@ public class DomXmpParserTest
assertEquals("created", firstHistoryEntry.getAction());
assertEquals("original PDF file", firstHistoryEntry.getParameters());
}
-}
+
+ @Test
+ public void testLenientPdfaExtension() throws XmpParsingException,
UnsupportedEncodingException
+ {
+ // First bag in pdfaExtension is incomplete.
+ final String s =
+ "<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n" +
+ "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"\n" +
+ " x:xmptk=\"Adobe XMP Core 4.2.1-c043 52.372728,
2009/01/18-15:08:04\">\n" +
+ " <rdf:RDF
xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" +
+ " <rdf:Description rdf:about=\"\"\n" +
+ "
xmlns:xmpMM=\"http://ns.adobe.com/xap/1.0/mm/\">\n" +
+ "
<xmpMM:DocumentID>uuid:0b306144-6a43-dcbd-6b3e-c6b6b1df873d</xmpMM:DocumentID>\n"
+
+ "
<xmpMM:InstanceID>uuid:0b306144-6a43-dcbd-6b3e-c6b6b1df873d</xmpMM:InstanceID>\n"
+
+ " </rdf:Description>\n" +
+ " <rdf:Description rdf:about=\"\"\n" +
+ "
xmlns:pdfaExtension=\"http://www.aiim.org/pdfa/ns/extension/\"\n" +
+ "
xmlns:pdfaSchema=\"http://www.aiim.org/pdfa/ns/schema#\"\n" +
+ "
xmlns:pdfaProperty=\"http://www.aiim.org/pdfa/ns/property#\">\n" +
+ " <pdfaExtension:schemas>\n" +
+ " <rdf:Bag>\n" +
+ " <rdf:li
rdf:parseType=\"Resource\">\n" +
+ "
<pdfaSchema:namespaceURI>http://ns.adobe.com/pdf/1.3/</pdfaSchema:namespaceURI>\n"
+
+ "
<pdfaSchema:prefix>pdf</pdfaSchema:prefix>\n" +
+ "
<pdfaSchema:schema>Adobe PDF Schema</pdfaSchema:schema>\n" +
+ " </rdf:li>\n" +
+ " <rdf:li
rdf:parseType=\"Resource\">\n" +
+ "
<pdfaSchema:namespaceURI>http://ns.adobe.com/xap/1.0/mm/</pdfaSchema:namespaceURI>\n"
+
+ "
<pdfaSchema:prefix>xmpMM</pdfaSchema:prefix>\n" +
+ " <pdfaSchema:schema>XMP
Media Management Schema</pdfaSchema:schema>\n" +
+ "
<pdfaSchema:property>\n" +
+ " <rdf:Seq>\n" +
+ " <rdf:li
rdf:parseType=\"Resource\">\n" +
+ "
<pdfaProperty:category>internal</pdfaProperty:category>\n" +
+ "
<pdfaProperty:description>UUID based identifier for specific incarnation of a
document</pdfaProperty:description>\n" +
+ "
<pdfaProperty:name>InstanceID</pdfaProperty:name>\n" +
+ "
<pdfaProperty:valueType>URI</pdfaProperty:valueType>\n" +
+ "
</rdf:li>\n" +
+ " </rdf:Seq>\n" +
+ "
</pdfaSchema:property>\n" +
+ " </rdf:li>\n" +
+ " <rdf:li
rdf:parseType=\"Resource\">\n" +
+ "
<pdfaSchema:namespaceURI>http://www.aiim.org/pdfa/ns/id/</pdfaSchema:namespaceURI>\n"
+
+ "
<pdfaSchema:prefix>pdfaid</pdfaSchema:prefix>\n" +
+ "
<pdfaSchema:schema>PDF/A ID Schema</pdfaSchema:schema>\n" +
+ "
<pdfaSchema:property>\n" +
+ " <rdf:Seq>\n" +
+ " <rdf:li
rdf:parseType=\"Resource\">\n" +
+ "
<pdfaProperty:category>internal</pdfaProperty:category>\n" +
+ "
<pdfaProperty:description>Part of PDF/A standard</pdfaProperty:description>\n" +
+ "
<pdfaProperty:name>part</pdfaProperty:name>\n" +
+ "
<pdfaProperty:valueType>Integer</pdfaProperty:valueType>\n" +
+ "
</rdf:li>\n" +
+ " <rdf:li
rdf:parseType=\"Resource\">\n" +
+ "
<pdfaProperty:category>internal</pdfaProperty:category>\n" +
+ "
<pdfaProperty:description>Amendment of PDF/A
standard</pdfaProperty:description>\n" +
+ "
<pdfaProperty:name>amd</pdfaProperty:name>\n" +
+ "
<pdfaProperty:valueType>Text</pdfaProperty:valueType>\n" +
+ "
</rdf:li>\n" +
+ " <rdf:li
rdf:parseType=\"Resource\">\n" +
+ "
<pdfaProperty:category>internal</pdfaProperty:category>\n" +
+ "
<pdfaProperty:description>Conformance level of PDF/A
standard</pdfaProperty:description>\n" +
+ "
<pdfaProperty:name>conformance</pdfaProperty:name>\n" +
+ "
<pdfaProperty:valueType>Text</pdfaProperty:valueType>\n" +
+ "
</rdf:li>\n" +
+ " </rdf:Seq>\n" +
+ "
</pdfaSchema:property>\n" +
+ " </rdf:li>\n" +
+ " </rdf:Bag>\n" +
+ " </pdfaExtension:schemas>\n" +
+ " </rdf:Description>\n" +
+ " </rdf:RDF>\n" +
+ "</x:xmpmeta>\n" +
+ "<?xpacket end=\"w\"?>";
+ try
+ {
+ new DomXmpParser().parse(s.getBytes("utf-8"));
+ fail("XmpParsingException expected");
+ }
+ catch (XmpParsingException ex)
+ {
+ assertEquals("Missing pdfaSchema:property in type definition",
ex.getMessage());
+ }
+ DomXmpParser xmpParser2 = new DomXmpParser();
+ assertTrue(xmpParser2.isStrictParsing());
+ xmpParser2.setStrictParsing(false);
+ assertFalse(xmpParser2.isStrictParsing());
+ XMPMetadata xmp2 = xmpParser2.parse(s.getBytes("utf-8"));
+ XMPMediaManagementSchema xmpMediaManagementSchema =
xmp2.getXMPMediaManagementSchema();
+ assertEquals("uuid:0b306144-6a43-dcbd-6b3e-c6b6b1df873d",
xmpMediaManagementSchema.getInstanceID());
+ assertEquals("uuid:0b306144-6a43-dcbd-6b3e-c6b6b1df873d",
xmpMediaManagementSchema.getDocumentID());
+ }
+}
\ No newline at end of file