Author: tilman
Date: Sun Dec 28 12:47:20 2025
New Revision: 1930916
Log:
PDFBOX-6133: try type namespace after failing schema namespace, while making
sure to keep old behavior when failing; pass local name because there can be
several types for the same namespace; add tests
Modified:
pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/type/TypeMapping.java
pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Modified:
pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/type/TypeMapping.java
==============================================================================
--- pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/type/TypeMapping.java
Sun Dec 28 12:23:15 2025 (r1930915)
+++ pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/type/TypeMapping.java
Sun Dec 28 12:47:20 2025 (r1930916)
@@ -25,9 +25,11 @@ import java.lang.annotation.Annotation;
import java.lang.reflect.Constructor;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
import java.util.Calendar;
import java.util.EnumMap;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import javax.xml.namespace.QName;
@@ -43,24 +45,37 @@ import org.apache.xmpbox.schema.TiffSche
import org.apache.xmpbox.schema.XMPBasicJobTicketSchema;
import org.apache.xmpbox.schema.XMPBasicSchema;
import org.apache.xmpbox.schema.XMPMediaManagementSchema;
+import org.apache.xmpbox.schema.XMPPageTextSchema;
import org.apache.xmpbox.schema.XMPRightsManagementSchema;
import org.apache.xmpbox.schema.XMPSchema;
import org.apache.xmpbox.schema.XMPSchemaFactory;
-import org.apache.xmpbox.schema.XMPPageTextSchema;
+import org.apache.xmpbox.schema.XmpSchemaException;
public final class TypeMapping
{
+ // type -> property
private Map<Types, PropertiesDescription> structuredMappings;
// ns -> type
+ // filled during init
+ @Deprecated
private Map<String, Types> structuredNamespaces;
- // ns -> type
+ // ns -> typeName
+ @Deprecated
private Map<String, String> definedStructuredNamespaces;
+ // ns -> list of property descriptions
+ private Map<String, List<PropertiesDescription>>
definedStructuredNamespaces2;
+
+ // typeName -> property
private Map<String, PropertiesDescription> definedStructuredMappings;
+ // ns -> type
+ // filled during init
+ private Map<String, List<Types>> structuredNamespaces2;
+
private final XMPMetadata metadata;
private Map<String, XMPSchemaFactory> schemaMap;
@@ -79,6 +94,7 @@ public final class TypeMapping
// structured types
structuredMappings = new EnumMap<>(Types.class);
structuredNamespaces = new HashMap<>();
+ structuredNamespaces2 = new HashMap<>();
for (Types type : Types.values())
{
if (type.isStructured())
@@ -89,6 +105,17 @@ public final class TypeMapping
String ns = st.namespace();
PropertiesDescription pm = initializePropMapping(clz);
structuredNamespaces.put(ns, type);
+ List<Types> list = structuredNamespaces2.get(ns);
+ if (list != null)
+ {
+ list.add(type);
+ }
+ else
+ {
+ list = new ArrayList<>();
+ list.add(type);
+ structuredNamespaces2.put(ns, list);
+ }
structuredMappings.put(type, pm);
}
}
@@ -96,6 +123,7 @@ public final class TypeMapping
// define structured types
definedStructuredNamespaces = new HashMap<>();
definedStructuredMappings = new HashMap<>();
+ definedStructuredNamespaces2 = new HashMap<>();
// schema
schemaMap = new HashMap<>();
@@ -115,14 +143,49 @@ public final class TypeMapping
public void addToDefinedStructuredTypes(String typeName, String ns,
PropertiesDescription pm)
{
+ List<PropertiesDescription> list =
definedStructuredNamespaces2.get(ns);
+ if (list != null)
+ {
+ list.add(pm);
+ }
+ else
+ {
+ list = new ArrayList<>();
+ list.add(pm);
+ definedStructuredNamespaces2.put(ns, list);
+ }
definedStructuredNamespaces.put(ns, typeName);
definedStructuredMappings.put(typeName, pm);
}
+ @Deprecated
public PropertiesDescription getDefinedDescriptionByNamespace(String
namespace)
{
String dt = definedStructuredNamespaces.get(namespace);
- return this.definedStructuredMappings.get(dt);
+ return definedStructuredMappings.get(dt);
+ }
+
+ /**
+ * Get a property description based on namespace and field name. Both are
needed because there
+ * can be several property descriptions for one namespace.
+ *
+ * @param namespace
+ * @param pdfaFieldName
+ * @return
+ */
+ public PropertiesDescription getDefinedDescriptionByNamespace(String
namespace, String pdfaFieldName)
+ {
+ List<PropertiesDescription> propDescList =
definedStructuredNamespaces2.get(namespace);
+ // gets list of type names (not to be confused with field names /
property names)
+ for (PropertiesDescription propDesc : propDescList)
+ {
+ // check whether one of these field names matches
+ if (propDesc.getPropertiesNames().contains(pdfaFieldName))
+ {
+ return propDesc;
+ }
+ }
+ return null;
}
public AbstractStructuredType instanciateStructuredType(Types type, String
propertyName)
@@ -189,17 +252,17 @@ public final class TypeMapping
*/
public boolean isStructuredTypeNamespace(String namespace)
{
- return structuredNamespaces.containsKey(namespace);
+ return structuredNamespaces2.containsKey(namespace);
}
public boolean isDefinedTypeNamespace(String namespace)
{
- return definedStructuredNamespaces.containsKey(namespace);
+ return definedStructuredNamespaces2.containsKey(namespace);
}
public boolean isDefinedType(String name)
{
- return this.definedStructuredMappings.containsKey(name);
+ return definedStructuredMappings.containsKey(name);
}
private void addNameSpace(Class<? extends XMPSchema> classSchem)
@@ -220,6 +283,37 @@ public final class TypeMapping
return structuredMappings.get(type);
}
+ /**
+ * Return the specialized schema class representation if it's known
(create and add it to metadata). In other cases,
+ * return null
+ *
+ * @param metadata
+ * Metadata to link the new schema
+ * @param namespace
+ * The namespace URI
+ * @param prefix The namespace prefix
+ * @return Schema representation
+ * @throws XmpSchemaException
+ * When Instancing specified Object Schema failed
+ *
+ * @deprecated This method will be removed in 4.0. If you need it, let us
know.
+ */
+ @Deprecated
+ public XMPSchema getAssociatedSchemaObject(XMPMetadata metadata, String
namespace, String prefix)
+ throws XmpSchemaException
+ {
+ if (schemaMap.containsKey(namespace))
+ {
+ XMPSchemaFactory factory = schemaMap.get(namespace);
+ return factory.createXMPSchema(metadata, prefix);
+ }
+ else
+ {
+ XMPSchemaFactory factory = getSchemaFactory(namespace);
+ return factory != null ? factory.createXMPSchema(metadata, prefix)
: null;
+ }
+ }
+
public XMPSchemaFactory getSchemaFactory(String namespace)
{
return schemaMap.get(namespace);
@@ -257,30 +351,73 @@ public final class TypeMapping
if (factory != null)
{
// found in schema
- return factory.getPropertyType(qName.getLocalPart());
+ PropertyType propertyType =
factory.getPropertyType(qName.getLocalPart());
+ if (propertyType != null)
+ {
+ return propertyType;
+ }
}
- else
+ // try in structured
+ List<Types> list = structuredNamespaces2.get(qName.getNamespaceURI());
+ Types st;
+ if (list != null)
{
- // try in structured
- Types st = structuredNamespaces.get(qName.getNamespaceURI());
- if (st != null)
+ st = list.get(0);
+ if (list.size() == 1)
{
- return createPropertyType(st, Cardinality.Simple);
+ PropertiesDescription propDesc = structuredMappings.get(st);
+ if (factory == null ||
propDesc.getPropertiesNames().contains(qName.getLocalPart()))
+ {
+ return createPropertyType(st, Cardinality.Simple);
+ }
+ return null;
}
- else
+ if (list.size() > 1)
{
- // try in defined
- String dt =
definedStructuredNamespaces.get(qName.getNamespaceURI());
- if (dt == null)
+ for (Types type : list)
{
- // not found
- throw new BadFieldValueException("No descriptor found for
" + qName);
+ if (type.name().equals(parentTypeName))
+ {
+ return createPropertyType(type, Cardinality.Simple);
+ }
}
- else
+ for (Types type : list)
{
- return createPropertyType(Types.DefinedType,
Cardinality.Simple);
+ PropertiesDescription propDesc =
structuredMappings.get(type);
+ if
(propDesc.getPropertiesNames().contains(qName.getLocalPart()))
+ {
+ st = type;
+ break;
+ }
}
}
+
+ PropertyType propertyType = createPropertyType(st,
Cardinality.Simple);
+ PropertiesDescription propertiesDescription =
getStructuredPropMapping(propertyType.type());
+ // PDFBOX-6133: do an additional check to make sure that the name
exists.
+ // This can happen with photoshop and exif because the namespace
exists as a schema and as a type
+ if
(propertiesDescription.getPropertiesNames().contains(qName.getLocalPart()))
+ {
+ return propertyType;
+ }
+ return null;
+ }
+ else
+ {
+ // try in defined
+ if
(!definedStructuredNamespaces2.containsKey(qName.getNamespaceURI()))
+ {
+ // not found
+ if (factory != null)
+ {
+ return null; // pre PDFBOX-6133 behavior
+ }
+ throw new BadFieldValueException("No descriptor found for " +
qName);
+ }
+ else
+ {
+ return createPropertyType(Types.DefinedType,
Cardinality.Simple);
+ }
}
}
Modified:
pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
==============================================================================
--- pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Sun Dec 28 12:23:15 2025 (r1930915)
+++ pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Sun Dec 28 12:47:20 2025 (r1930916)
@@ -737,7 +737,7 @@ public class DomXmpParser
}
else
{
- pm =
tm.getDefinedDescriptionByNamespace(liElement.getNamespaceURI());
+ pm =
tm.getDefinedDescriptionByNamespace(liElement.getNamespaceURI(),
liElement.getLocalName());
}
af = tryParseAttributesAsProperties(xmp, liElement, tm,
(AbstractStructuredType) af, pm, null);
}
@@ -815,7 +815,7 @@ public class DomXmpParser
}
else
{
- pm =
tm.getDefinedDescriptionByNamespace(firstLiDescriptionElementChild.getNamespaceURI());
+ pm =
tm.getDefinedDescriptionByNamespace(firstLiDescriptionElementChild.getNamespaceURI(),
firstLiDescriptionElementChild.getLocalName());
}
for (Element liDescriptionElementChild : liDescriptionElementChildren)
{
@@ -1126,7 +1126,7 @@ public class DomXmpParser
}
try
{
- return tm.getSpecifiedPropertyType(qName, null);
+ return tm.getSpecifiedPropertyType(qName, parentTypeName);
}
catch (BadFieldValueException e)
{
@@ -1200,7 +1200,7 @@ public class DomXmpParser
}
else
{
- pm =
tm.getDefinedDescriptionByNamespace(attr.getNamespaceURI());
+ pm =
tm.getDefinedDescriptionByNamespace(attr.getNamespaceURI(),
attr.getLocalName());
}
}
if (ast != null && pm != null && attr.getNamespaceURI() !=
null)
Modified:
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
==============================================================================
---
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Sun Dec 28 12:23:15 2025 (r1930915)
+++
pdfbox/trunk/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Sun Dec 28 12:47:20 2025 (r1930916)
@@ -43,8 +43,10 @@ import org.apache.xmpbox.schema.XMPSchem
import org.apache.xmpbox.type.AbstractField;
import org.apache.xmpbox.type.ArrayProperty;
import org.apache.xmpbox.type.BadFieldValueException;
+import org.apache.xmpbox.type.CFAPatternType;
import org.apache.xmpbox.type.DefinedStructuredType;
import org.apache.xmpbox.type.DimensionsType;
+import org.apache.xmpbox.type.FlashType;
import org.apache.xmpbox.type.LayerType;
import org.apache.xmpbox.type.PDFASchemaType;
import org.apache.xmpbox.type.ResourceEventType;
@@ -744,6 +746,7 @@ class DomXmpParserTest
void testPropertyNotDefined() throws XmpParsingException
{
// While "Fired" does exist as a type, it's not the correct syntax,
the PDFLib XMP validator complains too.
+ // Surprisingly, it works since PDFBOX-6133
String s = "<?xml version=\"1.0\" encoding=\"UTF-8\"
standalone=\"no\"?>\n" +
"<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>\n" +
"<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"\n" +
@@ -759,10 +762,9 @@ class DomXmpParserTest
" </rdf:RDF>\n" +
"</x:xmpmeta><?xpacket end='w'?>";
final DomXmpParser xmpParser = new DomXmpParser();
- XmpParsingException ex = assertThrows(
- XmpParsingException.class,
- () -> xmpParser.parse(s.getBytes(StandardCharsets.UTF_8)));
- assertEquals("Property 'exif:Fired' not defined in
http://ns.adobe.com/exif/1.0/", ex.getMessage());
+ XMPMetadata xmp = xmpParser.parse(s.getBytes(StandardCharsets.UTF_8));
+ FlashType flash = (FlashType)
xmp.getSchema(ExifSchema.class).getProperty(ExifSchema.FLASH);
+ assertEquals("[Fired=BooleanType:False]",
flash.getProperty(FlashType.FIRED).toString());
}
@Test
@@ -1492,4 +1494,54 @@ class DomXmpParserTest
assertEquals(1, xmp.getPDFAIdentificationSchema().getPart());
}
}
+
+ @Test
+ void testPDFBox6133() throws IOException, XmpParsingException,
BadFieldValueException
+ {
+ // Namespace is used both for the schema and the type,
+ // and that there are two types with the same namespace
+ try (InputStream is =
DomXmpParser.class.getResourceAsStream("/org/apache/xmpbox/xml/PDFBOX-6133-0064638.xml"))
+ {
+ DomXmpParser xmpParser = new DomXmpParser();
+ XMPMetadata xmp = xmpParser.parse(is);
+ XMPSchema epaSchema =
xmp.getSchema("http://www.epo.org/patent-bibliographic-data/1.0/");
+ assertEquals("[TotalNumberOfPages=RealType:47.0]",
epaSchema.getProperty("TotalNumberOfPages").toString());
+ DefinedStructuredType pub = (DefinedStructuredType)
epaSchema.getProperty("Publication");
+ assertEquals("[CountryCode=TextType:EP]",
pub.getProperty("CountryCode").toString());
+ }
+ }
+
+ @Test
+ void testPropertyNotDefined2() throws XmpParsingException
+ {
+ // from file 089448.pdf, page 2, image 4
+ String s =
+ "<?xml version=\"1.0\" encoding=\"UTF-8\"
standalone=\"no\"?>\n" +
+ "<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>\n" +
+ "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"\n" +
+ " x:xmptk=\"Adobe XMP Core 4.0-c006 1.236519, Wed
Jun 14 2006 08:31:24\">\n" +
+ " <rdf:RDF
xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" +
+ " <rdf:Description
xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n" +
+ "
xmlns:exif=\"http://ns.adobe.com/exif/1.0/\">\n" +
+ " <exif:CFAPattern>\n" +
+ " <rdf:Description>\n" +
+ " <exif:Values>\n" +
+ " <rdf:Seq>\n" +
+ "
<rdf:li>1</rdf:li>\n" +
+ "
<rdf:li>2</rdf:li>\n" +
+ "
<rdf:li>0</rdf:li>\n" +
+ "
<rdf:li>1</rdf:li>\n" +
+ " </rdf:Seq>\n" +
+ " </exif:Values>\n" +
+ " </rdf:Description>\n" +
+ " </exif:CFAPattern>\n" +
+ " </rdf:Description>\n" +
+ " </rdf:RDF>\n" +
+ "</x:xmpmeta><?xpacket end='w'?>";
+ final DomXmpParser xmpParser = new DomXmpParser();
+ XMPMetadata xmp = xmpParser.parse(s.getBytes(StandardCharsets.UTF_8));
+ CFAPatternType cfa = (CFAPatternType)
xmp.getSchema(ExifSchema.class).getProperty(ExifSchema.CFA_PATTERN);
+ ArrayProperty ap = (ArrayProperty)
cfa.getProperty(CFAPatternType.VALUES);
+ assertEquals("[1, 2, 0, 1]", ap.getElementsAsString().toString());
+ }
}