Author: tilman
Date: Sun Dec 28 12:47:29 2025
New Revision: 1930918
Log:
PDFBOX-6133: try type namespace after failing schema namespace, while making
sure to keep old behavior when failing; pass local name because there can be
several types for the same namespace; add tests
Modified:
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/type/TypeMapping.java
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Modified:
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/type/TypeMapping.java
==============================================================================
---
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/type/TypeMapping.java
Sun Dec 28 12:47:25 2025 (r1930917)
+++
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/type/TypeMapping.java
Sun Dec 28 12:47:29 2025 (r1930918)
@@ -25,9 +25,11 @@ import java.lang.annotation.Annotation;
import java.lang.reflect.Constructor;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
import java.util.Calendar;
import java.util.EnumMap;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import javax.xml.namespace.QName;
@@ -52,16 +54,28 @@ import org.apache.xmpbox.schema.XmpSchem
public final class TypeMapping
{
+ // type -> property
private Map<Types, PropertiesDescription> structuredMappings;
// ns -> type
+ // filled during init
+ @Deprecated
private Map<String, Types> structuredNamespaces;
- // ns -> type
+ // ns -> typeName
+ @Deprecated
private Map<String, String> definedStructuredNamespaces;
+ // ns -> list of property descriptions
+ private Map<String, List<PropertiesDescription>>
definedStructuredNamespaces2;
+
+ // typeName -> property
private Map<String, PropertiesDescription> definedStructuredMappings;
+ // ns -> type
+ // filled during init
+ private Map<String, List<Types>> structuredNamespaces2;
+
private final XMPMetadata metadata;
private Map<String, XMPSchemaFactory> schemaMap;
@@ -80,6 +94,7 @@ public final class TypeMapping
// structured types
structuredMappings = new EnumMap<Types,
PropertiesDescription>(Types.class);
structuredNamespaces = new HashMap<String, Types>();
+ structuredNamespaces2 = new HashMap<String, List<Types>>();
for (Types type : Types.values())
{
if (type.isStructured())
@@ -90,6 +105,17 @@ public final class TypeMapping
String ns = st.namespace();
PropertiesDescription pm = initializePropMapping(clz);
structuredNamespaces.put(ns, type);
+ List<Types> list = structuredNamespaces2.get(ns);
+ if (list != null)
+ {
+ list.add(type);
+ }
+ else
+ {
+ list = new ArrayList<Types>();
+ list.add(type);
+ structuredNamespaces2.put(ns, list);
+ }
structuredMappings.put(type, pm);
}
}
@@ -97,6 +123,7 @@ public final class TypeMapping
// define structured types
definedStructuredNamespaces = new HashMap<String, String>();
definedStructuredMappings = new HashMap<String,
PropertiesDescription>();
+ definedStructuredNamespaces2 = new HashMap<String,
List<PropertiesDescription>>();
// schema
schemaMap = new HashMap<String, XMPSchemaFactory>();
@@ -116,14 +143,49 @@ public final class TypeMapping
public void addToDefinedStructuredTypes(String typeName, String ns,
PropertiesDescription pm)
{
+ List<PropertiesDescription> list =
definedStructuredNamespaces2.get(ns);
+ if (list != null)
+ {
+ list.add(pm);
+ }
+ else
+ {
+ list = new ArrayList<PropertiesDescription>();
+ list.add(pm);
+ definedStructuredNamespaces2.put(ns, list);
+ }
definedStructuredNamespaces.put(ns, typeName);
definedStructuredMappings.put(typeName, pm);
}
+ @Deprecated
public PropertiesDescription getDefinedDescriptionByNamespace(String
namespace)
{
String dt = definedStructuredNamespaces.get(namespace);
- return this.definedStructuredMappings.get(dt);
+ return definedStructuredMappings.get(dt);
+ }
+
+ /**
+ * Get a property description based on namespace and field name. Both are
needed because there
+ * can be several property descriptions for one namespace.
+ *
+ * @param namespace
+ * @param pdfaFieldName
+ * @return
+ */
+ public PropertiesDescription getDefinedDescriptionByNamespace(String
namespace, String pdfaFieldName)
+ {
+ List<PropertiesDescription> propDescList =
definedStructuredNamespaces2.get(namespace);
+ // gets list of type names (not to be confused with field names /
property names)
+ for (PropertiesDescription propDesc : propDescList)
+ {
+ // check whether one of these field names matches
+ if (propDesc.getPropertiesNames().contains(pdfaFieldName))
+ {
+ return propDesc;
+ }
+ }
+ return null;
}
public AbstractStructuredType instanciateStructuredType(Types type, String
propertyName)
@@ -231,17 +293,17 @@ public final class TypeMapping
*/
public boolean isStructuredTypeNamespace(String namespace)
{
- return structuredNamespaces.containsKey(namespace);
+ return structuredNamespaces2.containsKey(namespace);
}
public boolean isDefinedTypeNamespace(String namespace)
{
- return definedStructuredNamespaces.containsKey(namespace);
+ return definedStructuredNamespaces2.containsKey(namespace);
}
public boolean isDefinedType(String name)
{
- return this.definedStructuredMappings.containsKey(name);
+ return definedStructuredMappings.containsKey(name);
}
private void addNameSpace(Class<? extends XMPSchema> classSchem)
@@ -336,29 +398,72 @@ public final class TypeMapping
if (factory != null)
{
// found in schema
- return factory.getPropertyType(qName.getLocalPart());
+ PropertyType propertyType =
factory.getPropertyType(qName.getLocalPart());
+ if (propertyType != null)
+ {
+ return propertyType;
+ }
}
- else
+ // try in structured
+ List<Types> list = structuredNamespaces2.get(qName.getNamespaceURI());
+ Types st;
+ if (list != null)
{
- // try in structured
- Types st = structuredNamespaces.get(qName.getNamespaceURI());
- if (st != null)
+ st = list.get(0);
+ if (list.size() == 1)
{
- return createPropertyType(st, Cardinality.Simple);
+ PropertiesDescription propDesc = structuredMappings.get(st);
+ if (factory == null ||
propDesc.getPropertiesNames().contains(qName.getLocalPart()))
+ {
+ return createPropertyType(st, Cardinality.Simple);
+ }
+ return null;
}
- else
+ if (list.size() > 1)
{
- // try in defined
- String dt =
definedStructuredNamespaces.get(qName.getNamespaceURI());
- if (dt == null)
+ for (Types type : list)
{
- // not found
- throw new BadFieldValueException("No descriptor found for
" + qName);
+ if (type.name().equals(parentTypeName))
+ {
+ return createPropertyType(type, Cardinality.Simple);
+ }
}
- else
+ for (Types type : list)
+ {
+ PropertiesDescription propDesc =
structuredMappings.get(type);
+ if
(propDesc.getPropertiesNames().contains(qName.getLocalPart()))
+ {
+ st = type;
+ break;
+ }
+ }
+ }
+
+ PropertyType propertyType = createPropertyType(st,
Cardinality.Simple);
+ PropertiesDescription propertiesDescription =
getStructuredPropMapping(propertyType.type());
+ // PDFBOX-6133: do an additional check to make sure that the name
exists.
+ // This can happen with photoshop and exif because the namespace
exists as a schema and as a type
+ if
(propertiesDescription.getPropertiesNames().contains(qName.getLocalPart()))
+ {
+ return propertyType;
+ }
+ return null;
+ }
+ else
+ {
+ // try in defined
+ if
(!definedStructuredNamespaces2.containsKey(qName.getNamespaceURI()))
+ {
+ // not found
+ if (factory != null)
{
- return createPropertyType(Types.DefinedType,
Cardinality.Simple);
+ return null; // pre PDFBOX-6133 behavior
}
+ throw new BadFieldValueException("No descriptor found for " +
qName);
+ }
+ else
+ {
+ return createPropertyType(Types.DefinedType,
Cardinality.Simple);
}
}
}
Modified:
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
==============================================================================
---
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Sun Dec 28 12:47:25 2025 (r1930917)
+++
pdfbox/branches/2.0/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Sun Dec 28 12:47:29 2025 (r1930918)
@@ -706,7 +706,7 @@ public class DomXmpParser
}
else
{
- pm =
tm.getDefinedDescriptionByNamespace(liElement.getNamespaceURI());
+ pm =
tm.getDefinedDescriptionByNamespace(liElement.getNamespaceURI(),
liElement.getLocalName());
}
af = tryParseAttributesAsProperties(xmp, liElement, tm,
(AbstractStructuredType) af, pm, null);
}
@@ -784,7 +784,7 @@ public class DomXmpParser
}
else
{
- pm =
tm.getDefinedDescriptionByNamespace(firstLiDescriptionElementChild.getNamespaceURI());
+ pm =
tm.getDefinedDescriptionByNamespace(firstLiDescriptionElementChild.getNamespaceURI(),
firstLiDescriptionElementChild.getLocalName());
}
for (Element liDescriptionElementChild : liDescriptionElementChildren)
{
@@ -1101,7 +1101,7 @@ public class DomXmpParser
}
try
{
- return tm.getSpecifiedPropertyType(qName, null);
+ return tm.getSpecifiedPropertyType(qName, parentTypeName);
}
catch (BadFieldValueException e)
{
@@ -1175,7 +1175,7 @@ public class DomXmpParser
}
else
{
- pm =
tm.getDefinedDescriptionByNamespace(attr.getNamespaceURI());
+ pm =
tm.getDefinedDescriptionByNamespace(attr.getNamespaceURI(),
attr.getLocalName());
}
}
if (ast != null && pm != null && attr.getNamespaceURI() !=
null)
Modified:
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
==============================================================================
---
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Sun Dec 28 12:47:25 2025 (r1930917)
+++
pdfbox/branches/2.0/xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java
Sun Dec 28 12:47:29 2025 (r1930918)
@@ -42,8 +42,10 @@ import org.apache.xmpbox.schema.XMPageTe
import org.apache.xmpbox.type.AbstractField;
import org.apache.xmpbox.type.ArrayProperty;
import org.apache.xmpbox.type.BadFieldValueException;
+import org.apache.xmpbox.type.CFAPatternType;
import org.apache.xmpbox.type.DefinedStructuredType;
import org.apache.xmpbox.type.DimensionsType;
+import org.apache.xmpbox.type.FlashType;
import org.apache.xmpbox.type.LayerType;
import org.apache.xmpbox.type.PDFASchemaType;
import org.apache.xmpbox.type.ResourceEventType;
@@ -776,6 +778,7 @@ public class DomXmpParserTest
public void testPropertyNotDefined() throws XmpParsingException,
UnsupportedEncodingException
{
// While "Fired" does exist as a type, it's not the correct syntax,
the PDFLib XMP validator complains too.
+ // Surprisingly, it works since PDFBOX-6133
String s = "<?xml version=\"1.0\" encoding=\"UTF-8\"
standalone=\"no\"?>\n" +
"<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>\n" +
"<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"\n" +
@@ -790,15 +793,10 @@ public class DomXmpParserTest
" </rdf:Description>\n" +
" </rdf:RDF>\n" +
"</x:xmpmeta><?xpacket end='w'?>";
- try
- {
- new DomXmpParser().parse(s.getBytes("utf-8"));
- fail("XmpParsingException expected");
- }
- catch (XmpParsingException ex)
- {
- assertEquals("Property 'exif:Fired' not defined in
http://ns.adobe.com/exif/1.0/", ex.getMessage());
- }
+ final DomXmpParser xmpParser = new DomXmpParser();
+ XMPMetadata xmp = xmpParser.parse(s.getBytes("utf-8"));
+ FlashType flash = (FlashType)
xmp.getSchema(ExifSchema.class).getProperty(ExifSchema.FLASH);
+ assertEquals("[Fired=BooleanType:False]",
flash.getProperty(FlashType.FIRED).toString());
}
@Test
@@ -1578,4 +1576,53 @@ public class DomXmpParserTest
assertEquals((Integer) 1, xmp.getPDFIdentificationSchema().getPart());
is.close();
}
-}
\ No newline at end of file
+
+ @Test
+ public void testPDFBox6133() throws IOException, XmpParsingException,
BadFieldValueException
+ {
+ // Namespace is used both for the schema and the type,
+ // and that there are two types with the same namespace
+ InputStream is =
DomXmpParser.class.getResourceAsStream("/org/apache/xmpbox/xml/PDFBOX-6133-0064638.xml");
+ DomXmpParser xmpParser = new DomXmpParser();
+ XMPMetadata xmp = xmpParser.parse(is);
+ XMPSchema epaSchema =
xmp.getSchema("http://www.epo.org/patent-bibliographic-data/1.0/");
+ assertEquals("[TotalNumberOfPages=RealType:47.0]",
epaSchema.getProperty("TotalNumberOfPages").toString());
+ DefinedStructuredType pub = (DefinedStructuredType)
epaSchema.getProperty("Publication");
+ assertEquals("[CountryCode=TextType:EP]",
pub.getProperty("CountryCode").toString());
+ is.close();
+ }
+
+ @Test
+ public void testPropertyNotDefined2() throws XmpParsingException,
UnsupportedEncodingException
+ {
+ // from file 089448.pdf, page 2, image 4
+ String s =
+ "<?xml version=\"1.0\" encoding=\"UTF-8\"
standalone=\"no\"?>\n" +
+ "<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>\n" +
+ "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"\n" +
+ " x:xmptk=\"Adobe XMP Core 4.0-c006 1.236519, Wed
Jun 14 2006 08:31:24\">\n" +
+ " <rdf:RDF
xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" +
+ " <rdf:Description
xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n" +
+ "
xmlns:exif=\"http://ns.adobe.com/exif/1.0/\">\n" +
+ " <exif:CFAPattern>\n" +
+ " <rdf:Description>\n" +
+ " <exif:Values>\n" +
+ " <rdf:Seq>\n" +
+ "
<rdf:li>1</rdf:li>\n" +
+ "
<rdf:li>2</rdf:li>\n" +
+ "
<rdf:li>0</rdf:li>\n" +
+ "
<rdf:li>1</rdf:li>\n" +
+ " </rdf:Seq>\n" +
+ " </exif:Values>\n" +
+ " </rdf:Description>\n" +
+ " </exif:CFAPattern>\n" +
+ " </rdf:Description>\n" +
+ " </rdf:RDF>\n" +
+ "</x:xmpmeta><?xpacket end='w'?>";
+ final DomXmpParser xmpParser = new DomXmpParser();
+ XMPMetadata xmp = xmpParser.parse(s.getBytes("utf-8"));
+ CFAPatternType cfa = (CFAPatternType)
xmp.getSchema(ExifSchema.class).getProperty(ExifSchema.CFA_PATTERN);
+ ArrayProperty ap = (ArrayProperty)
cfa.getProperty(CFAPatternType.VALUES);
+ assertEquals("[1, 2, 0, 1]", ap.getElementsAsString().toString());
+ }
+}