This is an automated email from the ASF dual-hosted git repository.
tilman pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 522e6ef84 TIKA-4551: support xmpMM:InstanceID
522e6ef84 is described below
commit 522e6ef842cf4f6f89a404b986c0b7950efa5874
Author: Tilman Hausherr <[email protected]>
AuthorDate: Thu Dec 4 16:55:39 2025 +0100
TIKA-4551: support xmpMM:InstanceID
---
.../main/java/org/apache/tika/parser/xmp/JempboxExtractor.java | 6 ++++--
.../java/org/apache/tika/parser/xmp/JempboxExtractorTest.java | 10 ++++++++++
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-xmp-commons/src/main/java/org/apache/tika/parser/xmp/JempboxExtractor.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-xmp-commons/src/main/java/org/apache/tika/parser/xmp/JempboxExtractor.java
index e51784733..dcabc2641 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-xmp-commons/src/main/java/org/apache/tika/parser/xmp/JempboxExtractor.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-xmp-commons/src/main/java/org/apache/tika/parser/xmp/JempboxExtractor.java
@@ -131,8 +131,10 @@ public class JempboxExtractor {
}
if (mmSchema != null) {
addMetadata(metadata, XMPMM.DOCUMENTID, mmSchema.getDocumentID());
- //not currently supported by JempBox...
-// metadata.set(XMPMM.INSTANCEID, mmSchema.getInstanceID());
+ // not currently supported by JempBox...
+ // but might be in 1.8.18 if ever released, see PDFBOX-6116
+ // until then use workaround (won't work if non standard prefix is
used)
+ metadata.set(XMPMM.INSTANCEID,
mmSchema.getTextProperty("xmpMM:InstanceID" ));
ResourceRef derivedFrom = mmSchema.getDerivedFrom();
if (derivedFrom != null) {
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-xmp-commons/src/test/java/org/apache/tika/parser/xmp/JempboxExtractorTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-xmp-commons/src/test/java/org/apache/tika/parser/xmp/JempboxExtractorTest.java
index f43d83077..e5abfa572 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-xmp-commons/src/test/java/org/apache/tika/parser/xmp/JempboxExtractorTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-xmp-commons/src/test/java/org/apache/tika/parser/xmp/JempboxExtractorTest.java
@@ -144,4 +144,14 @@ public class JempboxExtractorTest extends TikaTest {
assertEquals("2014-03-04T22:50:41Z", m.get(XMPMM.HISTORY_WHEN));
}
+ @Test
+ public void testXMPMMMisc() throws Exception {
+ Metadata m = new Metadata();
+ JempboxExtractor ex = new JempboxExtractor(m);
+ try (InputStream is =
getResourceAsStream("/test-documents/testXMP.xmp")) {
+ ex.parse(is);
+ }
+ assertEquals("uuid:cccee1fc-51b3-4b52-ac86-672af3974d25",
m.getValues(XMPMM.DOCUMENTID)[0]);
+ assertEquals("uuid:afa71b09-7cc5-48ac-8664-ac6dcf8b5ab4",
m.getValues(XMPMM.INSTANCEID)[0]);
+ }
}