This is an automated email from the ASF dual-hosted git repository.

tilman pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_3x by this push:
     new 586e361c3 TIKA-4444: get subject from xmp + add tests
586e361c3 is described below

commit 586e361c3e6fcf07f804f243117b0e35b3ef6d38
Author: Tilman Hausherr <[email protected]>
AuthorDate: Wed Jun 25 15:08:50 2025 +0200

    TIKA-4444: get subject from xmp + add tests
---
 .../java/org/apache/tika/parser/pdf/PDMetadataExtractor.java     | 1 +
 .../test/java/org/apache/tika/parser/pdf/CustomTikaXMPTest.java  | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDMetadataExtractor.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDMetadataExtractor.java
index a3fac7728..a8d35e1ba 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDMetadataExtractor.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDMetadataExtractor.java
@@ -124,6 +124,7 @@ public class PDMetadataExtractor {
             extractDublinCoreListItems(metadata, TikaCoreProperties.PUBLISHER, 
dcSchema);
             extractDublinCoreListItems(metadata, TikaCoreProperties.RELATION, 
dcSchema);
             extractDublinCoreSimpleItem(metadata, TikaCoreProperties.SOURCE, 
dcSchema);
+            extractDublinCoreListItems(metadata, TikaCoreProperties.SUBJECT, 
dcSchema);
         }
     }
 
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/CustomTikaXMPTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/CustomTikaXMPTest.java
index 9b7907b4d..dc6e3b3b4 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/CustomTikaXMPTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/CustomTikaXMPTest.java
@@ -59,6 +59,9 @@ public class CustomTikaXMPTest extends TikaTest {
     public void testPDFUA() throws Exception {
         Metadata metadata = extract("testPDFUA.xmp");
         assertEquals(1, metadata.getInt(PDF.PDFUAID_PART));
+        String[] subjects = metadata.getValues(TikaCoreProperties.SUBJECT);
+        assertEquals("keywords", subjects[0]);
+        assertEquals("subject", subjects[1]);
     }
 
     @Test
@@ -83,6 +86,12 @@ public class CustomTikaXMPTest extends TikaTest {
         assertEquals("International Union of Thinkology", 
metadata.get(TikaCoreProperties.PUBLISHER));
         assertEquals("Relation", metadata.get(TikaCoreProperties.RELATION));
         assertEquals("Journal of Thinkology", 
metadata.get(TikaCoreProperties.SOURCE));
+        String[] subjects = metadata.getValues(TikaCoreProperties.SUBJECT);
+        assertEquals("THOUGHTS", subjects[0]);
+        assertEquals("HAPPINESS", subjects[1]);
+        assertEquals("FEAR", subjects[2]);
+        assertEquals("ANGER", subjects[3]);
+        assertEquals("DESPAIR", subjects[4]);
     }
 
     private Metadata extract(String xmpFileName) throws IOException, 
TikaException, SAXException {

Reply via email to