This is an automated email from the ASF dual-hosted git repository.

tilman pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new a57e49f2c TIKA-4442: collect 6 more dublin core properties
a57e49f2c is described below

commit a57e49f2cfe7ebb8f4b85ee138d47ffec06017bc
Author: Tilman Hausherr <[email protected]>
AuthorDate: Wed Jun 25 09:43:18 2025 +0200

    TIKA-4442: collect 6 more dublin core properties
---
 .../tika/parser/pdf/PDMetadataExtractor.java       | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDMetadataExtractor.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDMetadataExtractor.java
index ce1109fc1..a3fac7728 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDMetadataExtractor.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDMetadataExtractor.java
@@ -118,6 +118,12 @@ public class PDMetadataExtractor {
             extractDublinCoreListItems(metadata, 
TikaCoreProperties.CONTRIBUTOR, dcSchema);
             extractDublinCoreListItems(metadata, TikaCoreProperties.CREATOR, 
dcSchema);
             extractMultilingualItems(metadata, TikaCoreProperties.TITLE, null, 
dcSchema);
+            extractDublinCoreListItems(metadata, TikaCoreProperties.TYPE, 
dcSchema); // finds only the first one?!
+            extractDublinCoreSimpleItem(metadata, 
TikaCoreProperties.IDENTIFIER, dcSchema);
+            extractDublinCoreListItems(metadata, TikaCoreProperties.LANGUAGE, 
dcSchema);
+            extractDublinCoreListItems(metadata, TikaCoreProperties.PUBLISHER, 
dcSchema);
+            extractDublinCoreListItems(metadata, TikaCoreProperties.RELATION, 
dcSchema);
+            extractDublinCoreSimpleItem(metadata, TikaCoreProperties.SOURCE, 
dcSchema);
         }
     }
 
@@ -437,6 +443,23 @@ public class PDMetadataExtractor {
         }
     }
 
+     /**
+     * This tries to read a string from a particular property in 
XMPSchemaDublinCore.
+     * <p/>
+     * This relies on the property having a DublinCore compliant getName()
+     *
+     * @param property
+     * @param dc
+     * @param metadata
+     */
+    private static void extractDublinCoreSimpleItem(Metadata metadata, 
Property property,
+                                                   XMPSchemaDublinCore dc) {
+        if (dc == null) {
+            return;
+        }
+        String textProperty = dc.getTextProperty(property.getName());
+        addMetadata(metadata, property, textProperty);
+    }
 
     static void addMetadata(Metadata metadata, Property property, String 
value) {
         if (value != null) {

Reply via email to