This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/master by this push:
     new f4626a6  TIKA-3101 -- extract metadata from XMP basic schema, cleanup
f4626a6 is described below

commit f4626a60be540fc0038858d6226c54912c41d5f9
Author: tallison <[email protected]>
AuthorDate: Mon Jun 1 10:12:19 2020 -0400

    TIKA-3101 -- extract metadata from XMP basic schema, cleanup
---
 tika-core/src/main/java/org/apache/tika/metadata/XMP.java    | 12 +++++++-----
 .../java/org/apache/tika/parser/pdf/PDMetadataExtractor.java |  5 +++++
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/metadata/XMP.java 
b/tika-core/src/main/java/org/apache/tika/metadata/XMP.java
index d8f81b2..14cc5ba 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/XMP.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/XMP.java
@@ -26,10 +26,12 @@ public interface XMP {
     String PREFIX_ = PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
 
     /**
-     * An unordered array of text strings that unambiguously identify the 
resource
-     * within a given context. An array item may be qualified with 
xmpidq:Scheme
-     * (see 8.7, “xmpidq namespace”) to denote the formal identification 
system to
-     * which that identifier conforms.
+     * Unordered text strings of advisories.
+     */
+    Property ABOUT = Property.externalTextBag(PREFIX_ + "About");
+
+    /**
+     * Unordered text strings of advisories.
      */
     Property ADVISORY = Property.externalTextBag(PREFIX_ + "Advisory");
 
@@ -71,7 +73,7 @@ public interface XMP {
     Property MODIFY_DATE = Property.externalDate(PREFIX_ + "ModifyDate");
 
     /**
-     * A word or short phrase that identifies a resource as a member of a 
userdefined collection.
+     * A word or short phrase that represents the nick name fo the file
      */
     Property NICKNAME = Property.externalText(PREFIX_ + "NickName");
 
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDMetadataExtractor.java
 
b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDMetadataExtractor.java
index 16605cb..0d3f59d 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDMetadataExtractor.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDMetadataExtractor.java
@@ -36,6 +36,7 @@ import org.apache.pdfbox.pdmodel.common.PDMetadata;
 import org.apache.poi.util.IOUtils;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.extractor.EmbeddedDocumentUtil;
+import org.apache.tika.metadata.DublinCore;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.PDF;
 import org.apache.tika.metadata.Property;
@@ -119,6 +120,8 @@ class PDMetadataExtractor {
         //add the elements from the basic schema if they haven't already
         //been extracted from dublin core
         setNotNull(XMP.CREATOR_TOOL, basic.getCreatorTool(), metadata);
+        setNotNull(DublinCore.TITLE, basic.getTitle(), metadata);
+        setNotNull(XMP.ABOUT, basic.getAbout(), metadata);
         setNotNull(XMP.LABEL, basic.getLabel(), metadata);
         try {
             setNotNull(XMP.CREATE_DATE, basic.getCreateDate(), metadata);
@@ -147,6 +150,8 @@ class PDMetadataExtractor {
         }
         setNotNull(XMP.NICKNAME, basic.getNickname(), metadata);
         setNotNull(XMP.RATING, basic.getRating(), metadata);
+        //TODO: find an example where basic.getThumbNail is not null
+        //and figure out how to add that info
     }
 
     private static void setNotNull(Property property, String value, Metadata 
metadata) {

Reply via email to