Author: nick
Date: Thu Jul 15 10:37:20 2010
New Revision: 964373

URL: http://svn.apache.org/viewvc?rev=964373&view=rev
Log:
Update parsers to fix problems with new style Date properties, for TIKA-451

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/DublinCore.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/DublinCore.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/DublinCore.java?rev=964373&r1=964372&r2=964373&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/DublinCore.java 
(original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/DublinCore.java 
Thu Jul 15 10:37:20 2010
@@ -45,6 +45,7 @@ public interface DublinCore {
 
     /**
      * Date on which the resource was changed.
+     * TODO Make me a Date Property
      */
     String MODIFIED = "modified";
 
@@ -81,6 +82,7 @@ public interface DublinCore {
      * the resource. Recommended best practice for encoding the date value is
      * defined in a profile of ISO 8601 [W3CDTF] and follows the YYYY-MM-DD
      * format.
+     * TODO Make me a Date Property
      */
     String DATE = "date";
 

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java?rev=964373&r1=964372&r2=964373&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java 
(original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java 
Thu Jul 15 10:37:20 2010
@@ -73,9 +73,10 @@ public interface MSOffice {
 
     String SECURITY = "Security";
 
-    Property EDIT_TIME = 
-        Property.internalDate("Edit-Time");
+    /** How long has been spent editing the document? */ 
+    String EDIT_TIME = "Edit-Time"; 
 
+    /** When was the document created? */
     Property CREATION_DATE = 
         Property.internalDate("Creation-Date");
 

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java?rev=964373&r1=964372&r2=964373&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
 Thu Jul 15 10:37:20 2010
@@ -16,6 +16,8 @@
  */
 package org.apache.tika.parser.microsoft.ooxml;
 
+import java.util.Date;
+
 import org.apache.poi.POIXMLTextExtractor;
 import org.apache.poi.POIXMLProperties.CoreProperties;
 import org.apache.poi.POIXMLProperties.ExtendedProperties;
@@ -23,6 +25,7 @@ import org.apache.poi.openxml4j.opc.inte
 import org.apache.poi.openxml4j.util.Nullable;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
 import 
org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
 
 /**
@@ -58,6 +61,8 @@ public class MetadataExtractor {
                 .getContentStatusProperty());
         addProperty(metadata, Metadata.DATE, propsHolder
                 .getCreatedPropertyString());
+        addProperty(metadata, Metadata.CREATION_DATE, propsHolder
+                .getCreatedProperty());
         addProperty(metadata, Metadata.CREATOR, propsHolder
                 .getCreatorProperty());
         addProperty(metadata, Metadata.AUTHOR, propsHolder
@@ -75,7 +80,7 @@ public class MetadataExtractor {
         addProperty(metadata, Metadata.LAST_PRINTED, propsHolder
                 .getLastPrintedPropertyString());
         addProperty(metadata, Metadata.LAST_MODIFIED, propsHolder
-                .getModifiedPropertyString());
+                .getModifiedProperty());
         addProperty(metadata, Metadata.REVISION_NUMBER, propsHolder
                 .getRevisionProperty());
         addProperty(metadata, Metadata.SUBJECT, propsHolder
@@ -110,6 +115,12 @@ public class MetadataExtractor {
         addProperty(metadata, Metadata.WORD_COUNT, propsHolder.getWords());
     }
 
+    private void addProperty(Metadata metadata, Property property, 
Nullable<Date> value) {
+        if (value.getValue() != null) {
+            metadata.set(property, value.getValue());
+        }
+    }
+
     private void addProperty(Metadata metadata, String name, Nullable<?> 
value) {
         if (value.getValue() != null) {
             addProperty(metadata, name, value.getValue().toString());

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java?rev=964373&r1=964372&r2=964373&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java
 Thu Jul 15 10:37:20 2010
@@ -71,7 +71,7 @@ public class OpenDocumentMetaParser exte
         // Process the Dublin Core Attributes 
         ch = super.getContentHandler(ch, md);
         // Process the OO Meta Attributes
-        ch = getMeta(ch, md, Metadata.CREATION_DATE, "creation-date");
+        ch = getMeta(ch, md, Metadata.CREATION_DATE.getName(), 
"creation-date");
         ch = getMeta(ch, md, Metadata.KEYWORDS, "keyword");
         ch = getMeta(ch, md, Metadata.EDIT_TIME, "editing-duration");
         ch = getMeta(ch, md, "editing-cycles", "editing-cycles");

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java?rev=964373&r1=964372&r2=964373&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java 
(original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java 
Thu Jul 15 10:37:20 2010
@@ -32,6 +32,7 @@ import org.apache.pdfbox.pdmodel.PDDocum
 import org.apache.pdfbox.pdmodel.PDDocumentInformation;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
@@ -146,6 +147,12 @@ public class PDFParser implements Parser
         }
     }
 
+    private void addMetadata(Metadata metadata, Property property, Calendar 
value) {
+        if (value != null) {
+            metadata.set(property, value.getTime());
+        }
+    }
+
     /**
      * Used when processing custom metadata entries, as PDFBox won't do
      *  the conversion for us in the way it does for the standard ones


Reply via email to