Author: nick
Date: Wed Jul 14 22:46:50 2010
New Revision: 964235

URL: http://svn.apache.org/viewvc?rev=964235&view=rev
Log:
TIKA-451 - Inconsistent date format for Metadata.CREATION_DATE and 
Metadata.LAST_MODIFIED
Make CREATION_DATE and LAST_MODIFIED Date property instances, and add support 
for getting and setting Dates (+getting ints), as discussed in TIKA-451
Unit tests for getting and settings ints and dates are included. Work to update 
the existing parsers to make use of the new Date setter is still outstanding

Added:
    
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/PropertyTypeException.java
   (with props)
Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/HttpHeaders.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
    
tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/PagesContentHandler.java

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/HttpHeaders.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/HttpHeaders.java?rev=964235&r1=964234&r2=964235&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/HttpHeaders.java 
(original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/HttpHeaders.java 
Wed Jul 14 22:46:50 2010
@@ -38,7 +38,8 @@ public interface HttpHeaders {
 
     String CONTENT_TYPE = "Content-Type";
 
-    String LAST_MODIFIED = "Last-Modified";
+    Property LAST_MODIFIED = 
+        Property.internalDate("Last-Modified");
 
     String LOCATION = "Location";
 

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java?rev=964235&r1=964234&r2=964235&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java 
(original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java 
Wed Jul 14 22:46:50 2010
@@ -73,8 +73,10 @@ public interface MSOffice {
 
     String SECURITY = "Security";
 
-    String EDIT_TIME = "Edit-Time";
+    Property EDIT_TIME = 
+        Property.internalDate("Edit-Time");
 
-    String CREATION_DATE = "Creation-Date";
+    Property CREATION_DATE = 
+        Property.internalDate("Creation-Date");
 
 }

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java?rev=964235&r1=964234&r2=964235&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java 
(original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java 
Wed Jul 14 22:46:50 2010
@@ -16,8 +16,13 @@
  */
 package org.apache.tika.metadata;
 
+import java.text.DateFormatSymbols;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
 import java.util.Enumeration;
 import java.util.HashMap;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Properties;
 
@@ -31,6 +36,12 @@ public class Metadata implements Creativ
      * A map of all metadata attributes.
      */
     private Map<String, String[]> metadata = null;
+    
+    /**
+     * The ISO-8601 format string we use for Dates
+     */
+    private SimpleDateFormat iso8601Format = new SimpleDateFormat(
+            "yyyy-MM-dd'T'HH:mm:ss'Z'Z", new DateFormatSymbols(Locale.US));
 
     /**
      * Constructs a new, empty metadata.
@@ -86,6 +97,54 @@ public class Metadata implements Creativ
     public String get(Property property) {
         return get(property.getName());
     }
+    
+    /**
+     * Returns the value of the identified Integer based metadata property.
+     * 
+     * @since Apache Tika 0.8
+     * @param property simple integer property definition
+     * @return property value as a Integer, or <code>null</code> if the 
property is not set, or not a valid Integer
+     */
+    public Integer getInt(Property property) {
+        if(property.getPropertyType() != Property.PropertyType.SIMPLE)
+            return null;
+        if(property.getValueType() != Property.ValueType.INTEGER)
+            return null;
+        
+        String v = get(property);
+        if(v == null) {
+            return null;
+        }
+        try {
+            return new Integer(v);
+        } catch(NumberFormatException e) {
+            return null;
+        }
+    }
+
+    /**
+     * Returns the value of the identified Date based metadata property.
+     * 
+     * @since Apache Tika 0.8
+     * @param property simple date property definition
+     * @return property value as a Date, or <code>null</code> if the property 
is not set, or not a valid Date
+     */
+    public Date getDate(Property property) {
+        if(property.getPropertyType() != Property.PropertyType.SIMPLE)
+            return null;
+        if(property.getValueType() != Property.ValueType.DATE)
+            return null;
+        
+        String v = get(property);
+        if(v == null) {
+            return null;
+        }
+        try {
+            return iso8601Format.parse(v);
+        } catch(ParseException e) {
+            return null;
+        }
+    }
 
     /**
      * Get the values associated to a metadata name.
@@ -176,12 +235,29 @@ public class Metadata implements Creativ
      * @param value    property value
      */
     public void set(Property property, int value) {
-        assert property.getPropertyType() == Property.PropertyType.SIMPLE;
-        assert property.getValueType() == Property.ValueType.INTEGER;
+        if(property.getPropertyType() != Property.PropertyType.SIMPLE)
+            throw new PropertyTypeException(Property.PropertyType.SIMPLE, 
property.getPropertyType());
+        if(property.getValueType() != Property.ValueType.INTEGER)
+            throw new PropertyTypeException(Property.ValueType.INTEGER, 
property.getValueType());
         set(property.getName(), Integer.toString(value));
     }
 
     /**
+     * Sets the date value of the identified metadata property.
+     *
+     * @since Apache Tika 0.8
+     * @param property simple integer property definition
+     * @param value    property value
+     */
+    public void set(Property property, Date date) {
+        if(property.getPropertyType() != Property.PropertyType.SIMPLE)
+            throw new PropertyTypeException(Property.PropertyType.SIMPLE, 
property.getPropertyType());
+        if(property.getValueType() != Property.ValueType.DATE)
+            throw new PropertyTypeException(Property.ValueType.DATE, 
property.getValueType());
+        set(property.getName(), iso8601Format.format(date));
+    }
+
+    /**
      * Remove a metadata and all its associated values.
      * 
      * @param name

Added: 
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/PropertyTypeException.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/PropertyTypeException.java?rev=964235&view=auto
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/PropertyTypeException.java
 (added)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/PropertyTypeException.java
 Wed Jul 14 22:46:50 2010
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata;
+
+import org.apache.tika.metadata.Property.PropertyType;
+import org.apache.tika.metadata.Property.ValueType;
+
+
+/**
+ * XMP property definition violation exception. This is thrown when
+ * you try to set a {...@link Property} value with an incorrect type,
+ * such as storing an Integer when the property is of type Date.
+ *
+ * @since Apache Tika 0.8
+ */
+public final class PropertyTypeException extends IllegalArgumentException {
+    public PropertyTypeException(PropertyType expected, PropertyType found) {
+        super("Expected a property of type " + expected + ", but received " + 
found);
+    }
+    public PropertyTypeException(ValueType expected, ValueType found) {
+        super("Expected a property with a " + expected + " value, but received 
a " + found);
+    }
+}

Propchange: 
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/PropertyTypeException.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: 
tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java?rev=964235&r1=964234&r2=964235&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java 
(original)
+++ 
tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java 
Wed Jul 14 22:46:50 2010
@@ -17,6 +17,7 @@
 package org.apache.tika.metadata;
 
 //JDK imports
+import java.util.Date;
 import java.util.Properties;
 
 //Junit imports
@@ -208,4 +209,85 @@ public class TestMetadata extends TestCa
         assertFalse(meta1.equals(meta2));
     }
 
+    /**
+     * Tests for getting and setting integer
+     *  based properties
+     */
+    public void testGetSetInt() {
+        Metadata meta = new Metadata();
+        
+        // Isn't initially set, will get null back
+        assertEquals(null, meta.get(Metadata.IMAGE_WIDTH));
+        assertEquals(null, meta.getInt(Metadata.IMAGE_WIDTH));
+        
+        // Can only set as a single valued int
+        try {
+            meta.set(Metadata.BITS_PER_SAMPLE, 1);
+            fail("Shouldn't be able to set a multi valued property as an int");
+        } catch(PropertyTypeException e) {}
+        try {
+            meta.set(Metadata.CREATION_DATE, 1);
+            fail("Shouldn't be able to set a date property as an int");
+        } catch(PropertyTypeException e) {}
+        
+        // Can set it and retrieve it
+        meta.set(Metadata.IMAGE_WIDTH, 22);
+        assertEquals("22", meta.get(Metadata.IMAGE_WIDTH));
+        assertEquals(22, meta.getInt(Metadata.IMAGE_WIDTH).intValue());
+        
+        // If you save a non int value, you get null
+        meta.set(Metadata.IMAGE_WIDTH, "INVALID");
+        assertEquals("INVALID", meta.get(Metadata.IMAGE_WIDTH));
+        assertEquals(null, meta.getInt(Metadata.IMAGE_WIDTH));
+        
+        // If you try to retrieve a non simple int value, you get null
+        meta.set(Metadata.IMAGE_WIDTH, 22);
+        assertEquals(22, meta.getInt(Metadata.IMAGE_WIDTH).intValue());
+        assertEquals(null, meta.getInt(Metadata.BITS_PER_SAMPLE));
+        assertEquals(null, meta.getInt(Metadata.CREATION_DATE));
+    }
+    
+    /**
+     * Tests for getting and setting date
+     *  based properties
+     */
+    public void testGetSetDate() {
+        Metadata meta = new Metadata();
+        
+        // Isn't initially set, will get null back
+        assertEquals(null, meta.get(Metadata.CREATION_DATE));
+        assertEquals(null, meta.getInt(Metadata.CREATION_DATE));
+        
+        // Can only set as a single valued date
+        try {
+            meta.set(Metadata.BITS_PER_SAMPLE, new Date(1000));
+            fail("Shouldn't be able to set a multi valued property as a date");
+        } catch(PropertyTypeException e) {}
+        try {
+            meta.set(Metadata.IMAGE_WIDTH, new Date(1000));
+            fail("Shouldn't be able to set an int property as an date");
+        } catch(PropertyTypeException e) {}
+        
+        // Can set it and retrieve it
+        meta.set(Metadata.CREATION_DATE, new Date(1000));
+        assertEquals("1970-01-01T00:00:01Z+0000", 
meta.get(Metadata.CREATION_DATE));
+        assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
+        
+        // If you save a non date value, you get null
+        meta.set(Metadata.CREATION_DATE, "INVALID");
+        assertEquals("INVALID", meta.get(Metadata.CREATION_DATE));
+        assertEquals(null, meta.getDate(Metadata.CREATION_DATE));
+        
+        // If you try to retrieve a non simple date value, you get null
+        meta.set(Metadata.CREATION_DATE, new Date(1000));
+        assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
+        assertEquals(null, meta.getInt(Metadata.BITS_PER_SAMPLE));
+        assertEquals(null, meta.getInt(Metadata.CREATION_DATE));
+        
+        // Our format doesn't include milliseconds
+        // This means things get rounded 
+        meta.set(Metadata.CREATION_DATE, new Date(1050));
+        assertEquals("1970-01-01T00:00:01Z+0000", 
meta.get(Metadata.CREATION_DATE));
+        assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
+    }
 }

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/PagesContentHandler.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/PagesContentHandler.java?rev=964235&r1=964234&r2=964235&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/PagesContentHandler.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/PagesContentHandler.java
 Wed Jul 14 22:46:50 2010
@@ -17,6 +17,7 @@
 package org.apache.tika.parser.iwork;
 
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
 import org.apache.tika.sax.XHTMLContentHandler;
 import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;
@@ -66,8 +67,12 @@ class PagesContentHandler extends Defaul
         if (parseProperty) {
             String value = parsePrimitiveElementValue(qName, attributes);
             if (value != null) {
-                String metaDataKey = resolveMetaDataKey(metaDataLocalName);
-                metadata.add(metaDataKey, value);
+                Object metaDataKey = resolveMetaDataKey(metaDataLocalName);
+                if(metaDataKey instanceof Property) {
+                    metadata.set((Property)metaDataKey, value);
+                } else {
+                    metadata.add((String)metaDataKey, value);
+                }
             }
         }
 
@@ -165,12 +170,13 @@ class PagesContentHandler extends Defaul
     /**
      * Returns a resolved key that is common in other document types or
      * returns the specified metaDataLocalName if no common key could be found.
+     * The key could be a simple String key, or could be a {...@link Property}
      *
      * @param metaDataLocalName The localname of the element containing 
metadata
      * @return a resolved key that is common in other document types
      */
-    private String resolveMetaDataKey(String metaDataLocalName) {
-        String metaDataKey = metaDataLocalName;
+    private Object resolveMetaDataKey(String metaDataLocalName) {
+        Object metaDataKey = metaDataLocalName;
         if ("sf:authors".equals(metaDataQName)) {
             metaDataKey = Metadata.AUTHOR;
         } else if ("sf:title".equals(metaDataQName)) {


Reply via email to