Author: nick
Date: Wed Jul 14 22:46:50 2010
New Revision: 964235
URL: http://svn.apache.org/viewvc?rev=964235&view=rev
Log:
TIKA-451 - Inconsistent date format for Metadata.CREATION_DATE and
Metadata.LAST_MODIFIED
Make CREATION_DATE and LAST_MODIFIED Date property instances, and add support
for getting and setting Dates (+getting ints), as discussed in TIKA-451
Unit tests for getting and settings ints and dates are included. Work to update
the existing parsers to make use of the new Date setter is still outstanding
Added:
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/PropertyTypeException.java
(with props)
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/HttpHeaders.java
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/PagesContentHandler.java
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/HttpHeaders.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/HttpHeaders.java?rev=964235&r1=964234&r2=964235&view=diff
==============================================================================
---
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/HttpHeaders.java
(original)
+++
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/HttpHeaders.java
Wed Jul 14 22:46:50 2010
@@ -38,7 +38,8 @@ public interface HttpHeaders {
String CONTENT_TYPE = "Content-Type";
- String LAST_MODIFIED = "Last-Modified";
+ Property LAST_MODIFIED =
+ Property.internalDate("Last-Modified");
String LOCATION = "Location";
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java?rev=964235&r1=964234&r2=964235&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java
(original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java
Wed Jul 14 22:46:50 2010
@@ -73,8 +73,10 @@ public interface MSOffice {
String SECURITY = "Security";
- String EDIT_TIME = "Edit-Time";
+ Property EDIT_TIME =
+ Property.internalDate("Edit-Time");
- String CREATION_DATE = "Creation-Date";
+ Property CREATION_DATE =
+ Property.internalDate("Creation-Date");
}
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java?rev=964235&r1=964234&r2=964235&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
(original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
Wed Jul 14 22:46:50 2010
@@ -16,8 +16,13 @@
*/
package org.apache.tika.metadata;
+import java.text.DateFormatSymbols;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
import java.util.Enumeration;
import java.util.HashMap;
+import java.util.Locale;
import java.util.Map;
import java.util.Properties;
@@ -31,6 +36,12 @@ public class Metadata implements Creativ
* A map of all metadata attributes.
*/
private Map<String, String[]> metadata = null;
+
+ /**
+ * The ISO-8601 format string we use for Dates
+ */
+ private SimpleDateFormat iso8601Format = new SimpleDateFormat(
+ "yyyy-MM-dd'T'HH:mm:ss'Z'Z", new DateFormatSymbols(Locale.US));
/**
* Constructs a new, empty metadata.
@@ -86,6 +97,54 @@ public class Metadata implements Creativ
public String get(Property property) {
return get(property.getName());
}
+
+ /**
+ * Returns the value of the identified Integer based metadata property.
+ *
+ * @since Apache Tika 0.8
+ * @param property simple integer property definition
+ * @return property value as a Integer, or <code>null</code> if the
property is not set, or not a valid Integer
+ */
+ public Integer getInt(Property property) {
+ if(property.getPropertyType() != Property.PropertyType.SIMPLE)
+ return null;
+ if(property.getValueType() != Property.ValueType.INTEGER)
+ return null;
+
+ String v = get(property);
+ if(v == null) {
+ return null;
+ }
+ try {
+ return new Integer(v);
+ } catch(NumberFormatException e) {
+ return null;
+ }
+ }
+
+ /**
+ * Returns the value of the identified Date based metadata property.
+ *
+ * @since Apache Tika 0.8
+ * @param property simple date property definition
+ * @return property value as a Date, or <code>null</code> if the property
is not set, or not a valid Date
+ */
+ public Date getDate(Property property) {
+ if(property.getPropertyType() != Property.PropertyType.SIMPLE)
+ return null;
+ if(property.getValueType() != Property.ValueType.DATE)
+ return null;
+
+ String v = get(property);
+ if(v == null) {
+ return null;
+ }
+ try {
+ return iso8601Format.parse(v);
+ } catch(ParseException e) {
+ return null;
+ }
+ }
/**
* Get the values associated to a metadata name.
@@ -176,12 +235,29 @@ public class Metadata implements Creativ
* @param value property value
*/
public void set(Property property, int value) {
- assert property.getPropertyType() == Property.PropertyType.SIMPLE;
- assert property.getValueType() == Property.ValueType.INTEGER;
+ if(property.getPropertyType() != Property.PropertyType.SIMPLE)
+ throw new PropertyTypeException(Property.PropertyType.SIMPLE,
property.getPropertyType());
+ if(property.getValueType() != Property.ValueType.INTEGER)
+ throw new PropertyTypeException(Property.ValueType.INTEGER,
property.getValueType());
set(property.getName(), Integer.toString(value));
}
/**
+ * Sets the date value of the identified metadata property.
+ *
+ * @since Apache Tika 0.8
+ * @param property simple integer property definition
+ * @param value property value
+ */
+ public void set(Property property, Date date) {
+ if(property.getPropertyType() != Property.PropertyType.SIMPLE)
+ throw new PropertyTypeException(Property.PropertyType.SIMPLE,
property.getPropertyType());
+ if(property.getValueType() != Property.ValueType.DATE)
+ throw new PropertyTypeException(Property.ValueType.DATE,
property.getValueType());
+ set(property.getName(), iso8601Format.format(date));
+ }
+
+ /**
* Remove a metadata and all its associated values.
*
* @param name
Added:
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/PropertyTypeException.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/PropertyTypeException.java?rev=964235&view=auto
==============================================================================
---
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/PropertyTypeException.java
(added)
+++
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/PropertyTypeException.java
Wed Jul 14 22:46:50 2010
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata;
+
+import org.apache.tika.metadata.Property.PropertyType;
+import org.apache.tika.metadata.Property.ValueType;
+
+
+/**
+ * XMP property definition violation exception. This is thrown when
+ * you try to set a {...@link Property} value with an incorrect type,
+ * such as storing an Integer when the property is of type Date.
+ *
+ * @since Apache Tika 0.8
+ */
+public final class PropertyTypeException extends IllegalArgumentException {
+ public PropertyTypeException(PropertyType expected, PropertyType found) {
+ super("Expected a property of type " + expected + ", but received " +
found);
+ }
+ public PropertyTypeException(ValueType expected, ValueType found) {
+ super("Expected a property with a " + expected + " value, but received
a " + found);
+ }
+}
Propchange:
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/PropertyTypeException.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified:
tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java?rev=964235&r1=964234&r2=964235&view=diff
==============================================================================
---
tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
(original)
+++
tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
Wed Jul 14 22:46:50 2010
@@ -17,6 +17,7 @@
package org.apache.tika.metadata;
//JDK imports
+import java.util.Date;
import java.util.Properties;
//Junit imports
@@ -208,4 +209,85 @@ public class TestMetadata extends TestCa
assertFalse(meta1.equals(meta2));
}
+ /**
+ * Tests for getting and setting integer
+ * based properties
+ */
+ public void testGetSetInt() {
+ Metadata meta = new Metadata();
+
+ // Isn't initially set, will get null back
+ assertEquals(null, meta.get(Metadata.IMAGE_WIDTH));
+ assertEquals(null, meta.getInt(Metadata.IMAGE_WIDTH));
+
+ // Can only set as a single valued int
+ try {
+ meta.set(Metadata.BITS_PER_SAMPLE, 1);
+ fail("Shouldn't be able to set a multi valued property as an int");
+ } catch(PropertyTypeException e) {}
+ try {
+ meta.set(Metadata.CREATION_DATE, 1);
+ fail("Shouldn't be able to set a date property as an int");
+ } catch(PropertyTypeException e) {}
+
+ // Can set it and retrieve it
+ meta.set(Metadata.IMAGE_WIDTH, 22);
+ assertEquals("22", meta.get(Metadata.IMAGE_WIDTH));
+ assertEquals(22, meta.getInt(Metadata.IMAGE_WIDTH).intValue());
+
+ // If you save a non int value, you get null
+ meta.set(Metadata.IMAGE_WIDTH, "INVALID");
+ assertEquals("INVALID", meta.get(Metadata.IMAGE_WIDTH));
+ assertEquals(null, meta.getInt(Metadata.IMAGE_WIDTH));
+
+ // If you try to retrieve a non simple int value, you get null
+ meta.set(Metadata.IMAGE_WIDTH, 22);
+ assertEquals(22, meta.getInt(Metadata.IMAGE_WIDTH).intValue());
+ assertEquals(null, meta.getInt(Metadata.BITS_PER_SAMPLE));
+ assertEquals(null, meta.getInt(Metadata.CREATION_DATE));
+ }
+
+ /**
+ * Tests for getting and setting date
+ * based properties
+ */
+ public void testGetSetDate() {
+ Metadata meta = new Metadata();
+
+ // Isn't initially set, will get null back
+ assertEquals(null, meta.get(Metadata.CREATION_DATE));
+ assertEquals(null, meta.getInt(Metadata.CREATION_DATE));
+
+ // Can only set as a single valued date
+ try {
+ meta.set(Metadata.BITS_PER_SAMPLE, new Date(1000));
+ fail("Shouldn't be able to set a multi valued property as a date");
+ } catch(PropertyTypeException e) {}
+ try {
+ meta.set(Metadata.IMAGE_WIDTH, new Date(1000));
+ fail("Shouldn't be able to set an int property as an date");
+ } catch(PropertyTypeException e) {}
+
+ // Can set it and retrieve it
+ meta.set(Metadata.CREATION_DATE, new Date(1000));
+ assertEquals("1970-01-01T00:00:01Z+0000",
meta.get(Metadata.CREATION_DATE));
+ assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
+
+ // If you save a non date value, you get null
+ meta.set(Metadata.CREATION_DATE, "INVALID");
+ assertEquals("INVALID", meta.get(Metadata.CREATION_DATE));
+ assertEquals(null, meta.getDate(Metadata.CREATION_DATE));
+
+ // If you try to retrieve a non simple date value, you get null
+ meta.set(Metadata.CREATION_DATE, new Date(1000));
+ assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
+ assertEquals(null, meta.getInt(Metadata.BITS_PER_SAMPLE));
+ assertEquals(null, meta.getInt(Metadata.CREATION_DATE));
+
+ // Our format doesn't include milliseconds
+ // This means things get rounded
+ meta.set(Metadata.CREATION_DATE, new Date(1050));
+ assertEquals("1970-01-01T00:00:01Z+0000",
meta.get(Metadata.CREATION_DATE));
+ assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
+ }
}
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/PagesContentHandler.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/PagesContentHandler.java?rev=964235&r1=964234&r2=964235&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/PagesContentHandler.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/PagesContentHandler.java
Wed Jul 14 22:46:50 2010
@@ -17,6 +17,7 @@
package org.apache.tika.parser.iwork;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
@@ -66,8 +67,12 @@ class PagesContentHandler extends Defaul
if (parseProperty) {
String value = parsePrimitiveElementValue(qName, attributes);
if (value != null) {
- String metaDataKey = resolveMetaDataKey(metaDataLocalName);
- metadata.add(metaDataKey, value);
+ Object metaDataKey = resolveMetaDataKey(metaDataLocalName);
+ if(metaDataKey instanceof Property) {
+ metadata.set((Property)metaDataKey, value);
+ } else {
+ metadata.add((String)metaDataKey, value);
+ }
}
}
@@ -165,12 +170,13 @@ class PagesContentHandler extends Defaul
/**
* Returns a resolved key that is common in other document types or
* returns the specified metaDataLocalName if no common key could be found.
+ * The key could be a simple String key, or could be a {...@link Property}
*
* @param metaDataLocalName The localname of the element containing
metadata
* @return a resolved key that is common in other document types
*/
- private String resolveMetaDataKey(String metaDataLocalName) {
- String metaDataKey = metaDataLocalName;
+ private Object resolveMetaDataKey(String metaDataLocalName) {
+ Object metaDataKey = metaDataLocalName;
if ("sf:authors".equals(metaDataQName)) {
metaDataKey = Metadata.AUTHOR;
} else if ("sf:title".equals(metaDataQName)) {