Author: nick
Date: Thu Jul 15 13:00:57 2010
New Revision: 964411
URL: http://svn.apache.org/viewvc?rev=964411&view=rev
Log:
Accept a wider range of ISO8601 date formats when turning a Property from a
String into a Date, for parsers which do set(Property,String) - for TIKA-451
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java?rev=964411&r1=964410&r2=964411&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
(original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
Thu Jul 15 13:00:57 2010
@@ -44,9 +44,34 @@ public class Metadata implements Creativ
*/
private SimpleDateFormat iso8601Format = new SimpleDateFormat(
"yyyy-MM-dd'T'HH:mm:ss'Z'", new DateFormatSymbols(Locale.US));
+ private SimpleDateFormat iso8601SpaceFormat = new SimpleDateFormat(
+ "yyyy-MM-dd' 'HH:mm:ss'Z'", new
DateFormatSymbols(Locale.US));
{
iso8601Format.setTimeZone(TimeZone.getTimeZone("UTC"));
+ iso8601SpaceFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
}
+ /**
+ * Some parsers will have the date as a ISO-8601 string
+ * already, and will set that into the Metadata object.
+ * So we can return Date objects for these, this is the
+ * list (in preference order) of the various ISO-8601
+ * variants that we try when processing a date based
+ * property.
+ */
+ private SimpleDateFormat[] iso8601InputFormats = new SimpleDateFormat[] {
+ // yyyy-mm-ddThh...
+ iso8601Format, // UTC/Zulu
+ new SimpleDateFormat(
+ "yyyy-MM-dd'T'HH:mm:ssZ", new DateFormatSymbols(Locale.US)), //
With timezone
+ new SimpleDateFormat(
+ "yyyy-MM-dd'T'HH:mm:ss", new DateFormatSymbols(Locale.US)), //
Without timezone
+ // yyyy-mm-dd hh...
+ iso8601SpaceFormat, // UTC/Zulu
+ new SimpleDateFormat(
+ "yyyy-MM-dd' 'HH:mm:ssZ", new DateFormatSymbols(Locale.US)), //
With timezone
+ new SimpleDateFormat(
+ "yyyy-MM-dd' 'HH:mm:ss", new DateFormatSymbols(Locale.US)), //
Without timezone
+ };
/**
* Constructs a new, empty metadata.
@@ -144,11 +169,22 @@ public class Metadata implements Creativ
if(v == null) {
return null;
}
- try {
- return iso8601Format.parse(v);
- } catch(ParseException e) {
- return null;
+ // Java doesn't like timezones in the form ss+hh:mm
+ // It only likes the hhmm form, without the colon
+ if(v.charAt(v.length()-3) == ':' &&
+ (v.charAt(v.length()-6) == '+' ||
+ v.charAt(v.length()-6) == '-')) {
+ v = v.substring(0, v.length()-3) + v.substring(v.length()-2);
+ }
+
+ // Try several different ISO-8601 variants
+ for(SimpleDateFormat format : iso8601InputFormats) {
+ try {
+ return format.parse(v);
+ } catch(ParseException e) {}
}
+ // It isn't in a supported date format, sorry
+ return null;
}
/**
Modified:
tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java?rev=964411&r1=964410&r2=964411&view=diff
==============================================================================
---
tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
(original)
+++
tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
Thu Jul 15 13:00:57 2010
@@ -289,5 +289,24 @@ public class TestMetadata extends TestCa
meta.set(Metadata.CREATION_DATE, new Date(1050));
assertEquals("1970-01-01T00:00:01Z", meta.get(Metadata.CREATION_DATE));
assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
+
+ // We can accept a number of different ISO-8601 variants
+ meta.set(Metadata.CREATION_DATE, "1970-01-01T00:00:01Z");
+ assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
+
+ meta.set(Metadata.CREATION_DATE, "1970-01-01 00:00:01Z");
+ assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
+
+ meta.set(Metadata.CREATION_DATE, "1970-01-01T01:00:01+01:00");
+ assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
+
+ meta.set(Metadata.CREATION_DATE, "1970-01-01 01:00:01+01:00");
+ assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
+
+ meta.set(Metadata.CREATION_DATE, "1970-01-01T12:00:01+12:00");
+ assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
+
+ meta.set(Metadata.CREATION_DATE, "1969-12-31T12:00:01-12:00");
+ assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
}
}