Author: nick
Date: Thu Jul 15 13:00:57 2010
New Revision: 964411

URL: http://svn.apache.org/viewvc?rev=964411&view=rev
Log:
Accept a wider range of ISO8601 date formats when turning a Property from a 
String into a Date, for parsers which do set(Property,String) - for TIKA-451

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
    
tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java?rev=964411&r1=964410&r2=964411&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java 
(original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java 
Thu Jul 15 13:00:57 2010
@@ -44,9 +44,34 @@ public class Metadata implements Creativ
      */
     private SimpleDateFormat iso8601Format = new SimpleDateFormat(
             "yyyy-MM-dd'T'HH:mm:ss'Z'", new DateFormatSymbols(Locale.US));
+    private SimpleDateFormat iso8601SpaceFormat = new SimpleDateFormat(
+                  "yyyy-MM-dd' 'HH:mm:ss'Z'", new 
DateFormatSymbols(Locale.US));
     {
        iso8601Format.setTimeZone(TimeZone.getTimeZone("UTC"));
+       iso8601SpaceFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
     }
+    /**
+     * Some parsers will have the date as a ISO-8601 string
+     *  already, and will set that into the Metadata object.
+     * So we can return Date objects for these, this is the
+     *  list (in preference order) of the various ISO-8601
+     *  variants that we try when processing a date based
+     *  property.
+     */
+    private SimpleDateFormat[] iso8601InputFormats = new SimpleDateFormat[] {
+       // yyyy-mm-ddThh...
+        iso8601Format, // UTC/Zulu
+        new SimpleDateFormat(
+           "yyyy-MM-dd'T'HH:mm:ssZ", new DateFormatSymbols(Locale.US)), // 
With timezone
+        new SimpleDateFormat(
+           "yyyy-MM-dd'T'HH:mm:ss", new DateFormatSymbols(Locale.US)), // 
Without timezone
+       // yyyy-mm-dd hh...
+        iso8601SpaceFormat, // UTC/Zulu
+        new SimpleDateFormat(
+           "yyyy-MM-dd' 'HH:mm:ssZ", new DateFormatSymbols(Locale.US)), // 
With timezone
+        new SimpleDateFormat(
+           "yyyy-MM-dd' 'HH:mm:ss", new DateFormatSymbols(Locale.US)), // 
Without timezone
+    };
 
     /**
      * Constructs a new, empty metadata.
@@ -144,11 +169,22 @@ public class Metadata implements Creativ
         if(v == null) {
             return null;
         }
-        try {
-            return iso8601Format.parse(v);
-        } catch(ParseException e) {
-            return null;
+        // Java doesn't like timezones in the form ss+hh:mm
+        // It only likes the hhmm form, without the colon
+        if(v.charAt(v.length()-3) == ':' && 
+            (v.charAt(v.length()-6) == '+' ||
+             v.charAt(v.length()-6) == '-')) {
+            v = v.substring(0, v.length()-3) + v.substring(v.length()-2);
+        }
+        
+        // Try several different ISO-8601 variants
+        for(SimpleDateFormat format : iso8601InputFormats) {
+            try {
+                return format.parse(v);
+            } catch(ParseException e) {}
         }
+        // It isn't in a supported date format, sorry
+        return null;
     }
 
     /**

Modified: 
tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java?rev=964411&r1=964410&r2=964411&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java 
(original)
+++ 
tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java 
Thu Jul 15 13:00:57 2010
@@ -289,5 +289,24 @@ public class TestMetadata extends TestCa
         meta.set(Metadata.CREATION_DATE, new Date(1050));
         assertEquals("1970-01-01T00:00:01Z", meta.get(Metadata.CREATION_DATE));
         assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
+        
+        // We can accept a number of different ISO-8601 variants
+        meta.set(Metadata.CREATION_DATE, "1970-01-01T00:00:01Z");
+        assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
+        
+        meta.set(Metadata.CREATION_DATE, "1970-01-01 00:00:01Z");
+        assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
+        
+        meta.set(Metadata.CREATION_DATE, "1970-01-01T01:00:01+01:00");
+        assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
+        
+        meta.set(Metadata.CREATION_DATE, "1970-01-01 01:00:01+01:00");
+        assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
+        
+        meta.set(Metadata.CREATION_DATE, "1970-01-01T12:00:01+12:00");
+        assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
+        
+        meta.set(Metadata.CREATION_DATE, "1969-12-31T12:00:01-12:00");
+        assertEquals(1000, meta.getDate(Metadata.CREATION_DATE).getTime());
     }
 }


Reply via email to