Author: olga
Date: Mon Oct 12 18:22:12 2009
New Revision: 824446

URL: http://svn.apache.org/viewvc?rev=824446&view=rev
Log:
PIG-1015: [piggybank] DateExtractor should take into account timezones
(dryaboy via  olgan)

Modified:
    hadoop/pig/trunk/contrib/CHANGES.txt
    
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/DateExtractor.java
    
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/apachelogparser/TestDateExtractor.java

Modified: hadoop/pig/trunk/contrib/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/CHANGES.txt?rev=824446&r1=824445&r2=824446&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/CHANGES.txt (original)
+++ hadoop/pig/trunk/contrib/CHANGES.txt Mon Oct 12 18:22:12 2009
@@ -1,3 +1,5 @@
+PIG-1015: [piggybank] DateExtractor should take into account timezones
+(dryaboy via  olgan)
 PIG-911: Added SequenceFileLoader (dryaboy via gates)
 PIG-885: New UDFs for piggybank (Bin, Decode, LookupInFiles, RegexExtract, 
RegexMatch, HashFVN, DiffDate) (daijy)
 PIG-868: added strin manipulation functions (bennies via olgan)

Modified: 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/DateExtractor.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/DateExtractor.java?rev=824446&r1=824445&r2=824446&view=diff
==============================================================================
--- 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/DateExtractor.java
 (original)
+++ 
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/DateExtractor.java
 Mon Oct 12 18:22:12 2009
@@ -19,6 +19,7 @@
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.List;
+import java.util.TimeZone;
 
 import org.apache.pig.EvalFunc;
 import org.apache.pig.FuncSpec;
@@ -29,9 +30,9 @@
 import org.apache.pig.impl.util.WrappedIOException;
 
 /**
- * DateExtractor has three different constructors which each allow for 
different functionality. The
- * incomingDateFormat (yyyy-MM-dd by default) is used to match the date string 
that gets passed in from the
- * log. The outgoingDateFormat (dd/MMM/yyyy:HH:mm:ss Z by default) is used to 
format the returned string.
+ * DateExtractor has four different constructors which each allow for 
different functionality. The
+ * incomingDateFormat ("dd/MMM/yyyy:HH:mm:ss Z" by default) is used to match 
the date string that gets passed in from the
+ * log. The outgoingDateFormat ("yyyy-MM-dd" by default) is used to format the 
returned string.
  * 
  * Different constructors exist for each combination; please use the 
appropriate respective constructor.
  * 
@@ -46,12 +47,14 @@
  * A = FOREACH row GENERATE DateExtractor(dayTime);
  * 
  * If a string cannot be parsed, null will be returned and an error message 
printed to stderr.
- * 
+ *
+ * By default, the DateExtractor uses the GMT timezone. You can use the 
three-parameter constructor to override the
+ * timezone.
  */
 public class DateExtractor extends EvalFunc<String> {
-    private static SimpleDateFormat DEFAULT_INCOMING_DATE_FORMAT = new 
SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss Z");
-    private static SimpleDateFormat DEFAULT_OUTGOING_DATE_FORMAT = new 
SimpleDateFormat("yyyy-MM-dd");
-
+    private static String DEFAULT_INCOMING_DATE_FORMAT = "dd/MMM/yyyy:HH:mm:ss 
Z";
+    private static String DEFAULT_OUTGOING_DATE_FORMAT = "yyyy-MM-dd";
+    private static String DEFAULT_TZ_ID="GMT";
     private SimpleDateFormat incomingDateFormat;
     private SimpleDateFormat outgoingDateFormat;
 
@@ -61,8 +64,7 @@
      * @param outgoingDateString outgoingDateFormat is based on 
outgoingDateString
      */
     public DateExtractor() {
-        incomingDateFormat = DEFAULT_INCOMING_DATE_FORMAT;
-        outgoingDateFormat = DEFAULT_OUTGOING_DATE_FORMAT;
+        this(DEFAULT_INCOMING_DATE_FORMAT, DEFAULT_OUTGOING_DATE_FORMAT, 
DEFAULT_TZ_ID);
     }
 
     /**
@@ -71,8 +73,7 @@
      * @param outgoingDateString outgoingDateFormat is based on 
outgoingDateString
      */
     public DateExtractor(String outgoingDateString) {
-        incomingDateFormat = DEFAULT_INCOMING_DATE_FORMAT;
-        outgoingDateFormat = new SimpleDateFormat(outgoingDateString);
+        this(DEFAULT_INCOMING_DATE_FORMAT, outgoingDateString, "GMT");
     }
 
     /**
@@ -83,10 +84,25 @@
      * 
      */
     public DateExtractor(String incomingDateString, String outgoingDateString) 
{
+        this(incomingDateString, outgoingDateString, DEFAULT_TZ_ID);
+    }
+
+    /**
+     * forms the formats based on passed incomingDateString and 
outgoingDateString
+     * 
+     * @param incomingDateString incomingDateFormat is based on 
incomingDateString
+     * @param outgoingDateString outgoingDateFormat is based on 
outgoingDateString
+     * @param timeZoneID time zone id in which dates should be expressed.
+     * 
+     */
+    public DateExtractor(String incomingDateString, String outgoingDateString, 
String timeZoneID) {
+        TimeZone tz = TimeZone.getTimeZone(timeZoneID);
         incomingDateFormat = new SimpleDateFormat(incomingDateString);
         outgoingDateFormat = new SimpleDateFormat(outgoingDateString);
+        incomingDateFormat.setTimeZone(tz);
+        outgoingDateFormat.setTimeZone(tz);
     }
-
+    
     @Override
     public String exec(Tuple input) throws IOException {
       if (input == null || input.size() == 0)
@@ -96,6 +112,7 @@
         str = (String)input.get(0);
         Date date = incomingDateFormat.parse(str);
         return outgoingDateFormat.format(date);
+        
       } catch (ParseException pe) {
         
System.err.println("piggybank.evaluation.util.apachelogparser.DateExtractor: 
unable to parse date "+str);
         return null;

Modified: 
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/apachelogparser/TestDateExtractor.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/apachelogparser/TestDateExtractor.java?rev=824446&r1=824445&r2=824446&view=diff
==============================================================================
--- 
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/apachelogparser/TestDateExtractor.java
 (original)
+++ 
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/apachelogparser/TestDateExtractor.java
 Mon Oct 12 18:22:12 2009
@@ -33,23 +33,35 @@
     @Test
     public void testDefaultFormatters() throws Exception {
         DateExtractor dayExtractor = new DateExtractor();
+        // test that GMT conversion moves the day
         input.set(0, "20/Sep/2008:23:53:04 -0600");
+        assertEquals("2008-09-21", dayExtractor.exec(input));
+        
+        // test that if the string is already in GMT, nothing moves
+        input.set(0, "20/Sep/2008:23:53:04 -0000");
         assertEquals("2008-09-20", dayExtractor.exec(input));
     }
 
     @Test
+    public void testMZFormatters() throws Exception {
+        DateExtractor extractor = new DateExtractor("dd/MMM/yyyy:HH:mm:ss Z", 
"yyyy-MM-dd", "PST");
+        input.set(0, "20/Sep/2008:23:53:04 -0700");
+        assertEquals("2008-09-20", extractor.exec(input));
+    }
+    
+    @Test
     public void testFailureThenSuccess() throws Exception {
         DateExtractor dayExtractor = new DateExtractor();
         input.set(0,"dud");
         assertEquals(null, dayExtractor.exec(input));
-        input.set(0,"20/Sep/2008:23:53:04 -0600");
+        input.set(0,"20/Sep/2008:23:53:04 -0000");
         assertEquals("2008-09-20", dayExtractor.exec(input));
     }
 
     @Test
     public void testPassedOutputFormatter() throws Exception {
         DateExtractor dayExtractor = new DateExtractor("MM-dd-yyyy");
-        input.set(0,"20/Sep/2008:23:53:04 -0600");
+        input.set(0,"20/Sep/2008:23:53:04 -0000");
         assertEquals("09-20-2008", dayExtractor.exec(input));
     }
 


Reply via email to