Author: olga
Date: Mon Oct 12 18:22:12 2009
New Revision: 824446
URL: http://svn.apache.org/viewvc?rev=824446&view=rev
Log:
PIG-1015: [piggybank] DateExtractor should take into account timezones
(dryaboy via olgan)
Modified:
hadoop/pig/trunk/contrib/CHANGES.txt
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/DateExtractor.java
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/apachelogparser/TestDateExtractor.java
Modified: hadoop/pig/trunk/contrib/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/CHANGES.txt?rev=824446&r1=824445&r2=824446&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/CHANGES.txt (original)
+++ hadoop/pig/trunk/contrib/CHANGES.txt Mon Oct 12 18:22:12 2009
@@ -1,3 +1,5 @@
+PIG-1015: [piggybank] DateExtractor should take into account timezones
+(dryaboy via olgan)
PIG-911: Added SequenceFileLoader (dryaboy via gates)
PIG-885: New UDFs for piggybank (Bin, Decode, LookupInFiles, RegexExtract,
RegexMatch, HashFVN, DiffDate) (daijy)
PIG-868: added strin manipulation functions (bennies via olgan)
Modified:
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/DateExtractor.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/DateExtractor.java?rev=824446&r1=824445&r2=824446&view=diff
==============================================================================
---
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/DateExtractor.java
(original)
+++
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/DateExtractor.java
Mon Oct 12 18:22:12 2009
@@ -19,6 +19,7 @@
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
+import java.util.TimeZone;
import org.apache.pig.EvalFunc;
import org.apache.pig.FuncSpec;
@@ -29,9 +30,9 @@
import org.apache.pig.impl.util.WrappedIOException;
/**
- * DateExtractor has three different constructors which each allow for
different functionality. The
- * incomingDateFormat (yyyy-MM-dd by default) is used to match the date string
that gets passed in from the
- * log. The outgoingDateFormat (dd/MMM/yyyy:HH:mm:ss Z by default) is used to
format the returned string.
+ * DateExtractor has four different constructors which each allow for
different functionality. The
+ * incomingDateFormat ("dd/MMM/yyyy:HH:mm:ss Z" by default) is used to match
the date string that gets passed in from the
+ * log. The outgoingDateFormat ("yyyy-MM-dd" by default) is used to format the
returned string.
*
* Different constructors exist for each combination; please use the
appropriate respective constructor.
*
@@ -46,12 +47,14 @@
* A = FOREACH row GENERATE DateExtractor(dayTime);
*
* If a string cannot be parsed, null will be returned and an error message
printed to stderr.
- *
+ *
+ * By default, the DateExtractor uses the GMT timezone. You can use the
three-parameter constructor to override the
+ * timezone.
*/
public class DateExtractor extends EvalFunc<String> {
- private static SimpleDateFormat DEFAULT_INCOMING_DATE_FORMAT = new
SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss Z");
- private static SimpleDateFormat DEFAULT_OUTGOING_DATE_FORMAT = new
SimpleDateFormat("yyyy-MM-dd");
-
+ private static String DEFAULT_INCOMING_DATE_FORMAT = "dd/MMM/yyyy:HH:mm:ss
Z";
+ private static String DEFAULT_OUTGOING_DATE_FORMAT = "yyyy-MM-dd";
+ private static String DEFAULT_TZ_ID="GMT";
private SimpleDateFormat incomingDateFormat;
private SimpleDateFormat outgoingDateFormat;
@@ -61,8 +64,7 @@
* @param outgoingDateString outgoingDateFormat is based on
outgoingDateString
*/
public DateExtractor() {
- incomingDateFormat = DEFAULT_INCOMING_DATE_FORMAT;
- outgoingDateFormat = DEFAULT_OUTGOING_DATE_FORMAT;
+ this(DEFAULT_INCOMING_DATE_FORMAT, DEFAULT_OUTGOING_DATE_FORMAT,
DEFAULT_TZ_ID);
}
/**
@@ -71,8 +73,7 @@
* @param outgoingDateString outgoingDateFormat is based on
outgoingDateString
*/
public DateExtractor(String outgoingDateString) {
- incomingDateFormat = DEFAULT_INCOMING_DATE_FORMAT;
- outgoingDateFormat = new SimpleDateFormat(outgoingDateString);
+ this(DEFAULT_INCOMING_DATE_FORMAT, outgoingDateString, "GMT");
}
/**
@@ -83,10 +84,25 @@
*
*/
public DateExtractor(String incomingDateString, String outgoingDateString)
{
+ this(incomingDateString, outgoingDateString, DEFAULT_TZ_ID);
+ }
+
+ /**
+ * forms the formats based on passed incomingDateString and
outgoingDateString
+ *
+ * @param incomingDateString incomingDateFormat is based on
incomingDateString
+ * @param outgoingDateString outgoingDateFormat is based on
outgoingDateString
+ * @param timeZoneID time zone id in which dates should be expressed.
+ *
+ */
+ public DateExtractor(String incomingDateString, String outgoingDateString,
String timeZoneID) {
+ TimeZone tz = TimeZone.getTimeZone(timeZoneID);
incomingDateFormat = new SimpleDateFormat(incomingDateString);
outgoingDateFormat = new SimpleDateFormat(outgoingDateString);
+ incomingDateFormat.setTimeZone(tz);
+ outgoingDateFormat.setTimeZone(tz);
}
-
+
@Override
public String exec(Tuple input) throws IOException {
if (input == null || input.size() == 0)
@@ -96,6 +112,7 @@
str = (String)input.get(0);
Date date = incomingDateFormat.parse(str);
return outgoingDateFormat.format(date);
+
} catch (ParseException pe) {
System.err.println("piggybank.evaluation.util.apachelogparser.DateExtractor:
unable to parse date "+str);
return null;
Modified:
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/apachelogparser/TestDateExtractor.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/apachelogparser/TestDateExtractor.java?rev=824446&r1=824445&r2=824446&view=diff
==============================================================================
---
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/apachelogparser/TestDateExtractor.java
(original)
+++
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/util/apachelogparser/TestDateExtractor.java
Mon Oct 12 18:22:12 2009
@@ -33,23 +33,35 @@
@Test
public void testDefaultFormatters() throws Exception {
DateExtractor dayExtractor = new DateExtractor();
+ // test that GMT conversion moves the day
input.set(0, "20/Sep/2008:23:53:04 -0600");
+ assertEquals("2008-09-21", dayExtractor.exec(input));
+
+ // test that if the string is already in GMT, nothing moves
+ input.set(0, "20/Sep/2008:23:53:04 -0000");
assertEquals("2008-09-20", dayExtractor.exec(input));
}
@Test
+ public void testMZFormatters() throws Exception {
+ DateExtractor extractor = new DateExtractor("dd/MMM/yyyy:HH:mm:ss Z",
"yyyy-MM-dd", "PST");
+ input.set(0, "20/Sep/2008:23:53:04 -0700");
+ assertEquals("2008-09-20", extractor.exec(input));
+ }
+
+ @Test
public void testFailureThenSuccess() throws Exception {
DateExtractor dayExtractor = new DateExtractor();
input.set(0,"dud");
assertEquals(null, dayExtractor.exec(input));
- input.set(0,"20/Sep/2008:23:53:04 -0600");
+ input.set(0,"20/Sep/2008:23:53:04 -0000");
assertEquals("2008-09-20", dayExtractor.exec(input));
}
@Test
public void testPassedOutputFormatter() throws Exception {
DateExtractor dayExtractor = new DateExtractor("MM-dd-yyyy");
- input.set(0,"20/Sep/2008:23:53:04 -0600");
+ input.set(0,"20/Sep/2008:23:53:04 -0000");
assertEquals("09-20-2008", dayExtractor.exec(input));
}