Repository: hive Updated Branches: refs/heads/branch-3 7a2b69588 -> c95136a07
HIVE-19370: Issue: ADD Months function on timestamp datatype fields in hive (Bharathkrishna Guruvayoor Murali, reviewed by Peter Vary) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/95f19a13 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/95f19a13 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/95f19a13 Branch: refs/heads/branch-3 Commit: 95f19a137638b450f21381c1d114dce993163297 Parents: 7a2b695 Author: Bharathkrishna Guruvayoor Murali <[email protected]> Authored: Fri Jun 1 05:51:35 2018 -0500 Committer: Jesus Camacho Rodriguez <[email protected]> Committed: Fri Jun 22 10:20:13 2018 -0700 ---------------------------------------------------------------------- .../ql/udf/generic/GenericUDFAddMonths.java | 73 ++++++++++--- .../ql/udf/generic/TestGenericUDFAddMonths.java | 109 +++++++++++++++++++ .../queries/clientpositive/udf_add_months.q | 2 + .../results/clientpositive/udf_add_months.q.out | 20 +++- 4 files changed, 181 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/95f19a13/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java index dae4b97..ea1544f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java @@ -22,11 +22,13 @@ import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveO import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP; import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.VOID_GROUP; +import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -44,32 +46,61 @@ import org.apache.hive.common.util.DateUtils; * */ @Description(name = "add_months", - value = "_FUNC_(start_date, num_months) - Returns the date that is num_months after start_date.", - extended = "start_date is a string in the format 'yyyy-MM-dd HH:mm:ss' or" - + " 'yyyy-MM-dd'. num_months is a number. The time part of start_date is " - + "ignored.\n" - + "Example:\n " + " > SELECT _FUNC_('2009-08-31', 1) FROM src LIMIT 1;\n" + " '2009-09-30'") + value = "_FUNC_(start_date, num_months, output_date_format) - " + + "Returns the date that is num_months after start_date.", + extended = "start_date is a string or timestamp indicating a valid date. " + + "num_months is a number. output_date_format is an optional String which specifies the format for output.\n" + + "The default output format is 'YYYY-MM-dd'.\n" + + "Example:\n > SELECT _FUNC_('2009-08-31', 1) FROM src LIMIT 1;\n" + " '2009-09-30'." + + "\n > SELECT _FUNC_('2017-12-31 14:15:16', 2, 'YYYY-MM-dd HH:mm:ss') LIMIT 1;\n" + + "'2018-02-28 14:15:16'.\n") @NDV(maxNdv = 250) // 250 seems to be reasonable upper limit for this public class GenericUDFAddMonths extends GenericUDF { - private transient Converter[] converters = new Converter[2]; - private transient PrimitiveCategory[] inputTypes = new PrimitiveCategory[2]; - private final Calendar calendar = Calendar.getInstance(); + private transient Converter[] tsConverters = new Converter[3]; + private transient PrimitiveCategory[] tsInputTypes = new PrimitiveCategory[3]; + private transient Converter[] dtConverters = new Converter[3]; + private transient PrimitiveCategory[] dtInputTypes = new PrimitiveCategory[3]; private final Text output = new Text(); + private transient SimpleDateFormat formatter = null; + private final Calendar calendar = Calendar.getInstance(); private transient Integer numMonthsConst; private transient boolean isNumMonthsConst; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - checkArgsSize(arguments, 2, 2); + checkArgsSize(arguments, 2, 3); checkArgPrimitive(arguments, 0); checkArgPrimitive(arguments, 1); - checkArgGroups(arguments, 0, inputTypes, STRING_GROUP, DATE_GROUP, VOID_GROUP); - checkArgGroups(arguments, 1, inputTypes, NUMERIC_GROUP, VOID_GROUP); + if (arguments.length == 3) { + if (arguments[2] instanceof ConstantObjectInspector) { + checkArgPrimitive(arguments, 2); + checkArgGroups(arguments, 2, tsInputTypes, STRING_GROUP); + String fmtStr = getConstantStringValue(arguments, 2); + if (fmtStr != null) { + formatter = new SimpleDateFormat(fmtStr); + } + } else { + throw new UDFArgumentTypeException(2, getFuncName() + " only takes constant as " + + getArgOrder(2) + " argument"); + } + } + if (formatter == null) { + //If the DateFormat is not provided by the user or is invalid, use the default format YYYY-MM-dd + formatter = DateUtils.getDateFormat(); + } + + // the function should support both short date and full timestamp format + // time part of the timestamp should not be skipped + checkArgGroups(arguments, 0, tsInputTypes, STRING_GROUP, DATE_GROUP, VOID_GROUP); + checkArgGroups(arguments, 0, dtInputTypes, STRING_GROUP, DATE_GROUP, VOID_GROUP); - obtainDateConverter(arguments, 0, inputTypes, converters); - obtainIntConverter(arguments, 1, inputTypes, converters); + obtainTimestampConverter(arguments, 0, tsInputTypes, tsConverters); + obtainDateConverter(arguments, 0, dtInputTypes, dtConverters); + + checkArgGroups(arguments, 1, tsInputTypes, NUMERIC_GROUP, VOID_GROUP); + obtainIntConverter(arguments, 1, tsInputTypes, tsConverters); if (arguments[1] instanceof ConstantObjectInspector) { numMonthsConst = getConstantIntValue(arguments, 1); @@ -86,7 +117,7 @@ public class GenericUDFAddMonths extends GenericUDF { if (isNumMonthsConst) { numMonthV = numMonthsConst; } else { - numMonthV = getIntValue(arguments, 1, converters); + numMonthV = getIntValue(arguments, 1, tsConverters); } if (numMonthV == null) { @@ -94,14 +125,22 @@ public class GenericUDFAddMonths extends GenericUDF { } int numMonthInt = numMonthV.intValue(); - Date date = getDateValue(arguments, 0, inputTypes, converters); + + // the function should support both short date and full timestamp format + // time part of the timestamp should not be skipped + Date date = getTimestampValue(arguments, 0, tsConverters); if (date == null) { - return null; + date = getDateValue(arguments, 0, dtInputTypes, dtConverters); + if (date == null) { + return null; + } } addMonth(date, numMonthInt); Date newDate = calendar.getTime(); - output.set(DateUtils.getDateFormat().format(newDate)); + String res = formatter.format(newDate); + + output.set(res); return output; } http://git-wip-us.apache.org/repos/asf/hive/blob/95f19a13/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java index af9b6c4..0db9370 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java @@ -25,13 +25,21 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; +import java.sql.Timestamp; public class TestGenericUDFAddMonths extends TestCase { + private final Text fmtTextWithTime = new Text("YYYY-MM-dd HH:mm:ss"); + private final Text fmtTextWithTimeAndms = new Text("YYYY-MM-dd HH:mm:ss.SSS"); + private final Text fmtTextWithoutTime = new Text("YYYY-MM-dd"); + private final Text fmtTextInvalid = new Text("YYYY-abcdz"); + public void testAddMonthsInt() throws HiveException { GenericUDFAddMonths udf = new GenericUDFAddMonths(); ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; @@ -61,6 +69,85 @@ public class TestGenericUDFAddMonths extends TestCase { runAndVerify("2016-02-29 10:30:00", -12, "2015-02-28", udf); runAndVerify("2016-01-29 10:30:00", 1, "2016-02-29", udf); runAndVerify("2016-02-29 10:30:00", -1, "2016-01-31", udf); + runAndVerify("2016-02-29 10:30:00", -1, fmtTextWithoutTime, "2016-01-31", udf); + } + + public void testAddMonthsStringWithTime() throws HiveException { + GenericUDFAddMonths udf = new GenericUDFAddMonths(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, + fmtTextWithTime); + + ObjectInspector[] arguments = {valueOI0, valueOI1, valueOI2}; + udf.initialize(arguments); + runAndVerify("2018-05-10 08:15:12", -1, fmtTextWithTime, "2018-04-10 08:15:12", udf); + runAndVerify("2017-12-31 14:15:16", 2, fmtTextWithTime, "2018-02-28 14:15:16", udf); + runAndVerify("2017-12-31 14:15:16.001", 2, fmtTextWithTime, "2018-02-28 14:15:16", udf); + } + + public void testAddMonthsInvalidFormatter() throws HiveException { + GenericUDFAddMonths udf = new GenericUDFAddMonths(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, + fmtTextInvalid); + + ObjectInspector[] arguments = {valueOI0, valueOI1, valueOI2}; + try { + udf.initialize(arguments); + fail("Expected to throw an exception for invalid DateFormat"); + } catch (IllegalArgumentException e) { + //test success if exception caught + } + } + public void testAddMonthsStringWithTimeWithms() throws HiveException { + GenericUDFAddMonths udf = new GenericUDFAddMonths(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, + fmtTextWithTimeAndms); + + ObjectInspector[] arguments = {valueOI0, valueOI1, valueOI2}; + udf.initialize(arguments); + runAndVerify("2017-12-31 14:15:16.350", 2, fmtTextWithTimeAndms, "2018-02-28 14:15:16.350", + udf); + runAndVerify("2017-12-31 14:15:16.001", 2, fmtTextWithTimeAndms, "2018-02-28 14:15:16.001", + udf); + //Try to parse ms where there is no millisecond part in input, expected to return .000 as ms + runAndVerify("2017-12-31 14:15:16", 2, fmtTextWithTimeAndms, "2018-02-28 14:15:16.000", udf); + } + + public void testAddMonthsWithNullFormatter() throws HiveException { + GenericUDFAddMonths udf = new GenericUDFAddMonths(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, + null); + + ObjectInspector[] arguments = {valueOI0, valueOI1, valueOI2}; + udf.initialize(arguments); + runAndVerify("2017-12-31 14:15:16.350", 2, null, "2018-02-28", + udf); + runAndVerify("2017-12-31", 2, null, "2018-02-28", + udf); + } + public void testAddMonthsTimestamp() throws HiveException { + GenericUDFAddMonths udf = new GenericUDFAddMonths(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtTextWithTime); + ObjectInspector[] arguments = {valueOI0, valueOI1, valueOI2}; + + udf.initialize(arguments); + runAndVerify(Timestamp.valueOf("2018-05-10 08:15:12"), 1, fmtTextWithTime, "2018-06-10 08:15:12", udf); + runAndVerify(Timestamp.valueOf("2017-12-31 14:15:16"), 2, fmtTextWithTime, "2018-02-28 14:15:16", udf); } public void testWrongDateStr() throws HiveException { @@ -150,6 +237,28 @@ public class TestGenericUDFAddMonths extends TestCase { assertEquals("add_months() test ", expResult, output != null ? output.toString() : null); } + private void runAndVerify(String str, int months, Text dateFormat, String expResult, + GenericUDF udf) throws HiveException { + DeferredObject valueObj0 = new DeferredJavaObject(new Text(str)); + DeferredObject valueObj1 = new DeferredJavaObject(new IntWritable(months)); + DeferredObject valueObj2 = new DeferredJavaObject(dateFormat); + DeferredObject[] args = {valueObj0, valueObj1, valueObj2}; + Text output = (Text) udf.evaluate(args); + assertEquals("add_months() test with time part", expResult, + output != null ? output.toString() : null); + } + + private void runAndVerify(Timestamp ts, int months, Text dateFormat, String expResult, GenericUDF udf) + throws HiveException { + DeferredObject valueObj0 = new DeferredJavaObject(new TimestampWritable(ts)); + DeferredObject valueObj1 = new DeferredJavaObject(new IntWritable(months)); + DeferredObject valueObj2 = new DeferredJavaObject(dateFormat); + DeferredObject[] args = {valueObj0, valueObj1, valueObj2}; + Text output = (Text) udf.evaluate(args); + assertEquals("add_months() test for timestamp", expResult, output != null ? output.toString() : null); + } + + private void runAndVerify(String str, short months, String expResult, GenericUDF udf) throws HiveException { DeferredObject valueObj0 = new DeferredJavaObject(new Text(str)); http://git-wip-us.apache.org/repos/asf/hive/blob/95f19a13/ql/src/test/queries/clientpositive/udf_add_months.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/udf_add_months.q b/ql/src/test/queries/clientpositive/udf_add_months.q index 0b8eeee..0ef7c3e 100644 --- a/ql/src/test/queries/clientpositive/udf_add_months.q +++ b/ql/src/test/queries/clientpositive/udf_add_months.q @@ -25,6 +25,7 @@ add_months('2014-04-30 10:30:00', -2), add_months('2015-02-28 10:30:00', 12), add_months('2016-02-29 10:30:00', -12), add_months('2016-01-29 10:30:00', 1), +add_months('2016-02-29 10:30:12', -12, 'YYYY-MM-dd HH:mm:ss'), add_months('2016-02-29 10:30:00', -1); select @@ -37,4 +38,5 @@ add_months(cast('2015-02-28 10:30:00' as timestamp), 12), add_months(cast('2016-02-29 10:30:00' as timestamp), -12), add_months(cast('2016-01-29 10:30:00' as timestamp), 1), add_months(cast('2016-02-29 10:30:00' as timestamp), -1), +add_months(cast('2016-02-29 10:30:12' as timestamp), 2, 'YYYY-MM-dd HH:mm:ss'), add_months(cast(null as timestamp), 1); http://git-wip-us.apache.org/repos/asf/hive/blob/95f19a13/ql/src/test/results/clientpositive/udf_add_months.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/udf_add_months.q.out b/ql/src/test/results/clientpositive/udf_add_months.q.out index 5ba720a..69b0c4f 100644 --- a/ql/src/test/results/clientpositive/udf_add_months.q.out +++ b/ql/src/test/results/clientpositive/udf_add_months.q.out @@ -2,16 +2,20 @@ PREHOOK: query: DESCRIBE FUNCTION add_months PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION add_months POSTHOOK: type: DESCFUNCTION -add_months(start_date, num_months) - Returns the date that is num_months after start_date. +add_months(start_date, num_months, output_date_format) - Returns the date that is num_months after start_date. PREHOOK: query: DESCRIBE FUNCTION EXTENDED add_months PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED add_months POSTHOOK: type: DESCFUNCTION -add_months(start_date, num_months) - Returns the date that is num_months after start_date. -start_date is a string in the format 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. num_months is a number. The time part of start_date is ignored. +add_months(start_date, num_months, output_date_format) - Returns the date that is num_months after start_date. +start_date is a string or timestamp indicating a valid date. num_months is a number. output_date_format is an optional String which specifies the format for output. +The default output format is 'YYYY-MM-dd'. Example: > SELECT add_months('2009-08-31', 1) FROM src LIMIT 1; - '2009-09-30' + '2009-09-30'. + > SELECT add_months('2017-12-31 14:15:16', 2, 'YYYY-MM-dd HH:mm:ss') LIMIT 1; +'2018-02-28 14:15:16'. + Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFAddMonths Function type:BUILTIN PREHOOK: query: explain select add_months('2014-01-14', 1) @@ -76,6 +80,7 @@ add_months('2014-04-30 10:30:00', -2), add_months('2015-02-28 10:30:00', 12), add_months('2016-02-29 10:30:00', -12), add_months('2016-01-29 10:30:00', 1), +add_months('2016-02-29 10:30:12', -12, 'YYYY-MM-dd HH:mm:ss'), add_months('2016-02-29 10:30:00', -1) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -89,11 +94,12 @@ add_months('2014-04-30 10:30:00', -2), add_months('2015-02-28 10:30:00', 12), add_months('2016-02-29 10:30:00', -12), add_months('2016-01-29 10:30:00', 1), +add_months('2016-02-29 10:30:12', -12, 'YYYY-MM-dd HH:mm:ss'), add_months('2016-02-29 10:30:00', -1) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -2014-02-14 2014-02-28 2014-01-31 2014-04-30 2014-02-28 2016-02-29 2015-02-28 2016-02-29 2016-01-31 +2014-02-14 2014-02-28 2014-01-31 2014-04-30 2014-02-28 2016-02-29 2015-02-28 2016-02-29 2015-02-28 10:30:12 2016-01-31 PREHOOK: query: select add_months(cast('2014-01-14 10:30:00' as timestamp), 1), add_months(cast('2014-01-31 10:30:00' as timestamp), 1), @@ -104,6 +110,7 @@ add_months(cast('2015-02-28 10:30:00' as timestamp), 12), add_months(cast('2016-02-29 10:30:00' as timestamp), -12), add_months(cast('2016-01-29 10:30:00' as timestamp), 1), add_months(cast('2016-02-29 10:30:00' as timestamp), -1), +add_months(cast('2016-02-29 10:30:12' as timestamp), 2, 'YYYY-MM-dd HH:mm:ss'), add_months(cast(null as timestamp), 1) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -118,8 +125,9 @@ add_months(cast('2015-02-28 10:30:00' as timestamp), 12), add_months(cast('2016-02-29 10:30:00' as timestamp), -12), add_months(cast('2016-01-29 10:30:00' as timestamp), 1), add_months(cast('2016-02-29 10:30:00' as timestamp), -1), +add_months(cast('2016-02-29 10:30:12' as timestamp), 2, 'YYYY-MM-dd HH:mm:ss'), add_months(cast(null as timestamp), 1) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -2014-02-14 2014-02-28 2014-01-31 2014-04-30 2014-02-28 2016-02-29 2015-02-28 2016-02-29 2016-01-31 NULL +2014-02-14 2014-02-28 2014-01-31 2014-04-30 2014-02-28 2016-02-29 2015-02-28 2016-02-29 2016-01-31 2016-04-30 10:30:12 NULL
