HIVE-10728 : deprecate unix_timestamp(void) and make it deterministic (Sergey Shelukhin, reveiwed by Ashutosh Chauhan(
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/24d3307b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/24d3307b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/24d3307b Branch: refs/heads/hbase-metastore Commit: 24d3307be79d35d3a34c49014dfdd597112f9106 Parents: 7556361 Author: Sergey Shelukhin <ser...@apache.org> Authored: Tue Jun 2 18:37:48 2015 -0700 Committer: Sergey Shelukhin <ser...@apache.org> Committed: Tue Jun 2 18:37:48 2015 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/session/SessionState.java | 4 +-- .../udf/generic/GenericUDFToUnixTimeStamp.java | 6 +++- .../ql/udf/generic/GenericUDFUnixTimeStamp.java | 36 ++++++++++++++------ .../queries/clientpositive/autogen_colalias.q | 4 +-- .../queries/clientpositive/udf_unix_timestamp.q | 8 +++-- .../clientpositive/autogen_colalias.q.out | 12 +++---- ql/src/test/results/clientpositive/udf5.q.out | 30 +++++----------- .../clientpositive/udf_unix_timestamp.q.out | 32 +++++++++++++++-- 8 files changed, 85 insertions(+), 47 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/24d3307b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index 37b6d6f..7930298 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -903,12 +903,12 @@ public class SessionState { return ((ss != null) && (ss.out != null)) ? ss.out : System.out; } - public PrintStream getInfoStream() { + public static PrintStream getInfoStream() { SessionState ss = SessionState.get(); return ((ss != null) && (ss.info != null)) ? ss.info : getErrStream(); } - public PrintStream getErrStream() { + public static PrintStream getErrStream() { SessionState ss = SessionState.get(); return ((ss != null) && (ss.err != null)) ? ss.err : System.err; } http://git-wip-us.apache.org/repos/asf/hive/blob/24d3307b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java index 65a2297..4ab5389 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java @@ -152,10 +152,14 @@ public class GenericUDFToUnixTimeStamp extends GenericUDF { return retValue; } Timestamp timestamp = inputTimestampOI.getPrimitiveJavaObject(arguments[0].get()); - retValue.set(timestamp.getTime() / 1000); + setValueFromTs(retValue, timestamp); return retValue; } + protected static void setValueFromTs(LongWritable value, Timestamp timestamp) { + value.set(timestamp.getTime() / 1000); + } + @Override public String getDisplayString(String[] children) { StringBuilder sb = new StringBuilder(32); http://git-wip-us.apache.org/repos/asf/hive/blob/24d3307b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnixTimeStamp.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnixTimeStamp.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnixTimeStamp.java index 0720c55..c1b2a01 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnixTimeStamp.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnixTimeStamp.java @@ -18,23 +18,43 @@ package org.apache.hadoop.hive.ql.udf.generic; +import java.io.PrintStream; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.io.LongWritable; + -@UDFType(deterministic = false) +@UDFType(deterministic = true) @Description(name = "unix_timestamp", - value = "_FUNC_([date[, pattern]]) - Returns the UNIX timestamp", - extended = "Converts the current or specified time to number of seconds " - + "since 1970-01-01.") + value = "_FUNC_(date[, pattern]) - Converts the time to a number", + extended = "Converts the specified time to number of seconds " + + "since 1970-01-01. The _FUNC_(void) overload is deprecated, use current_timestamp.") public class GenericUDFUnixTimeStamp extends GenericUDFToUnixTimeStamp { - + private static final Log LOG = LogFactory.getLog(GenericUDFUnixTimeStamp.class); + private LongWritable currentTimestamp; // retValue is transient so store this separately. @Override protected void initializeInput(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length > 0) { super.initializeInput(arguments); + } else { + if (currentTimestamp == null) { + currentTimestamp = new LongWritable(0); + setValueFromTs(currentTimestamp, SessionState.get().getQueryCurrentTimestamp()); + String msg = "unix_timestamp(void) is deprecated. Use current_timestamp instead."; + LOG.warn(msg); + PrintStream stream = LogHelper.getInfoStream(); + if (stream != null) { + stream.println(msg); + } + } } } @@ -45,10 +65,6 @@ public class GenericUDFUnixTimeStamp extends GenericUDFToUnixTimeStamp { @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { - if (arguments.length == 0) { - retValue.set(System.currentTimeMillis() / 1000); - return retValue; - } - return super.evaluate(arguments); + return (arguments.length == 0) ? currentTimestamp : super.evaluate(arguments); } } http://git-wip-us.apache.org/repos/asf/hive/blob/24d3307b/ql/src/test/queries/clientpositive/autogen_colalias.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/autogen_colalias.q b/ql/src/test/queries/clientpositive/autogen_colalias.q index e247a73..dac0bf7 100644 --- a/ql/src/test/queries/clientpositive/autogen_colalias.q +++ b/ql/src/test/queries/clientpositive/autogen_colalias.q @@ -1,6 +1,6 @@ CREATE TEMPORARY FUNCTION test_max AS 'org.apache.hadoop.hive.ql.udf.UDAFTestMax'; -create table dest_grouped_old1 as select 1+1, 2+2 as zz, src.key, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 1, +create table dest_grouped_old1 as select 1+1, 2+2 as zz, src.key, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), current_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 1, 1, 0) from src group by src.key; @@ -12,7 +12,7 @@ describe dest_grouped_old2; set hive.autogen.columnalias.prefix.label=column_; set hive.autogen.columnalias.prefix.includefuncname=true; -create table dest_grouped_new1 as select 1+1, 2+2 as zz, ((src.key % 2)+2)/2, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 10, +create table dest_grouped_new1 as select 1+1, 2+2 as zz, ((src.key % 2)+2)/2, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), current_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 10, (src.key +5) % 2, 0) from src group by src.key; http://git-wip-us.apache.org/repos/asf/hive/blob/24d3307b/ql/src/test/queries/clientpositive/udf_unix_timestamp.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/udf_unix_timestamp.q b/ql/src/test/queries/clientpositive/udf_unix_timestamp.q index 1664329..bb598c2 100644 --- a/ql/src/test/queries/clientpositive/udf_unix_timestamp.q +++ b/ql/src/test/queries/clientpositive/udf_unix_timestamp.q @@ -21,9 +21,13 @@ SELECT unix_timestamp('2009 Mar 20 11:30:01 am', 'yyyy MMM dd h:mm:ss a') FROM oneline; +create table foo as SELECT + 'deprecated' as a, + unix_timestamp() as b +FROM oneline; +drop table foo; + SELECT 'random_string', unix_timestamp('random_string') FROM oneline; - - http://git-wip-us.apache.org/repos/asf/hive/blob/24d3307b/ql/src/test/results/clientpositive/autogen_colalias.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/autogen_colalias.q.out b/ql/src/test/results/clientpositive/autogen_colalias.q.out index 7257aaa..5569b46 100644 --- a/ql/src/test/results/clientpositive/autogen_colalias.q.out +++ b/ql/src/test/results/clientpositive/autogen_colalias.q.out @@ -4,7 +4,7 @@ PREHOOK: Output: test_max POSTHOOK: query: CREATE TEMPORARY FUNCTION test_max AS 'org.apache.hadoop.hive.ql.udf.UDAFTestMax' POSTHOOK: type: CREATEFUNCTION POSTHOOK: Output: test_max -PREHOOK: query: create table dest_grouped_old1 as select 1+1, 2+2 as zz, src.key, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 1, +PREHOOK: query: create table dest_grouped_old1 as select 1+1, 2+2 as zz, src.key, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), current_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 1, 1, 0) from src group by src.key @@ -12,7 +12,7 @@ PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src PREHOOK: Output: database:default PREHOOK: Output: default@dest_grouped_old1 -POSTHOOK: query: create table dest_grouped_old1 as select 1+1, 2+2 as zz, src.key, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 1, +POSTHOOK: query: create table dest_grouped_old1 as select 1+1, 2+2 as zz, src.key, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), current_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 1, 1, 0) from src group by src.key @@ -33,7 +33,7 @@ c3 int c4 bigint c5 double c6 bigint -c7 bigint +c7 timestamp c8 int c9 int PREHOOK: query: create table dest_grouped_old2 as select distinct src.key from src @@ -53,7 +53,7 @@ POSTHOOK: query: describe dest_grouped_old2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@dest_grouped_old2 key string -PREHOOK: query: create table dest_grouped_new1 as select 1+1, 2+2 as zz, ((src.key % 2)+2)/2, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 10, +PREHOOK: query: create table dest_grouped_new1 as select 1+1, 2+2 as zz, ((src.key % 2)+2)/2, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), current_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 10, (src.key +5) % 2, 0) from src group by src.key @@ -61,7 +61,7 @@ PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src PREHOOK: Output: database:default PREHOOK: Output: default@dest_grouped_new1 -POSTHOOK: query: create table dest_grouped_new1 as select 1+1, 2+2 as zz, ((src.key % 2)+2)/2, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), unix_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 10, +POSTHOOK: query: create table dest_grouped_new1 as select 1+1, 2+2 as zz, ((src.key % 2)+2)/2, test_max(length(src.value)), count(src.value), sin(count(src.value)), count(sin(src.value)), current_timestamp(), CAST(SUM(IF(value > 10, value, 1)) AS INT), if(src.key > 10, (src.key +5) % 2, 0) from src group by src.key @@ -82,7 +82,7 @@ test_max_length_src__3 int count_src_value_4 bigint sin_count_src_value_5 double count_sin_src_value_6 bigint -unix_timestamp_7 bigint +current_timestamp_7 timestamp sum_if_value_10_valu_8 int if_src_key_10_src_ke_9 double PREHOOK: query: create table dest_grouped_new2 as select distinct src.key from src http://git-wip-us.apache.org/repos/asf/hive/blob/24d3307b/ql/src/test/results/clientpositive/udf5.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/udf5.q.out b/ql/src/test/results/clientpositive/udf5.q.out index 26cf3f1..860ebcb 100644 --- a/ql/src/test/results/clientpositive/udf5.q.out +++ b/ql/src/test/results/clientpositive/udf5.q.out @@ -54,33 +54,21 @@ POSTHOOK: query: EXPLAIN SELECT from_unixtime(unix_timestamp('2010-01-13 11:57:40', 'yyyy-MM-dd HH:mm:ss'), 'MM/dd/yy HH:mm:ss'), from_unixtime(unix_timestamp('2010-01-13 11:57:40')) from dest1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: dest1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: from_unixtime(unix_timestamp('2010-01-13 11:57:40','yyyy-MM-dd HH:mm:ss'), 'MM/dd/yy HH:mm:ss') (type: string), from_unixtime(unix_timestamp('2010-01-13 11:57:40')) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: dest1 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: '01/13/10 11:57:40' (type: string), '2010-01-13 11:57:40' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: SELECT from_unixtime(unix_timestamp('2010-01-13 11:57:40', 'yyyy-MM-dd HH:mm:ss'), 'MM/dd/yy HH:mm:ss'), from_unixtime(unix_timestamp('2010-01-13 11:57:40')) from dest1 PREHOOK: type: QUERY http://git-wip-us.apache.org/repos/asf/hive/blob/24d3307b/ql/src/test/results/clientpositive/udf_unix_timestamp.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/udf_unix_timestamp.q.out b/ql/src/test/results/clientpositive/udf_unix_timestamp.q.out index e4ffa1a..c64379d 100644 --- a/ql/src/test/results/clientpositive/udf_unix_timestamp.q.out +++ b/ql/src/test/results/clientpositive/udf_unix_timestamp.q.out @@ -2,13 +2,13 @@ PREHOOK: query: DESCRIBE FUNCTION unix_timestamp PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION unix_timestamp POSTHOOK: type: DESCFUNCTION -unix_timestamp([date[, pattern]]) - Returns the UNIX timestamp +unix_timestamp(date[, pattern]) - Converts the time to a number PREHOOK: query: DESCRIBE FUNCTION EXTENDED unix_timestamp PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED unix_timestamp POSTHOOK: type: DESCFUNCTION -unix_timestamp([date[, pattern]]) - Returns the UNIX timestamp -Converts the current or specified time to number of seconds since 1970-01-01. +unix_timestamp(date[, pattern]) - Converts the time to a number +Converts the specified time to number of seconds since 1970-01-01. The unix_timestamp(void) overload is deprecated, use current_timestamp. PREHOOK: query: create table oneline(key int, value string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -70,6 +70,32 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@oneline #### A masked pattern was here #### 2009 Mar 20 11:30:01 am 1237573801 +unix_timestamp(void) is deprecated. Use current_timestamp instead. +unix_timestamp(void) is deprecated. Use current_timestamp instead. +PREHOOK: query: create table foo as SELECT + 'deprecated' as a, + unix_timestamp() as b +FROM oneline +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@oneline +PREHOOK: Output: database:default +PREHOOK: Output: default@foo +POSTHOOK: query: create table foo as SELECT + 'deprecated' as a, + unix_timestamp() as b +FROM oneline +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@oneline +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo +PREHOOK: query: drop table foo +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@foo +PREHOOK: Output: default@foo +POSTHOOK: query: drop table foo +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@foo +POSTHOOK: Output: default@foo PREHOOK: query: SELECT 'random_string', unix_timestamp('random_string')