This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 2b181dc HIVE-21283: Create Synonym mid for substr, position for locate (Mani M, reviewed by Sankar Hariappan) 2b181dc is described below commit 2b181dc7aeccf3d92968e68be0d3a9aabd0350e2 Author: Mani M <rmsm...@gmail.com> AuthorDate: Fri Mar 22 09:46:43 2019 +0530 HIVE-21283: Create Synonym mid for substr, position for locate (Mani M, reviewed by Sankar Hariappan) Signed-off-by: Sankar Hariappan <sank...@apache.org> --- .../hadoop/hive/ql/exec/FunctionRegistry.java | 2 + .../org/apache/hadoop/hive/ql/udf/UDFSubstr.java | 2 +- .../hive/ql/udf/generic/GenericUDFLocate.java | 2 +- ql/src/test/queries/clientpositive/udf_mid.q | 3 + ql/src/test/queries/clientpositive/udf_position.q | 44 ++++++++ .../results/clientpositive/show_functions.q.out | 3 + .../test/results/clientpositive/udf_locate.q.out | 1 + ql/src/test/results/clientpositive/udf_mid.q.out | 21 ++++ .../test/results/clientpositive/udf_position.q.out | 121 +++++++++++++++++++++ .../test/results/clientpositive/udf_substr.q.out | 2 +- .../results/clientpositive/udf_substring.q.out | 2 +- 11 files changed, 199 insertions(+), 4 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 3e4aa74..ed41bef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -193,6 +193,7 @@ public final class FunctionRegistry { static { system.registerGenericUDF("concat", GenericUDFConcat.class); + system.registerUDF("mid", UDFSubstr.class, false); system.registerUDF("substr", UDFSubstr.class, false); system.registerUDF("substring", UDFSubstr.class, false); system.registerGenericUDF("substring_index", GenericUDFSubstringIndex.class); @@ -498,6 +499,7 @@ public final class FunctionRegistry { system.registerGenericUDF("in_file", GenericUDFInFile.class); system.registerGenericUDF("instr", GenericUDFInstr.class); system.registerGenericUDF("locate", GenericUDFLocate.class); + system.registerGenericUDF("position", GenericUDFLocate.class); system.registerGenericUDF("elt", GenericUDFElt.class); system.registerGenericUDF("concat_ws", GenericUDFConcatWS.class); system.registerGenericUDF("sort_array", GenericUDFSortArray.class); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java index 4136cc7..5b1964c 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java @@ -33,7 +33,7 @@ import org.apache.hadoop.io.Text; * UDFSubstr. * */ -@Description(name = "substr,substring", +@Description(name = "substr,substring,mid", value = "_FUNC_(str, pos[, len]) - returns the substring of str that" + " starts at pos and is of length len or" + "_FUNC_(bin, pos[, len]) - returns the slice of byte array that" diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLocate.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLocate.java index cebc752..a077222 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLocate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLocate.java @@ -42,7 +42,7 @@ import org.apache.hadoop.io.Text; * </pre> * <p> */ -@Description(name = "locate", +@Description(name = "locate,position", value = "_FUNC_(substr, str[, pos]) - Returns the position of the first " + "occurance of substr in str after position pos", extended = "Example:\n" diff --git a/ql/src/test/queries/clientpositive/udf_mid.q b/ql/src/test/queries/clientpositive/udf_mid.q new file mode 100644 index 0000000..a2a6526 --- /dev/null +++ b/ql/src/test/queries/clientpositive/udf_mid.q @@ -0,0 +1,3 @@ +-- Synonym. See udf_substr.q +DESCRIBE FUNCTION mid; +DESCRIBE FUNCTION EXTENDED mid; diff --git a/ql/src/test/queries/clientpositive/udf_position.q b/ql/src/test/queries/clientpositive/udf_position.q new file mode 100644 index 0000000..232ed77 --- /dev/null +++ b/ql/src/test/queries/clientpositive/udf_position.q @@ -0,0 +1,44 @@ +--! qt:dataset:src +set hive.fetch.task.conversion=more; + +DESCRIBE FUNCTION position; +DESCRIBE FUNCTION EXTENDED position; + +EXPLAIN +SELECT position('abc', 'abcd'), + position('ccc', 'abcabc'), + position('23', 123), + position(23, 123), + position('abc', 'abcabc', 2), + position('abc', 'abcabc', '2'), + position(1, TRUE), + position(1, FALSE), + position(CAST('2' AS TINYINT), '12345'), + position('34', CAST('12345' AS SMALLINT)), + position('456', CAST('123456789012' AS BIGINT)), + position('.25', CAST(1.25 AS FLOAT)), + position('.0', CAST(16.0 AS DOUBLE)), + position(null, 'abc'), + position('abc', null), + position('abc', 'abcd', null), + position('abc', 'abcd', 'invalid number') +FROM src tablesample (1 rows); + +SELECT position('abc', 'abcd'), + position('ccc', 'abcabc'), + position('23', 123), + position(23, 123), + position('abc', 'abcabc', 2), + position('abc', 'abcabc', '2'), + position(1, TRUE), + position(1, FALSE), + position(CAST('2' AS TINYINT), '12345'), + position('34', CAST('12345' AS SMALLINT)), + position('456', CAST('123456789012' AS BIGINT)), + position('.25', CAST(1.25 AS FLOAT)), + position('.0', CAST(16.0 AS DOUBLE)), + position(null, 'abc'), + position('abc', null), + position('abc', 'abcd', null), + position('abc', 'abcd', 'invalid number') +FROM src tablesample (1 rows); diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out index dc2b436..0b772f0 100644 --- a/ql/src/test/results/clientpositive/show_functions.q.out +++ b/ql/src/test/results/clientpositive/show_functions.q.out @@ -168,6 +168,7 @@ mask_show_last_n matchpath max md5 +mid min minute mod @@ -196,6 +197,7 @@ percentile_approx pi pmod posexplode +position positive pow power @@ -437,6 +439,7 @@ mask_show_last_n matchpath max md5 +mid min minute mod diff --git a/ql/src/test/results/clientpositive/udf_locate.q.out b/ql/src/test/results/clientpositive/udf_locate.q.out index 63213b3..cb3f63b 100644 --- a/ql/src/test/results/clientpositive/udf_locate.q.out +++ b/ql/src/test/results/clientpositive/udf_locate.q.out @@ -8,6 +8,7 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED locate POSTHOOK: type: DESCFUNCTION locate(substr, str[, pos]) - Returns the position of the first occurance of substr in str after position pos +Synonyms: position Example: > SELECT locate('bar', 'foobarbar', 5) FROM src LIMIT 1; 7 diff --git a/ql/src/test/results/clientpositive/udf_mid.q.out b/ql/src/test/results/clientpositive/udf_mid.q.out new file mode 100644 index 0000000..0e28452 --- /dev/null +++ b/ql/src/test/results/clientpositive/udf_mid.q.out @@ -0,0 +1,21 @@ +PREHOOK: query: DESCRIBE FUNCTION mid +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION mid +POSTHOOK: type: DESCFUNCTION +mid(str, pos[, len]) - returns the substring of str that starts at pos and is of length len ormid(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len +PREHOOK: query: DESCRIBE FUNCTION EXTENDED mid +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED mid +POSTHOOK: type: DESCFUNCTION +mid(str, pos[, len]) - returns the substring of str that starts at pos and is of length len ormid(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len +Synonyms: substr, substring +pos is a 1-based index. If pos<0 the starting position is determined by counting backwards from the end of str. +Example: + > SELECT mid('Facebook', 5) FROM src LIMIT 1; + 'book' + > SELECT mid('Facebook', -5) FROM src LIMIT 1; + 'ebook' + > SELECT mid('Facebook', 5, 1) FROM src LIMIT 1; + 'b' +Function class:org.apache.hadoop.hive.ql.udf.UDFSubstr +Function type:BUILTIN diff --git a/ql/src/test/results/clientpositive/udf_position.q.out b/ql/src/test/results/clientpositive/udf_position.q.out new file mode 100644 index 0000000..ac5f6c0 --- /dev/null +++ b/ql/src/test/results/clientpositive/udf_position.q.out @@ -0,0 +1,121 @@ +PREHOOK: query: DESCRIBE FUNCTION position +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION position +POSTHOOK: type: DESCFUNCTION +position(substr, str[, pos]) - Returns the position of the first occurance of substr in str after position pos +PREHOOK: query: DESCRIBE FUNCTION EXTENDED position +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED position +POSTHOOK: type: DESCFUNCTION +position(substr, str[, pos]) - Returns the position of the first occurance of substr in str after position pos +Synonyms: locate +Example: + > SELECT position('bar', 'foobarbar', 5) FROM src LIMIT 1; + 7 +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFLocate +Function type:BUILTIN +PREHOOK: query: EXPLAIN +SELECT position('abc', 'abcd'), + position('ccc', 'abcabc'), + position('23', 123), + position(23, 123), + position('abc', 'abcabc', 2), + position('abc', 'abcabc', '2'), + position(1, TRUE), + position(1, FALSE), + position(CAST('2' AS TINYINT), '12345'), + position('34', CAST('12345' AS SMALLINT)), + position('456', CAST('123456789012' AS BIGINT)), + position('.25', CAST(1.25 AS FLOAT)), + position('.0', CAST(16.0 AS DOUBLE)), + position(null, 'abc'), + position('abc', null), + position('abc', 'abcd', null), + position('abc', 'abcd', 'invalid number') +FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT position('abc', 'abcd'), + position('ccc', 'abcabc'), + position('23', 123), + position(23, 123), + position('abc', 'abcabc', 2), + position('abc', 'abcabc', '2'), + position(1, TRUE), + position(1, FALSE), + position(CAST('2' AS TINYINT), '12345'), + position('34', CAST('12345' AS SMALLINT)), + position('456', CAST('123456789012' AS BIGINT)), + position('.25', CAST(1.25 AS FLOAT)), + position('.0', CAST(16.0 AS DOUBLE)), + position(null, 'abc'), + position('abc', null), + position('abc', 'abcd', null), + position('abc', 'abcd', 'invalid number') +FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src + Row Limit Per Split: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int), 0 (type: int), 2 (type: int), 2 (type: int), 4 (type: int), 4 (type: int), 0 (type: int), 0 (type: int), 2 (type: int), 3 (type: int), 4 (type: int), 2 (type: int), 3 (type: int), null (type: int), null (type: int), 0 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 500 Data size: 30008 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: SELECT position('abc', 'abcd'), + position('ccc', 'abcabc'), + position('23', 123), + position(23, 123), + position('abc', 'abcabc', 2), + position('abc', 'abcabc', '2'), + position(1, TRUE), + position(1, FALSE), + position(CAST('2' AS TINYINT), '12345'), + position('34', CAST('12345' AS SMALLINT)), + position('456', CAST('123456789012' AS BIGINT)), + position('.25', CAST(1.25 AS FLOAT)), + position('.0', CAST(16.0 AS DOUBLE)), + position(null, 'abc'), + position('abc', null), + position('abc', 'abcd', null), + position('abc', 'abcd', 'invalid number') +FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT position('abc', 'abcd'), + position('ccc', 'abcabc'), + position('23', 123), + position(23, 123), + position('abc', 'abcabc', 2), + position('abc', 'abcabc', '2'), + position(1, TRUE), + position(1, FALSE), + position(CAST('2' AS TINYINT), '12345'), + position('34', CAST('12345' AS SMALLINT)), + position('456', CAST('123456789012' AS BIGINT)), + position('.25', CAST(1.25 AS FLOAT)), + position('.0', CAST(16.0 AS DOUBLE)), + position(null, 'abc'), + position('abc', null), + position('abc', 'abcd', null), + position('abc', 'abcd', 'invalid number') +FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1 0 2 2 4 4 0 0 2 3 4 2 3 NULL NULL 0 0 diff --git a/ql/src/test/results/clientpositive/udf_substr.q.out b/ql/src/test/results/clientpositive/udf_substr.q.out index 4489006..00fa606 100644 --- a/ql/src/test/results/clientpositive/udf_substr.q.out +++ b/ql/src/test/results/clientpositive/udf_substr.q.out @@ -8,7 +8,7 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED substr POSTHOOK: type: DESCFUNCTION substr(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstr(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len -Synonyms: substring +Synonyms: mid, substring pos is a 1-based index. If pos<0 the starting position is determined by counting backwards from the end of str. Example: > SELECT substr('Facebook', 5) FROM src LIMIT 1; diff --git a/ql/src/test/results/clientpositive/udf_substring.q.out b/ql/src/test/results/clientpositive/udf_substring.q.out index 72898e6..d6b1c4a 100644 --- a/ql/src/test/results/clientpositive/udf_substring.q.out +++ b/ql/src/test/results/clientpositive/udf_substring.q.out @@ -8,7 +8,7 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED substring POSTHOOK: type: DESCFUNCTION substring(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstring(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len -Synonyms: substr +Synonyms: mid, substr pos is a 1-based index. If pos<0 the starting position is determined by counting backwards from the end of str. Example: > SELECT substring('Facebook', 5) FROM src LIMIT 1;