HIVE-14580: Introduce || operator (Zoltan Haindrich reviewed by Pengcheng Xiong)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/efa39eab Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/efa39eab Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/efa39eab Branch: refs/heads/hive-14535 Commit: efa39eab3a480aba4a3a4a0b3d56b3121382fe9b Parents: 0049a21 Author: Pengcheng Xiong <[email protected]> Authored: Sat Oct 22 14:14:34 2016 -0700 Committer: Pengcheng Xiong <[email protected]> Committed: Sat Oct 22 14:25:11 2016 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/ql/parse/HiveLexer.g | 1 + .../hadoop/hive/ql/parse/IdentifiersParser.g | 16 +- ql/src/test/queries/clientpositive/concat_op.q | 45 +++ .../test/results/clientpositive/concat_op.q.out | 301 +++++++++++++++++++ 4 files changed, 362 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/efa39eab/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 17985d2..a0ff65d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -370,6 +370,7 @@ DIV : 'DIV'; AMPERSAND : '&'; TILDE : '~'; BITWISEOR : '|'; +CONCATENATE : '||'; BITWISEXOR : '^'; QUESTION : '?'; DOLLAR : '$'; http://git-wip-us.apache.org/repos/asf/hive/blob/efa39eab/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index f79960a..13e2d17 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -437,6 +437,20 @@ precedencePlusExpression precedenceStarExpression (precedencePlusOperator^ precedenceStarExpression)* ; +precedenceConcatenateOperator + : + CONCATENATE + ; + +precedenceConcatenateExpression + : + (precedencePlusExpression -> precedencePlusExpression) + ( + precedenceConcatenateOperator plus=precedencePlusExpression + -> ^(TOK_FUNCTION {adaptor.create(Identifier, "concat")} {$precedenceConcatenateExpression.tree} $plus) + )* + -> {$precedenceConcatenateExpression.tree} + ; precedenceAmpersandOperator : @@ -445,7 +459,7 @@ precedenceAmpersandOperator precedenceAmpersandExpression : - precedencePlusExpression (precedenceAmpersandOperator^ precedencePlusExpression)* + precedenceConcatenateExpression (precedenceAmpersandOperator^ precedenceConcatenateExpression)* ; http://git-wip-us.apache.org/repos/asf/hive/blob/efa39eab/ql/src/test/queries/clientpositive/concat_op.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/concat_op.q b/ql/src/test/queries/clientpositive/concat_op.q new file mode 100644 index 0000000..8bbbb92 --- /dev/null +++ b/ql/src/test/queries/clientpositive/concat_op.q @@ -0,0 +1,45 @@ +explain select key || value from src; + +select concat('a','b','c'); +select 'a' || 'b' || 'c'; + +select '1' || 2+3; +select 1+2 || '7'; + +select 1 || 1 || 1; +select 1.2 || 1.7; +select 1 + 1 || 1 + 1; +select 9 + 9 || 9 + 9; +select 1 + 1 || 1 + 1 || 1 + 1; + +-- || has higher precedence than bitwise ops...so () is neccessary +select '1' || 4 / 2 || 1 + 2 * 1 || (6 & 4) || (1 | 4); + +-- however ^ is different from the other bitwise ops: +select 0 ^ 1 || '2' || 1 ^ 2; + +create table ct1 (c int); +create table ct2 (c int); + +insert into ct1 values (7),(5),(3),(1); +insert into ct2 values (8),(6),(4),(2); + +create view ct_v1 as select * from ct1 union all select * from ct2 order by c; + +select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1; + + +select *, 'x' || (c&3) , 'a' || c*c+c || 'b' from ct_v1 + order by 'a' || c*c+c || 'b'; + +select 'x' || (c&3),collect_list(c) from ct_v1 + group by 'x' || (c&3); + +explain select concat('a','b','c'); +explain select 'a' || 'b' || 'c'; + +-- check and/or precedence relation; should be true +-- (true and false) or (false and true) or true => true psql/mysql/ora/hive +-- true and (false or false) and (true or true) => false should not happen +select true and false or false and true or true; + http://git-wip-us.apache.org/repos/asf/hive/blob/efa39eab/ql/src/test/results/clientpositive/concat_op.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/concat_op.q.out b/ql/src/test/results/clientpositive/concat_op.q.out new file mode 100644 index 0000000..3d872fc --- /dev/null +++ b/ql/src/test/results/clientpositive/concat_op.q.out @@ -0,0 +1,301 @@ +PREHOOK: query: explain select key || value from src +PREHOOK: type: QUERY +POSTHOOK: query: explain select key || value from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: concat(key, value) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select concat('a','b','c') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select concat('a','b','c') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +abc +PREHOOK: query: select 'a' || 'b' || 'c' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 'a' || 'b' || 'c' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +abc +PREHOOK: query: select '1' || 2+3 +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select '1' || 2+3 +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +15 +PREHOOK: query: select 1+2 || '7' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 1+2 || '7' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +37 +PREHOOK: query: select 1 || 1 || 1 +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 1 || 1 || 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +111 +PREHOOK: query: select 1.2 || 1.7 +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 1.2 || 1.7 +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +1.21.7 +PREHOOK: query: select 1 + 1 || 1 + 1 +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 1 + 1 || 1 + 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +22 +PREHOOK: query: select 9 + 9 || 9 + 9 +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 9 + 9 || 9 + 9 +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +1818 +PREHOOK: query: select 1 + 1 || 1 + 1 || 1 + 1 +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 1 + 1 || 1 + 1 || 1 + 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +222 +PREHOOK: query: -- || has higher precedence than bitwise ops...so () is neccessary +select '1' || 4 / 2 || 1 + 2 * 1 || (6 & 4) || (1 | 4) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: -- || has higher precedence than bitwise ops...so () is neccessary +select '1' || 4 / 2 || 1 + 2 * 1 || (6 & 4) || (1 | 4) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +12.0345 +PREHOOK: query: -- however ^ is different from the other bitwise ops: +select 0 ^ 1 || '2' || 1 ^ 2 +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: -- however ^ is different from the other bitwise ops: +select 0 ^ 1 || '2' || 1 ^ 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +123 +PREHOOK: query: create table ct1 (c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ct1 +POSTHOOK: query: create table ct1 (c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ct1 +PREHOOK: query: create table ct2 (c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ct2 +POSTHOOK: query: create table ct2 (c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ct2 +PREHOOK: query: insert into ct1 values (7),(5),(3),(1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@ct1 +POSTHOOK: query: insert into ct1 values (7),(5),(3),(1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@ct1 +POSTHOOK: Lineage: ct1.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into ct2 values (8),(6),(4),(2) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@ct2 +POSTHOOK: query: insert into ct2 values (8),(6),(4),(2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@ct2 +POSTHOOK: Lineage: ct2.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create view ct_v1 as select * from ct1 union all select * from ct2 order by c +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@ct1 +PREHOOK: Input: default@ct2 +PREHOOK: Output: database:default +PREHOOK: Output: default@ct_v1 +POSTHOOK: query: create view ct_v1 as select * from ct1 union all select * from ct2 order by c +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@ct1 +POSTHOOK: Input: default@ct2 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ct_v1 +PREHOOK: query: select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1 +PREHOOK: type: QUERY +PREHOOK: Input: default@ct1 +PREHOOK: Input: default@ct2 +PREHOOK: Input: default@ct_v1 +#### A masked pattern was here #### +POSTHOOK: query: select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ct1 +POSTHOOK: Input: default@ct2 +POSTHOOK: Input: default@ct_v1 +#### A masked pattern was here #### +1 2x c+c=2, c*c=1, (c&c)=1 +2 6x c+c=4, c*c=4, (c&c)=2 +3 12x c+c=6, c*c=9, (c&c)=3 +4 20x c+c=8, c*c=16, (c&c)=4 +5 30x c+c=10, c*c=25, (c&c)=5 +6 42x c+c=12, c*c=36, (c&c)=6 +7 56x c+c=14, c*c=49, (c&c)=7 +8 72x c+c=16, c*c=64, (c&c)=8 +PREHOOK: query: select *, 'x' || (c&3) , 'a' || c*c+c || 'b' from ct_v1 + order by 'a' || c*c+c || 'b' +PREHOOK: type: QUERY +PREHOOK: Input: default@ct1 +PREHOOK: Input: default@ct2 +PREHOOK: Input: default@ct_v1 +#### A masked pattern was here #### +POSTHOOK: query: select *, 'x' || (c&3) , 'a' || c*c+c || 'b' from ct_v1 + order by 'a' || c*c+c || 'b' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ct1 +POSTHOOK: Input: default@ct2 +POSTHOOK: Input: default@ct_v1 +#### A masked pattern was here #### +3 x3 a12b +4 x0 a20b +1 x1 a2b +5 x1 a30b +6 x2 a42b +7 x3 a56b +2 x2 a6b +8 x0 a72b +PREHOOK: query: select 'x' || (c&3),collect_list(c) from ct_v1 + group by 'x' || (c&3) +PREHOOK: type: QUERY +PREHOOK: Input: default@ct1 +PREHOOK: Input: default@ct2 +PREHOOK: Input: default@ct_v1 +#### A masked pattern was here #### +POSTHOOK: query: select 'x' || (c&3),collect_list(c) from ct_v1 + group by 'x' || (c&3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ct1 +POSTHOOK: Input: default@ct2 +POSTHOOK: Input: default@ct_v1 +#### A masked pattern was here #### +x0 [4,8] +x1 [1,5] +x2 [2,6] +x3 [3,7] +PREHOOK: query: explain select concat('a','b','c') +PREHOOK: type: QUERY +POSTHOOK: query: explain select concat('a','b','c') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'abc' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: explain select 'a' || 'b' || 'c' +PREHOOK: type: QUERY +POSTHOOK: query: explain select 'a' || 'b' || 'c' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'abc' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- check and/or precedence relation; should be true +-- (true and false) or (false and true) or true => true psql/mysql/ora/hive +-- true and (false or false) and (true or true) => false should not happen +select true and false or false and true or true +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: -- check and/or precedence relation; should be true +-- (true and false) or (false and true) or true => true psql/mysql/ora/hive +-- true and (false or false) and (true or true) => false should not happen +select true and false or false and true or true +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +true
