HIVE-15042: Support intersect/except without distinct keyword (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8a6d8186 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8a6d8186 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8a6d8186 Branch: refs/heads/hive-14535 Commit: 8a6d8186c7e03d9958824ce4907766eaeb79921f Parents: f99a6e8 Author: Pengcheng Xiong <[email protected]> Authored: Tue Oct 25 20:52:35 2016 +0900 Committer: Pengcheng Xiong <[email protected]> Committed: Tue Oct 25 20:52:35 2016 +0900 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/parse/HiveParser.g | 6 +- .../queries/clientpositive/setop_no_distinct.q | 51 ++++ .../clientpositive/setop_no_distinct.q.out | 237 +++++++++++++++++++ 3 files changed, 291 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/8a6d8186/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 7bf02bb..7b56be5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -2309,11 +2309,11 @@ setOperator : KW_UNION KW_ALL -> ^(TOK_UNIONALL) | KW_UNION KW_DISTINCT? -> ^(TOK_UNIONDISTINCT) | KW_INTERSECT KW_ALL -> ^(TOK_INTERSECTALL) - | KW_INTERSECT KW_DISTINCT -> ^(TOK_INTERSECTDISTINCT) + | KW_INTERSECT KW_DISTINCT? -> ^(TOK_INTERSECTDISTINCT) | KW_EXCEPT KW_ALL -> ^(TOK_EXCEPTALL) - | KW_EXCEPT KW_DISTINCT -> ^(TOK_EXCEPTDISTINCT) + | KW_EXCEPT KW_DISTINCT? -> ^(TOK_EXCEPTDISTINCT) | KW_MINUS KW_ALL -> ^(TOK_EXCEPTALL) - | KW_MINUS KW_DISTINCT -> ^(TOK_EXCEPTDISTINCT) + | KW_MINUS KW_DISTINCT? -> ^(TOK_EXCEPTDISTINCT) ; queryStatementExpression http://git-wip-us.apache.org/repos/asf/hive/blob/8a6d8186/ql/src/test/queries/clientpositive/setop_no_distinct.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/setop_no_distinct.q b/ql/src/test/queries/clientpositive/setop_no_distinct.q new file mode 100644 index 0000000..207954a --- /dev/null +++ b/ql/src/test/queries/clientpositive/setop_no_distinct.q @@ -0,0 +1,51 @@ +set hive.mapred.mode=nonstrict; +set hive.cbo.enable=true; + +create table a(key int, value int); + +insert into table a values (1,2),(1,2),(1,2),(1,3),(2,3); + +create table b(key int, value int); + +insert into table b values (1,2),(1,2),(2,3); + +select * from a intersect select * from b; + +(select * from b intersect (select * from a)) intersect select * from b; + +select * from b intersect all select * from a intersect select * from b; + +(select * from b) intersect all ((select * from a) intersect select * from b); + +select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +intersect +select * from (select a.key, b.value from a join b on a.key=b.key)sub2; + +drop table a; + +drop table b; + +create table a(key int); + +insert into table a values (0),(1),(2),(2),(2),(2),(3),(NULL),(NULL),(NULL),(NULL),(NULL); + +create table b(key bigint); + +insert into table b values (1),(2),(2),(3),(5),(5),(NULL),(NULL),(NULL); + +select * from a except select * from b; + +(select * from a) minus select * from b union (select * from a) minus select * from b; + +(select * from a) minus select * from b union all ((select * from a) minus select * from b); + +(select * from a) minus select * from b union all (select * from a) minus all select * from b; + +select * from a minus select * from b minus (select * from a minus select * from b); + +(select * from a) minus (select * from b minus (select * from a minus select * from b)); + +drop table a; + +drop table b; + http://git-wip-us.apache.org/repos/asf/hive/blob/8a6d8186/ql/src/test/results/clientpositive/setop_no_distinct.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/setop_no_distinct.q.out b/ql/src/test/results/clientpositive/setop_no_distinct.q.out new file mode 100644 index 0000000..55e9ba7 --- /dev/null +++ b/ql/src/test/results/clientpositive/setop_no_distinct.q.out @@ -0,0 +1,237 @@ +PREHOOK: query: create table a(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: insert into table a values (1,2),(1,2),(1,2),(1,3),(2,3) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@a +POSTHOOK: query: insert into table a values (1,2),(1,2),(1,2),(1,3),(2,3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@a +POSTHOOK: Lineage: a.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: a.value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: create table b(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: insert into table b values (1,2),(1,2),(2,3) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@b +POSTHOOK: query: insert into table b values (1,2),(1,2),(2,3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@b +POSTHOOK: Lineage: b.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: b.value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select * from a intersect select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a intersect select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +2 3 +PREHOOK: query: (select * from b intersect (select * from a)) intersect select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: (select * from b intersect (select * from a)) intersect select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +2 3 +PREHOOK: query: select * from b intersect all select * from a intersect select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from b intersect all select * from a intersect select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +2 3 +PREHOOK: query: (select * from b) intersect all ((select * from a) intersect select * from b) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: (select * from b) intersect all ((select * from a) intersect select * from b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +2 3 +PREHOOK: query: select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +intersect +select * from (select a.key, b.value from a join b on a.key=b.key)sub2 +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +intersect +select * from (select a.key, b.value from a join b on a.key=b.key)sub2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +1 2 +2 3 +PREHOOK: query: drop table a +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@a +PREHOOK: Output: default@a +POSTHOOK: query: drop table a +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@a +POSTHOOK: Output: default@a +PREHOOK: query: drop table b +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@b +PREHOOK: Output: default@b +POSTHOOK: query: drop table b +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@b +POSTHOOK: Output: default@b +PREHOOK: query: create table a(key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a(key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: insert into table a values (0),(1),(2),(2),(2),(2),(3),(NULL),(NULL),(NULL),(NULL),(NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@a +POSTHOOK: query: insert into table a values (0),(1),(2),(2),(2),(2),(3),(NULL),(NULL),(NULL),(NULL),(NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@a +POSTHOOK: Lineage: a.key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table b(key bigint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b(key bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: insert into table b values (1),(2),(2),(3),(5),(5),(NULL),(NULL),(NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@b +POSTHOOK: query: insert into table b values (1),(2),(2),(3),(5),(5),(NULL),(NULL),(NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@b +POSTHOOK: Lineage: b.key EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: select * from a except select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a except select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +0 +PREHOOK: query: (select * from a) minus select * from b union (select * from a) minus select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: (select * from a) minus select * from b union (select * from a) minus select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +0 +PREHOOK: query: (select * from a) minus select * from b union all ((select * from a) minus select * from b) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: (select * from a) minus select * from b union all ((select * from a) minus select * from b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +0 +0 +PREHOOK: query: (select * from a) minus select * from b union all (select * from a) minus all select * from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: (select * from a) minus select * from b union all (select * from a) minus all select * from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +NULL +NULL +0 +0 +2 +2 +PREHOOK: query: select * from a minus select * from b minus (select * from a minus select * from b) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select * from a minus select * from b minus (select * from a minus select * from b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +PREHOOK: query: (select * from a) minus (select * from b minus (select * from a minus select * from b)) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: (select * from a) minus (select * from b minus (select * from a minus select * from b)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +0 +PREHOOK: query: drop table a +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@a +PREHOOK: Output: default@a +POSTHOOK: query: drop table a +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@a +POSTHOOK: Output: default@a +PREHOOK: query: drop table b +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@b +PREHOOK: Output: default@b +POSTHOOK: query: drop table b +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@b +POSTHOOK: Output: default@b
