HIVE-16040 : union column expansion should take aliases from the leftmost
branch (Sergey Shelukhin, reviewed by Ashutosh Chauhan)
Conflicts:
ql/src/test/results/clientpositive/perf/query14.q.out
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ba565411
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ba565411
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ba565411
Branch: refs/heads/branch-2.2
Commit: ba56541134c1b32d3e593030c8768dcdcf27575f
Parents: 4bcb077
Author: Sergey Shelukhin <[email protected]>
Authored: Wed Mar 1 19:21:27 2017 -0800
Committer: Owen O'Malley <[email protected]>
Committed: Tue Mar 28 15:27:55 2017 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/parse/ParseUtils.java | 32 +-
.../queries/clientpositive/union_pos_alias.q | 20 +-
.../clientpositive/union_pos_alias.q.out | 306 +++++++++++++++++++
3 files changed, 353 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ba565411/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
index 0023649..845305e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
@@ -51,6 +51,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import com.google.common.base.Preconditions;
+
/**
* Library of utility functions used in the parse code.
@@ -402,10 +404,9 @@ public final class ParseUtils {
}
}
}
- // We find the SELECT closest to the top. This assumes there's only one
FROM or FROM-s
- // are all equivalent (union case). Also, this assumption could be false
for an already
- // malformed query; we don't check for that here - it will fail later
anyway.
- // TODO: Maybe we should find ALL the SELECT-s not nested in another
from, and compare.
+ // Note: we assume that this isn't an already malformed query;
+ // we don't check for that here - it will fail later anyway.
+ // First, we find the SELECT closest to the top.
ASTNode select = searcher.simpleBreadthFirstSearchAny((ASTNode)fromNode,
HiveParser.TOK_SELECT, HiveParser.TOK_SELECTDI);
if (select == null) {
@@ -414,6 +415,29 @@ public final class ParseUtils {
setCols.token.setType(HiveParser.TOK_ALLCOLREF);
return;
}
+
+ // Then, find the leftmost logical sibling select, because that's what
Hive uses for aliases.
+ while (true) {
+ CommonTree queryOfSelect = select.parent;
+ while (queryOfSelect != null && queryOfSelect.getType() !=
HiveParser.TOK_QUERY) {
+ queryOfSelect = queryOfSelect.parent;
+ }
+ // We should have some QUERY; and also its parent because by
supposition we are in subq.
+ if (queryOfSelect == null || queryOfSelect.parent == null) {
+ LOG.debug("Replacing SETCOLREF with ALLCOLREF because we couldn't
find the QUERY");
+ setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+ return;
+ }
+ if (queryOfSelect.childIndex == 0) break; // We are the left-most
child.
+ Tree moreToTheLeft = queryOfSelect.parent.getChild(0);
+ Preconditions.checkState(moreToTheLeft != queryOfSelect);
+ ASTNode newSelect =
searcher.simpleBreadthFirstSearchAny((ASTNode)moreToTheLeft,
+ HiveParser.TOK_SELECT, HiveParser.TOK_SELECTDI);
+ Preconditions.checkState(newSelect != select);
+ select = newSelect;
+ // Repeat the procedure for the new select.
+ }
+
// Found the proper columns.
List<ASTNode> newChildren = new ArrayList<>(select.getChildCount());
HashSet<String> aliases = new HashSet<>();
http://git-wip-us.apache.org/repos/asf/hive/blob/ba565411/ql/src/test/queries/clientpositive/union_pos_alias.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_pos_alias.q
b/ql/src/test/queries/clientpositive/union_pos_alias.q
index c4eca68..60fea8f 100644
--- a/ql/src/test/queries/clientpositive/union_pos_alias.q
+++ b/ql/src/test/queries/clientpositive/union_pos_alias.q
@@ -27,4 +27,22 @@ UNION ALL
select 'test', value from src_10 s3
order by 2, 1 desc;
-drop table src_10;
\ No newline at end of file
+drop table src_10;
+
+
+drop view v;
+create view v as select key as k from src intersect all select key as k1 from
src;
+desc formatted v;
+
+set hive.mapred.mode=nonstrict;
+set
hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+
+create table masking_test as select cast(key as int) as key, value from src;
+
+explain
+select * from masking_test union all select * from masking_test ;
+select * from masking_test union all select * from masking_test ;
+
+explain
+select key as k1, value as v1 from masking_test where key > 0 intersect all
select key as k2, value as v2 from masking_test where key > 0;
+select key as k1, value as v1 from masking_test where key > 0 intersect all
select key as k2, value as v2 from masking_test where key > 0;
http://git-wip-us.apache.org/repos/asf/hive/blob/ba565411/ql/src/test/results/clientpositive/union_pos_alias.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_pos_alias.q.out
b/ql/src/test/results/clientpositive/union_pos_alias.q.out
index 8eddbd9..6d2f018 100644
--- a/ql/src/test/results/clientpositive/union_pos_alias.q.out
+++ b/ql/src/test/results/clientpositive/union_pos_alias.q.out
@@ -306,3 +306,309 @@ POSTHOOK: query: drop table src_10
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@src_10
POSTHOOK: Output: default@src_10
+PREHOOK: query: drop view v
+PREHOOK: type: DROPVIEW
+POSTHOOK: query: drop view v
+POSTHOOK: type: DROPVIEW
+PREHOOK: query: create view v as select key as k from src intersect all select
key as k1 from src
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@v
+POSTHOOK: query: create view v as select key as k from src intersect all
select key as k1 from src
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@v
+POSTHOOK: Lineage: v.k SCRIPT [(src)src.null, (src)src.FieldSchema(name:key,
type:string, comment:default), ]
+PREHOOK: query: desc formatted v
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@v
+POSTHOOK: query: desc formatted v
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@v
+# col_name data_type comment
+
+k string
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+Table Type: VIRTUAL_VIEW
+Table Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: null
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+
+# View Information
+View Original Text: select key as k from src intersect all select key as k1
from src
+View Expanded Text: select `src`.`key` as `k` from `default`.`src`
intersect all select `src`.`key` as `k1` from `default`.`src`
+PREHOOK: query: create table masking_test as select cast(key as int) as key,
value from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@masking_test
+POSTHOOK: query: create table masking_test as select cast(key as int) as key,
value from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@masking_test
+POSTHOOK: Lineage: masking_test.key EXPRESSION [(src)src.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: masking_test.value SIMPLE [(src)src.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: explain
+select * from masking_test union all select * from masking_test
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from masking_test union all select * from masking_test
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: masking_test
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
+ Filter Operator
+ predicate: (((key % 2) = 0) and (key < 10)) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE
Column stats: NONE
+ Union
+ Statistics: Num rows: 166 Data size: 1762 Basic stats:
COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 166 Data size: 1762 Basic stats:
COMPLETE Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TableScan
+ alias: masking_test
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
+ Filter Operator
+ predicate: (((key % 2) = 0) and (key < 10)) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE
Column stats: NONE
+ Union
+ Statistics: Num rows: 166 Data size: 1762 Basic stats:
COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 166 Data size: 1762 Basic stats:
COMPLETE Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from masking_test union all select * from
masking_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+POSTHOOK: query: select * from masking_test union all select * from
masking_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+0 0_lav
+0 0_lav
+4 4_lav
+4 4_lav
+8 8_lav
+8 8_lav
+0 0_lav
+0 0_lav
+0 0_lav
+0 0_lav
+2 2_lav
+2 2_lav
+PREHOOK: query: explain
+select key as k1, value as v1 from masking_test where key > 0 intersect all
select key as k2, value as v2 from masking_test where key > 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key as k1, value as v1 from masking_test where key > 0 intersect all
select key as k2, value as v2 from masking_test where key > 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: masking_test
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
+ Filter Operator
+ predicate: (((key % 2) = 0) and (key < 10) and (key > 0)) (type:
boolean)
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE
Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: int), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 27 Data size: 286 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: string)
+ Statistics: Num rows: 27 Data size: 286 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 137 Basic stats: COMPLETE Column
stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Union
+ Statistics: Num rows: 26 Data size: 274 Basic stats: COMPLETE
Column stats: NONE
+ Group By Operator
+ aggregations: min(_col2), count(_col2)
+ keys: _col0 (type: int), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 26 Data size: 274 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: string)
+ Statistics: Num rows: 26 Data size: 274 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint), _col3 (type: bigint)
+ TableScan
+ Union
+ Statistics: Num rows: 26 Data size: 274 Basic stats: COMPLETE
Column stats: NONE
+ Group By Operator
+ aggregations: min(_col2), count(_col2)
+ keys: _col0 (type: int), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 26 Data size: 274 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: string)
+ Statistics: Num rows: 26 Data size: 274 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint), _col3 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), count(VALUE._col1)
+ keys: KEY._col0 (type: int), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 137 Basic stats: COMPLETE Column
stats: NONE
+ Filter Operator
+ predicate: (_col3 = 2) (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column
stats: NONE
+ Select Operator
+ expressions: _col2 (type: bigint), _col0 (type: int), _col1
(type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE
Column stats: NONE
+ UDTF Operator
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE
Column stats: NONE
+ function name: UDTFReplicateRows
+ Select Operator
+ expressions: col1 (type: int), col2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 10 Basic stats:
COMPLETE Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: masking_test
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
+ Filter Operator
+ predicate: (((key % 2) = 0) and (key < 10) and (key > 0)) (type:
boolean)
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE
Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: int), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 27 Data size: 286 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: string)
+ Statistics: Num rows: 27 Data size: 286 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 137 Basic stats: COMPLETE Column
stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key as k1, value as v1 from masking_test where key > 0
intersect all select key as k2, value as v2 from masking_test where key > 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+POSTHOOK: query: select key as k1, value as v1 from masking_test where key > 0
intersect all select key as k2, value as v2 from masking_test where key > 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+2 2_lav
+4 4_lav
+8 8_lav