Repository: hive Updated Branches: refs/heads/master 32b899448 -> 39f1e82ad
HIVE-18590 : Assertion error on transitive join inference in the presence of NOT NULL constraint (Jesus Camacho Rodriguez via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan <hashut...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/39f1e82a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/39f1e82a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/39f1e82a Branch: refs/heads/master Commit: 39f1e82ad2c974357dfc0d38ed776e3e25a2c4db Parents: 32b8994 Author: Jesus Camacho Rodriguez <jcama...@apache.org> Authored: Fri Feb 2 00:07:16 2018 -0800 Committer: Ashutosh Chauhan <hashut...@apache.org> Committed: Fri Feb 2 00:07:16 2018 -0800 ---------------------------------------------------------------------- .../HiveJoinPushTransitivePredicatesRule.java | 23 ++- .../nullability_transitive_inference.q | 40 ++++ .../nullability_transitive_inference.q.out | 181 +++++++++++++++++++ 3 files changed, 240 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/39f1e82a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java index 3cd00f4..48b7765 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java @@ -34,6 +34,7 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexVisitorImpl; import org.apache.calcite.sql.type.SqlTypeName; @@ -103,13 +104,15 @@ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule { if (!newLeftPredicate.isAlwaysTrue()) { RelNode curr = lChild; - lChild = filterFactory.createFilter(lChild, newLeftPredicate); + lChild = filterFactory.createFilter( + lChild, newLeftPredicate.accept(new RexReplacer(lChild))); call.getPlanner().onCopy(curr, lChild); } if (!newRightPredicate.isAlwaysTrue()) { RelNode curr = rChild; - rChild = filterFactory.createFilter(rChild, newRightPredicate); + rChild = filterFactory.createFilter( + rChild, newRightPredicate.accept(new RexReplacer(rChild))); call.getPlanner().onCopy(curr, rChild); } @@ -161,7 +164,7 @@ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule { return typeSafeRex; } - private static class InputRefValidator extends RexVisitorImpl<Void> { + private static class InputRefValidator extends RexVisitorImpl<Void> { private final List<RelDataTypeField> types; protected InputRefValidator(List<RelDataTypeField> types) { @@ -201,5 +204,17 @@ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule { return false; } } -} + /* Changes the type of the input references to adjust nullability */ + private static class RexReplacer extends RexShuttle { + private final RelNode input; + + RexReplacer(RelNode input) { + this.input = input; + } + + @Override public RexNode visitInputRef(RexInputRef inputRef) { + return RexInputRef.of(inputRef.getIndex(), input.getRowType()); + } + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/39f1e82a/ql/src/test/queries/clientpositive/nullability_transitive_inference.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/nullability_transitive_inference.q b/ql/src/test/queries/clientpositive/nullability_transitive_inference.q new file mode 100644 index 0000000..ff0ce3a --- /dev/null +++ b/ql/src/test/queries/clientpositive/nullability_transitive_inference.q @@ -0,0 +1,40 @@ +-- SORT_QUERY_RESULTS + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.checks.cartesian.product=false; +set hive.stats.fetch.column.stats=true; +set hive.materializedview.rewriting=true; + +create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250); +analyze table emps compute statistics for columns; + +create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20); +analyze table depts compute statistics for columns; + +alter table emps add constraint pk1 primary key (empid) disable novalidate rely; +alter table depts add constraint pk2 primary key (deptno) disable novalidate rely; + +alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely; + +explain +select empid from emps +join depts using (deptno) where depts.deptno >= 20 +group by empid, depts.deptno; + +select empid from emps +join depts using (deptno) where depts.deptno >= 20 +group by empid, depts.deptno; http://git-wip-us.apache.org/repos/asf/hive/blob/39f1e82a/ql/src/test/results/clientpositive/nullability_transitive_inference.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/nullability_transitive_inference.q.out b/ql/src/test/results/clientpositive/nullability_transitive_inference.q.out new file mode 100644 index 0000000..37cec81 --- /dev/null +++ b/ql/src/test/results/clientpositive/nullability_transitive_inference.q.out @@ -0,0 +1,181 @@ +PREHOOK: query: create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emps +POSTHOOK: query: create table emps ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emps +PREHOOK: query: insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@emps +POSTHOOK: query: insert into emps values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@emps +POSTHOOK: Lineage: emps.commission SCRIPT [] +POSTHOOK: Lineage: emps.deptno SCRIPT [] +POSTHOOK: Lineage: emps.empid SCRIPT [] +POSTHOOK: Lineage: emps.name SCRIPT [] +POSTHOOK: Lineage: emps.salary SCRIPT [] +PREHOOK: query: analyze table emps compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +PREHOOK: Output: default@emps +#### A masked pattern was here #### +POSTHOOK: query: analyze table emps compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +POSTHOOK: Output: default@emps +#### A masked pattern was here #### +PREHOOK: query: create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@depts +POSTHOOK: query: create table depts ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@depts +PREHOOK: query: insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@depts +POSTHOOK: query: insert into depts values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@depts +POSTHOOK: Lineage: depts.deptno SCRIPT [] +POSTHOOK: Lineage: depts.locationid SCRIPT [] +POSTHOOK: Lineage: depts.name SCRIPT [] +PREHOOK: query: analyze table depts compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Output: default@depts +#### A masked pattern was here #### +POSTHOOK: query: analyze table depts compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Output: default@depts +#### A masked pattern was here #### +PREHOOK: query: alter table emps add constraint pk1 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps add constraint pk1 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts add constraint pk2 primary key (deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts add constraint pk2 primary key (deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps add constraint fk1 foreign key (deptno) references depts(deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: explain +select empid from emps +join depts using (deptno) where depts.deptno >= 20 +group by empid, depts.deptno +PREHOOK: type: QUERY +POSTHOOK: query: explain +select empid from emps +join depts using (deptno) where depts.deptno >= 20 +group by empid, depts.deptno +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: emps + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptno >= 20) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), deptno (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + TableScan + alias: depts + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptno >= 20) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select empid from emps +join depts using (deptno) where depts.deptno >= 20 +group by empid, depts.deptno +PREHOOK: type: QUERY +PREHOOK: Input: default@depts +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select empid from emps +join depts using (deptno) where depts.deptno >= 20 +group by empid, depts.deptno +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +200