This is an automated email from the ASF dual-hosted git repository. krisztiankasa pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new d6b1d5fa784 HIVE-26762: Remove operand pruning in HiveFilterSetOpTransposeRule (Alessandro Solimando, reviewed by Krisztian Kasa) d6b1d5fa784 is described below commit d6b1d5fa784789d7aa0461adc9676a0489f2e3ea Author: Alessandro Solimando <alessandro.solima...@gmail.com> AuthorDate: Mon Dec 5 20:17:14 2022 +0100 HIVE-26762: Remove operand pruning in HiveFilterSetOpTransposeRule (Alessandro Solimando, reviewed by Krisztian Kasa) --- .../rules/HiveFilterSetOpTransposeRule.java | 64 ++-------- .../union_all_filter_transpose_pruned_operands.q | 45 +++++++ ...nion_all_filter_transpose_pruned_operands.q.out | 140 +++++++++++++++++++++ .../perf/tpcds30tb/tez/cbo_query11.q.out | 8 +- .../perf/tpcds30tb/tez/cbo_query4.q.out | 12 +- .../perf/tpcds30tb/tez/cbo_query74.q.out | 8 +- .../perf/tpcds30tb/tez/query11.q.out | 44 ++++--- .../clientpositive/perf/tpcds30tb/tez/query4.q.out | 58 +++++---- .../perf/tpcds30tb/tez/query74.q.out | 46 +++---- 9 files changed, 291 insertions(+), 134 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSetOpTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSetOpTransposeRule.java index 192fb682e13..8f6bb61b833 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSetOpTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSetOpTransposeRule.java @@ -20,25 +20,17 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; import java.util.ArrayList; import java.util.List; -import org.apache.calcite.plan.RelOptPredicateList; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.SetOp; -import org.apache.calcite.rel.core.Union; -import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.rules.FilterSetOpTransposeRule; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexExecutor; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexSimplify; -import org.apache.calcite.rex.RexUnknownAs; -import org.apache.calcite.rex.RexUtil; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilderFactory; -import org.apache.calcite.util.Util; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; @@ -55,17 +47,12 @@ public class HiveFilterSetOpTransposeRule extends FilterSetOpTransposeRule { * Union * / \ * Op1 Op2 - * * to * Union * /\ * FIL * | | * Op1 Op2 - * - * - * It additionally can remove branch(es) of filter if it's able to determine - * that they are going to generate an empty result set. */ private HiveFilterSetOpTransposeRule(RelBuilderFactory relBuilderFactory) { super(relBuilderFactory); @@ -85,57 +72,30 @@ public class HiveFilterSetOpTransposeRule extends FilterSetOpTransposeRule { //~ Methods ---------------------------------------------------------------- - // implement RelOptRule - // We override the rule in order to do union all branch elimination + @Override public void onMatch(RelOptRuleCall call) { - Filter filterRel = call.rel(0); - SetOp setOp = call.rel(1); + final Filter filterRel = call.rel(0); + final SetOp setOp = call.rel(1); - RexNode condition = filterRel.getCondition(); + final RexNode condition = filterRel.getCondition(); // create filters on top of each setop child, modifying the filter // condition to reference each setop child - RexBuilder rexBuilder = filterRel.getCluster().getRexBuilder(); + final RexBuilder rexBuilder = filterRel.getCluster().getRexBuilder(); final RelBuilder relBuilder = call.builder(); - List<RelDataTypeField> origFields = setOp.getRowType().getFieldList(); - int[] adjustments = new int[origFields.size()]; + final List<RelDataTypeField> origFields = setOp.getRowType().getFieldList(); + final int[] adjustments = new int[origFields.size()]; final List<RelNode> newSetOpInputs = new ArrayList<>(); - RelNode lastInput = null; + for (int index = 0; index < setOp.getInputs().size(); index++) { RelNode input = setOp.getInput(index); RexNode newCondition = condition.accept(new RelOptUtil.RexInputConverter(rexBuilder, origFields, input.getRowType().getFieldList(), adjustments)); - if (setOp instanceof Union && setOp.all) { - final RelMetadataQuery mq = call.getMetadataQuery(); - final RelOptPredicateList predicates = mq.getPulledUpPredicates(input); - if (predicates != null) { - final RexExecutor executor = - Util.first(filterRel.getCluster().getPlanner().getExecutor(), RexUtil.EXECUTOR); - final RexSimplify simplify = new RexSimplify(rexBuilder, predicates, executor); - final RexNode x = simplify.simplifyUnknownAs(newCondition, RexUnknownAs.FALSE); - if (x.isAlwaysFalse()) { - // this is the last branch, and it is always false - // We assume alwaysFalse filter will get pushed down to TS so this - // branch won't read any data. - if (index == setOp.getInputs().size() - 1) { - lastInput = relBuilder.push(input).filter(newCondition).build(); - } - // remove this branch - continue; - } - } - } newSetOpInputs.add(relBuilder.push(input).filter(newCondition).build()); } - if (newSetOpInputs.size() > 1) { - // create a new setop whose children are the filters created above - SetOp newSetOp = setOp.copy(setOp.getTraitSet(), newSetOpInputs); - call.transformTo(newSetOp); - } else { - // We have to keep at least a branch before we support empty values() in Hive - RelNode result = newSetOpInputs.size() == 1 ? newSetOpInputs.get(0) : lastInput; - call.transformTo( - relBuilder.push(result).convert(filterRel.getRowType(), false).build()); - } + + // create a new setop whose children are the filters created above + SetOp newSetOp = setOp.copy(setOp.getTraitSet(), newSetOpInputs); + call.transformTo(newSetOp); } } diff --git a/ql/src/test/queries/clientpositive/union_all_filter_transpose_pruned_operands.q b/ql/src/test/queries/clientpositive/union_all_filter_transpose_pruned_operands.q new file mode 100644 index 00000000000..1410ee8c015 --- /dev/null +++ b/ql/src/test/queries/clientpositive/union_all_filter_transpose_pruned_operands.q @@ -0,0 +1,45 @@ +CREATE EXTERNAL TABLE t (a string, b string); + +INSERT INTO t VALUES ('1000', 'b1'); +INSERT INTO t VALUES ('1001', 'b1'); +INSERT INTO t VALUES ('1002', 'b1'); +INSERT INTO t VALUES ('2000', 'b2'); + +SELECT * FROM ( + SELECT + a, + b + FROM t + UNION ALL + SELECT + a, + b + FROM t + WHERE a = '1001' + UNION ALL + SELECT + a, + b + FROM t + WHERE a = '1002') AS t2 +WHERE a = '1000'; + +EXPLAIN CBO +SELECT * FROM ( + SELECT + a, + b + FROM t + UNION ALL + SELECT + a, + b + FROM t + WHERE a = '1001' + UNION ALL + SELECT + a, + b + FROM t + WHERE a = '1002') AS t2 +WHERE a = '1000'; diff --git a/ql/src/test/results/clientpositive/llap/union_all_filter_transpose_pruned_operands.q.out b/ql/src/test/results/clientpositive/llap/union_all_filter_transpose_pruned_operands.q.out new file mode 100644 index 00000000000..b1f8efc5300 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/union_all_filter_transpose_pruned_operands.q.out @@ -0,0 +1,140 @@ +PREHOOK: query: CREATE EXTERNAL TABLE t (a string, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: CREATE EXTERNAL TABLE t (a string, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: INSERT INTO t VALUES ('1000', 'b1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: INSERT INTO t VALUES ('1000', 'b1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.a SCRIPT [] +POSTHOOK: Lineage: t.b SCRIPT [] +PREHOOK: query: INSERT INTO t VALUES ('1001', 'b1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: INSERT INTO t VALUES ('1001', 'b1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.a SCRIPT [] +POSTHOOK: Lineage: t.b SCRIPT [] +PREHOOK: query: INSERT INTO t VALUES ('1002', 'b1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: INSERT INTO t VALUES ('1002', 'b1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.a SCRIPT [] +POSTHOOK: Lineage: t.b SCRIPT [] +PREHOOK: query: INSERT INTO t VALUES ('2000', 'b2') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: INSERT INTO t VALUES ('2000', 'b2') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.a SCRIPT [] +POSTHOOK: Lineage: t.b SCRIPT [] +PREHOOK: query: SELECT * FROM ( + SELECT + a, + b + FROM t + UNION ALL + SELECT + a, + b + FROM t + WHERE a = '1001' + UNION ALL + SELECT + a, + b + FROM t + WHERE a = '1002') AS t2 +WHERE a = '1000' +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM ( + SELECT + a, + b + FROM t + UNION ALL + SELECT + a, + b + FROM t + WHERE a = '1001' + UNION ALL + SELECT + a, + b + FROM t + WHERE a = '1002') AS t2 +WHERE a = '1000' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +1000 b1 +PREHOOK: query: EXPLAIN CBO +SELECT * FROM ( + SELECT + a, + b + FROM t + UNION ALL + SELECT + a, + b + FROM t + WHERE a = '1001' + UNION ALL + SELECT + a, + b + FROM t + WHERE a = '1002') AS t2 +WHERE a = '1000' +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN CBO +SELECT * FROM ( + SELECT + a, + b + FROM t + UNION ALL + SELECT + a, + b + FROM t + WHERE a = '1001' + UNION ALL + SELECT + a, + b + FROM t + WHERE a = '1002') AS t2 +WHERE a = '1000' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +CBO PLAN: +HiveProject(a=[CAST(_UTF-16LE'1000':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"):VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], b=[$1]) + HiveFilter(condition=[=($0, _UTF-16LE'1000')]) + HiveTableScan(table=[[default, t]], table:alias=[t]) + diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query11.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query11.q.out index 5af734f10a7..ade152ee71c 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query11.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query11.q.out @@ -3,8 +3,8 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveProject(customer_id=[$0], customer_first_name=[$2], customer_last_name=[$3], customer_birth_country=[$4]) HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(customer_id=[$0]) - HiveJoin(condition=[AND(CASE(IS NOT NULL($4), CASE($9, >(/($6, $8), /($2, $4)), >(0:DECIMAL(1, 0), /($2, $4))), CASE($9, >(/($6, $8), 0:DECIMAL(1, 0)), false)), =($0, $7))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(CASE($5, CASE($10, >(/($7, $9), /($2, $4)), >(0:DECIMAL(1, 0), /($2, $4))), CASE($10, >(/($7, $9), 0:DECIMAL(1, 0)), false)), =($0, $8))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_id=[$0], c_birth_country=[$1], $f2=[$2]) HiveAggregate(group=[{5, 8}], agg#0=[sum($2)]) @@ -20,7 +20,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject($f0=[$0], $f1=[$1]) + HiveProject(customer_id=[$0], year_total=[$1], >=[>($1, 0:DECIMAL(1, 0))]) HiveFilter(condition=[>($1, 0:DECIMAL(1, 0))]) HiveAggregate(group=[{5}], agg#0=[sum($2)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -49,7 +49,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(customer_id=[$0], year_total=[$1], CAST=[CAST(IS NOT NULL($1)):BOOLEAN]) + HiveProject(customer_id=[$0], year_total=[$1], >=[>($1, 0:DECIMAL(1, 0))]) HiveFilter(condition=[>($1, 0:DECIMAL(1, 0))]) HiveAggregate(group=[{5}], agg#0=[sum($2)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query4.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query4.q.out index e84ab82c70e..d8acd2b4921 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query4.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query4.q.out @@ -3,7 +3,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveProject(customer_id=[$0], customer_first_name=[$2], customer_last_name=[$3], customer_birth_country=[$4]) HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(customer_id=[$2]) - HiveJoin(condition=[AND(=($2, $12), CASE($14, CASE($11, >(/($8, $10), /($1, $13)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($2, $13), CASE($15, CASE($12, >(/($9, $11), /($1, $14)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_id=[$0], $f1=[$1]) HiveAggregate(group=[{5}], agg#0=[sum($2)]) @@ -19,8 +19,8 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[AND(CASE(IS NOT NULL($4), CASE($9, >(/($6, $8), /($2, $4)), false), false), =($0, $7))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(CASE($5, CASE($10, >(/($7, $9), /($2, $4)), false), false), =($0, $8))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_id=[$0], c_birth_country=[$1], $f2=[$2]) HiveAggregate(group=[{5, 8}], agg#0=[sum($2)]) @@ -36,7 +36,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject($f0=[$0], $f1=[$1]) + HiveProject(customer_id=[$0], year_total=[$1], >=[>($1, 0:DECIMAL(1, 0))]) HiveFilter(condition=[>($1, 0:DECIMAL(1, 0))]) HiveAggregate(group=[{5}], agg#0=[sum($2)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -65,7 +65,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(customer_id=[$0], year_total=[$1], CAST=[CAST(IS NOT NULL($1)):BOOLEAN]) + HiveProject(customer_id=[$0], year_total=[$1], >=[>($1, 0:DECIMAL(1, 0))]) HiveFilter(condition=[>($1, 0:DECIMAL(1, 0))]) HiveAggregate(group=[{5}], agg#0=[sum($2)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -80,7 +80,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(customer_id=[$0], year_total=[$1], CAST=[CAST(IS NOT NULL($1)):BOOLEAN]) + HiveProject(customer_id=[$0], year_total=[$1], >=[>($1, 0:DECIMAL(1, 0))]) HiveFilter(condition=[>($1, 0:DECIMAL(1, 0))]) HiveAggregate(group=[{5}], agg#0=[sum($2)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query74.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query74.q.out index f1e24a57ec8..162ca9c8111 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query74.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query74.q.out @@ -1,8 +1,8 @@ CBO PLAN: HiveSortLimit(sort0=[$2], sort1=[$0], sort2=[$1], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) HiveProject(customer_id=[$0], customer_first_name=[$1], customer_last_name=[$2]) - HiveJoin(condition=[AND(CASE(IS NOT NULL($5), CASE($10, >(/($7, $9), /($3, $5)), false), false), =($0, $8))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(CASE($6, CASE($11, >(/($8, $10), /($3, $5)), false), false), =($0, $9))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_id=[$0], c_first_name=[$1], c_last_name=[$2], $f3=[$3]) HiveAggregate(group=[{5, 6, 7}], agg#0=[sum($1)]) @@ -16,7 +16,7 @@ HiveSortLimit(sort0=[$2], sort1=[$0], sort2=[$1], dir0=[ASC], dir1=[ASC], dir2=[ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject($f0=[$0], $f1=[$1]) + HiveProject(customer_id=[$0], year_total=[$1], >=[>($1, 0:DECIMAL(1, 0))]) HiveFilter(condition=[>($1, 0:DECIMAL(1, 0))]) HiveAggregate(group=[{5}], agg#0=[sum($1)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -41,7 +41,7 @@ HiveSortLimit(sort0=[$2], sort1=[$0], sort2=[$1], dir0=[ASC], dir1=[ASC], dir2=[ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(customer_id=[$0], year_total=[$1], CAST=[CAST(IS NOT NULL($1)):BOOLEAN]) + HiveProject(customer_id=[$0], year_total=[$1], >=[>($1, 0:DECIMAL(1, 0))]) HiveFilter(condition=[>($1, 0:DECIMAL(1, 0))]) HiveAggregate(group=[{5}], agg#0=[sum($1)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query11.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query11.q.out index b3ae8c6ce6c..d569bd484af 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query11.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query11.q.out @@ -446,7 +446,7 @@ STAGE PLANS: predicate: (_col1 > 0) (type: boolean) Statistics: Num rows: 26666666 Data size: 5653333192 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: char(16)), _col1 (type: decimal(18,2)), _col1 is not null (type: boolean) + expressions: _col0 (type: char(16)), _col1 (type: decimal(18,2)), (_col1 > 0) (type: boolean) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26666666 Data size: 5759999856 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -513,13 +513,17 @@ STAGE PLANS: Filter Operator predicate: (_col1 > 0) (type: boolean) Statistics: Num rows: 26666666 Data size: 5653333192 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: char(16)) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: char(16)) - Statistics: Num rows: 26666666 Data size: 5653333192 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(18,2)) + Select Operator + expressions: _col0 (type: char(16)), _col1 (type: decimal(18,2)), (_col1 > 0) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26666666 Data size: 5759999856 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: char(16)) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: char(16)) + Statistics: Num rows: 26666666 Data size: 5759999856 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(18,2)), _col2 (type: boolean) Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -577,18 +581,18 @@ STAGE PLANS: keys: 0 KEY.reducesinkkey0 (type: char(16)) 1 KEY.reducesinkkey0 (type: char(16)) - outputColumnNames: _col0, _col2, _col4 + outputColumnNames: _col0, _col2, _col4, _col5 input vertices: 1 Reducer 4 - Statistics: Num rows: 26666666 Data size: 8639999784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26666666 Data size: 8746666448 Basic stats: COMPLETE Column stats: COMPLETE DynamicPartitionHashJoin: true Reduce Output Operator key expressions: _col0 (type: char(16)) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: char(16)) - Statistics: Num rows: 26666666 Data size: 8639999784 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: decimal(18,2)), _col4 (type: decimal(18,2)) + Statistics: Num rows: 26666666 Data size: 8746666448 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(18,2)), _col4 (type: decimal(18,2)), _col5 (type: boolean) Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: @@ -598,18 +602,18 @@ STAGE PLANS: keys: 0 KEY.reducesinkkey0 (type: char(16)) 1 KEY.reducesinkkey0 (type: char(16)) - outputColumnNames: _col0, _col2, _col4, _col6 + outputColumnNames: _col0, _col2, _col4, _col5, _col7 input vertices: 1 Reducer 15 - Statistics: Num rows: 26666666 Data size: 11626666376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26666666 Data size: 11733333040 Basic stats: COMPLETE Column stats: COMPLETE DynamicPartitionHashJoin: true Reduce Output Operator key expressions: _col0 (type: char(16)) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: char(16)) - Statistics: Num rows: 26666666 Data size: 11626666376 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: decimal(18,2)), _col4 (type: decimal(18,2)), _col6 (type: decimal(18,2)) + Statistics: Num rows: 26666666 Data size: 11733333040 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(18,2)), _col4 (type: decimal(18,2)), _col5 (type: boolean), _col7 (type: decimal(18,2)) Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: @@ -619,14 +623,14 @@ STAGE PLANS: keys: 0 KEY.reducesinkkey0 (type: char(16)) 1 KEY.reducesinkkey0 (type: char(16)) - outputColumnNames: _col0, _col2, _col4, _col6, _col8, _col9 + outputColumnNames: _col0, _col2, _col4, _col5, _col7, _col9, _col10 input vertices: 1 Reducer 17 - Statistics: Num rows: 26666666 Data size: 14719999632 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26666666 Data size: 14826666296 Basic stats: COMPLETE Column stats: COMPLETE DynamicPartitionHashJoin: true Filter Operator - predicate: if(_col4 is not null, if(_col9, ((_col6 / _col8) > (_col2 / _col4)), (0 > (_col2 / _col4))), if(_col9, ((_col6 / _col8) > 0), false)) (type: boolean) - Statistics: Num rows: 13333333 Data size: 7359999816 Basic stats: COMPLETE Column stats: COMPLETE + predicate: if(_col5, if(_col10, ((_col7 / _col9) > (_col2 / _col4)), (0 > (_col2 / _col4))), if(_col10, ((_col7 / _col9) > 0), false)) (type: boolean) + Statistics: Num rows: 13333333 Data size: 7413333148 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(16)) outputColumnNames: _col0 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query4.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query4.q.out index d2d87dc9f22..5ae35bed472 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query4.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query4.q.out @@ -474,13 +474,17 @@ STAGE PLANS: Filter Operator predicate: (_col1 > 0) (type: boolean) Statistics: Num rows: 26666666 Data size: 5653333192 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: char(16)) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: char(16)) - Statistics: Num rows: 26666666 Data size: 5653333192 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(24,6)) + Select Operator + expressions: _col0 (type: char(16)), _col1 (type: decimal(24,6)), (_col1 > 0) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26666666 Data size: 5759999856 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: char(16)) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: char(16)) + Statistics: Num rows: 26666666 Data size: 5759999856 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(24,6)), _col2 (type: boolean) Reducer 12 Execution mode: vectorized, llap Reduce Operator Tree: @@ -566,7 +570,7 @@ STAGE PLANS: predicate: (_col1 > 0) (type: boolean) Statistics: Num rows: 26666666 Data size: 5653333192 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: char(16)), _col1 (type: decimal(24,6)), _col1 is not null (type: boolean) + expressions: _col0 (type: char(16)), _col1 (type: decimal(24,6)), (_col1 > 0) (type: boolean) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26666666 Data size: 5759999856 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -689,7 +693,7 @@ STAGE PLANS: predicate: (_col1 > 0) (type: boolean) Statistics: Num rows: 26666666 Data size: 5653333192 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: char(16)), _col1 (type: decimal(24,6)), _col1 is not null (type: boolean) + expressions: _col0 (type: char(16)), _col1 (type: decimal(24,6)), (_col1 > 0) (type: boolean) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26666666 Data size: 5759999856 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -781,18 +785,18 @@ STAGE PLANS: keys: 0 KEY.reducesinkkey0 (type: char(16)) 1 KEY.reducesinkkey0 (type: char(16)) - outputColumnNames: _col0, _col2, _col4 + outputColumnNames: _col0, _col2, _col4, _col5 input vertices: 1 Reducer 10 - Statistics: Num rows: 26666666 Data size: 8639999784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26666666 Data size: 8746666448 Basic stats: COMPLETE Column stats: COMPLETE DynamicPartitionHashJoin: true Reduce Output Operator key expressions: _col0 (type: char(16)) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: char(16)) - Statistics: Num rows: 26666666 Data size: 8639999784 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: decimal(24,6)), _col4 (type: decimal(24,6)) + Statistics: Num rows: 26666666 Data size: 8746666448 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(24,6)), _col4 (type: decimal(24,6)), _col5 (type: boolean) Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -802,18 +806,18 @@ STAGE PLANS: keys: 0 KEY.reducesinkkey0 (type: char(16)) 1 KEY.reducesinkkey0 (type: char(16)) - outputColumnNames: _col0, _col2, _col4, _col6 + outputColumnNames: _col0, _col2, _col4, _col5, _col7 input vertices: 1 Reducer 13 - Statistics: Num rows: 26666666 Data size: 11626666376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26666666 Data size: 11733333040 Basic stats: COMPLETE Column stats: COMPLETE DynamicPartitionHashJoin: true Reduce Output Operator key expressions: _col0 (type: char(16)) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: char(16)) - Statistics: Num rows: 26666666 Data size: 11626666376 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: decimal(24,6)), _col4 (type: decimal(24,6)), _col6 (type: decimal(24,6)) + Statistics: Num rows: 26666666 Data size: 11733333040 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(24,6)), _col4 (type: decimal(24,6)), _col5 (type: boolean), _col7 (type: decimal(24,6)) Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -823,21 +827,21 @@ STAGE PLANS: keys: 0 KEY.reducesinkkey0 (type: char(16)) 1 KEY.reducesinkkey0 (type: char(16)) - outputColumnNames: _col0, _col2, _col4, _col6, _col8, _col9 + outputColumnNames: _col0, _col2, _col4, _col5, _col7, _col9, _col10 input vertices: 1 Reducer 15 - Statistics: Num rows: 26666666 Data size: 14719999632 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26666666 Data size: 14826666296 Basic stats: COMPLETE Column stats: COMPLETE DynamicPartitionHashJoin: true Filter Operator - predicate: if(_col4 is not null, if(_col9, ((_col6 / _col8) > (_col2 / _col4)), false), false) (type: boolean) - Statistics: Num rows: 13333333 Data size: 7359999816 Basic stats: COMPLETE Column stats: COMPLETE + predicate: if(_col5, if(_col10, ((_col7 / _col9) > (_col2 / _col4)), false), false) (type: boolean) + Statistics: Num rows: 13333333 Data size: 7413333148 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(16)) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: char(16)) - Statistics: Num rows: 13333333 Data size: 7359999816 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col6 (type: decimal(24,6)), _col8 (type: decimal(24,6)), _col9 (type: boolean) + Statistics: Num rows: 13333333 Data size: 7413333148 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col7 (type: decimal(24,6)), _col9 (type: decimal(24,6)), _col10 (type: boolean) Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: @@ -847,7 +851,7 @@ STAGE PLANS: keys: 0 KEY.reducesinkkey0 (type: char(16)) 1 KEY.reducesinkkey0 (type: char(16)) - outputColumnNames: _col0, _col6, _col8, _col9, _col11 + outputColumnNames: _col0, _col7, _col9, _col10, _col12 input vertices: 1 Reducer 19 Statistics: Num rows: 13333333 Data size: 5866666520 Basic stats: COMPLETE Column stats: COMPLETE @@ -858,7 +862,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: char(16)) Statistics: Num rows: 13333333 Data size: 5866666520 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col6 (type: decimal(24,6)), _col8 (type: decimal(24,6)), _col9 (type: boolean), _col11 (type: decimal(24,6)) + value expressions: _col7 (type: decimal(24,6)), _col9 (type: decimal(24,6)), _col10 (type: boolean), _col12 (type: decimal(24,6)) Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: @@ -868,13 +872,13 @@ STAGE PLANS: keys: 0 KEY.reducesinkkey0 (type: char(16)) 1 KEY.reducesinkkey0 (type: char(16)) - outputColumnNames: _col0, _col6, _col8, _col9, _col11, _col13, _col14 + outputColumnNames: _col0, _col7, _col9, _col10, _col12, _col14, _col15 input vertices: 1 Reducer 21 Statistics: Num rows: 13333333 Data size: 7413333148 Basic stats: COMPLETE Column stats: COMPLETE DynamicPartitionHashJoin: true Filter Operator - predicate: if(_col14, if(_col9, ((_col6 / _col8) > (_col11 / _col13)), false), false) (type: boolean) + predicate: if(_col15, if(_col10, ((_col7 / _col9) > (_col12 / _col14)), false), false) (type: boolean) Statistics: Num rows: 6666666 Data size: 3706666296 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(16)) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query74.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query74.q.out index ed8f381dfa0..bf65d327cbd 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query74.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query74.q.out @@ -351,7 +351,7 @@ STAGE PLANS: predicate: (_col1 > 0) (type: boolean) Statistics: Num rows: 80000000 Data size: 16960000000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: char(16)), _col1 (type: decimal(17,2)), _col1 is not null (type: boolean) + expressions: _col0 (type: char(16)), _col1 (type: decimal(17,2)), (_col1 > 0) (type: boolean) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 80000000 Data size: 17280000000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -469,13 +469,17 @@ STAGE PLANS: Filter Operator predicate: (_col1 > 0) (type: boolean) Statistics: Num rows: 80000000 Data size: 16960000000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: char(16)) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: char(16)) - Statistics: Num rows: 80000000 Data size: 16960000000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) + Select Operator + expressions: _col0 (type: char(16)), _col1 (type: decimal(17,2)), (_col1 > 0) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 80000000 Data size: 17280000000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: char(16)) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: char(16)) + Statistics: Num rows: 80000000 Data size: 17280000000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)), _col2 (type: boolean) Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -485,18 +489,18 @@ STAGE PLANS: keys: 0 KEY.reducesinkkey0 (type: char(16)) 1 KEY.reducesinkkey0 (type: char(16)) - outputColumnNames: _col0, _col1, _col2, _col3, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6 input vertices: 0 Reducer 3 - Statistics: Num rows: 80000000 Data size: 40320000000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 80000000 Data size: 40640000000 Basic stats: COMPLETE Column stats: COMPLETE DynamicPartitionHashJoin: true Reduce Output Operator key expressions: _col0 (type: char(16)) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: char(16)) - Statistics: Num rows: 80000000 Data size: 40320000000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: char(20)), _col2 (type: char(30)), _col3 (type: decimal(17,2)), _col5 (type: decimal(17,2)) + Statistics: Num rows: 80000000 Data size: 40640000000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(20)), _col2 (type: char(30)), _col3 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: boolean) Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: @@ -506,18 +510,18 @@ STAGE PLANS: keys: 0 KEY.reducesinkkey0 (type: char(16)) 1 KEY.reducesinkkey0 (type: char(16)) - outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col7 + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col8 input vertices: 1 Reducer 12 - Statistics: Num rows: 80000000 Data size: 49280000000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 80000000 Data size: 49600000000 Basic stats: COMPLETE Column stats: COMPLETE DynamicPartitionHashJoin: true Reduce Output Operator key expressions: _col0 (type: char(16)) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: char(16)) - Statistics: Num rows: 80000000 Data size: 49280000000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: char(20)), _col2 (type: char(30)), _col3 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col7 (type: decimal(17,2)) + Statistics: Num rows: 80000000 Data size: 49600000000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(20)), _col2 (type: char(30)), _col3 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: boolean), _col8 (type: decimal(17,2)) Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: @@ -527,19 +531,19 @@ STAGE PLANS: keys: 0 KEY.reducesinkkey0 (type: char(16)) 1 KEY.reducesinkkey0 (type: char(16)) - outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col7, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col8, _col10, _col11 input vertices: 1 Reducer 14 - Statistics: Num rows: 80000000 Data size: 58560000000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 80000000 Data size: 58880000000 Basic stats: COMPLETE Column stats: COMPLETE DynamicPartitionHashJoin: true Filter Operator - predicate: if(_col5 is not null, if(_col10, ((_col7 / _col9) > (_col3 / _col5)), false), false) (type: boolean) - Statistics: Num rows: 40000000 Data size: 29280000000 Basic stats: COMPLETE Column stats: COMPLETE + predicate: if(_col6, if(_col11, ((_col8 / _col10) > (_col3 / _col5)), false), false) (type: boolean) + Statistics: Num rows: 40000000 Data size: 29440000000 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: +++ keys: _col2 (type: char(30)), _col0 (type: char(16)), _col1 (type: char(20)) null sort order: zzz - Statistics: Num rows: 40000000 Data size: 29280000000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40000000 Data size: 29440000000 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Select Operator expressions: _col0 (type: char(16)), _col1 (type: char(20)), _col2 (type: char(30))