HIVE-20835: Interaction between constraints and MV rewriting may create loop in Calcite planner (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f4a48fdb Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f4a48fdb Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f4a48fdb Branch: refs/heads/master Commit: f4a48fdbbdbd56aa0e5765fa899361f0669d9865 Parents: 353c55e Author: Jesus Camacho Rodriguez <[email protected]> Authored: Wed Oct 31 15:05:51 2018 -0700 Committer: Jesus Camacho Rodriguez <[email protected]> Committed: Tue Nov 6 07:27:48 2018 -0800 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 5 +- .../hadoop/hive/cli/control/CliConfigs.java | 6 + .../rules/HiveProjectJoinTransposeRule.java | 120 ++- .../hadoop/hive/ql/parse/CalcitePlanner.java | 2 +- .../queries/clientpositive/perf/mv_query44.q | 47 + .../perf/tez/constraints/cbo_query11.q.out | 6 +- .../perf/tez/constraints/cbo_query2.q.out | 6 +- .../perf/tez/constraints/cbo_query23.q.out | 150 ++-- .../perf/tez/constraints/cbo_query31.q.out | 70 +- .../perf/tez/constraints/cbo_query4.q.out | 10 +- .../perf/tez/constraints/cbo_query58.q.out | 10 +- .../perf/tez/constraints/cbo_query59.q.out | 6 +- .../perf/tez/constraints/cbo_query61.q.out | 6 +- .../perf/tez/constraints/cbo_query74.q.out | 6 +- .../perf/tez/constraints/cbo_query75.q.out | 8 +- .../perf/tez/constraints/cbo_query83.q.out | 10 +- .../perf/tez/constraints/cbo_query90.q.out | 6 +- .../perf/tez/constraints/mv_query44.q.out | 214 +++++ .../perf/tez/constraints/query11.q.out | 20 +- .../perf/tez/constraints/query2.q.out | 34 +- .../perf/tez/constraints/query23.q.out | 630 +++++++------ .../perf/tez/constraints/query31.q.out | 442 +++++----- .../perf/tez/constraints/query4.q.out | 32 +- .../perf/tez/constraints/query44.q.out | 2 +- .../perf/tez/constraints/query58.q.out | 216 +++-- .../perf/tez/constraints/query59.q.out | 4 +- .../perf/tez/constraints/query61.q.out | 482 +++++----- .../perf/tez/constraints/query74.q.out | 20 +- .../perf/tez/constraints/query75.q.out | 882 +++++++++---------- .../perf/tez/constraints/query83.q.out | 160 ++-- .../perf/tez/constraints/query90.q.out | 342 ++++--- 31 files changed, 2145 insertions(+), 1809 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/f4a48fdb/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index da2091a..eeca1c9 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -1731,6 +1731,8 @@ spark.only.query.negative.files=spark_job_max_tasks.q,\ spark_submit_negative_executor_cores.q,\ spark_submit_negative_executor_memory.q +tez.perf.disabled.query.files=mv_query44.q + spark.perf.disabled.query.files=query14.q,\ query64.q,\ cbo_query1.q,\ @@ -1829,7 +1831,8 @@ spark.perf.disabled.query.files=query14.q,\ cbo_query96.q,\ cbo_query97.q,\ cbo_query98.q,\ - cbo_query99.q + cbo_query99.q,\ + mv_query44.q druid.query.files=druidmini_test1.q,\ druidmini_test_ts.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/f4a48fdb/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java ---------------------------------------------------------------------- diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java index afff0df..df058ea 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java @@ -285,6 +285,12 @@ public class CliConfigs { try { setQueryDir("ql/src/test/queries/clientpositive/perf"); + if (useConstraints) { + excludesFrom(testConfigProps, "tez.perf.constraints.disabled.query.files"); + } else { + excludesFrom(testConfigProps, "tez.perf.disabled.query.files"); + } + excludesFrom(testConfigProps, "minimr.query.files"); excludesFrom(testConfigProps, "minitez.query.files"); excludesFrom(testConfigProps, "encrypted.query.files"); http://git-wip-us.apache.org/repos/asf/hive/blob/f4a48fdb/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java index 43c7896..38759c0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java @@ -17,17 +17,131 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; -import org.apache.calcite.rel.rules.ProjectJoinTransposeRule; +import java.util.ArrayList; +import java.util.List; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.SemiJoin; +import org.apache.calcite.rel.rules.PushProjector; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexNode; import org.apache.calcite.tools.RelBuilderFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; -public class HiveProjectJoinTransposeRule extends ProjectJoinTransposeRule { +/** + * Planner rule that pushes a {@link org.apache.calcite.rel.core.Project} + * past a {@link org.apache.calcite.rel.core.Join} + * by splitting the projection into a projection on top of each child of + * the join. + * TODO: Use Calcite rule once we can pass the matching operand as a parameter + */ +public class HiveProjectJoinTransposeRule extends RelOptRule { public static final HiveProjectJoinTransposeRule INSTANCE = new HiveProjectJoinTransposeRule(HiveRelFactories.HIVE_BUILDER); + /** + * Condition for expressions that should be preserved in the projection. + */ + private final PushProjector.ExprCondition preserveExprCondition; + private HiveProjectJoinTransposeRule(RelBuilderFactory relBuilderFactory) { - super(expr -> true, relBuilderFactory); + super( + operand(Project.class, + operand(Join.class, + operand(RelNode.class, any()), + operand(RelNode.class, any()))), + relBuilderFactory, "HiveProjectJoinTransposeRule"); + this.preserveExprCondition = expr -> true; + } + + @Override + public boolean matches(RelOptRuleCall call) { + final RelNode leftInput = call.rel(2); + final RelNode rightInput = call.rel(3); + + if (leftInput instanceof Project && rightInput instanceof Project) { + return false; + } + + return true; + } + + //~ Methods ---------------------------------------------------------------- + + // implement RelOptRule + public void onMatch(RelOptRuleCall call) { + Project origProj = call.rel(0); + final Join join = call.rel(1); + + if (join instanceof SemiJoin) { + return; // TODO: support SemiJoin + } + // locate all fields referenced in the projection and join condition; + // determine which inputs are referenced in the projection and + // join condition; if all fields are being referenced and there are no + // special expressions, no point in proceeding any further + PushProjector pushProject = + new PushProjector( + origProj, + join.getCondition(), + join, + preserveExprCondition, + call.builder()); + if (pushProject.locateAllRefs()) { + return; + } + + // create left and right projections, projecting only those + // fields referenced on each side + RelNode leftProjRel = + pushProject.createProjectRefsAndExprs( + join.getLeft(), + true, + false); + RelNode rightProjRel = + pushProject.createProjectRefsAndExprs( + join.getRight(), + true, + true); + + // convert the join condition to reference the projected columns + RexNode newJoinFilter = null; + int[] adjustments = pushProject.getAdjustments(); + if (join.getCondition() != null) { + List<RelDataTypeField> projJoinFieldList = new ArrayList<>(); + projJoinFieldList.addAll( + join.getSystemFieldList()); + projJoinFieldList.addAll( + leftProjRel.getRowType().getFieldList()); + projJoinFieldList.addAll( + rightProjRel.getRowType().getFieldList()); + newJoinFilter = + pushProject.convertRefsAndExprs( + join.getCondition(), + projJoinFieldList, + adjustments); + } + + // create a new join with the projected children + Join newJoinRel = + join.copy( + join.getTraitSet(), + newJoinFilter, + leftProjRel, + rightProjRel, + join.getJoinType(), + join.isSemiJoinDone()); + + // put the original project on top of the join, converting it to + // reference the modified projection list + RelNode topProject = + pushProject.createNewProject(newJoinRel, adjustments); + + call.transformTo(topProject); } } http://git-wip-us.apache.org/repos/asf/hive/blob/f4a48fdb/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index ab63ce2..11c8f5f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -2250,7 +2250,7 @@ public class CalcitePlanner extends SemanticAnalyzer { perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: View-based rewriting"); - if (calcitePreMVRewritingPlan != basePlan) { + if (!RelOptUtil.toString(calcitePreMVRewritingPlan).equals(RelOptUtil.toString(basePlan))) { // A rewriting was produced, we will check whether it was part of an incremental rebuild // to try to replace INSERT OVERWRITE by INSERT if (mvRebuildMode == MaterializationRebuildMode.INSERT_OVERWRITE_REBUILD && http://git-wip-us.apache.org/repos/asf/hive/blob/f4a48fdb/ql/src/test/queries/clientpositive/perf/mv_query44.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/mv_query44.q b/ql/src/test/queries/clientpositive/perf/mv_query44.q new file mode 100644 index 0000000..7415f5e --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/mv_query44.q @@ -0,0 +1,47 @@ +set hive.mapred.mode=nonstrict; +set hive.materializedview.rewriting.time.window=-1; + +-- start query 1 in stream 0 using template query44.tpl and seed 1819994127 + +CREATE MATERIALIZED VIEW mv_store_sales_item_customer PARTITIONED ON (ss_sold_date_sk) +AS + select ss_item_sk, ss_store_sk, ss_customer_sk, ss_sold_date_sk, count(*) cnt, sum(ss_quantity) as ss_quantity, sum(ss_ext_wholesale_cost) as ss_ext_wholesale_cost,sum(ss_net_paid) as ss_net_paid,sum(ss_net_profit) as ss_net_profit, sum(ss_ext_sales_price) as ss_ext_sales_price, sum(ss_coupon_amt) amt, sum(ss_sales_price) ss_sales_price, sum(ss_quantity*ss_sales_price) ssales + from store_sales + group by ss_store_sk, + ss_item_sk, ss_customer_sk, ss_sold_date_sk; + +explain +select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100; + +-- end query 1 in stream 0 using template query44.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/f4a48fdb/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out index cd1eb71..24bd6fd 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out @@ -159,7 +159,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(c_preferred_cust_flag=[$1]) - HiveJoin(condition=[AND(=($0, $5), CASE($7, CASE($10, >(/($4, $9), /($2, $6)), >(null, /($2, $6))), CASE($10, >(/($4, $9), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($0, $5), CASE(CAST(IS NOT NULL($6)):BOOLEAN, CASE($9, >(/($4, $8), /($2, $6)), >(null, /($2, $6))), CASE($9, >(/($4, $8), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f4=[$3], $f9=[$7]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -172,7 +172,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[=($6, 2002)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f8=[$7]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) @@ -186,7 +186,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[=($6, 2002)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) + HiveProject($f0=[$0], $f9=[$7]) HiveFilter(condition=[>($7, 0)]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) http://git-wip-us.apache.org/repos/asf/hive/blob/f4a48fdb/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query2.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query2.q.out index c245b9b..07ab7a1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query2.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query2.q.out @@ -126,8 +126,8 @@ POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC]) - HiveProject(d_week_seq1=[$0], _o__c1=[round(/($1, $9), 2)], _o__c2=[round(/($2, $10), 2)], _o__c3=[round(/($3, $11), 2)], _o__c4=[round(/($4, $12), 2)], _o__c5=[round(/($5, $13), 2)], _o__c6=[round(/($6, $14), 2)], _o__c7=[round(/($7, $15), 2)]) - HiveJoin(condition=[=($0, $16)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_week_seq1=[$0], _o__c1=[round(/($1, $10), 2)], _o__c2=[round(/($2, $11), 2)], _o__c3=[round(/($3, $12), 2)], _o__c4=[round(/($4, $13), 2)], _o__c5=[round(/($5, $14), 2)], _o__c6=[round(/($6, $15), 2)], _o__c7=[round(/($7, $16), 2)]) + HiveJoin(condition=[=($0, -($9, 53))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)], agg#3=[sum($4)], agg#4=[sum($5)], agg#5=[sum($6)], agg#6=[sum($7)]) @@ -147,7 +147,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC]) HiveProject(d_week_seq=[$4]) HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(sun_sales2=[$1], mon_sales2=[$2], tue_sales2=[$3], wed_sales2=[$4], thu_sales2=[$5], fri_sales2=[$6], sat_sales2=[$7], -=[-($0, 53)]) + HiveProject(d_week_seq2=[$0], sun_sales2=[$1], mon_sales2=[$2], tue_sales2=[$3], wed_sales2=[$4], thu_sales2=[$5], fri_sales2=[$6], sat_sales2=[$7]) HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)], agg#3=[sum($4)], agg#4=[sum($5)], agg#5=[sum($6)], agg#6=[sum($7)]) http://git-wip-us.apache.org/repos/asf/hive/blob/f4a48fdb/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out index 9629a71..1d2e0df 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[445][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 19' is a cross product -Warning: Shuffle Join MERGEJOIN[446][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 20' is a cross product -Warning: Shuffle Join MERGEJOIN[448][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 24' is a cross product -Warning: Shuffle Join MERGEJOIN[449][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 25' is a cross product +Warning: Shuffle Join MERGEJOIN[437][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 19' is a cross product +Warning: Shuffle Join MERGEJOIN[438][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 20' is a cross product +Warning: Shuffle Join MERGEJOIN[440][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 23' is a cross product +Warning: Shuffle Join MERGEJOIN[441][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 24' is a cross product PREHOOK: query: explain cbo with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -127,42 +127,41 @@ HiveSortLimit(fetch=[100]) HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)]) HiveJoin(condition=[=($3, $7)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_customer_sk=[$0]) - HiveAggregate(group=[{0}]) - HiveJoin(condition=[>($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_customer_sk=[$0], $f1=[$1]) - HiveAggregate(group=[{0}], agg#0=[sum($1)]) - HiveProject(ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) - HiveFilter(condition=[IS NOT NULL($3)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[<=(sq_count_check($0), 1)]) - HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveProject - HiveProject($f0=[$0]) - HiveAggregate(group=[{}], agg#0=[count($0)]) - HiveProject(ss_customer_sk=[$0], $f1=[$1]) - HiveAggregate(group=[{1}], agg#0=[sum($2)]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(*=[*(0.95, $0)]) - HiveAggregate(group=[{}], agg#0=[max($1)]) - HiveProject(ss_customer_sk=[$0], $f1=[$1]) - HiveAggregate(group=[{1}], agg#0=[sum($2)]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0]) + HiveJoin(condition=[>($1, *(0.95, $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject(ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count($0)]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[max($1)]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_quantity=[$18], cs_list_price=[$20]) HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) @@ -199,42 +198,41 @@ HiveSortLimit(fetch=[100]) HiveProject(i_item_sk=[$0], substr=[substr($4, 1, 30)]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_customer_sk=[$0]) - HiveAggregate(group=[{0}]) - HiveJoin(condition=[>($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_customer_sk=[$0], $f1=[$1]) - HiveAggregate(group=[{0}], agg#0=[sum($1)]) - HiveProject(ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) - HiveFilter(condition=[IS NOT NULL($3)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[<=(sq_count_check($0), 1)]) - HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveProject - HiveProject($f0=[$0]) - HiveAggregate(group=[{}], agg#0=[count($0)]) - HiveProject(ss_customer_sk=[$0], $f1=[$1]) - HiveAggregate(group=[{1}], agg#0=[sum($2)]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(*=[*(0.95, $0)]) - HiveAggregate(group=[{}], agg#0=[max($1)]) - HiveProject(ss_customer_sk=[$0], $f1=[$1]) - HiveAggregate(group=[{1}], agg#0=[sum($2)]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0]) + HiveJoin(condition=[>($1, *(0.95, $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject(ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count($0)]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[max($1)]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{1}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_customer_sk=[$4], ws_quantity=[$18], ws_list_price=[$20]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) http://git-wip-us.apache.org/repos/asf/hive/blob/f4a48fdb/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out index 41321f4..f8e31a2 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out @@ -111,10 +111,10 @@ POSTHOOK: Input: default@store_sales POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: -HiveProject(ca_county=[$0], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($14, $9)], store_q1_q2_increase=[/($1, $4)], web_q2_q3_increase=[/($12, $14)], store_q2_q3_increase=[/($7, $1)]) - HiveJoin(condition=[AND(AND(=($0, $8), CASE($5, CASE($10, >(/($14, $9), /($1, $4)), >(null, /($1, $4))), CASE($10, >(/($14, $9), null), null))), CASE($2, CASE($15, >(/($12, $14), /($7, $1)), >(null, /($7, $1))), CASE($15, >(/($12, $14), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) +HiveProject(ca_county=[$8], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($6, $1)], store_q1_q2_increase=[/($9, $11)], web_q2_q3_increase=[/($4, $6)], store_q2_q3_increase=[/($13, $9)]) + HiveJoin(condition=[AND(AND(=($8, $0), CASE(>($11, 0), CASE($2, >(/($6, $1), /($9, $11)), >(null, /($9, $11))), CASE($2, >(/($6, $1), null), null))), CASE(>($9, 0), CASE($7, >(/($4, $6), /($13, $9)), >(null, /($13, $9))), CASE($7, >(/($4, $6), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f3=[$1], >=[>($1, 0)]) HiveAggregate(group=[{1}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -122,53 +122,53 @@ HiveProject(ca_county=[$0], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($ HiveFilter(condition=[IS NOT NULL($7)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($10, 2), =($6, 2000))]) + HiveFilter(condition=[AND(=($10, 1), =($6, 2000))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], $f3=[$1], >=[>($1, 0)]) + HiveProject(ca_county=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_county=[$7]) HiveFilter(condition=[IS NOT NULL($7)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($10, 1), =($6, 2000))]) + HiveFilter(condition=[AND(=($10, 3), =($6, 2000))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_county=[$0], $f1=[$1]) + HiveProject($f0=[$0], $f3=[$1], >=[>($1, 0)]) HiveAggregate(group=[{1}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_county=[$7]) HiveFilter(condition=[IS NOT NULL($7)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($10, 3), =($6, 2000))]) + HiveFilter(condition=[AND(=($10, 2), =($6, 2000))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], $f3=[$1], >=[$2], ca_county=[$3], $f1=[$4], $f00=[$5], $f30=[$6], >0=[$7]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f3=[$1], >=[>($1, 0)]) + HiveProject(ca_county=[$0], $f1=[$1], ca_county0=[$2], $f10=[$3], ca_county1=[$4], $f11=[$5]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_county=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_county=[$7]) HiveFilter(condition=[IS NOT NULL($7)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($10, 1), =($6, 2000))]) + HiveFilter(condition=[AND(=($10, 2), =($6, 2000))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(ca_county=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($4)]) @@ -177,23 +177,23 @@ HiveProject(ca_county=[$0], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($ HiveFilter(condition=[IS NOT NULL($7)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($10, 3), =($6, 2000))]) + HiveFilter(condition=[AND(=($10, 1), =($6, 2000))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], $f3=[$1], >=[>($1, 0)]) + HiveProject(ca_county=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_county=[$7]) HiveFilter(condition=[IS NOT NULL($7)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($10, 2), =($6, 2000))]) + HiveFilter(condition=[AND(=($10, 3), =($6, 2000))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) http://git-wip-us.apache.org/repos/asf/hive/blob/f4a48fdb/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out index ccec2f3..9fb918e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out @@ -229,7 +229,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(customer_preferred_cust_flag=[$1]) - HiveJoin(condition=[AND(=($0, $7), CASE($9, CASE($15, >(/($4, $14), /($2, $8)), >(null, /($2, $8))), CASE($15, >(/($4, $14), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($0, $7), CASE(CAST(IS NOT NULL($8)):BOOLEAN, CASE($14, >(/($4, $13), /($2, $8)), >(null, /($2, $8))), CASE($14, >(/($4, $13), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f3=[$3], $f8=[$7]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -242,7 +242,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[=($6, 2002)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[AND(=($4, $0), CASE($9, CASE($12, >(/($1, $11), /($3, $8)), >(null, /($3, $8))), CASE($12, >(/($1, $11), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($4, $0), CASE($8, CASE($11, >(/($1, $10), /($3, $7)), >(null, /($3, $7))), CASE($11, >(/($1, $10), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f8=[$7]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -255,8 +255,8 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[=($6, 2002)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f8=[$7]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) @@ -270,7 +270,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[=($6, 2002)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) + HiveProject($f0=[$0], $f8=[$7]) HiveFilter(condition=[>($7, 0)]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) http://git-wip-us.apache.org/repos/asf/hive/blob/f4a48fdb/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out index df67f6f..a362c45 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out @@ -141,10 +141,10 @@ POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) - HiveProject(item_id=[$0], ss_item_rev=[$5], ss_dev=[*(/(/($5, +(+($5, $1), $9)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], cs_item_rev=[$1], cs_dev=[*(/(/($1, +(+($5, $1), $9)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], ws_item_rev=[$9], ws_dev=[*(/(/($9, +(+($5, $1), $9)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], average=[/(+(+($5, $1), $9), CAST(3):DECIMAL(10, 0))]) - HiveJoin(condition=[AND(AND(AND(AND(=($0, $8), BETWEEN(false, $5, $10, $11)), BETWEEN(false, $1, $10, $11)), BETWEEN(false, $9, $6, $7)), BETWEEN(false, $9, $2, $3))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(AND(=($4, $0), BETWEEN(false, $5, $2, $3)), BETWEEN(false, $1, $6, $7))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(item_id=[$0], cs_item_rev=[$1], *=[*(0.9, $1)], *3=[*(1.1, $1)]) + HiveProject(item_id=[$0], ss_item_rev=[$3], ss_dev=[*(/(/($3, +(+($3, $1), $5)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], cs_item_rev=[$1], cs_dev=[*(/(/($1, +(+($3, $1), $5)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], ws_item_rev=[$5], ws_dev=[*(/(/($5, +(+($3, $1), $5)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], average=[/(+(+($3, $1), $5), CAST(3):DECIMAL(10, 0))]) + HiveJoin(condition=[AND(AND(AND(AND(=($0, $4), BETWEEN(false, $3, $6, $7)), BETWEEN(false, $1, $6, $7)), BETWEEN(false, $5, *(0.9, $3), *(1.1, $3))), BETWEEN(false, $5, *(0.9, $1), *(1.1, $1)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(AND(=($2, $0), BETWEEN(false, $3, *(0.9, $1), *(1.1, $1))), BETWEEN(false, $1, *(0.9, $3), *(1.1, $3)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{4}], agg#0=[sum($2)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -174,7 +174,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(d_week_seq=[$4]) HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(item_id=[$0], ss_item_rev=[$1], *=[*(0.9, $1)], *3=[*(1.1, $1)]) + HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{4}], agg#0=[sum($2)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) http://git-wip-us.apache.org/repos/asf/hive/blob/f4a48fdb/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query59.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query59.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query59.q.out index 989bd78..34376d1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query59.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query59.q.out @@ -94,8 +94,8 @@ POSTHOOK: Input: default@store_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) - HiveProject(s_store_name1=[$2], s_store_id1=[$1], d_week_seq1=[$3], _o__c3=[/($5, $14)], _o__c4=[/($6, $15)], _o__c5=[/($7, $7)], _o__c6=[/($8, $16)], _o__c7=[/($9, $17)], _o__c8=[/($10, $18)], _o__c9=[/($11, $19)]) - HiveJoin(condition=[AND(=($1, $13), =($3, $20))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_name1=[$2], s_store_id1=[$1], d_week_seq1=[$3], _o__c3=[/($5, $15)], _o__c4=[/($6, $16)], _o__c5=[/($7, $7)], _o__c6=[/($8, $17)], _o__c7=[/($9, $18)], _o__c8=[/($10, $19)], _o__c9=[/($11, $20)]) + HiveJoin(condition=[AND(=($1, $14), =($3, -($13, 52)))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) HiveTableScan(table=[[default, store]], table:alias=[store]) @@ -113,7 +113,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject(d_week_seq=[$4]) HiveFilter(condition=[AND(BETWEEN(false, $3, 1185, 1196), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[d]) - HiveProject(s_store_id2=[$1], sun_sales2=[$4], mon_sales2=[$5], wed_sales2=[$6], thu_sales2=[$7], fri_sales2=[$8], sat_sales2=[$9], -=[-($2, 52)]) + HiveProject(d_week_seq2=[$2], s_store_id2=[$1], sun_sales2=[$4], mon_sales2=[$5], wed_sales2=[$6], thu_sales2=[$7], fri_sales2=[$8], sat_sales2=[$9]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(s_store_sk=[$0], s_store_id=[$1]) HiveTableScan(table=[[default, store]], table:alias=[store]) http://git-wip-us.apache.org/repos/asf/hive/blob/f4a48fdb/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query61.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query61.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query61.q.out index 68d03cb..253190d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query61.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query61.q.out @@ -103,9 +103,9 @@ POSTHOOK: Input: default@store_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) - HiveProject(promotions=[$0], total=[$2], _o__c2=[*(/($1, $3), CAST(100):DECIMAL(10, 0))]) + HiveProject(promotions=[$0], total=[$1], _o__c2=[*(/(CAST($0):DECIMAL(15, 4), CAST($1):DECIMAL(15, 4)), CAST(100):DECIMAL(10, 0))]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(promotions=[$0], CAST=[CAST($0):DECIMAL(15, 4)]) + HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[sum($8)]) HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -135,7 +135,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(p_promo_sk=[$0]) HiveFilter(condition=[OR(=($8, _UTF-16LE'Y'), =($9, _UTF-16LE'Y'), =($11, _UTF-16LE'Y'))]) HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) - HiveProject(total=[$0], CAST=[CAST($0):DECIMAL(15, 4)]) + HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[sum($7)]) HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) http://git-wip-us.apache.org/repos/asf/hive/blob/f4a48fdb/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out index c16cab0..74a3a3f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out @@ -131,7 +131,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$1], sort1=[$0], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) HiveProject(customer_id=[$0], customer_first_name=[$1], customer_last_name=[$2]) - HiveJoin(condition=[AND(=($0, $6), CASE($8, CASE($11, >(/($5, $10), /($3, $7)), >(null, /($3, $7))), CASE($11, >(/($5, $10), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($0, $6), CASE(CAST(IS NOT NULL($7)):BOOLEAN, CASE($10, >(/($5, $9), /($3, $7)), >(null, /($3, $7))), CASE($10, >(/($5, $9), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_id=[$0], c_first_name=[$1], c_last_name=[$2], $f3=[$3]) HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -144,7 +144,7 @@ HiveSortLimit(sort0=[$1], sort1=[$0], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(IN($6, 2001, 2002), =($6, 2002))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f4=[$3]) HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) @@ -158,7 +158,7 @@ HiveSortLimit(sort0=[$1], sort1=[$0], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(IN($6, 2001, 2002), =($6, 2002))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(customer_id=[$0], year_total=[$3], CAST=[CAST(IS NOT NULL($3)):BOOLEAN]) + HiveProject($f0=[$0], $f4=[$3]) HiveFilter(condition=[>($3, 0)]) HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) http://git-wip-us.apache.org/repos/asf/hive/blob/f4a48fdb/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query75.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query75.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query75.q.out index 50a9dfc..a95e756 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query75.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query75.q.out @@ -157,9 +157,9 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveProject(prev_year=[CAST(2001):INTEGER], year=[CAST(2002):INTEGER], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], prev_yr_cnt=[$4], curr_yr_cnt=[$5], sales_cnt_diff=[$6], sales_amt_diff=[$7]) HiveSortLimit(sort0=[$6], dir0=[ASC], fetch=[100]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], prev_yr_cnt=[$4], curr_yr_cnt=[$11], sales_cnt_diff=[-($11, $4)], sales_amt_diff=[-($12, $5)]) - HiveJoin(condition=[AND(AND(AND(AND(=($7, $0), =($8, $1)), =($9, $2)), =($10, $3)), <(/($13, $6), 0.9))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], $f5=[$4], $f6=[$5], CAST=[CAST($4):DECIMAL(17, 2)]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], prev_yr_cnt=[$4], curr_yr_cnt=[$10], sales_cnt_diff=[-($10, $4)], sales_amt_diff=[-($11, $5)]) + HiveJoin(condition=[AND(AND(AND(AND(=($6, $0), =($7, $1)), =($8, $2)), =($9, $3)), <(/(CAST($10):DECIMAL(17, 2), CAST($4):DECIMAL(17, 2)), 0.9))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], $f4=[$4], $f5=[$5]) HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[sum($4)], agg#1=[sum($5)]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) HiveAggregate(group=[{0, 1, 2, 3, 4, 5}]) @@ -214,7 +214,7 @@ HiveProject(prev_year=[CAST(2001):INTEGER], year=[CAST(2002):INTEGER], i_brand_i HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($13))]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], $f5=[$4], $f6=[$5], CAST=[CAST($4):DECIMAL(17, 2)]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], $f4=[$4], $f5=[$5]) HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[sum($4)], agg#1=[sum($5)]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) HiveAggregate(group=[{0, 1, 2, 3, 4, 5}]) http://git-wip-us.apache.org/repos/asf/hive/blob/f4a48fdb/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out index ee94ea3..2a51015 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out @@ -144,10 +144,10 @@ POSTHOOK: Input: default@web_returns POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) - HiveProject(item_id=[$0], sr_item_qty=[$4], sr_dev=[*(/(/($5, CAST(+(+($4, $1), $7)):DOUBLE), CAST(3):DOUBLE), CAST(100):DOUBLE)], cr_item_qty=[$1], cr_dev=[*(/(/($2, CAST(+(+($4, $1), $7)):DOUBLE), CAST(3):DOUBLE), CAST(100):DOUBLE)], wr_item_qty=[$7], wr_dev=[*(/(/($8, CAST(+(+($4, $1), $7)):DOUBLE), CAST(3):DOUBLE), CAST(100):DOUBLE)], average=[/(CAST(+(+($4, $1), $7)):DECIMAL(19, 0), 3)]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(item_id=[$0], cr_item_qty=[$1], CAST=[CAST($1):DOUBLE]) + HiveProject(item_id=[$0], sr_item_qty=[$3], sr_dev=[*(/(/(CAST($3):DOUBLE, CAST(+(+($3, $1), $5)):DOUBLE), CAST(3):DOUBLE), CAST(100):DOUBLE)], cr_item_qty=[$1], cr_dev=[*(/(/(CAST($1):DOUBLE, CAST(+(+($3, $1), $5)):DOUBLE), CAST(3):DOUBLE), CAST(100):DOUBLE)], wr_item_qty=[$5], wr_dev=[*(/(/($6, CAST(+(+($3, $1), $5)):DOUBLE), CAST(3):DOUBLE), CAST(100):DOUBLE)], average=[/(CAST(+(+($3, $1), $5)):DECIMAL(19, 0), 3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{4}], agg#0=[sum($2)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -170,7 +170,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(d_date_sk=[$0], d_date_id=[$1], d_date=[$2], d_month_seq=[$3], d_week_seq=[$4], d_quarter_seq=[$5], d_year=[$6], d_dow=[$7], d_moy=[$8], d_dom=[$9], d_qoy=[$10], d_fy_year=[$11], d_fy_quarter_seq=[$12], d_fy_week_seq=[$13], d_day_name=[$14], d_quarter_name=[$15], d_holiday=[$16], d_weekend=[$17], d_following_holiday=[$18], d_first_dom=[$19], d_last_dom=[$20], d_same_day_ly=[$21], d_same_day_lq=[$22], d_current_day=[$23], d_current_week=[$24], d_current_month=[$25], d_current_quarter=[$26], d_current_year=[$27], BLOCK__OFFSET__INSIDE__FILE=[$28], INPUT__FILE__NAME=[$29], ROW__ID=[$30]) HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(item_id=[$0], sr_item_qty=[$1], CAST=[CAST($1):DOUBLE]) + HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{4}], agg#0=[sum($2)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) http://git-wip-us.apache.org/repos/asf/hive/blob/f4a48fdb/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out index ff28da1..b5aac32 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out @@ -53,9 +53,9 @@ POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) - HiveProject(am_pm_ratio=[/($0, $1)]) + HiveProject(am_pm_ratio=[/(CAST($0):DECIMAL(15, 4), CAST($1):DECIMAL(15, 4))]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(CAST=[CAST($0):DECIMAL(15, 4)]) + HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[count()]) HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -72,7 +72,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(hd_demo_sk=[$0]) HiveFilter(condition=[=($3, 8)]) HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) - HiveProject(CAST=[CAST($0):DECIMAL(15, 4)]) + HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[count()]) HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) http://git-wip-us.apache.org/repos/asf/hive/blob/f4a48fdb/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out new file mode 100644 index 0000000..db9acc9 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out @@ -0,0 +1,214 @@ +PREHOOK: query: CREATE MATERIALIZED VIEW mv_store_sales_item_customer PARTITIONED ON (ss_sold_date_sk) +AS + select ss_item_sk, ss_store_sk, ss_customer_sk, ss_sold_date_sk, count(*) cnt, sum(ss_quantity) as ss_quantity, sum(ss_ext_wholesale_cost) as ss_ext_wholesale_cost,sum(ss_net_paid) as ss_net_paid,sum(ss_net_profit) as ss_net_profit, sum(ss_ext_sales_price) as ss_ext_sales_price, sum(ss_coupon_amt) amt, sum(ss_sales_price) ss_sales_price, sum(ss_quantity*ss_sales_price) ssales + from store_sales + group by ss_store_sk, + ss_item_sk, ss_customer_sk, ss_sold_date_sk +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@store_sales +PREHOOK: Output: database:default +PREHOOK: Output: default@mv_store_sales_item_customer +PREHOOK: Output: default@mv_store_sales_item_customer +POSTHOOK: query: CREATE MATERIALIZED VIEW mv_store_sales_item_customer PARTITIONED ON (ss_sold_date_sk) +AS + select ss_item_sk, ss_store_sk, ss_customer_sk, ss_sold_date_sk, count(*) cnt, sum(ss_quantity) as ss_quantity, sum(ss_ext_wholesale_cost) as ss_ext_wholesale_cost,sum(ss_net_paid) as ss_net_paid,sum(ss_net_profit) as ss_net_profit, sum(ss_ext_sales_price) as ss_ext_sales_price, sum(ss_coupon_amt) amt, sum(ss_sales_price) ss_sales_price, sum(ss_quantity*ss_sales_price) ssales + from store_sales + group by ss_store_sk, + ss_item_sk, ss_customer_sk, ss_sold_date_sk +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv_store_sales_item_customer +Warning: Shuffle Join MERGEJOIN[101][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product +PREHOOK: query: explain +select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 410 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 410 + and ss_hdemo_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 10 <- Reducer 8 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_135] + Limit [LIM_134] (rows=100 width=218) + Number of rows:100 + Select Operator [SEL_133] (rows=6951 width=218) + Output:["_col0","_col1","_col2"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_67] + Select Operator [SEL_66] (rows=6951 width=218) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_105] (rows=6951 width=218) + Conds:RS_63._col3=RS_64._col3(Inner),Output:["_col1","_col3","_col5"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_63] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_102] (rows=6951 width=111) + Conds:RS_107._col0=RS_127._col0(Inner),Output:["_col1","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_107] + PartitionCols:_col0 + Select Operator [SEL_106] (rows=462000 width=111) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=462000 width=111) + default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_product_name"] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_127] + PartitionCols:_col0 + Select Operator [SEL_126] (rows=6951 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_125] (rows=6951 width=116) + predicate:(rank_window_0 < 11) + PTF Operator [PTF_124] (rows=20854 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"0"}] + Select Operator [SEL_123] (rows=20854 width=116) + Output:["_col0","_col1"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:0 + Filter Operator [FIL_20] (rows=20854 width=228) + predicate:(_col1 > (0.9 * _col2)) + Merge Join Operator [MERGEJOIN_101] (rows=62562 width=228) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_122] + Select Operator [SEL_121] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_120] (rows=1 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_119] + PartitionCols:_col0 + Group By Operator [GBY_118] (rows=258 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true + Select Operator [SEL_117] (rows=287946 width=114) + Output:["_col1"] + Filter Operator [FIL_116] (rows=287946 width=114) + predicate:((ss_store_sk = 410) and ss_hdemo_sk is null) + TableScan [TS_9] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_115] + Select Operator [SEL_114] (rows=62562 width=116) + Output:["_col0","_col1"] + Group By Operator [GBY_113] (rows=62562 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_112] + PartitionCols:_col0 + Group By Operator [GBY_111] (rows=3199976 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk + Select Operator [SEL_110] (rows=6399952 width=114) + Output:["ss_item_sk","ss_net_profit"] + Filter Operator [FIL_109] (rows=6399952 width=114) + predicate:(ss_store_sk = 410) + TableScan [TS_2] (rows=575995635 width=114) + default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_64] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_104] (rows=6951 width=111) + Conds:RS_108._col0=RS_132._col0(Inner),Output:["_col1","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_108] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_106] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_132] + PartitionCols:_col0 + Select Operator [SEL_131] (rows=6951 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_130] (rows=6951 width=116) + predicate:(rank_window_0 < 11) + PTF Operator [PTF_129] (rows=20854 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_128] (rows=20854 width=116) + Output:["_col0","_col1"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:0 + Please refer to the previous Filter Operator [FIL_20] + http://git-wip-us.apache.org/repos/asf/hive/blob/f4a48fdb/ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out index 658ae27..00b6bcb 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out @@ -198,10 +198,10 @@ Stage-0 SHUFFLE [RS_89] Select Operator [SEL_88] (rows=12248093 width=85) Output:["_col0"] - Filter Operator [FIL_87] (rows=12248093 width=541) - predicate:CASE WHEN (_col4) THEN (CASE WHEN (_col7) THEN (((_col1 / _col6) > (_col10 / _col3))) ELSE ((null > (_col10 / _col3))) END) ELSE (CASE WHEN (_col7) THEN (((_col1 / _col6) > null)) ELSE (null) END) END - Merge Join Operator [MERGEJOIN_283] (rows=24496186 width=541) - Conds:RS_84._col2=RS_346._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col7","_col9","_col10"] + Filter Operator [FIL_87] (rows=12248093 width=537) + predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col6) THEN (((_col1 / _col5) > (_col9 / _col3))) ELSE ((null > (_col9 / _col3))) END) ELSE (CASE WHEN (_col6) THEN (((_col1 / _col5) > null)) ELSE (null) END) END + Merge Join Operator [MERGEJOIN_283] (rows=24496186 width=537) + Conds:RS_84._col2=RS_346._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col8","_col9"] <-Reducer 20 [SIMPLE_EDGE] vectorized SHUFFLE [RS_346] PartitionCols:_col0 @@ -260,8 +260,8 @@ Stage-0 <-Reducer 6 [ONE_TO_ONE_EDGE] FORWARD [RS_84] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_282] (rows=20485011 width=444) - Conds:RS_81._col2=RS_338._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7"] + Merge Join Operator [MERGEJOIN_282] (rows=20485011 width=440) + Conds:RS_81._col2=RS_338._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] <-Reducer 16 [SIMPLE_EDGE] vectorized SHUFFLE [RS_338] PartitionCols:_col0 @@ -320,13 +320,13 @@ Stage-0 <-Reducer 5 [ONE_TO_ONE_EDGE] FORWARD [RS_81] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_281] (rows=31888273 width=328) - Conds:RS_318._col0=RS_328._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_281] (rows=31888273 width=324) + Conds:RS_318._col0=RS_328._col0(Inner),Output:["_col1","_col2","_col3"] <-Reducer 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_328] PartitionCols:_col0 - Select Operator [SEL_327] (rows=26666666 width=216) - Output:["_col0","_col1","_col2"] + Select Operator [SEL_327] (rows=26666666 width=212) + Output:["_col0","_col1"] Filter Operator [FIL_326] (rows=26666666 width=212) predicate:(_col7 > 0) Select Operator [SEL_325] (rows=80000000 width=212)
