HIVE-11865: Disable Hive PPD optimizer when CBO has optimized the plan (Jesus Camacho Rodriguez, reviewed by Laljo John Pullokkaran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6f44fc81 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6f44fc81 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6f44fc81 Branch: refs/heads/master Commit: 6f44fc8108d5063594fa75cf4e0cdd7e899692b7 Parents: 1628bf1 Author: Jesus Camacho Rodriguez <[email protected]> Authored: Fri Dec 18 23:17:26 2015 +0100 Committer: Jesus Camacho Rodriguez <[email protected]> Committed: Mon Dec 21 02:06:08 2015 +0100 ---------------------------------------------------------------------- .../test/results/positive/hbase_queries.q.out | 12 +- .../hadoop/hive/ql/optimizer/Optimizer.java | 16 +- .../calcite/reloperators/HiveProject.java | 4 +- .../rules/HiveFilterProjectTSTransposeRule.java | 2 +- .../rules/HiveFilterProjectTransposeRule.java | 18 +- .../rules/HiveFilterSortTransposeRule.java | 71 ++ .../calcite/rules/HiveJoinAddNotNullRule.java | 39 +- .../hadoop/hive/ql/parse/CalcitePlanner.java | 23 +- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 8 +- .../apache/hadoop/hive/ql/plan/FilterDesc.java | 9 + .../hadoop/hive/ql/ppd/OpProcFactory.java | 50 +- .../hive/ql/ppd/SimplePredicatePushDown.java | 110 ++ .../hive/ql/ppd/SyntheticJoinPredicate.java | 4 +- .../results/clientpositive/auto_join12.q.out | 4 +- .../results/clientpositive/auto_join13.q.out | 41 +- .../results/clientpositive/auto_join16.q.out | 4 +- .../results/clientpositive/auto_join2.q.out | 41 +- .../results/clientpositive/auto_join33.q.out | 12 +- .../clientpositive/auto_join_filters.q.out | 8 +- .../clientpositive/auto_join_nulls.q.out | 2 +- .../clientpositive/auto_join_stats.q.out | 127 +- .../clientpositive/auto_join_stats2.q.out | 75 +- .../auto_join_without_localtask.q.out | 6 +- .../clientpositive/auto_smb_mapjoin_14.q.out | 28 +- .../clientpositive/auto_sortmerge_join_12.q.out | 4 +- .../clientpositive/auto_sortmerge_join_6.q.out | 8 +- .../clientpositive/auto_sortmerge_join_9.q.out | 80 +- .../bucketsortoptimize_insert_7.q.out | 2 +- .../cbo_rp_cross_product_check_2.q.out | 4 +- .../results/clientpositive/cbo_rp_join0.q.out | 29 +- .../results/clientpositive/cbo_rp_join1.q.out | 96 +- .../clientpositive/cbo_rp_lineage2.q.out | 8 +- .../cbo_rp_outer_join_ppr.q.java1.7.out | 28 +- .../clientpositive/correlationoptimizer10.q.out | 24 +- .../clientpositive/correlationoptimizer13.q.out | 4 +- .../clientpositive/correlationoptimizer9.q.out | 16 +- .../results/clientpositive/cross_join.q.out | 4 +- .../clientpositive/cross_product_check_1.q.out | 6 +- .../clientpositive/cross_product_check_2.q.out | 16 +- .../clientpositive/dynamic_rdd_cache.q.out | 12 +- .../encryption_join_unencrypted_tbl.q.out | 4 +- .../clientpositive/explain_logical.q.out | 74 +- .../clientpositive/filter_cond_pushdown.q.out | 2 +- .../clientpositive/fouter_join_ppr.q.out | 4 +- .../clientpositive/groupby_grouping_sets4.q.out | 84 +- .../clientpositive/groupby_position.q.out | 4 +- .../test/results/clientpositive/having2.q.out | 58 +- .../clientpositive/index_auto_mult_tables.q.out | 12 +- .../index_auto_mult_tables_compact.q.out | 12 +- .../infer_bucket_sort_map_operators.q.out | 4 +- ql/src/test/results/clientpositive/join12.q.out | 4 +- ql/src/test/results/clientpositive/join13.q.out | 17 +- ql/src/test/results/clientpositive/join16.q.out | 4 +- ql/src/test/results/clientpositive/join2.q.out | 17 +- .../clientpositive/join_cond_pushdown_1.q.out | 2 +- .../clientpositive/join_cond_pushdown_3.q.out | 2 +- .../results/clientpositive/join_reorder.q.out | 10 +- .../llap/bucket_map_join_tez2.q.out | 12 +- .../llap/dynamic_partition_pruning.q.out | 265 ++-- .../clientpositive/llap/mapjoin_decimal.q.out | 1 - .../llap/tez_dynpart_hashjoin_2.q.out | 18 +- .../llap/tez_vector_dynpart_hashjoin_2.q.out | 18 +- .../llap/vector_join_part_col_char.q.out | 2 +- .../vectorized_dynamic_partition_pruning.q.out | 265 ++-- .../clientpositive/louter_join_ppr.q.out | 4 +- .../clientpositive/mapjoin_mapjoin.q.out | 2 +- .../results/clientpositive/multiMapJoin1.q.out | 36 +- .../nonblock_op_deduplicate.q.out | 4 +- .../test/results/clientpositive/orc_llap.q.out | 6 +- .../clientpositive/outer_join_ppr.q.java1.7.out | 2 +- .../clientpositive/partition_boolexpr.q.out | 26 +- .../results/clientpositive/perf/query13.q.out | 66 +- .../results/clientpositive/perf/query15.q.out | 50 +- .../results/clientpositive/perf/query17.q.out | 76 +- .../results/clientpositive/perf/query18.q.out | 68 +- .../results/clientpositive/perf/query19.q.out | 60 +- .../results/clientpositive/perf/query20.q.out | 44 +- .../results/clientpositive/perf/query21.q.out | 44 +- .../results/clientpositive/perf/query22.q.out | 44 +- .../results/clientpositive/perf/query25.q.out | 76 +- .../results/clientpositive/perf/query26.q.out | 50 +- .../results/clientpositive/perf/query27.q.out | 52 +- .../results/clientpositive/perf/query28.q.out | 32 +- .../results/clientpositive/perf/query29.q.out | 76 +- .../results/clientpositive/perf/query3.q.out | 34 +- .../results/clientpositive/perf/query31.q.out | 260 ++-- .../results/clientpositive/perf/query32.q.out | 64 +- .../results/clientpositive/perf/query34.q.out | 62 +- .../results/clientpositive/perf/query39.q.out | 114 +- .../results/clientpositive/perf/query40.q.out | 51 +- .../results/clientpositive/perf/query42.q.out | 34 +- .../results/clientpositive/perf/query43.q.out | 34 +- .../results/clientpositive/perf/query45.q.out | 60 +- .../results/clientpositive/perf/query46.q.out | 80 +- .../results/clientpositive/perf/query48.q.out | 50 +- .../results/clientpositive/perf/query50.q.out | 50 +- .../results/clientpositive/perf/query51.q.out | 76 +- .../results/clientpositive/perf/query52.q.out | 34 +- .../results/clientpositive/perf/query54.q.out | 108 +- .../results/clientpositive/perf/query55.q.out | 34 +- .../results/clientpositive/perf/query58.q.out | 196 +-- .../results/clientpositive/perf/query64.q.out | 468 +++---- .../results/clientpositive/perf/query65.q.out | 88 +- .../results/clientpositive/perf/query66.q.out | 130 +- .../results/clientpositive/perf/query67.q.out | 52 +- .../results/clientpositive/perf/query68.q.out | 80 +- .../results/clientpositive/perf/query7.q.out | 50 +- .../results/clientpositive/perf/query70.q.out | 88 +- .../results/clientpositive/perf/query71.q.out | 90 +- .../results/clientpositive/perf/query72.q.out | 138 +- .../results/clientpositive/perf/query73.q.out | 62 +- .../results/clientpositive/perf/query75.q.out | 282 ++--- .../results/clientpositive/perf/query76.q.out | 106 +- .../results/clientpositive/perf/query79.q.out | 58 +- .../results/clientpositive/perf/query80.q.out | 235 ++-- .../results/clientpositive/perf/query82.q.out | 36 +- .../results/clientpositive/perf/query84.q.out | 48 +- .../results/clientpositive/perf/query85.q.out | 100 +- .../results/clientpositive/perf/query87.q.out | 130 +- .../results/clientpositive/perf/query88.q.out | 426 +++---- .../results/clientpositive/perf/query89.q.out | 56 +- .../results/clientpositive/perf/query90.q.out | 98 +- .../results/clientpositive/perf/query91.q.out | 66 +- .../results/clientpositive/perf/query92.q.out | 58 +- .../results/clientpositive/perf/query93.q.out | 41 +- .../results/clientpositive/perf/query94.q.out | 84 +- .../results/clientpositive/perf/query95.q.out | 106 +- .../results/clientpositive/perf/query96.q.out | 40 +- .../results/clientpositive/perf/query97.q.out | 60 +- .../results/clientpositive/perf/query98.q.out | 42 +- .../results/clientpositive/ppd_gby_join.q.out | 30 +- .../test/results/clientpositive/ppd_join.q.out | 32 +- .../test/results/clientpositive/ppd_join2.q.out | 60 +- .../test/results/clientpositive/ppd_join3.q.out | 62 +- .../test/results/clientpositive/ppd_join5.q.out | 6 +- .../clientpositive/ppd_join_filter.q.out | 108 +- .../clientpositive/ppd_outer_join2.q.out | 34 +- .../clientpositive/ppd_outer_join3.q.out | 34 +- .../clientpositive/ppd_outer_join4.q.out | 49 +- .../results/clientpositive/ppd_random.q.out | 24 +- .../clientpositive/ppd_repeated_alias.q.out | 2 +- .../results/clientpositive/ppd_udf_case.q.out | 26 +- .../results/clientpositive/ptfgroupbyjoin.q.out | 48 +- .../clientpositive/router_join_ppr.q.out | 4 +- .../test/results/clientpositive/skewjoin.q.out | 14 +- .../results/clientpositive/skewjoinopt18.q.out | 4 +- .../clientpositive/spark/auto_join12.q.out | 4 +- .../clientpositive/spark/auto_join13.q.out | 45 +- .../clientpositive/spark/auto_join16.q.out | 4 +- .../clientpositive/spark/auto_join2.q.out | 45 +- .../spark/auto_join_filters.q.out | 8 +- .../clientpositive/spark/auto_join_nulls.q.out | 2 +- .../clientpositive/spark/auto_join_stats.q.out | 85 +- .../clientpositive/spark/auto_join_stats2.q.out | 85 +- .../spark/auto_join_without_localtask.q.out | 2 +- .../spark/auto_smb_mapjoin_14.q.out | 28 +- .../spark/auto_sortmerge_join_12.q.out | 4 +- .../spark/auto_sortmerge_join_6.q.out | 8 +- .../spark/auto_sortmerge_join_9.q.out | 66 +- .../spark/bucket_map_join_tez2.q.out | 12 +- .../spark/bucketsortoptimize_insert_7.q.out | 4 +- .../clientpositive/spark/cross_join.q.out | 4 +- .../spark/cross_product_check_1.q.out | 6 +- .../spark/cross_product_check_2.q.out | 12 +- .../spark/dynamic_rdd_cache.q.out | 12 +- .../clientpositive/spark/groupby_position.q.out | 4 +- .../spark/infer_bucket_sort_map_operators.q.out | 4 +- .../results/clientpositive/spark/join12.q.out | 4 +- .../results/clientpositive/spark/join13.q.out | 15 +- .../results/clientpositive/spark/join16.q.out | 4 +- .../results/clientpositive/spark/join2.q.out | 15 +- .../spark/join_cond_pushdown_1.q.out | 2 +- .../spark/join_cond_pushdown_3.q.out | 2 +- .../clientpositive/spark/join_reorder.q.out | 10 +- .../clientpositive/spark/louter_join_ppr.q.out | 4 +- .../clientpositive/spark/mapjoin_mapjoin.q.out | 2 +- .../spark/outer_join_ppr.q.java1.7.out | 2 +- .../clientpositive/spark/ppd_gby_join.q.out | 30 +- .../results/clientpositive/spark/ppd_join.q.out | 32 +- .../clientpositive/spark/ppd_join2.q.out | 58 +- .../clientpositive/spark/ppd_join3.q.out | 60 +- .../clientpositive/spark/ppd_join5.q.out | 6 +- .../clientpositive/spark/ppd_join_filter.q.out | 68 +- .../clientpositive/spark/ppd_outer_join2.q.out | 34 +- .../clientpositive/spark/ppd_outer_join3.q.out | 34 +- .../clientpositive/spark/ppd_outer_join4.q.out | 49 +- .../clientpositive/spark/router_join_ppr.q.out | 4 +- .../results/clientpositive/spark/skewjoin.q.out | 14 +- .../clientpositive/spark/skewjoinopt18.q.out | 4 +- .../clientpositive/spark/subquery_exists.q.out | 4 +- .../clientpositive/spark/subquery_in.q.out | 21 +- .../spark/table_access_keys_stats.q.out | 24 +- .../clientpositive/subquery_exists.q.out | 4 +- .../results/clientpositive/subquery_in.q.out | 21 +- .../clientpositive/subquery_in_having.q.out | 4 +- .../subquery_notin_having.q.java1.7.out | 2 +- .../subquery_unqualcolumnrefs.q.out | 16 +- .../results/clientpositive/subquery_views.q.out | 4 +- .../table_access_keys_stats.q.out | 24 +- .../clientpositive/tez/auto_join_filters.q.out | 10 +- .../clientpositive/tez/auto_join_nulls.q.out | 2 +- .../tez/auto_sortmerge_join_12.q.out | 4 +- .../tez/auto_sortmerge_join_6.q.out | 12 +- .../tez/auto_sortmerge_join_9.q.out | 74 +- .../tez/bucket_map_join_tez2.q.out | 12 +- .../results/clientpositive/tez/cross_join.q.out | 8 +- .../tez/cross_product_check_1.q.out | 12 +- .../tez/cross_product_check_2.q.out | 12 +- .../tez/dynamic_partition_pruning.q.out | 265 ++-- .../clientpositive/tez/explainuser_1.q.out | 1015 ++++++++------- .../clientpositive/tez/explainuser_2.q.out | 1180 +++++++++--------- .../clientpositive/tez/explainuser_3.q.out | 12 +- .../clientpositive/tez/mapjoin_decimal.q.out | 1 - .../clientpositive/tez/mapjoin_mapjoin.q.out | 2 +- .../results/clientpositive/tez/mergejoin.q.out | 2 +- .../results/clientpositive/tez/skewjoin.q.out | 14 +- .../clientpositive/tez/subquery_exists.q.out | 4 +- .../clientpositive/tez/subquery_in.q.out | 21 +- .../tez/tez_dynpart_hashjoin_2.q.out | 18 +- .../tez/tez_vector_dynpart_hashjoin_2.q.out | 18 +- .../tez/vector_auto_smb_mapjoin_14.q.out | 234 ++-- .../tez/vector_between_columns.q.out | 4 +- .../tez/vector_binary_join_groupby.q.out | 2 +- .../tez/vector_char_mapjoin1.q.out | 4 +- .../tez/vector_decimal_mapjoin.q.out | 1 - .../tez/vector_interval_mapjoin.q.out | 52 +- .../tez/vector_join_filters.q.out | 2 +- .../clientpositive/tez/vector_join_nulls.q.out | 2 +- .../tez/vector_join_part_col_char.q.out | 22 +- .../tez/vector_varchar_mapjoin1.q.out | 2 +- .../vectorized_dynamic_partition_pruning.q.out | 265 ++-- .../vector_auto_smb_mapjoin_14.q.out | 28 +- .../clientpositive/vector_between_columns.q.out | 4 +- .../vector_binary_join_groupby.q.out | 2 +- .../clientpositive/vector_char_mapjoin1.q.out | 2 +- .../vector_interval_mapjoin.q.out | 46 +- .../clientpositive/vector_join_filters.q.out | 2 +- .../clientpositive/vector_join_nulls.q.out | 2 +- .../vector_varchar_mapjoin1.q.out | 2 +- 239 files changed, 6109 insertions(+), 6252 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/hbase-handler/src/test/results/positive/hbase_queries.q.out ---------------------------------------------------------------------- diff --git a/hbase-handler/src/test/results/positive/hbase_queries.q.out b/hbase-handler/src/test/results/positive/hbase_queries.q.out index 3907bc9..4f10564 100644 --- a/hbase-handler/src/test/results/positive/hbase_queries.q.out +++ b/hbase-handler/src/test/results/positive/hbase_queries.q.out @@ -121,7 +121,7 @@ STAGE PLANS: alias: hbase_table_1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int) @@ -136,7 +136,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -272,7 +272,7 @@ STAGE PLANS: alias: hbase_table_1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (((100 < key) and (key < 120)) and key is not null) (type: boolean) + predicate: ((100 < key) and (key < 120)) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int) @@ -287,7 +287,7 @@ STAGE PLANS: alias: hbase_table_2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (((key < 120) and (100 < key)) and key is not null) (type: boolean) + predicate: ((key < 120) and (100 < key)) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), value (type: string) @@ -494,7 +494,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(key) @@ -529,7 +529,7 @@ STAGE PLANS: alias: hbase_table_1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), value (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 8f48e7d..7ec068c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -22,8 +22,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Set; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc; import org.apache.hadoop.hive.ql.optimizer.correlation.CorrelationOptimizer; @@ -40,7 +38,10 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.ppd.PredicatePushDown; import org.apache.hadoop.hive.ql.ppd.PredicateTransitivePropagate; +import org.apache.hadoop.hive.ql.ppd.SimplePredicatePushDown; import org.apache.hadoop.hive.ql.ppd.SyntheticJoinPredicate; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.base.Splitter; import com.google.common.base.Strings; @@ -91,18 +92,25 @@ public class Optimizer { transformations.add(new PartitionColumnsSeparator()); } - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) { + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD) && + !pctx.getContext().isCboSucceeded()) { transformations.add(new PredicateTransitivePropagate()); if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) { transformations.add(new ConstantPropagate()); } transformations.add(new SyntheticJoinPredicate()); transformations.add(new PredicatePushDown()); + } else if (pctx.getContext().isCboSucceeded()) { + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) { + transformations.add(new ConstantPropagate()); + } + transformations.add(new SyntheticJoinPredicate()); + transformations.add(new SimplePredicatePushDown()); } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) { // We run constant propagation twice because after predicate pushdown, filter expressions // are combined and may become eligible for reduction (like is not null filter). - transformations.add(new ConstantPropagate()); + transformations.add(new ConstantPropagate()); } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) { http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java index 142812c..c5376bf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java @@ -166,7 +166,7 @@ public class HiveProject extends Project implements HiveRelNode { public Project copy(RelTraitSet traitSet, RelNode input, List<RexNode> exps, RelDataType rowType) { assert traitSet.containsIfApplicable(HiveRelNode.CONVENTION); HiveProject hp = new HiveProject(getCluster(), traitSet, input, exps, rowType, getFlags()); - if (this.isSysnthetic()) { + if (this.isSynthetic()) { hp.setSynthetic(); } @@ -192,7 +192,7 @@ public class HiveProject extends Project implements HiveRelNode { this.isSysnthetic = true; } - public boolean isSysnthetic() { + public boolean isSynthetic() { return isSysnthetic; } http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTSTransposeRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTSTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTSTransposeRule.java index 8321504..f81c21b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTSTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTSTransposeRule.java @@ -58,7 +58,7 @@ public class HiveFilterProjectTSTransposeRule extends RelOptRule { // 2. If ProjectRel is not synthetic then PPD would have already pushed // relevant pieces down and hence no point in running PPD again. // 3. For synthetic Projects we don't care about non deterministic UDFs - if (!projRel.isSysnthetic()) { + if (!projRel.isSynthetic()) { return false; } http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java index 7e484b9..1e947c3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java @@ -25,23 +25,37 @@ import org.apache.calcite.rel.core.RelFactories.ProjectFactory; import org.apache.calcite.rel.rules.FilterProjectTransposeRule; import org.apache.calcite.rex.RexNode; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; public class HiveFilterProjectTransposeRule extends FilterProjectTransposeRule { + public static final HiveFilterProjectTransposeRule INSTANCE_DETERMINISTIC = + new HiveFilterProjectTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, + HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY, true); + + public static final HiveFilterProjectTransposeRule INSTANCE = + new HiveFilterProjectTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, + HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY, false); + + private final boolean onlyDeterministic; + public HiveFilterProjectTransposeRule(Class<? extends Filter> filterClass, FilterFactory filterFactory, Class<? extends Project> projectClass, - ProjectFactory projectFactory) { + ProjectFactory projectFactory, boolean onlyDeterministic) { super(filterClass, filterFactory, projectClass, projectFactory); + this.onlyDeterministic = onlyDeterministic; } @Override public boolean matches(RelOptRuleCall call) { final Filter filterRel = call.rel(0); RexNode condition = filterRel.getCondition(); - if (!HiveCalciteUtil.isDeterministic(condition)) { + if (this.onlyDeterministic && !HiveCalciteUtil.isDeterministic(condition)) { return false; } return super.matches(call); } + } http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortTransposeRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortTransposeRule.java new file mode 100644 index 0000000..cfd879f --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortTransposeRule.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; + +import com.google.common.collect.ImmutableList; + +public class HiveFilterSortTransposeRule extends RelOptRule { + + public static final HiveFilterSortTransposeRule INSTANCE = + new HiveFilterSortTransposeRule(); + + //~ Constructors ----------------------------------------------------------- + + /** + * Creates a HiveFilterSortTransposeRule. + */ + private HiveFilterSortTransposeRule() { + super( + operand( + HiveFilter.class, + operand(HiveSortLimit.class, any()))); + } + + //~ Methods ---------------------------------------------------------------- + + public boolean matches(RelOptRuleCall call) { + final HiveSortLimit sort = call.rel(1); + + // If sort contains a limit operation, we bail out + if (HiveCalciteUtil.limitRelNode(sort)) { + return false; + } + + return true; + } + + public void onMatch(RelOptRuleCall call) { + final HiveFilter filter = call.rel(0); + final HiveSortLimit sort = call.rel(1); + + final RelNode newFilter = filter.copy(sort.getInput().getTraitSet(), + ImmutableList.<RelNode>of(sort.getInput())); + final HiveSortLimit newSort = sort.copy(sort.getTraitSet(), + newFilter, sort.collation, sort.offset, sort.fetch); + + call.transformTo(newSort); + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java index c8de1d8..de880ce 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java @@ -36,19 +36,13 @@ import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.SqlOperator; -import org.apache.calcite.sql.type.SqlTypeName; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; -import org.apache.hadoop.hive.ql.parse.SemanticException; - -import com.google.common.collect.ImmutableList; public final class HiveJoinAddNotNullRule extends RelOptRule { @@ -146,9 +140,6 @@ public final class HiveJoinAddNotNullRule extends RelOptRule { boolean added = false; - final RelDataType returnType = cluster.getTypeFactory(). - createSqlType(SqlTypeName.BOOLEAN); - final Map<String,RexNode> newConditions; if (input instanceof HiveFilter) { newConditions = splitCondition(((HiveFilter) input).getCondition()); @@ -157,23 +148,17 @@ public final class HiveJoinAddNotNullRule extends RelOptRule { newConditions = new HashMap<String,RexNode>(); } for (int pos : inputKeyPositions) { - try { - RelDataType keyType = input.getRowType().getFieldList().get(pos).getType(); - // Nothing to do if key cannot be null - if (!keyType.isNullable()) { - continue; - } - SqlOperator funcCall = SqlFunctionConverter.getCalciteOperator(NOT_NULL_FUNC_NAME, - FunctionRegistry.getFunctionInfo(NOT_NULL_FUNC_NAME).getGenericUDF(), - ImmutableList.of(keyType), returnType); - RexNode cond = rexBuilder.makeCall(funcCall, rexBuilder.makeInputRef(input, pos)); - String digest = cond.toString(); - if (!newConditions.containsKey(digest)) { - newConditions.put(digest,cond); - added = true; - } - } catch (SemanticException e) { - throw new AssertionError(e.getMessage()); + RelDataType keyType = input.getRowType().getFieldList().get(pos).getType(); + // Nothing to do if key cannot be null + if (!keyType.isNullable()) { + continue; + } + RexNode cond = rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, + rexBuilder.makeInputRef(input, pos)); + String digest = cond.toString(); + if (!newConditions.containsKey(digest)) { + newConditions.put(digest,cond); + added = true; } } // Nothing will be added to the expression http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index f2da304..87b18b7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -21,12 +21,12 @@ import java.lang.reflect.Field; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.UndeclaredThrowableException; import java.math.BigDecimal; +import java.util.AbstractMap.SimpleEntry; import java.util.ArrayList; import java.util.BitSet; import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; -import java.util.AbstractMap.SimpleEntry; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; @@ -143,6 +143,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTSTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSortTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInsertExchange4JoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinAddNotNullRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinCommuteRule; @@ -489,6 +490,11 @@ public class CalcitePlanner extends SemanticAnalyzer { } @Override + boolean isCBOExecuted() { + return runCBO; + } + + @Override boolean continueJoinMerge() { return !(runCBO && disableSemJoinReordering); } @@ -995,6 +1001,9 @@ public class CalcitePlanner extends SemanticAnalyzer { calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), HepMatchOrder.BOTTOM_UP, ProjectRemoveRule.INSTANCE, new ProjectMergeRule(false, HiveRelFactories.HIVE_PROJECT_FACTORY)); + calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(), + new HiveFilterProjectTSTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, + HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY, HiveTableScan.class)); // 8.2. Introduce exchange operators below join/multijoin operators calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), @@ -1061,10 +1070,8 @@ public class CalcitePlanner extends SemanticAnalyzer { // TODO: Add in ReduceExpressionrules (Constant folding) to below once // HIVE-11927 is fixed. perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, new HiveFilterProjectTransposeRule( - Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, HiveProject.class, - HiveRelFactories.HIVE_PROJECT_FACTORY), new HiveFilterSetOpTransposeRule( - HiveRelFactories.HIVE_FILTER_FACTORY), HiveFilterJoinRule.JOIN, + basePlan = hepPlan(basePlan, true, mdProvider, HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC, + HiveFilterSetOpTransposeRule.INSTANCE, HiveFilterSortTransposeRule.INSTANCE, HiveFilterJoinRule.JOIN, HiveFilterJoinRule.FILTER_ON_JOIN, new HiveFilterAggregateTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class), new FilterMergeRule( HiveRelFactories.HIVE_FILTER_FACTORY)); @@ -1115,10 +1122,8 @@ public class CalcitePlanner extends SemanticAnalyzer { // TODO: Add in ReduceExpressionrules (Constant folding) to below once // HIVE-11927 is fixed. perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, new HiveFilterProjectTransposeRule( - Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, HiveProject.class, - HiveRelFactories.HIVE_PROJECT_FACTORY), new HiveFilterSetOpTransposeRule( - HiveRelFactories.HIVE_FILTER_FACTORY), HiveFilterJoinRule.JOIN, + basePlan = hepPlan(basePlan, true, mdProvider, HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC, + HiveFilterSetOpTransposeRule.INSTANCE, HiveFilterSortTransposeRule.INSTANCE, HiveFilterJoinRule.JOIN, HiveFilterJoinRule.FILTER_ON_JOIN, new HiveFilterAggregateTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class), new FilterMergeRule( HiveRelFactories.HIVE_FILTER_FACTORY)); http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 27549dc..ea776ca 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -7566,7 +7566,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { for (int i = 0; i < srcOps.length; i++) { // generate a ReduceSink operator for the join String[] srcs = baseSrc[i] != null ? new String[] {baseSrc[i]} : joinTree.getLeftAliases(); - srcOps[i] = genNotNullFilterForJoinSourcePlan(qb, srcOps[i], joinTree, joinKeys[i]); + if (!isCBOExecuted()) { + srcOps[i] = genNotNullFilterForJoinSourcePlan(qb, srcOps[i], joinTree, joinKeys[i]); + } srcOps[i] = genJoinReduceSinkChild(qb, joinKeys[i], srcOps[i], srcs, joinTree.getNextTag()); } @@ -8436,6 +8438,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { return new ObjectPair(res, tgtToNodeExprMap); } + boolean isCBOExecuted() { + return false; + } + boolean continueJoinMerge() { return true; } http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java index ccc4bb4..d04cb78 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java @@ -80,6 +80,7 @@ public class FilterDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; private org.apache.hadoop.hive.ql.plan.ExprNodeDesc predicate; private boolean isSamplingPred; + private boolean syntheticJoinPredicate; private transient SampleDesc sampleDescr; //Is this a filter that should perform a comparison for sorted searches private boolean isSortedFilter; @@ -163,6 +164,14 @@ public class FilterDesc extends AbstractOperatorDesc { this.isGenerated = isGenerated; } + public boolean isSyntheticJoinPredicate() { + return syntheticJoinPredicate; + } + + public void setSyntheticJoinPredicate(boolean syntheticJoinPredicate) { + this.syntheticJoinPredicate = syntheticJoinPredicate; + } + @Override public Object clone() { FilterDesc filterDesc = new FilterDesc(getPredicate().clone(), getIsSamplingPred()); http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java index 1702628..4702f01 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java @@ -28,8 +28,6 @@ import java.util.Map.Entry; import java.util.Set; import java.util.Stack; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; @@ -39,6 +37,7 @@ import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.PTFOperator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.lib.Node; @@ -75,6 +74,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.mapred.JobConf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Operator factory for predicate pushdown processing of operator graph Each @@ -400,6 +401,11 @@ public final class OpProcFactory { @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { + return process(nd, stack, procCtx, false, nodeOutputs); + } + + Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, + boolean onlySyntheticJoinPredicate, Object... nodeOutputs) throws SemanticException { LOG.info("Processing for " + nd.getName() + "(" + ((Operator) nd).getIdentifier() + ")"); @@ -411,7 +417,9 @@ public final class OpProcFactory { // Don't push a sampling predicate since createFilter() always creates filter // with isSamplePred = false. Also, the filterop with sampling pred is always // a child of TableScan, so there is no need to push this predicate. - if (ewi == null && !((FilterOperator)op).getConf().getIsSamplingPred()) { + if (ewi == null && !((FilterOperator)op).getConf().getIsSamplingPred() + && (!onlySyntheticJoinPredicate + || ((FilterOperator)op).getConf().isSyntheticJoinPredicate())) { // get pushdown predicates for this operator's predicate ExprNodeDesc predicate = (((FilterOperator) nd).getConf()).getPredicate(); ewi = ExprWalkerProcFactory.extractPushdownPreds(owi, op, predicate); @@ -447,6 +455,38 @@ public final class OpProcFactory { } } + public static class SimpleFilterPPD extends FilterPPD implements NodeProcessor { + @Override + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + FilterOperator filterOp = (FilterOperator) nd; + // We try to push the full Filter predicate iff: + // - the Filter is on top of a TableScan, or + // - the Filter is on top of a PTF (between PTF and Filter, there might be Select operators) + // Otherwise, we push only the synthetic join predicates + // Note : pushing Filter on top of PTF is necessary so the LimitPushdownOptimizer for Rank + // functions gets enabled + boolean parentTableScan = filterOp.getParentOperators().get(0) instanceof TableScanOperator; + boolean ancestorPTF = false; + if (!parentTableScan) { + Operator<?> parent = filterOp; + while (true) { + assert parent.getParentOperators().size() == 1; + parent = parent.getParentOperators().get(0); + if (parent instanceof SelectOperator) { + continue; + } else if (parent instanceof PTFOperator) { + ancestorPTF = true; + break; + } else { + break; + } + } + } + return process(nd, stack, procCtx, !parentTableScan && !ancestorPTF, nodeOutputs); + } + } + /** * Determines predicates for which alias can be pushed to it's parents. See * the comments for getQualifiedAliases function. @@ -971,6 +1011,10 @@ public final class OpProcFactory { return new FilterPPD(); } + public static NodeProcessor getFilterSyntheticJoinPredicateProc() { + return new SimpleFilterPPD(); + } + public static NodeProcessor getJoinProc() { return new JoinPPD(); } http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/java/org/apache/hadoop/hive/ql/ppd/SimplePredicatePushDown.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SimplePredicatePushDown.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SimplePredicatePushDown.java new file mode 100644 index 0000000..2395c7a --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SimplePredicatePushDown.java @@ -0,0 +1,110 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.ppd; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator; +import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator; +import org.apache.hadoop.hive.ql.exec.LimitOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.PTFOperator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.ScriptOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.UDTFOperator; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.optimizer.Transform; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SimplePredicatePushDown extends Transform { + + private static final Logger LOG = LoggerFactory.getLogger(SimplePredicatePushDown.class); + private ParseContext pGraphContext; + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + pGraphContext = pctx; + + // create a the context for walking operators + OpWalkerInfo opWalkerInfo = new OpWalkerInfo(pGraphContext); + + Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); + opRules.put(new RuleRegExp("R1", + FilterOperator.getOperatorName() + "%"), + OpProcFactory.getFilterSyntheticJoinPredicateProc()); + opRules.put(new RuleRegExp("R2", + PTFOperator.getOperatorName() + "%"), + OpProcFactory.getPTFProc()); + opRules.put(new RuleRegExp("R3", + CommonJoinOperator.getOperatorName() + "%"), + OpProcFactory.getJoinProc()); + opRules.put(new RuleRegExp("R4", + TableScanOperator.getOperatorName() + "%"), + OpProcFactory.getTSProc()); + opRules.put(new RuleRegExp("R5", + ScriptOperator.getOperatorName() + "%"), + OpProcFactory.getSCRProc()); + opRules.put(new RuleRegExp("R6", + LimitOperator.getOperatorName() + "%"), + OpProcFactory.getLIMProc()); + opRules.put(new RuleRegExp("R7", + UDTFOperator.getOperatorName() + "%"), + OpProcFactory.getUDTFProc()); + opRules.put(new RuleRegExp("R8", + LateralViewForwardOperator.getOperatorName() + "%"), + OpProcFactory.getLVFProc()); + opRules.put(new RuleRegExp("R9", + LateralViewJoinOperator.getOperatorName() + "%"), + OpProcFactory.getLVJProc()); + opRules.put(new RuleRegExp("R10", + ReduceSinkOperator.getOperatorName() + "%"), + OpProcFactory.getRSProc()); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), + opRules, opWalkerInfo); + GraphWalker ogw = new DefaultGraphWalker(disp); + + // Create a list of topop nodes + ArrayList<Node> topNodes = new ArrayList<Node>(); + topNodes.addAll(pGraphContext.getTopOps().values()); + ogw.startWalking(topNodes, null); + + if (LOG.isDebugEnabled()) { + LOG.debug("After PPD:\n" + Operator.toString(pctx.getTopOps().values())); + } + return pGraphContext; + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java index accfb3b..5d5f02d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java @@ -105,7 +105,9 @@ public class SyntheticJoinPredicate extends Transform { // insert filter operator between target(child) and input(parent) private static Operator<FilterDesc> createFilter(Operator<?> target, Operator<?> parent, RowSchema parentRS, ExprNodeDesc filterExpr) { - Operator<FilterDesc> filter = OperatorFactory.get(new FilterDesc(filterExpr, false), + FilterDesc filterDesc = new FilterDesc(filterExpr, false); + filterDesc.setSyntheticJoinPredicate(true); + Operator<FilterDesc> filter = OperatorFactory.get(filterDesc, new RowSchema(parentRS.getSignature())); filter.getParentOperators().add(parent); filter.getChildOperators().add(target); http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/test/results/clientpositive/auto_join12.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join12.q.out b/ql/src/test/results/clientpositive/auto_join12.q.out index 6f08aa8..8ef3664 100644 --- a/ql/src/test/results/clientpositive/auto_join12.q.out +++ b/ql/src/test/results/clientpositive/auto_join12.q.out @@ -41,7 +41,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) and key is not null) (type: boolean) + predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -57,7 +57,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((UDFToDouble(key) < 80.0) and (UDFToDouble(key) < 100.0)) and key is not null) (type: boolean) + predicate: ((UDFToDouble(key) < 80.0) and (UDFToDouble(key) < 100.0)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/test/results/clientpositive/auto_join13.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join13.q.out b/ql/src/test/results/clientpositive/auto_join13.q.out index d29818c..fa03d2c 100644 --- a/ql/src/test/results/clientpositive/auto_join13.q.out +++ b/ql/src/test/results/clientpositive/auto_join13.q.out @@ -41,7 +41,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) < 200.0) and UDFToDouble(key) is not null) (type: boolean) + predicate: (UDFToDouble(key) < 200.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -88,30 +88,27 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(_col2) + UDFToDouble(_col0)) is not null (type: boolean) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col1, _col2 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col2,_col1) (type: int) + outputColumnNames: _col0 Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col2,_col1) (type: int) + Group By Operator + aggregations: sum(_col0) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - mode: hash - outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/test/results/clientpositive/auto_join16.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join16.q.out b/ql/src/test/results/clientpositive/auto_join16.q.out index 38738ed..c1da6d2 100644 --- a/ql/src/test/results/clientpositive/auto_join16.q.out +++ b/ql/src/test/results/clientpositive/auto_join16.q.out @@ -32,7 +32,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) > 20.0)) and (UDFToDouble(value) < 200.0)) and key is not null) and value is not null) (type: boolean) + predicate: (((UDFToDouble(key) > 10.0) and (UDFToDouble(key) > 20.0)) and (UDFToDouble(value) < 200.0)) (type: boolean) Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -50,7 +50,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((UDFToDouble(value) < 200.0) and (UDFToDouble(key) > 20.0)) and (UDFToDouble(key) > 10.0)) and key is not null) and value is not null) (type: boolean) + predicate: (((UDFToDouble(value) < 200.0) and (UDFToDouble(key) > 20.0)) and (UDFToDouble(key) > 10.0)) (type: boolean) Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/test/results/clientpositive/auto_join2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join2.q.out b/ql/src/test/results/clientpositive/auto_join2.q.out index ce40ff3..26d16ee 100644 --- a/ql/src/test/results/clientpositive/auto_join2.q.out +++ b/ql/src/test/results/clientpositive/auto_join2.q.out @@ -51,7 +51,7 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -83,29 +83,26 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(_col0) + UDFToDouble(_col1)) is not null (type: boolean) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col3 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 Local Work: Map Reduce Local Work http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/test/results/clientpositive/auto_join33.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join33.q.out b/ql/src/test/results/clientpositive/auto_join33.q.out index 8b13bd0..b7aed2c 100644 --- a/ql/src/test/results/clientpositive/auto_join33.q.out +++ b/ql/src/test/results/clientpositive/auto_join33.q.out @@ -34,7 +34,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((UDFToDouble(key) + 1.0) < 10.0) and key is not null) and (UDFToDouble(key) + 1.0) is not null) (type: boolean) + predicate: (((UDFToDouble(key) + 1.0) < 10.0) and key is not null) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -42,8 +42,8 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 (UDFToDouble(_col0) + 1.0) (type: double) - 1 (UDFToDouble(_col0) + 2.0) (type: double) + 0 (UDFToDouble(_col0) + UDFToDouble(1)) (type: double) + 1 (UDFToDouble(_col0) + UDFToDouble(2)) (type: double) Stage: Stage-3 Map Reduce @@ -52,7 +52,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((UDFToDouble(key) + 2.0) < 10.0) and key is not null) and (UDFToDouble(key) + 2.0) is not null) (type: boolean) + predicate: (((UDFToDouble(key) + 2.0) < 10.0) and key is not null) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -62,8 +62,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 (UDFToDouble(_col0) + 1.0) (type: double) - 1 (UDFToDouble(_col0) + 2.0) (type: double) + 0 (UDFToDouble(_col0) + UDFToDouble(1)) (type: double) + 1 (UDFToDouble(_col0) + UDFToDouble(2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/test/results/clientpositive/auto_join_filters.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join_filters.q.out b/ql/src/test/results/clientpositive/auto_join_filters.q.out index e0ed373..2fdf470 100644 --- a/ql/src/test/results/clientpositive/auto_join_filters.q.out +++ b/ql/src/test/results/clientpositive/auto_join_filters.q.out @@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE my POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@myinput1 -Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -300,7 +300,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_input2 -Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -310,7 +310,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 3078400 -Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -320,7 +320,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 3078400 -Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/test/results/clientpositive/auto_join_nulls.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join_nulls.q.out b/ql/src/test/results/clientpositive/auto_join_nulls.q.out index 954bf06..4af5535 100644 --- a/ql/src/test/results/clientpositive/auto_join_nulls.q.out +++ b/ql/src/test/results/clientpositive/auto_join_nulls.q.out @@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE my POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@myinput1 -Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/test/results/clientpositive/auto_join_stats.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join_stats.q.out b/ql/src/test/results/clientpositive/auto_join_stats.q.out index feb8186..9d9e111 100644 --- a/ql/src/test/results/clientpositive/auto_join_stats.q.out +++ b/ql/src/test/results/clientpositive/auto_join_stats.q.out @@ -91,15 +91,12 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(_col0) + UDFToDouble(_col1)) is not null (type: boolean) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -115,7 +112,7 @@ STAGE PLANS: alias: smalltable Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -192,15 +189,12 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(_col0) + UDFToDouble(_col1)) is not null (type: boolean) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -246,15 +240,12 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(_col0) + UDFToDouble(_col1)) is not null (type: boolean) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -361,15 +352,12 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(_col0) + UDFToDouble(_col1)) is not null (type: boolean) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -388,7 +376,7 @@ STAGE PLANS: alias: smalltable Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -403,7 +391,7 @@ STAGE PLANS: alias: smalltable2 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -426,24 +414,21 @@ STAGE PLANS: 1 UDFToDouble(_col0) (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(_col0) + UDFToDouble(_col1)) is not null (type: boolean) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col2 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -491,15 +476,12 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(_col0) + UDFToDouble(_col1)) is not null (type: boolean) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -545,15 +527,12 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(_col0) + UDFToDouble(_col1)) is not null (type: boolean) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/6f44fc81/ql/src/test/results/clientpositive/auto_join_stats2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join_stats2.q.out b/ql/src/test/results/clientpositive/auto_join_stats2.q.out index e0d0146..007ea03 100644 --- a/ql/src/test/results/clientpositive/auto_join_stats2.q.out +++ b/ql/src/test/results/clientpositive/auto_join_stats2.q.out @@ -60,7 +60,7 @@ STAGE PLANS: alias: smalltable Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -92,24 +92,21 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(_col0) + UDFToDouble(_col1)) is not null (type: boolean) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col2 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -198,7 +195,7 @@ STAGE PLANS: alias: smalltable Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -213,7 +210,7 @@ STAGE PLANS: alias: smalltable2 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and UDFToDouble(key) is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -245,9 +242,14 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(_col0) + UDFToDouble(_col1)) is not null (type: boolean) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -255,25 +257,14 @@ STAGE PLANS: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(_col0) + UDFToDouble(_col1)) is not null (type: boolean) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work
