HIVE-20778: Join reordering may not be triggered if all joins in plan are created by decorrelation logic (Vineet Garg via Jesus Camacho Rodriguez)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ae1eb15d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ae1eb15d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ae1eb15d Branch: refs/heads/master Commit: ae1eb15d4347484537b85a8074d8a8619991fc3f Parents: 6dd0136 Author: Vineet Garg <vg...@hortonworks.com> Authored: Sat Nov 3 09:34:43 2018 -0700 Committer: Jesus Camacho Rodriguez <jcama...@apache.org> Committed: Sat Nov 3 09:34:43 2018 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/parse/CalcitePlanner.java | 13 +- .../queries/clientpositive/subquery_notin.q | 2 + .../queries/clientpositive/subquery_select.q | 2 + .../clientpositive/constant_prop_3.q.out | 226 +- .../llap/dynamic_semijoin_reduction_2.q.out | 88 +- .../clientpositive/llap/explainuser_1.q.out | 400 +-- .../results/clientpositive/llap/lineage3.q.out | 2 +- .../clientpositive/llap/mapjoin_hint.q.out | 146 +- .../clientpositive/llap/optimize_join_ptp.q.out | 88 +- .../clientpositive/llap/subquery_in.q.out | 292 +- .../llap/subquery_in_having.q.out | 246 +- .../clientpositive/llap/subquery_multi.q.out | 1005 +++--- .../clientpositive/llap/subquery_notin.q.out | 1960 +++++------ .../clientpositive/llap/subquery_null_agg.q.out | 94 +- .../clientpositive/llap/subquery_scalar.q.out | 1400 ++++---- .../clientpositive/llap/subquery_select.q.out | 1641 +++++----- .../clientpositive/llap/subquery_views.q.out | 124 +- .../llap/vector_groupby_mapjoin.q.out | 184 +- .../llap/vector_mapjoin_reduce.q.out | 371 ++- .../test/results/clientpositive/semijoin5.q.out | 26 +- .../spark_dynamic_partition_pruning_3.q.out | 52 +- .../spark/spark_explainuser_1.q.out | 366 ++- .../clientpositive/spark/subquery_in.q.out | 290 +- .../clientpositive/spark/subquery_multi.q.out | 944 +++--- .../clientpositive/spark/subquery_notin.q.out | 3053 +++++++++--------- .../spark/subquery_null_agg.q.out | 86 +- .../clientpositive/spark/subquery_scalar.q.out | 1441 +++++---- .../clientpositive/spark/subquery_select.q.out | 1688 +++++----- .../clientpositive/spark/subquery_views.q.out | 194 +- .../spark/vector_mapjoin_reduce.q.out | 491 +-- .../clientpositive/subquery_notin_having.q.out | 465 +-- .../clientpositive/vector_groupby_mapjoin.q.out | 425 ++- 32 files changed, 9207 insertions(+), 8598 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/ae1eb15d/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index d92cfca..ab63ce2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1798,16 +1798,17 @@ public class CalcitePlanner extends SemanticAnalyzer { perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Removing sq_count_check UDF "); } - // Remove Projects between Joins so that JoinToMultiJoinRule can merge them to MultiJoin - calcitePreCboPlan = hepPlan(calcitePreCboPlan, true, mdProvider.getMetadataProvider(), executorProvider, - HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.LEFT_PROJECT_BTW_JOIN, - HiveJoinProjectTransposeRule.RIGHT_PROJECT_BTW_JOIN, HiveProjectMergeRule.INSTANCE); // 4. Apply join order optimizations: reordering MST algorithm // If join optimizations failed because of missing stats, we continue with // the rest of optimizations if (profilesCBO.contains(ExtendedCBOProfile.JOIN_REORDERING)) { perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + + // Remove Projects between Joins so that JoinToMultiJoinRule can merge them to MultiJoin + calcitePreCboPlan = hepPlan(calcitePreCboPlan, true, mdProvider.getMetadataProvider(), executorProvider, + HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.LEFT_PROJECT_BTW_JOIN, + HiveJoinProjectTransposeRule.RIGHT_PROJECT_BTW_JOIN, HiveProjectMergeRule.INSTANCE); try { List<RelMetadataProvider> list = Lists.newArrayList(); list.add(mdProvider.getMetadataProvider()); @@ -3346,6 +3347,10 @@ public class CalcitePlanner extends SemanticAnalyzer { } catch (SemanticException e) { throw new CalciteSubquerySemanticException(e.getMessage()); } + if(isSubQuery) { + // since subqueries will later be rewritten into JOINs we want join reordering logic to trigger + profilesCBO.add(ExtendedCBOProfile.JOIN_REORDERING); + } return isSubQuery; } http://git-wip-us.apache.org/repos/asf/hive/blob/ae1eb15d/ql/src/test/queries/clientpositive/subquery_notin.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/subquery_notin.q b/ql/src/test/queries/clientpositive/subquery_notin.q index a2d93df..f863645 100644 --- a/ql/src/test/queries/clientpositive/subquery_notin.q +++ b/ql/src/test/queries/clientpositive/subquery_notin.q @@ -3,6 +3,8 @@ --! qt:dataset:lineitem set hive.mapred.mode=nonstrict; +-- SORT_QUERY_RESULTS + -- non agg, non corr explain select * http://git-wip-us.apache.org/repos/asf/hive/blob/ae1eb15d/ql/src/test/queries/clientpositive/subquery_select.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/subquery_select.q b/ql/src/test/queries/clientpositive/subquery_select.q index 80618c7..52fcdb2 100644 --- a/ql/src/test/queries/clientpositive/subquery_select.q +++ b/ql/src/test/queries/clientpositive/subquery_select.q @@ -4,6 +4,8 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; +-- SORT_QUERY_RESULTS + -- IN, non corr explain SELECT p_size, p_size IN ( SELECT MAX(p_size) FROM part) http://git-wip-us.apache.org/repos/asf/hive/blob/ae1eb15d/ql/src/test/results/clientpositive/constant_prop_3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/constant_prop_3.q.out b/ql/src/test/results/clientpositive/constant_prop_3.q.out index d106cd3..2b314d7 100644 --- a/ql/src/test/results/clientpositive/constant_prop_3.q.out +++ b/ql/src/test/results/clientpositive/constant_prop_3.q.out @@ -94,7 +94,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@supplier_hive POSTHOOK: Output: default@supplier_hive #### A masked pattern was here #### -Warning: Shuffle Join JOIN[25][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select p_brand, p_type, @@ -166,36 +166,56 @@ POSTHOOK: Input: default@partsupp_hive POSTHOOK: Input: default@supplier_hive #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-7 - Stage-3 depends on stages: Stage-2, Stage-8 + Stage-5 is a root stage + Stage-1 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-1, Stage-6 + Stage-3 depends on stages: Stage-2 Stage-4 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-4 Stage-7 is a root stage - Stage-8 is a root stage - Stage-0 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: partsupp_hive - filterExpr: ps_partkey is not null (type: boolean) + alias: supplier_hive + filterExpr: (s_comment like '%Customer%Complaints%') (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ps_partkey is not null (type: boolean) + predicate: (s_comment like '%Customer%Complaints%') (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: ps_partkey (type: int), ps_suppkey (type: int) - outputColumnNames: _col0, _col1 + expressions: s_suppkey (type: int) + outputColumnNames: s_suppkey Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: int) + Group By Operator + aggregations: count(), count(s_suppkey) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: TableScan alias: part_hive filterExpr: ((p_size) IN (22, 14, 27, 49, 21, 33, 35, 28) and (p_brand <> 'Brand#34') and p_partkey is not null and (not (p_type like 'ECONOMY BRUSHED%'))) (type: boolean) @@ -208,35 +228,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: int) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: int) TableScan Reduce Output Operator sort order: @@ -249,7 +243,7 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 17 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -258,53 +252,57 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 17 Basic stats: PARTIAL Column stats: NONE - value expressions: _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: bigint), _col7 (type: bigint) + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: boolean) + value expressions: _col1 (type: int), _col3 (type: boolean) Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9 + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7, _col9 Statistics: Num rows: 1 Data size: 18 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((_col6 = 0L) or (_col9 is null and _col1 is not null and (_col7 >= _col6))) (type: boolean) + Select Operator + expressions: _col7 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col9 (type: boolean) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9 Statistics: Num rows: 1 Data size: 18 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col3, _col4, _col5 + Filter Operator + predicate: ((_col6 = 0L) or (_col9 is null and _col1 is not null and (_col7 >= _col6))) (type: boolean) Statistics: Num rows: 1 Data size: 18 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count(DISTINCT _col1) - keys: _col3 (type: string), _col4 (type: string), _col5 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: _col1 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 18 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count(DISTINCT _col1) + keys: _col3 (type: string), _col4 (type: string), _col5 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 18 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -328,7 +326,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -365,43 +363,6 @@ STAGE PLANS: outputColumnNames: s_suppkey Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count(), count(s_suppkey) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-8 - Map Reduce - Map Operator Tree: - TableScan - alias: supplier_hive - filterExpr: (s_comment like '%Customer%Complaints%') (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (s_comment like '%Customer%Complaints%') (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: s_suppkey (type: int) - outputColumnNames: s_suppkey - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator keys: s_suppkey (type: int) mode: hash outputColumnNames: _col0 @@ -429,6 +390,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: partsupp_hive + filterExpr: ps_partkey is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ps_partkey is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ps_partkey (type: int), ps_suppkey (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: boolean) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 http://git-wip-us.apache.org/repos/asf/hive/blob/ae1eb15d/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out index 801416d..2810fdd 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out @@ -80,10 +80,10 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) - Map 13 <- Reducer 12 (BROADCAST_EDGE) - Reducer 10 <- Map 13 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) + Map 9 <- Reducer 13 (BROADCAST_EDGE) + Reducer 10 <- Map 12 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) - Reducer 12 <- Map 9 (CUSTOM_SIMPLE_EDGE) + Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) @@ -113,25 +113,37 @@ STAGE PLANS: value expressions: _col3 (type: timestamp) Execution mode: vectorized, llap LLAP IO: no inputs - Map 13 + Map 12 Map Operator Tree: TableScan - alias: tt2 - filterExpr: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (decimal1911_col_16 BETWEEN DynamicValue(RS_12_tt1_decimal2612_col_77_min) AND DynamicValue(RS_12_tt1_decimal2612_col_77_max) and in_bloom_filter(decimal1911_col_16, DynamicValue(RS_12_tt1_decimal2612_col_77_bloom_filter)))) (type: boolean) - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + alias: tt1 + filterExpr: decimal2612_col_77 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((decimal1911_col_16 BETWEEN DynamicValue(RS_12_tt1_decimal2612_col_77_min) AND DynamicValue(RS_12_tt1_decimal2612_col_77_max) and in_bloom_filter(decimal1911_col_16, DynamicValue(RS_12_tt1_decimal2612_col_77_bloom_filter))) and decimal1911_col_16 is not null and timestamp_col_18 is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + predicate: decimal2612_col_77 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: decimal1911_col_16 (type: decimal(19,11)), timestamp_col_18 (type: timestamp) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + expressions: decimal2612_col_77 (type: decimal(26,12)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(26,12)) sort order: + Map-reduce partition columns: _col0 (type: decimal(26,12)) - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: timestamp) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: decimal(26,12)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(26,12)), _col1 (type: decimal(26,12)), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: no inputs Map 5 @@ -197,34 +209,22 @@ STAGE PLANS: Map 9 Map Operator Tree: TableScan - alias: tt1 - filterExpr: decimal2612_col_77 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + alias: tt2 + filterExpr: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (decimal1911_col_16 BETWEEN DynamicValue(RS_13_tt1_decimal2612_col_77_min) AND DynamicValue(RS_13_tt1_decimal2612_col_77_max) and in_bloom_filter(decimal1911_col_16, DynamicValue(RS_13_tt1_decimal2612_col_77_bloom_filter)))) (type: boolean) + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: decimal2612_col_77 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + predicate: ((decimal1911_col_16 BETWEEN DynamicValue(RS_13_tt1_decimal2612_col_77_min) AND DynamicValue(RS_13_tt1_decimal2612_col_77_max) and in_bloom_filter(decimal1911_col_16, DynamicValue(RS_13_tt1_decimal2612_col_77_bloom_filter))) and decimal1911_col_16 is not null and timestamp_col_18 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: decimal2612_col_77 (type: decimal(26,12)) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + expressions: decimal1911_col_16 (type: decimal(19,11)), timestamp_col_18 (type: timestamp) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(26,12)) sort order: + Map-reduce partition columns: _col0 (type: decimal(26,12)) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: decimal(26,12)) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(26,12)), _col1 (type: decimal(26,12)), _col2 (type: binary) + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 10 @@ -236,26 +236,26 @@ STAGE PLANS: keys: 0 _col0 (type: decimal(26,12)) 1 _col0 (type: decimal(26,12)) - outputColumnNames: _col2 - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 167 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: timestamp), -92 (type: int) + expressions: _col1 (type: timestamp), -92 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 167 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: timestamp), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 167 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: int) - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 167 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: timestamp) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 167 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) mode: hash @@ -277,7 +277,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: binary) - Reducer 12 + Reducer 13 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator http://git-wip-us.apache.org/repos/asf/hive/blob/ae1eb15d/ql/src/test/results/clientpositive/llap/explainuser_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 71838ee..a87890f 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -2251,36 +2251,42 @@ POSTHOOK: Input: default@src_cbo Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 llap - File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_27] (rows=83 width=178) - Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=166 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_15] (rows=166 width=178) - predicate:(key > '9') - TableScan [TS_0] (rows=500 width=178) - default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - PartitionCols:_col0 - Group By Operator [GBY_7] (rows=83 width=87) - Output:["_col0"],keys:_col0 - Select Operator [SEL_5] (rows=166 width=87) - Output:["_col0"] - Filter Operator [FIL_16] (rows=166 width=87) + Reducer 3 llap + File Output Operator [FS_14] + Select Operator [SEL_13] (rows=83 width=178) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_29] (rows=83 width=178) + Conds:RS_10._col0=RS_11._col0(Inner),Output:["_col1","_col2"] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_11] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=166 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_18] (rows=166 width=178) predicate:(key > '9') - TableScan [TS_3] (rows=500 width=87) - default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + TableScan [TS_7] (rows=500 width=178) + default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 2 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_10] + PartitionCols:_col0 + Group By Operator [GBY_5] (rows=83 width=87) + Output:["_col0"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + PartitionCols:_col0 + Group By Operator [GBY_3] (rows=83 width=87) + Output:["_col0"],keys:key + Filter Operator [FIL_17] (rows=166 width=87) + predicate:(key > '9') + TableScan [TS_0] (rows=500 width=87) + default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] PREHOOK: query: explain select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey @@ -2299,58 +2305,56 @@ POSTHOOK: Input: default@lineitem Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) -Reducer 4 <- Map 6 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 llap - File Output Operator [FS_22] - Select Operator [SEL_21] (rows=1 width=8) + Reducer 3 llap + File Output Operator [FS_21] + Select Operator [SEL_20] (rows=1 width=8) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_52] (rows=1 width=8) - Conds:RS_18._col1, _col4=RS_19._col0, _col1(Left Semi),Output:["_col0","_col3"] - <-Map 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] + Merge Join Operator [MERGEJOIN_51] (rows=1 width=8) + Conds:RS_17._col1, _col4=RS_18._col0, _col1(Left Semi),Output:["_col0","_col3"] + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] PartitionCols:_col0, _col1 - Group By Operator [GBY_17] (rows=1 width=8) + Group By Operator [GBY_16] (rows=1 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_12] (rows=2 width=8) + Select Operator [SEL_14] (rows=2 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_30] (rows=2 width=96) + Filter Operator [FIL_29] (rows=2 width=96) predicate:((l_linenumber = 1) and (l_shipmode = 'AIR') and l_orderkey is not null) - TableScan [TS_10] (rows=100 width=96) + TableScan [TS_12] (rows=100 width=96) default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_linenumber","l_shipmode"] - <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_17] PartitionCols:_col1, _col4 - Merge Join Operator [MERGEJOIN_51] (rows=14 width=16) - Conds:RS_13._col0=RS_14._col1(Inner),Output:["_col0","_col1","_col3","_col4"] - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_14] - PartitionCols:_col1 - Select Operator [SEL_9] (rows=14 width=16) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_29] (rows=14 width=16) - predicate:((l_linenumber = 1) and l_orderkey is not null and l_partkey is not null) - TableScan [TS_7] (rows=100 width=16) - default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_partkey","l_suppkey","l_linenumber"] - <-Reducer 2 [ONE_TO_ONE_EDGE] llap - FORWARD [RS_13] - PartitionCols:_col0 - Group By Operator [GBY_5] (rows=50 width=4) - Output:["_col0"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_4] - PartitionCols:_col0 - Group By Operator [GBY_3] (rows=50 width=4) - Output:["_col0"],keys:l_partkey + Select Operator [SEL_11] (rows=14 width=16) + Output:["_col0","_col1","_col3","_col4"] + Merge Join Operator [MERGEJOIN_50] (rows=14 width=16) + Conds:RS_8._col1=RS_9._col0(Left Semi),Output:["_col0","_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_8] + PartitionCols:_col1 + Select Operator [SEL_2] (rows=14 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_27] (rows=14 width=16) + predicate:((l_linenumber = 1) and l_orderkey is not null and l_partkey is not null) + TableScan [TS_0] (rows=100 width=16) + default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_partkey","l_suppkey","l_linenumber"] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_9] + PartitionCols:_col0 + Group By Operator [GBY_7] (rows=50 width=4) + Output:["_col0"],keys:_col0 + Select Operator [SEL_5] (rows=100 width=4) + Output:["_col0"] Filter Operator [FIL_28] (rows=100 width=4) predicate:l_partkey is not null - TableScan [TS_0] (rows=100 width=4) + TableScan [TS_3] (rows=100 width=4) default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey"] PREHOOK: query: explain select key, value, count(*) @@ -2372,74 +2376,78 @@ POSTHOOK: Input: default@src_cbo Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 6 <- Map 1 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 llap - File Output Operator [FS_31] - Merge Join Operator [MERGEJOIN_54] (rows=41 width=186) - Conds:RS_27._col2=RS_28._col0(Left Semi),Output:["_col0","_col1","_col2"] - <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_27] + Reducer 5 llap + File Output Operator [FS_33] + Merge Join Operator [MERGEJOIN_56] (rows=41 width=186) + Conds:RS_29._col2=RS_30._col0(Left Semi),Output:["_col0","_col1","_col2"] + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_29] PartitionCols:_col2 - Filter Operator [FIL_37] (rows=41 width=186) + Filter Operator [FIL_39] (rows=41 width=186) predicate:_col2 is not null - Group By Operator [GBY_14] (rows=41 width=186) + Group By Operator [GBY_16] (rows=41 width=186) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_13] + <-Reducer 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_15] PartitionCols:_col0, _col1 - Group By Operator [GBY_12] (rows=41 width=186) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1 - Merge Join Operator [MERGEJOIN_53] (rows=83 width=178) - Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col0","_col1"] - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] + Group By Operator [GBY_14] (rows=41 width=186) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col2 + Merge Join Operator [MERGEJOIN_55] (rows=83 width=178) + Conds:RS_10._col0=RS_11._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_11] PartitionCols:_col0 - Group By Operator [GBY_7] (rows=83 width=87) - Output:["_col0"],keys:_col0 - Select Operator [SEL_5] (rows=166 width=87) - Output:["_col0"] - Filter Operator [FIL_39] (rows=166 width=87) - predicate:(key > '8') - TableScan [TS_3] (rows=500 width=87) - default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=166 width=178) + Select Operator [SEL_9] (rows=166 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_38] (rows=166 width=178) + Filter Operator [FIL_41] (rows=166 width=178) predicate:(key > '8') - TableScan [TS_0] (rows=500 width=178) + TableScan [TS_7] (rows=500 width=178) default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 2 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_10] + PartitionCols:_col0 + Group By Operator [GBY_5] (rows=83 width=87) + Output:["_col0"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + PartitionCols:_col0 + Group By Operator [GBY_3] (rows=83 width=87) + Output:["_col0"],keys:key + Filter Operator [FIL_40] (rows=166 width=87) + predicate:(key > '8') + TableScan [TS_0] (rows=500 width=87) + default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_28] + SHUFFLE [RS_30] PartitionCols:_col0 - Group By Operator [GBY_26] (rows=41 width=8) + Group By Operator [GBY_28] (rows=41 width=8) Output:["_col0"],keys:_col0 - Select Operator [SEL_24] (rows=83 width=8) + Select Operator [SEL_26] (rows=83 width=8) Output:["_col0"] - Filter Operator [FIL_40] (rows=83 width=8) + Filter Operator [FIL_42] (rows=83 width=8) predicate:_col1 is not null - Select Operator [SEL_42] (rows=83 width=8) + Select Operator [SEL_44] (rows=83 width=8) Output:["_col1"] - Group By Operator [GBY_22] (rows=83 width=95) + Group By Operator [GBY_24] (rows=83 width=95) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_21] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_23] PartitionCols:_col0 - Group By Operator [GBY_20] (rows=83 width=95) + Group By Operator [GBY_22] (rows=83 width=95) Output:["_col0","_col1"],aggregations:["count()"],keys:key - Filter Operator [FIL_41] (rows=166 width=87) + Filter Operator [FIL_43] (rows=166 width=87) predicate:(key > '9') - Please refer to the previous TableScan [TS_3] + Please refer to the previous TableScan [TS_0] PREHOOK: query: explain select p_mfgr, p_name, avg(p_size) from part @@ -2528,64 +2536,66 @@ POSTHOOK: Input: default@src_cbo Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Map 5 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 7 <- Map 5 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_26] - Select Operator [SEL_25] (rows=631 width=178) + File Output Operator [FS_27] + Select Operator [SEL_26] (rows=631 width=178) Output:["_col0","_col1"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_24] - Select Operator [SEL_23] (rows=631 width=178) + SHUFFLE [RS_25] + Select Operator [SEL_24] (rows=631 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=631 width=194) + Filter Operator [FIL_23] (rows=631 width=194) predicate:((_col2 = 0L) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) - Merge Join Operator [MERGEJOIN_36] (rows=631 width=194) - Conds:RS_19._col0=RS_20._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_35] (rows=500 width=194) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_16] - Select Operator [SEL_1] (rows=500 width=178) - Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=178) - default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_17] - Group By Operator [GBY_7] (rows=1 width=16) - Output:["_col0","_col1"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"] - <-Map 5 [CUSTOM_SIMPLE_EDGE] llap - SHUFFLE [RS_6] - Group By Operator [GBY_5] (rows=1 width=16) - Output:["_col0","_col1"],aggregations:["count()","count(key)"] - Filter Operator [FIL_28] (rows=166 width=87) - predicate:(key > '2') - TableScan [TS_2] (rows=500 width=87) - default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 7 [ONE_TO_ONE_EDGE] llap - FORWARD [RS_20] - PartitionCols:_col0 - Select Operator [SEL_15] (rows=83 width=91) - Output:["_col0","_col1"] - Group By Operator [GBY_14] (rows=83 width=87) - Output:["_col0"],keys:KEY._col0 - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_13] + Select Operator [SEL_22] (rows=631 width=194) + Output:["_col0","_col1","_col2","_col3","_col5"] + Merge Join Operator [MERGEJOIN_37] (rows=631 width=194) + Conds:(Inner),Output:["_col0","_col1","_col3","_col4","_col5"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_19] + Merge Join Operator [MERGEJOIN_36] (rows=631 width=178) + Conds:RS_16._col0=RS_17._col0(Left Outer),Output:["_col0","_col1","_col3"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_16] PartitionCols:_col0 - Group By Operator [GBY_12] (rows=83 width=87) - Output:["_col0"],keys:key - Filter Operator [FIL_29] (rows=166 width=87) + Select Operator [SEL_1] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500 width=178) + default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 6 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_17] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=83 width=91) + Output:["_col0","_col1"] + Group By Operator [GBY_7] (rows=83 width=87) + Output:["_col0"],keys:KEY._col0 + <-Map 5 [SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_6] + PartitionCols:_col0 + Group By Operator [GBY_5] (rows=83 width=87) + Output:["_col0"],keys:key + Filter Operator [FIL_29] (rows=166 width=87) + predicate:(key > '2') + TableScan [TS_2] (rows=500 width=87) + default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_20] + Group By Operator [GBY_14] (rows=1 width=16) + Output:["_col0","_col1"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"] + <-Map 5 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_13] + Group By Operator [GBY_12] (rows=1 width=16) + Output:["_col0","_col1"],aggregations:["count()","count(key)"] + Filter Operator [FIL_30] (rows=166 width=87) predicate:(key > '2') Please refer to the previous TableScan [TS_2] @@ -2697,8 +2707,8 @@ POSTHOOK: Input: default@part Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) @@ -2707,50 +2717,52 @@ Stage-0 limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_31] - Select Operator [SEL_30] (rows=27 width=125) + File Output Operator [FS_32] + Select Operator [SEL_31] (rows=27 width=125) Output:["_col0","_col1"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_29] - Select Operator [SEL_28] (rows=27 width=125) + SHUFFLE [RS_30] + Select Operator [SEL_29] (rows=27 width=125) Output:["_col0","_col1"] - Filter Operator [FIL_27] (rows=27 width=141) + Filter Operator [FIL_28] (rows=27 width=141) predicate:((_col2 = 0L) or (_col5 is null and _col1 is not null and (_col3 >= _col2))) - Merge Join Operator [MERGEJOIN_37] (rows=27 width=141) - Conds:RS_24.UDFToDouble(_col1)=RS_25._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] - PartitionCols:_col0 - Select Operator [SEL_20] (rows=1 width=12) - Output:["_col0","_col1"] - Group By Operator [GBY_7] (rows=1 width=16) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] - <-Map 5 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_6] - Group By Operator [GBY_5] (rows=1 width=16) - Output:["_col0","_col1"],aggregations:["sum(p_size)","count(p_size)"] - Filter Operator [FIL_33] (rows=8 width=4) - predicate:(p_size < 10) - TableScan [TS_2] (rows=26 width=4) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_size"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_24] - PartitionCols:UDFToDouble(_col1) - Merge Join Operator [MERGEJOIN_36] (rows=26 width=141) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - SHUFFLE [RS_22] - Group By Operator [GBY_12] (rows=1 width=16) - Output:["_col0","_col1"],aggregations:["count()","count(_col0)"] - Select Operator [SEL_8] (rows=1 width=16) - Output:["_col0"] + Select Operator [SEL_27] (rows=27 width=141) + Output:["_col0","_col1","_col2","_col3","_col5"] + Merge Join Operator [MERGEJOIN_38] (rows=27 width=141) + Conds:(Inner),Output:["_col0","_col1","_col3","_col4","_col5"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_25] + Group By Operator [GBY_19] (rows=1 width=16) + Output:["_col0","_col1"],aggregations:["count()","count(_col0)"] + Select Operator [SEL_15] (rows=1 width=16) + Output:["_col0"] + Group By Operator [GBY_7] (rows=1 width=16) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 5 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_6] + Group By Operator [GBY_5] (rows=1 width=16) + Output:["_col0","_col1"],aggregations:["sum(p_size)","count(p_size)"] + Filter Operator [FIL_35] (rows=8 width=4) + predicate:(p_size < 10) + TableScan [TS_2] (rows=26 width=4) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_size"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_24] + Merge Join Operator [MERGEJOIN_37] (rows=27 width=125) + Conds:RS_21.UDFToDouble(_col1)=RS_22._col0(Left Outer),Output:["_col0","_col1","_col3"] + <-Reducer 6 [SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_22] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=1 width=12) + Output:["_col0","_col1"] Please refer to the previous Group By Operator [GBY_7] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_21] - Select Operator [SEL_1] (rows=26 width=125) - Output:["_col0","_col1"] - TableScan [TS_0] (rows=26 width=125) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_size"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] + PartitionCols:UDFToDouble(_col1) + Select Operator [SEL_1] (rows=26 width=125) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=26 width=125) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_size"] PREHOOK: query: explain select b.p_mfgr, min(p_retailprice) from part b http://git-wip-us.apache.org/repos/asf/hive/blob/ae1eb15d/ql/src/test/results/clientpositive/llap/lineage3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/lineage3.q.out b/ql/src/test/results/clientpositive/llap/lineage3.q.out index 94c6a13..22d1cdb 100644 --- a/ql/src/test/results/clientpositive/llap/lineage3.q.out +++ b/ql/src/test/results/clientpositive/llap/lineage3.q.out @@ -180,7 +180,7 @@ PREHOOK: Input: default@src1 #### A masked pattern was here #### {"version":"1.0","engine":"tez","database":"default","hash":"94e9cc0a67801fe1503a3cb0c5029d59","queryText":"select * from src1 a\nwhere exists\n (select cint from alltypesorc b\n where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > 300.0D)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(a.key = a.key)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"b.ctinyint is not null","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(UDFToDouble((UDFToInteger(b.ctinyint) + 300)) = UDFToDouble(a.key))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1 .value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]} 311 val_311 -Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select key, value from src1 where key not in (select key+18 from src1) order by key PREHOOK: type: QUERY http://git-wip-us.apache.org/repos/asf/hive/blob/ae1eb15d/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out index ac505a5..e62eb0e 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out @@ -446,7 +446,7 @@ POSTHOOK: Lineage: part_null_n1.p_partkey SCRIPT [] POSTHOOK: Lineage: part_null_n1.p_retailprice SCRIPT [] POSTHOOK: Lineage: part_null_n1.p_size SCRIPT [] POSTHOOK: Lineage: part_null_n1.p_type SCRIPT [] -Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select /*+ mapjoin(None)*/ * from part where p_name = (select p_name from part_null_n1 where p_name is null) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -466,32 +466,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: part - filterExpr: (p_name = null) (type: boolean) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (p_name = null) (type: boolean) - Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan alias: part_null_n1 filterExpr: p_name is null (type: boolean) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE @@ -514,7 +495,42 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: part + filterExpr: (p_name = null) (type: boolean) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_name = null) (type: boolean) + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sq_count_check(_col0) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -525,10 +541,10 @@ STAGE PLANS: 0 1 2 - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col2, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), null (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + expressions: _col2 (type: int), null (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: double), _col10 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -538,22 +554,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -561,7 +561,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Map 3' is a cross product PREHOOK: query: explain select * from part where p_name = (select p_name from part_null_n1 where p_name is null) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -581,13 +581,38 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: part_null_n1 + filterExpr: p_name is null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan alias: part filterExpr: (p_name = null) (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE @@ -606,13 +631,13 @@ STAGE PLANS: 0 1 2 - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col2, _col4, _col5, _col6, _col7, _col8, _col9, _col10 input vertices: - 1 Reducer 3 - 2 Map 2 + 0 Reducer 2 + 1 Map 1 Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), null (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + expressions: _col2 (type: int), null (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: double), _col10 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -624,32 +649,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: part_null_n1 - filterExpr: p_name is null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator http://git-wip-us.apache.org/repos/asf/hive/blob/ae1eb15d/ql/src/test/results/clientpositive/llap/optimize_join_ptp.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/optimize_join_ptp.q.out b/ql/src/test/results/clientpositive/llap/optimize_join_ptp.q.out index a68275b..af55d4d 100644 --- a/ql/src/test/results/clientpositive/llap/optimize_join_ptp.q.out +++ b/ql/src/test/results/clientpositive/llap/optimize_join_ptp.q.out @@ -55,33 +55,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: t1_n97 - filterExpr: (k < 15) (type: boolean) - Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (k < 15) (type: boolean) - Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: v (type: string), k (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: UDFToDouble(_col1) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col1) (type: double) - Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan alias: t2_n60 filterExpr: ((v = 'people') and k is not null) (type: boolean) Statistics: Num rows: 3 Data size: 297 Basic stats: COMPLETE Column stats: COMPLETE @@ -90,10 +70,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 99 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: k (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: k + Statistics: Num rows: 1 Data size: 99 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: double) + keys: k (type: double) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -104,24 +84,62 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: t1_n97 + filterExpr: (k < 15) (type: boolean) + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (k < 15) (type: boolean) + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: v (type: string), k (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToDouble(_col1) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col1) (type: double) + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 UDFToDouble(_col1) (type: double) - 1 _col0 (type: double) - outputColumnNames: _col0, _col1 + 0 _col0 (type: double) + 1 UDFToDouble(_col1) (type: double) + outputColumnNames: _col1, _col2 Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Select Operator + expressions: _col1 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator