HIVE-20788: Extended SJ reduction may backtrack columns incorrectly when creating filters (Jesus Camacho Rodriguez, reviewed by Deepak Jaiswal)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3cbc13e9 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3cbc13e9 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3cbc13e9 Branch: refs/heads/master-tez092 Commit: 3cbc13e92b9c22fabf9eac72eaec9352eb9b43d2 Parents: 94d4991 Author: Jesus Camacho Rodriguez <[email protected]> Authored: Mon Oct 22 18:30:18 2018 -0700 Committer: Jesus Camacho Rodriguez <[email protected]> Committed: Wed Oct 24 16:11:48 2018 -0700 ---------------------------------------------------------------------- .../hive/ql/ppd/SyntheticJoinPredicate.java | 17 +- .../queries/clientpositive/perf/cbo_query24.q | 3 +- .../test/queries/clientpositive/perf/query24.q | 3 +- .../clientpositive/perf/spark/query24.q.out | 400 ++++++++--------- .../clientpositive/perf/tez/cbo_query23.q.out | 8 +- .../clientpositive/perf/tez/cbo_query24.q.out | 103 ++--- .../perf/tez/constraints/cbo_query24.q.out | 101 ++--- .../perf/tez/constraints/cbo_query6.q.out | 2 +- .../perf/tez/constraints/query18.q.out | 108 ++--- .../perf/tez/constraints/query24.q.out | 436 ++++++++++--------- .../perf/tez/constraints/query33.q.out | 202 ++++----- .../perf/tez/constraints/query56.q.out | 236 +++++----- .../perf/tez/constraints/query6.q.out | 132 +++--- .../perf/tez/constraints/query60.q.out | 242 +++++----- .../perf/tez/constraints/query95.q.out | 128 +++--- .../clientpositive/perf/tez/query18.q.out | 112 ++--- .../clientpositive/perf/tez/query23.q.out | 340 +++++++-------- .../clientpositive/perf/tez/query24.q.out | 436 ++++++++++--------- .../clientpositive/perf/tez/query59.q.out | 74 ++-- .../clientpositive/perf/tez/query95.q.out | 180 ++++---- 20 files changed, 1664 insertions(+), 1599 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java index 1f533bc..e97e447 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java @@ -308,7 +308,22 @@ public class SyntheticJoinPredicate extends Transform { CommonJoinOperator<JoinDesc> joinOp = (CommonJoinOperator) currentOp; // 2. Backtrack expression to join output - final ExprNodeDesc joinExprNode = ExprNodeDescUtils.backtrack(currentNode, op, joinOp); + ExprNodeDesc expr = currentNode; + if (currentOp != op) { + if (expr instanceof ExprNodeColumnDesc) { + // Expression refers to output of current operator, but backtrack methods works + // from the input columns, hence we need to make resolution for current operator + // here. If the operator was already the join, there is nothing to do + if (op.getColumnExprMap() != null) { + expr = op.getColumnExprMap().get(((ExprNodeColumnDesc) expr).getColumn()); + } + } else { + // TODO: We can extend to other expression types + // We are done + return true; + } + } + final ExprNodeDesc joinExprNode = ExprNodeDescUtils.backtrack(expr, op, joinOp); if (joinExprNode == null || !(joinExprNode instanceof ExprNodeColumnDesc)) { // TODO: We can extend to other expression types // We are done http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/queries/clientpositive/perf/cbo_query24.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query24.q b/ql/src/test/queries/clientpositive/perf/cbo_query24.q index 02bcbaf..8994de7 100644 --- a/ql/src/test/queries/clientpositive/perf/cbo_query24.q +++ b/ql/src/test/queries/clientpositive/perf/cbo_query24.q @@ -24,7 +24,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/queries/clientpositive/perf/query24.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/query24.q b/ql/src/test/queries/clientpositive/perf/query24.q index 007d7ee..b3cdaef 100644 --- a/ql/src/test/queries/clientpositive/perf/query24.q +++ b/ql/src/test/queries/clientpositive/perf/query24.q @@ -24,7 +24,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/results/clientpositive/perf/spark/query24.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query24.q.out b/ql/src/test/results/clientpositive/perf/spark/query24.q.out index 4e2e8e7..91fe702 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query24.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[104][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[107][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain with ssales as (select c_last_name @@ -23,7 +23,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name @@ -79,7 +80,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name @@ -138,8 +140,8 @@ STAGE PLANS: Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col2 (type: string) + 1 _col4 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -147,43 +149,43 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 975), Map 20 (PARTITION-LEVEL SORT, 975) - Reducer 14 <- Map 21 (PARTITION-LEVEL SORT, 486), Reducer 13 (PARTITION-LEVEL SORT, 486) - Reducer 15 <- Map 22 (PARTITION-LEVEL SORT, 564), Reducer 14 (PARTITION-LEVEL SORT, 564) - Reducer 16 <- Map 23 (PARTITION-LEVEL SORT, 899), Reducer 15 (PARTITION-LEVEL SORT, 899) - Reducer 17 <- Reducer 16 (GROUP, 640) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 887), Map 20 (PARTITION-LEVEL SORT, 887) + Reducer 14 <- Map 21 (PARTITION-LEVEL SORT, 989), Reducer 13 (PARTITION-LEVEL SORT, 989) + Reducer 15 <- Map 22 (PARTITION-LEVEL SORT, 442), Reducer 14 (PARTITION-LEVEL SORT, 442) + Reducer 16 <- Map 23 (PARTITION-LEVEL SORT, 516), Reducer 15 (PARTITION-LEVEL SORT, 516) + Reducer 17 <- Reducer 16 (GROUP, 529) Reducer 18 <- Reducer 17 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 12 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_store_sk is not null and ss_customer_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + filterExpr: (ca_address_sk is not null and ca_zip is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (ca_address_sk is not null and ca_zip is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + expressions: ca_address_sk (type: int), ca_state (type: string), ca_zip (type: string), ca_country (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col8, _col9 + 0 _col2 (type: string) + 1 _col4 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col7 input vertices: 1 Map 19 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col7 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -191,25 +193,45 @@ STAGE PLANS: Map Operator Tree: TableScan alias: customer - filterExpr: (c_customer_sk is not null and c_birth_country is not null) (type: boolean) + filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c_birth_country is not null and c_customer_sk is not null) (type: boolean) + predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) Execution mode: vectorized Map 21 Map Operator Tree: TableScan + alias: store_sales + filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_store_sk is not null and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + Execution mode: vectorized + Map 22 + Map Operator Tree: + TableScan alias: item filterExpr: i_item_sk is not null (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE @@ -227,7 +249,7 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int) Execution mode: vectorized - Map 22 + Map 23 Map Operator Tree: TableScan alias: store_returns @@ -246,109 +268,92 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 23 - Map Operator Tree: - TableScan - alias: customer_address - filterExpr: (upper(ca_country) is not null and ca_zip is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ca_zip is not null and upper(ca_country) is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_state (type: string), ca_zip (type: string), ca_country (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), upper(_col2) (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), upper(_col2) (type: string) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Execution mode: vectorized Reducer 13 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3, _col4, _col6, _col8, _col9, _col11, _col12, _col13 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string) + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col7, _col9, _col11, _col12, _col13 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col13 <> upper(_col3)) (type: boolean) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col9 (type: int), _col4 (type: int) + sort order: ++ + Map-reduce partition columns: _col9 (type: int), _col4 (type: int) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col7 (type: string), _col11 (type: string), _col12 (type: string) Reducer 14 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3, _col4, _col6, _col8, _col9, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + 0 _col9 (type: int), _col4 (type: int) + 1 _col1 (type: int), _col2 (type: int) + outputColumnNames: _col1, _col5, _col7, _col11, _col12, _col14, _col17, _col18 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col3 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int) + key expressions: _col14 (type: int) + sort order: + + Map-reduce partition columns: _col14 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col7 (type: string), _col11 (type: string), _col12 (type: string), _col17 (type: int), _col18 (type: decimal(7,2)) Reducer 15 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col3 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col4, _col6, _col8, _col9, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + 0 _col14 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col5, _col7, _col11, _col12, _col14, _col17, _col18, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col9 (type: string), _col13 (type: string) + key expressions: _col14 (type: int), _col17 (type: int) sort order: ++ - Map-reduce partition columns: _col9 (type: string), _col13 (type: string) - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int) + Map-reduce partition columns: _col14 (type: int), _col17 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col7 (type: string), _col11 (type: string), _col12 (type: string), _col18 (type: decimal(7,2)), _col20 (type: decimal(7,2)), _col21 (type: string), _col22 (type: string), _col23 (type: string), _col24 (type: int) Reducer 16 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col9 (type: string), _col13 (type: string) - 1 _col1 (type: string), upper(_col2) (type: string) - outputColumnNames: _col4, _col6, _col8, _col11, _col12, _col15, _col16, _col17, _col18, _col19, _col22 - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + 0 _col14 (type: int), _col17 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col1, _col5, _col7, _col11, _col12, _col18, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col4) - keys: _col11 (type: string), _col12 (type: string), _col6 (type: string), _col8 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int), _col22 (type: string) + aggregations: sum(_col18) + keys: _col11 (type: string), _col12 (type: string), _col1 (type: string), _col5 (type: string), _col7 (type: string), _col20 (type: decimal(7,2)), _col21 (type: string), _col22 (type: string), _col23 (type: string), _col24 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int) sort order: ++++++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: string) - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col10 (type: decimal(17,2)) Reducer 17 Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: decimal(7,2)), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: int), KEY._col9 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: decimal(7,2)), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: string), KEY._col9 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col10 (type: decimal(17,2)) outputColumnNames: _col10 - Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col10), count(_col10) mode: hash @@ -381,7 +386,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 9 Map Operator Tree: TableScan alias: store @@ -396,8 +401,8 @@ STAGE PLANS: Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col2 (type: string) + 1 _col4 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -405,11 +410,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 400), Map 7 (PARTITION-LEVEL SORT, 400) - Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 1009), Reducer 2 (PARTITION-LEVEL SORT, 1009) - Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 564), Reducer 3 (PARTITION-LEVEL SORT, 564) - Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 899), Reducer 4 (PARTITION-LEVEL SORT, 899) - Reducer 6 <- Reducer 5 (GROUP PARTITION-LEVEL SORT, 640) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 400), Map 6 (PARTITION-LEVEL SORT, 400) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1009), Reducer 8 (PARTITION-LEVEL SORT, 1009) + Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 516), Reducer 3 (PARTITION-LEVEL SORT, 516) + Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 529) + Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 887), Map 7 (PARTITION-LEVEL SORT, 887) #### A masked pattern was here #### Vertices: Map 1 @@ -435,6 +440,26 @@ STAGE PLANS: Map 10 Map Operator Tree: TableScan + alias: customer + filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Execution mode: vectorized + Map 11 + Map Operator Tree: + TableScan alias: store_returns filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE @@ -451,27 +476,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 11 - Map Operator Tree: - TableScan - alias: customer_address - filterExpr: (upper(ca_country) is not null and ca_zip is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ca_zip is not null and upper(ca_country) is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_state (type: string), ca_zip (type: string), ca_country (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), upper(_col2) (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), upper(_col2) (type: string) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Execution mode: vectorized - Map 7 + Map 6 Map Operator Tree: TableScan alias: item @@ -491,29 +496,39 @@ STAGE PLANS: Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col4 (type: string), _col5 (type: int) Execution mode: vectorized - Map 9 + Map 7 Map Operator Tree: TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_birth_country is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + filterExpr: (ca_address_sk is not null and ca_zip is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c_birth_country is not null and c_customer_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + predicate: (ca_address_sk is not null and ca_zip is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string) + expressions: ca_address_sk (type: int), ca_state (type: string), ca_zip (type: string), ca_country (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col4 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col7 + input vertices: + 1 Map 9 + Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col7 (type: string) Execution mode: vectorized - Reducer 2 Local Work: Map Reduce Local Work + Reducer 2 Reduce Operator Tree: Join Operator condition map: @@ -523,38 +538,28 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col9, _col10 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15 - input vertices: - 1 Map 8 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col15 (type: string) + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3, _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15, _col17, _col18, _col19 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int), _col2 (type: int) + 1 _col0 (type: int), _col9 (type: int) + outputColumnNames: _col0, _col3, _col4, _col6, _col7, _col9, _col10, _col13, _col14, _col17, _col21, _col23 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col3 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col3 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col15 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col13 (type: string), _col14 (type: string), _col17 (type: string), _col21 (type: string), _col23 (type: string) Reducer 4 Reduce Operator Tree: Join Operator @@ -563,61 +568,45 @@ STAGE PLANS: keys: 0 _col0 (type: int), _col3 (type: int) 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15, _col17, _col18, _col19 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col15 (type: string), _col19 (type: string) - sort order: ++ - Map-reduce partition columns: _col15 (type: string), _col19 (type: string) - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col17 (type: string), _col18 (type: string) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col15 (type: string), _col19 (type: string) - 1 _col1 (type: string), upper(_col2) (type: string) - outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col17, _col18, _col22 - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col13, _col14, _col17, _col21, _col23 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col4) - keys: _col17 (type: string), _col18 (type: string), _col12 (type: string), _col22 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col14 (type: string) + keys: _col13 (type: string), _col14 (type: string), _col21 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col17 (type: string), _col23 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string) sort order: +++++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col9 (type: decimal(17,2)) - Reducer 6 + Reducer 5 Execution mode: vectorized Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: decimal(7,2)), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: int), KEY._col8 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: decimal(7,2)), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: int), KEY._col7 (type: string), KEY._col8 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col9 (type: decimal(17,2)) - outputColumnNames: _col1, _col2, _col7, _col9 - Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col4, _col5, _col7, _col9 + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col9) - keys: _col1 (type: string), _col2 (type: string), _col7 (type: string) + keys: _col4 (type: string), _col5 (type: string), _col7 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 231911707 Data size: 20459318134 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string), _col3 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 231911707 Data size: 20459318134 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -627,21 +616,44 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 input vertices: 1 Reducer 18 - Statistics: Num rows: 231911707 Data size: 74494745865 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191662559 Data size: 61565902849 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col3 > _col4) (type: boolean) - Statistics: Num rows: 77303902 Data size: 24831581847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63887519 Data size: 20521967402 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 77303902 Data size: 24831581847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63887519 Data size: 20521967402 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 77303902 Data size: 24831581847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63887519 Data size: 20521967402 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col7, _col9, _col11, _col12, _col13 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col13 <> upper(_col3)) (type: boolean) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col9 (type: int), _col11 (type: string), _col12 (type: string), _col1 (type: string), _col4 (type: int), _col5 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col2, _col3, _col6, _col9, _col10, _col12 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col9 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col9 (type: int) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col6 (type: string), _col10 (type: string), _col12 (type: string) Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out index baf790e..ace7cf5 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[589][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 29' is a cross product -Warning: Shuffle Join MERGEJOIN[590][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product -Warning: Shuffle Join MERGEJOIN[592][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 33' is a cross product -Warning: Shuffle Join MERGEJOIN[593][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 34' is a cross product +Warning: Shuffle Join MERGEJOIN[593][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 29' is a cross product +Warning: Shuffle Join MERGEJOIN[594][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product +Warning: Shuffle Join MERGEJOIN[596][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 33' is a cross product +Warning: Shuffle Join MERGEJOIN[597][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 34' is a cross product PREHOOK: query: explain cbo with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out index 53220d2..1d005b8 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[290][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[301][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain cbo with ssales as (select c_last_name @@ -23,7 +23,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name @@ -79,7 +80,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name @@ -115,57 +117,58 @@ CBO PLAN: HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) HiveJoin(condition=[>($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_last_name=[$1], c_first_name=[$0], s_store_name=[$2], $f3=[$3]) - HiveAggregate(group=[{1, 2, 7}], agg#0=[sum($9)]) - HiveProject(ca_state=[$0], c_first_name=[$1], c_last_name=[$2], i_current_price=[$3], i_size=[$4], i_units=[$5], i_manager_id=[$6], s_store_name=[$7], s_state=[$8], $f9=[$9]) - HiveAggregate(group=[{0, 6, 7, 15, 16, 18, 19, 21, 23}], agg#0=[sum($13)]) - HiveJoin(condition=[AND(=($8, UPPER($2)), =($24, $1))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_state=[$8], ca_zip=[$9], ca_country=[$10]) - HiveFilter(condition=[AND(IS NOT NULL(UPPER($10)), IS NOT NULL($9))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[AND(=($9, $1), =($6, $0))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) - HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($14))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($2, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2), IS NOT NULL($7), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[CAST(_UTF-16LE'orchid'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_units=[$18], i_manager_id=[$20]) - HiveFilter(condition=[AND(=($17, _UTF-16LE'orchid'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_market_id=[CAST(7):INTEGER], s_state=[$24], s_zip=[$25]) - HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($0), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveAggregate(group=[{4, 5, 7}], agg#0=[sum($9)]) + HiveProject(i_current_price=[$0], i_size=[$1], i_units=[$2], i_manager_id=[$3], c_first_name=[$4], c_last_name=[$5], ca_state=[$6], s_store_name=[$7], s_state=[$8], $f9=[$9]) + HiveAggregate(group=[{8, 9, 11, 12, 15, 16, 19, 23, 25}], agg#0=[sum($6)]) + HiveJoin(condition=[AND(=($5, $1), =($2, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[AND(=($1, $11), =($2, $20))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2), IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[CAST(_UTF-16LE'orchid'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_units=[$18], i_manager_id=[$20]) + HiveFilter(condition=[AND(=($17, _UTF-16LE'orchid'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], c_first_name=[$2], c_last_name=[$3], c_birth_country=[$4], ca_address_sk=[$5], ca_state=[$6], ca_zip=[$7], ca_country=[$8], s_store_sk=[$9], s_store_name=[$10], s_market_id=[$11], s_state=[$12], s_zip=[$13]) + HiveJoin(condition=[AND(=($1, $5), <>($4, UPPER($8)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($8, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], ca_country=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($9))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_market_id=[CAST(7):INTEGER], s_state=[$24], s_zip=[$25]) + HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($0), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(_o__c0=[*(0.05, /($0, $1))]) HiveAggregate(group=[{}], agg#0=[sum($10)], agg#1=[count($10)]) - HiveProject(c_first_name=[$0], c_last_name=[$1], s_store_name=[$2], s_state=[$3], i_current_price=[$4], i_size=[$5], i_color=[$6], i_units=[$7], i_manager_id=[$8], ca_state=[$9], $f10=[$10]) - HiveAggregate(group=[{3, 4, 12, 14, 17, 18, 19, 20, 21, 22}], agg#0=[sum($10)]) - HiveJoin(condition=[AND(=($5, UPPER($24)), =($15, $23))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($9, $1), =($6, $0))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) - HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($4, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($14))]) + HiveProject(c_first_name=[$0], c_last_name=[$1], ca_state=[$2], s_store_name=[$3], s_state=[$4], i_current_price=[$5], i_size=[$6], i_color=[$7], i_units=[$8], i_manager_id=[$9], $f10=[$10]) + HiveAggregate(group=[{7, 8, 11, 15, 17, 20, 21, 22, 23, 24}], agg#0=[sum($4)]) + HiveJoin(condition=[AND(=($3, $26), =($0, $25))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $19)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $5), =($2, $14))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2), IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[AND(=($1, $5), <>($4, UPPER($8)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2), IS NOT NULL($7), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[=($8, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], ca_country=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($9))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveProject(s_store_sk=[$0], s_store_name=[$5], s_market_id=[CAST(7):INTEGER], s_state=[$24], s_zip=[$25]) HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($0), IS NOT NULL($25))]) HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[$17], i_units=[$18], i_manager_id=[$20]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(ca_state=[$8], ca_zip=[$9], ca_country=[$10]) - HiveFilter(condition=[AND(IS NOT NULL(UPPER($10)), IS NOT NULL($9))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[$17], i_units=[$18], i_manager_id=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out index 34cc51b..0801f34 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[287][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[298][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain cbo with ssales as (select c_last_name @@ -23,7 +23,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name @@ -79,7 +80,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name @@ -115,54 +117,55 @@ CBO PLAN: HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) HiveJoin(condition=[>($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_last_name=[$1], c_first_name=[$0], s_store_name=[$2], $f3=[$3]) - HiveAggregate(group=[{1, 2, 7}], agg#0=[sum($9)]) - HiveProject(ca_state=[$0], c_first_name=[$1], c_last_name=[$2], i_current_price=[$3], i_size=[$4], i_units=[$5], i_manager_id=[$6], s_store_name=[$7], s_state=[$8], $f9=[$9]) - HiveAggregate(group=[{0, 6, 7, 15, 16, 17, 18, 20, 21}], agg#0=[sum($13)]) - HiveJoin(condition=[AND(=($8, $2), =($22, $1))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) - HiveFilter(condition=[AND(IS NOT NULL(UPPER($10)), IS NOT NULL($9))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[AND(=($9, $1), =($6, $0))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) - HiveFilter(condition=[IS NOT NULL($14)]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_units=[$18], i_manager_id=[$20]) - HiveFilter(condition=[=($17, _UTF-16LE'orchid')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) - HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(_o__c0=[*(0.05, /($0, $1))]) - HiveAggregate(group=[{}], agg#0=[sum($10)], agg#1=[count($10)]) - HiveProject(c_first_name=[$0], c_last_name=[$1], s_store_name=[$2], s_state=[$3], i_current_price=[$4], i_size=[$5], i_color=[$6], i_units=[$7], i_manager_id=[$8], ca_state=[$9], $f10=[$10]) - HiveAggregate(group=[{3, 4, 12, 13, 16, 17, 18, 19, 20, 21}], agg#0=[sum($10)]) - HiveJoin(condition=[AND(=($5, $23), =($14, $22))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($9, $1), =($6, $0))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($4, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) - HiveFilter(condition=[IS NOT NULL($14)]) + HiveAggregate(group=[{4, 5, 7}], agg#0=[sum($9)]) + HiveProject(i_current_price=[$0], i_size=[$1], i_units=[$2], i_manager_id=[$3], c_first_name=[$4], c_last_name=[$5], ca_state=[$6], s_store_name=[$7], s_state=[$8], $f9=[$9]) + HiveAggregate(group=[{8, 9, 10, 11, 14, 15, 18, 22, 23}], agg#0=[sum($6)]) + HiveJoin(condition=[AND(=($5, $1), =($2, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[AND(=($1, $10), =($2, $19))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_units=[$18], i_manager_id=[$20]) + HiveFilter(condition=[=($17, _UTF-16LE'orchid')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], c_first_name=[$2], c_last_name=[$3], c_birth_country=[$4], ca_address_sk=[$5], ca_state=[$6], ca_zip=[$7], UPPER=[$8], s_store_sk=[$9], s_store_name=[$10], s_state=[$11], s_zip=[$12]) + HiveJoin(condition=[AND(=($1, $5), <>($4, $8))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) + HiveFilter(condition=[IS NOT NULL($4)]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[=($7, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) + HiveFilter(condition=[IS NOT NULL($9)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[$17], i_units=[$18], i_manager_id=[$20]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) - HiveFilter(condition=[AND(IS NOT NULL(UPPER($10)), IS NOT NULL($9))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(_o__c0=[*(0.05, /($0, $1))]) + HiveAggregate(group=[{}], agg#0=[sum($10)], agg#1=[count($10)]) + HiveProject(c_first_name=[$0], c_last_name=[$1], ca_state=[$2], s_store_name=[$3], s_state=[$4], i_current_price=[$5], i_size=[$6], i_color=[$7], i_units=[$8], i_manager_id=[$9], $f10=[$10]) + HiveAggregate(group=[{9, 10, 13, 17, 18, 21, 22, 23, 24, 25}], agg#0=[sum($6)]) + HiveJoin(condition=[AND(=($5, $1), =($2, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[=($0, $18)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($17, $12), =($2, $14))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[AND(=($1, $5), <>($4, $8))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) + HiveFilter(condition=[IS NOT NULL($9)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) + HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[$17], i_units=[$18], i_manager_id=[$20]) + HiveTableScan(table=[[default, item]], table:alias=[item]) http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out index ef53060..cbf372a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[172][bigTable=?] in task 'Reducer 15' is a cross product +Warning: Map Join MAPJOIN[170][bigTable=?] in task 'Reducer 15' is a cross product PREHOOK: query: explain cbo select a.ca_state state, count(*) cnt from customer_address a
