HIVE-20100 : OpTraits : Select Optraits should stop when a mismatch is detected (Deepak Jaiswal, reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ddf7e25d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ddf7e25d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ddf7e25d Branch: refs/heads/master-txnstats Commit: ddf7e25d3ee94224c17e353e334728241515fffb Parents: 5016d6f Author: Deepak Jaiswal <djais...@apache.org> Authored: Mon Jul 9 15:31:13 2018 -0700 Committer: Deepak Jaiswal <djais...@apache.org> Committed: Mon Jul 9 15:31:13 2018 -0700 ---------------------------------------------------------------------- .../annotation/OpTraitsRulesProcFactory.java | 59 +++++----- .../clientpositive/llap/subquery_notin.q.out | 118 ++++++++++++------- .../results/clientpositive/llap/tez_join.q.out | 51 +++++--- 3 files changed, 138 insertions(+), 90 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/ddf7e25d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java index 263770e..89db530 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java @@ -308,44 +308,40 @@ public class OpTraitsRulesProcFactory { public static class SelectRule implements NodeProcessor { - boolean processSortCols = false; - // For bucket columns // If all the columns match to the parent, put them in the bucket cols // else, add empty list. // For sort columns // Keep the subset of all the columns as long as order is maintained. public List<List<String>> getConvertedColNames( - List<List<String>> parentColNames, SelectOperator selOp) { + List<List<String>> parentColNames, SelectOperator selOp, boolean processSortCols) { List<List<String>> listBucketCols = new ArrayList<>(); - if (selOp.getColumnExprMap() != null) { - if (parentColNames != null) { - for (List<String> colNames : parentColNames) { - List<String> bucketColNames = new ArrayList<>(); - boolean found = false; - for (String colName : colNames) { - for (Entry<String, ExprNodeDesc> entry : selOp.getColumnExprMap().entrySet()) { - if ((entry.getValue() instanceof ExprNodeColumnDesc) && - (((ExprNodeColumnDesc) (entry.getValue())).getColumn().equals(colName))) { - bucketColNames.add(entry.getKey()); - found = true; - break; - } - } - if (!found) { - // Bail out on first missed column. - break; - } - } - if (!processSortCols && !found) { - // While processing bucket columns, atleast one bucket column - // missed. This results in a different bucketing scheme. - // Add empty list - listBucketCols.add(new ArrayList<>()); - } else { - listBucketCols.add(bucketColNames); + for (List<String> colNames : parentColNames) { + List<String> bucketColNames = new ArrayList<>(); + boolean found = false; + for (String colName : colNames) { + // Reset found + found = false; + for (Entry<String, ExprNodeDesc> entry : selOp.getColumnExprMap().entrySet()) { + if ((entry.getValue() instanceof ExprNodeColumnDesc) && + (((ExprNodeColumnDesc) (entry.getValue())).getColumn().equals(colName))) { + bucketColNames.add(entry.getKey()); + found = true; + break; } } + if (!found) { + // Bail out on first missed column. + break; + } + } + if (!processSortCols && !found) { + // While processing bucket columns, atleast one bucket column + // missed. This results in a different bucketing scheme. + // Add empty list + listBucketCols.add(new ArrayList<>()); + } else { + listBucketCols.add(bucketColNames); } } @@ -363,13 +359,12 @@ public class OpTraitsRulesProcFactory { List<List<String>> listSortCols = null; if (selOp.getColumnExprMap() != null) { if (parentBucketColNames != null) { - listBucketCols = getConvertedColNames(parentBucketColNames, selOp); + listBucketCols = getConvertedColNames(parentBucketColNames, selOp, false); } List<List<String>> parentSortColNames = selOp.getParentOperators().get(0).getOpTraits().getSortCols(); if (parentSortColNames != null) { - processSortCols = true; - listSortCols = getConvertedColNames(parentSortColNames, selOp); + listSortCols = getConvertedColNames(parentSortColNames, selOp, true); } } http://git-wip-us.apache.org/repos/asf/hive/blob/ddf7e25d/ql/src/test/results/clientpositive/llap/subquery_notin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index c16b143..469ec69 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -6102,9 +6102,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) - Reducer 7 <- Map 3 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 5 <- Map 3 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -6160,7 +6162,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: fixob @@ -6223,7 +6225,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 7 + Reducer 5 + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: int) @@ -6234,27 +6237,42 @@ STAGE PLANS: expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + Reducer 6 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + Reducer 8 + Execution mode: vectorized, llap + Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -6320,9 +6338,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) Reducer 3 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -6370,7 +6390,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: t_n0 @@ -6434,13 +6454,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 4 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: int) @@ -6451,20 +6465,42 @@ STAGE PLANS: expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) + value expressions: _col1 (type: boolean) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/ddf7e25d/ql/src/test/results/clientpositive/llap/tez_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/tez_join.q.out b/ql/src/test/results/clientpositive/llap/tez_join.q.out index 53f8895..ecf9299 100644 --- a/ql/src/test/results/clientpositive/llap/tez_join.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_join.q.out @@ -49,7 +49,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -71,7 +73,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: t2_n26 @@ -91,32 +93,47 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator