HIVE-13069: Enable cartesian product merging (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/db239d52 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/db239d52 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/db239d52 Branch: refs/heads/master Commit: db239d52d61c4586d2f6e834dd5d24d827839b50 Parents: 8657e300 Author: Jesus Camacho Rodriguez <[email protected]> Authored: Tue Mar 1 20:39:23 2016 +0100 Committer: Jesus Camacho Rodriguez <[email protected]> Committed: Wed Mar 2 18:03:38 2016 +0100 ---------------------------------------------------------------------- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 8 +- .../queries/clientpositive/cross_join_merge.q | 17 + .../clientpositive/auto_join_stats.q.out | 58 +- .../clientpositive/auto_join_stats2.q.out | 38 +- .../bucketizedhiveinputformat.q.out | 3 +- .../clientpositive/cross_join_merge.q.out | 490 ++++++++++ ql/src/test/results/clientpositive/join42.q.out | 43 +- .../partition_coltype_literals.q.out | 4 +- .../results/clientpositive/perf/query28.q.out | 192 ++-- .../results/clientpositive/perf/query65.q.out | 116 ++- .../results/clientpositive/perf/query88.q.out | 912 +++++++++---------- .../clientpositive/ppd_outer_join5.q.out | 127 +-- .../clientpositive/spark/auto_join_stats.q.out | 31 +- .../clientpositive/spark/auto_join_stats2.q.out | 31 +- .../spark/bucketizedhiveinputformat.q.out | 3 +- .../clientpositive/spark/ppd_outer_join5.q.out | 97 +- .../clientpositive/spark/stats_only_null.q.out | 4 +- .../clientpositive/tez/stats_only_null.q.out | 4 +- 18 files changed, 1230 insertions(+), 948 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/db239d52/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index f0ea4c5..3e91e10 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -8695,14 +8695,11 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { private ObjectPair<Integer, int[]> findMergePos(QBJoinTree node, QBJoinTree target) { int res = -1; String leftAlias = node.getLeftAlias(); - if (leftAlias == null) { - return new ObjectPair(-1, null); - } ArrayList<ASTNode> nodeCondn = node.getExpressions().get(0); ArrayList<ASTNode> targetCondn = null; - if (leftAlias.equals(target.getLeftAlias())) { + if (leftAlias == null || leftAlias.equals(target.getLeftAlias())) { targetCondn = target.getExpressions().get(0); res = 0; } else { @@ -9998,8 +9995,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } } - if (!disableJoinMerge) + if (!disableJoinMerge) { mergeJoinTree(qb); + } } // if any filters are present in the join tree, push them on top of the http://git-wip-us.apache.org/repos/asf/hive/blob/db239d52/ql/src/test/queries/clientpositive/cross_join_merge.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/cross_join_merge.q b/ql/src/test/queries/clientpositive/cross_join_merge.q new file mode 100644 index 0000000..3ba4727 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cross_join_merge.q @@ -0,0 +1,17 @@ +explain +select src1.key from src src1 join src src2 join src src3; + +explain +select src1.key from src src1 join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key; + +explain +select src1.key from src src1 join src src2 join src src3 where src1.key=src2.key and src1.key=src3.key; + +explain +select src1.key from src src1 join src src2 on 5 = src2.key join src src3 on src1.key=src3.key; + +explain +select src1.key from src src1 left outer join src src2 join src src3; + +explain +select src1.key from src src1 left outer join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key; http://git-wip-us.apache.org/repos/asf/hive/blob/db239d52/ql/src/test/results/clientpositive/auto_join_stats.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join_stats.q.out b/ql/src/test/results/clientpositive/auto_join_stats.q.out index 587c39e..6a18a02 100644 --- a/ql/src/test/results/clientpositive/auto_join_stats.q.out +++ b/ql/src/test/results/clientpositive/auto_join_stats.q.out @@ -294,21 +294,21 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-11 is a root stage , consists of Stage-13, Stage-14, Stage-1 - Stage-13 has a backup stage: Stage-1 - Stage-9 depends on stages: Stage-13 - Stage-12 depends on stages: Stage-1, Stage-9, Stage-10 - Stage-7 depends on stages: Stage-12 - Stage-14 has a backup stage: Stage-1 - Stage-10 depends on stages: Stage-14 + Stage-9 is a root stage , consists of Stage-11, Stage-12, Stage-1 + Stage-11 has a backup stage: Stage-1 + Stage-7 depends on stages: Stage-11 + Stage-10 depends on stages: Stage-1, Stage-7, Stage-8 + Stage-6 depends on stages: Stage-10 + Stage-12 has a backup stage: Stage-1 + Stage-8 depends on stages: Stage-12 Stage-1 - Stage-0 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-11 + Stage: Stage-9 Conditional Operator - Stage: Stage-13 + Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src1 @@ -331,7 +331,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-9 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -361,7 +361,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-12 + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_2:smalltable @@ -386,6 +386,7 @@ STAGE PLANS: keys: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) + 2 UDFToDouble(_col0) (type: double) $hdt$_3:smalltable2 TableScan alias: smalltable2 @@ -401,38 +402,33 @@ STAGE PLANS: keys: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) + 2 UDFToDouble(_col0) (type: double) - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) + 2 UDFToDouble(_col0) (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work - Stage: Stage-14 + Stage: Stage-12 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -455,7 +451,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-10 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan http://git-wip-us.apache.org/repos/asf/hive/blob/db239d52/ql/src/test/results/clientpositive/auto_join_stats2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join_stats2.q.out b/ql/src/test/results/clientpositive/auto_join_stats2.q.out index 35776f5..28fd569 100644 --- a/ql/src/test/results/clientpositive/auto_join_stats2.q.out +++ b/ql/src/test/results/clientpositive/auto_join_stats2.q.out @@ -157,12 +157,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-7 depends on stages: Stage-10 - Stage-0 depends on stages: Stage-7 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -205,6 +205,7 @@ STAGE PLANS: keys: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) + 2 UDFToDouble(_col0) (type: double) $hdt$_3:smalltable2 TableScan alias: smalltable2 @@ -220,8 +221,9 @@ STAGE PLANS: keys: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) + 2 UDFToDouble(_col0) (type: double) - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -245,26 +247,20 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) + 2 UDFToDouble(_col0) (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work http://git-wip-us.apache.org/repos/asf/hive/blob/db239d52/ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out b/ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out index cfb95be..6b40ee8 100644 --- a/ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out +++ b/ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out @@ -22,8 +22,7 @@ POSTHOOK: query: CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@T2 -Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product -Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM ( SELECT tmp1.name as name FROM ( SELECT name, 'MMM' AS n FROM T1) tmp1 http://git-wip-us.apache.org/repos/asf/hive/blob/db239d52/ql/src/test/results/clientpositive/cross_join_merge.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/cross_join_merge.q.out b/ql/src/test/results/clientpositive/cross_join_merge.q.out new file mode 100644 index 0000000..f15dd17 --- /dev/null +++ b/ql/src/test/results/clientpositive/cross_join_merge.q.out @@ -0,0 +1,490 @@ +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain +select src1.key from src src1 join src src2 join src src3 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select src1.key from src src1 join src src2 join src src3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 + 1 + 2 + outputColumnNames: _col0 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select src1.key from src src1 join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select src1.key from src src1 join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select src1.key from src src1 join src src2 join src src3 where src1.key=src2.key and src1.key=src3.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select src1.key from src src1 join src src2 join src src3 where src1.key=src2.key and src1.key=src3.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[15][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: explain +select src1.key from src src1 join src src2 on 5 = src2.key join src src3 on src1.key=src3.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select src1.key from src src1 join src src2 on 5 = src2.key join src src3 on src1.key=src3.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (5.0 = UDFToDouble(key)) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain +select src1.key from src src1 left outer join src src2 join src src3 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select src1.key from src src1 left outer join src src2 join src src3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Inner Join 0 to 2 + keys: + 0 + 1 + 2 + outputColumnNames: _col0 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select src1.key from src src1 left outer join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select src1.key from src src1 left outer join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + http://git-wip-us.apache.org/repos/asf/hive/blob/db239d52/ql/src/test/results/clientpositive/join42.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/join42.q.out b/ql/src/test/results/clientpositive/join42.q.out index 4715bb5..e52f1fc 100644 --- a/ql/src/test/results/clientpositive/join42.q.out +++ b/ql/src/test/results/clientpositive/join42.q.out @@ -80,8 +80,7 @@ POSTHOOK: Output: default@acct POSTHOOK: Lineage: acct.acc_n EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] POSTHOOK: Lineage: acct.aid EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: acct.brn EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -Warning: Shuffle Join JOIN[25][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product -Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: --[HIVE-10841] (WHERE col is not null) does not work sometimes for queries with many JOIN statements explain select acct.ACC_N, @@ -114,8 +113,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -146,30 +144,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int) TableScan alias: fr Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -185,11 +159,13 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 1 + 2 outputColumnNames: _col2, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -197,7 +173,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -205,7 +181,7 @@ STAGE PLANS: key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int) TableScan alias: a @@ -256,7 +232,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -308,8 +284,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[25][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product -Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select acct.ACC_N, acct.brn http://git-wip-us.apache.org/repos/asf/hive/blob/db239d52/ql/src/test/results/clientpositive/partition_coltype_literals.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/partition_coltype_literals.q.out b/ql/src/test/results/clientpositive/partition_coltype_literals.q.out index 262a081..1a93b7a 100644 --- a/ql/src/test/results/clientpositive/partition_coltype_literals.q.out +++ b/ql/src/test/results/clientpositive/partition_coltype_literals.q.out @@ -359,7 +359,7 @@ Database: default Table: partcoltypenum #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} #### A masked pattern was here #### numFiles 2 numRows 30 @@ -410,7 +410,7 @@ Database: default Table: partcoltypenum #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} #### A masked pattern was here #### numFiles 2 numRows 30 http://git-wip-us.apache.org/repos/asf/hive/blob/db239d52/ql/src/test/results/clientpositive/perf/query28.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query28.q.out b/ql/src/test/results/clientpositive/perf/query28.q.out index aed4808..8196db5 100644 --- a/ql/src/test/results/clientpositive/perf/query28.q.out +++ b/ql/src/test/results/clientpositive/perf/query28.q.out @@ -1,8 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[71][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[72][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[73][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[74][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[75][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[63][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from (select avg(ss_list_price) B1_LP ,count(ss_list_price) B1_CNT @@ -110,126 +106,106 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 11 <- Map 10 (SIMPLE_EDGE) Reducer 13 <- Map 12 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE) -Reducer 17 <- Map 16 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 15 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 17 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 - File Output Operator [FS_64] - Limit [LIM_63] (rows=1 width=215) + Reducer 3 + File Output Operator [FS_56] + Limit [LIM_55] (rows=5 width=149) Number of rows:100 - Select Operator [SEL_62] (rows=1 width=215) + Select Operator [SEL_54] (rows=5 width=149) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - Merge Join Operator [MERGEJOIN_75] (rows=1 width=215) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_60] + Merge Join Operator [MERGEJOIN_63] (rows=5 width=149) + Conds:(Inner),(Inner),(Inner),(Inner),(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_51] + Group By Operator [GBY_33] (rows=1 width=136) + Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_32] + Group By Operator [GBY_31] (rows=1 width=0) + Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price + Select Operator [SEL_30] (rows=1 width=0) + Output:["ss_list_price"] + Filter Operator [FIL_61] (rows=1 width=0) + predicate:(ss_quantity BETWEEN 11 AND 15 and (ss_list_price BETWEEN 66 AND 76 or ss_coupon_amt BETWEEN 920 AND 1920 or ss_wholesale_cost BETWEEN 4 AND 24)) + TableScan [TS_28] (rows=1 width=0) + default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_52] Group By Operator [GBY_40] (rows=1 width=136) Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] - <-Map 16 [SIMPLE_EDGE] + <-Map 12 [SIMPLE_EDGE] SHUFFLE [RS_39] Group By Operator [GBY_38] (rows=1 width=0) Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price Select Operator [SEL_37] (rows=1 width=0) Output:["ss_list_price"] - Filter Operator [FIL_70] (rows=1 width=0) + Filter Operator [FIL_62] (rows=1 width=0) predicate:(ss_quantity BETWEEN 6 AND 10 and (ss_list_price BETWEEN 91 AND 101 or ss_coupon_amt BETWEEN 1430 AND 2430 or ss_wholesale_cost BETWEEN 32 AND 52)) TableScan [TS_35] (rows=1 width=0) default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_59] - Merge Join Operator [MERGEJOIN_74] (rows=1 width=196) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_57] - Group By Operator [GBY_33] (rows=1 width=136) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_32] - Group By Operator [GBY_31] (rows=1 width=0) - Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price - Select Operator [SEL_30] (rows=1 width=0) - Output:["ss_list_price"] - Filter Operator [FIL_69] (rows=1 width=0) - predicate:(ss_quantity BETWEEN 11 AND 15 and (ss_list_price BETWEEN 66 AND 76 or ss_coupon_amt BETWEEN 920 AND 1920 or ss_wholesale_cost BETWEEN 4 AND 24)) - TableScan [TS_28] (rows=1 width=0) - default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_56] - Merge Join Operator [MERGEJOIN_73] (rows=1 width=179) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_54] - Group By Operator [GBY_26] (rows=1 width=136) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_25] - Group By Operator [GBY_24] (rows=1 width=0) - Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price - Select Operator [SEL_23] (rows=1 width=0) - Output:["ss_list_price"] - Filter Operator [FIL_68] (rows=1 width=0) - predicate:(ss_quantity BETWEEN 16 AND 20 and (ss_list_price BETWEEN 142 AND 152 or ss_coupon_amt BETWEEN 3054 AND 4054 or ss_wholesale_cost BETWEEN 80 AND 100)) - TableScan [TS_21] (rows=1 width=0) - default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_53] - Merge Join Operator [MERGEJOIN_72] (rows=1 width=163) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_51] - Group By Operator [GBY_19] (rows=1 width=136) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_18] - Group By Operator [GBY_17] (rows=1 width=0) - Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price - Select Operator [SEL_16] (rows=1 width=0) - Output:["ss_list_price"] - Filter Operator [FIL_67] (rows=1 width=0) - predicate:(ss_quantity BETWEEN 21 AND 25 and (ss_list_price BETWEEN 135 AND 145 or ss_coupon_amt BETWEEN 14180 AND 15180 or ss_wholesale_cost BETWEEN 38 AND 58)) - TableScan [TS_14] (rows=1 width=0) - default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_50] - Merge Join Operator [MERGEJOIN_71] (rows=1 width=149) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_47] - Group By Operator [GBY_5] (rows=1 width=136) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_4] - Group By Operator [GBY_3] (rows=1 width=0) - Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price - Select Operator [SEL_2] (rows=1 width=0) - Output:["ss_list_price"] - Filter Operator [FIL_65] (rows=1 width=0) - predicate:(ss_quantity BETWEEN 0 AND 5 and (ss_list_price BETWEEN 11 AND 21 or ss_coupon_amt BETWEEN 460 AND 1460 or ss_wholesale_cost BETWEEN 14 AND 34)) - TableScan [TS_0] (rows=1 width=0) - default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_48] - Group By Operator [GBY_12] (rows=1 width=136) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=0) - Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price - Select Operator [SEL_9] (rows=1 width=0) - Output:["ss_list_price"] - Filter Operator [FIL_66] (rows=1 width=0) - predicate:(ss_quantity BETWEEN 26 AND 30 and (ss_list_price BETWEEN 28 AND 38 or ss_coupon_amt BETWEEN 2513 AND 3513 or ss_wholesale_cost BETWEEN 42 AND 62)) - TableScan [TS_7] (rows=1 width=0) - default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_47] + Group By Operator [GBY_5] (rows=1 width=136) + Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=0) + Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price + Select Operator [SEL_2] (rows=1 width=0) + Output:["ss_list_price"] + Filter Operator [FIL_57] (rows=1 width=0) + predicate:(ss_quantity BETWEEN 0 AND 5 and (ss_list_price BETWEEN 11 AND 21 or ss_coupon_amt BETWEEN 460 AND 1460 or ss_wholesale_cost BETWEEN 14 AND 34)) + TableScan [TS_0] (rows=1 width=0) + default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_48] + Group By Operator [GBY_12] (rows=1 width=136) + Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] + <-Map 4 [SIMPLE_EDGE] + SHUFFLE [RS_11] + Group By Operator [GBY_10] (rows=1 width=0) + Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price + Select Operator [SEL_9] (rows=1 width=0) + Output:["ss_list_price"] + Filter Operator [FIL_58] (rows=1 width=0) + predicate:(ss_quantity BETWEEN 26 AND 30 and (ss_list_price BETWEEN 28 AND 38 or ss_coupon_amt BETWEEN 2513 AND 3513 or ss_wholesale_cost BETWEEN 42 AND 62)) + TableScan [TS_7] (rows=1 width=0) + default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_49] + Group By Operator [GBY_19] (rows=1 width=136) + Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] + <-Map 6 [SIMPLE_EDGE] + SHUFFLE [RS_18] + Group By Operator [GBY_17] (rows=1 width=0) + Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price + Select Operator [SEL_16] (rows=1 width=0) + Output:["ss_list_price"] + Filter Operator [FIL_59] (rows=1 width=0) + predicate:(ss_quantity BETWEEN 21 AND 25 and (ss_list_price BETWEEN 135 AND 145 or ss_coupon_amt BETWEEN 14180 AND 15180 or ss_wholesale_cost BETWEEN 38 AND 58)) + TableScan [TS_14] (rows=1 width=0) + default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_50] + Group By Operator [GBY_26] (rows=1 width=136) + Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_25] + Group By Operator [GBY_24] (rows=1 width=0) + Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price + Select Operator [SEL_23] (rows=1 width=0) + Output:["ss_list_price"] + Filter Operator [FIL_60] (rows=1 width=0) + predicate:(ss_quantity BETWEEN 16 AND 20 and (ss_list_price BETWEEN 142 AND 152 or ss_coupon_amt BETWEEN 3054 AND 4054 or ss_wholesale_cost BETWEEN 80 AND 100)) + TableScan [TS_21] (rows=1 width=0) + default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] http://git-wip-us.apache.org/repos/asf/hive/blob/db239d52/ql/src/test/results/clientpositive/perf/query65.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query65.q.out b/ql/src/test/results/clientpositive/perf/query65.q.out index 9799fa9..37bb1b3 100644 --- a/ql/src/test/results/clientpositive/perf/query65.q.out +++ b/ql/src/test/results/clientpositive/perf/query65.q.out @@ -1,5 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[72][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[75][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[71][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select s_store_name, i_item_desc, @@ -79,76 +78,91 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 8 <- Map 11 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 7 <- Map 10 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 5 - File Output Operator [FS_55] - Limit [LIM_54] (rows=100 width=1436) + Reducer 4 + File Output Operator [FS_53] + Limit [LIM_52] (rows=100 width=1436) Number of rows:100 - Select Operator [SEL_53] (rows=204974 width=1436) + Select Operator [SEL_51] (rows=372680 width=1436) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_52] - Select Operator [SEL_51] (rows=204974 width=1436) + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_50] + Select Operator [SEL_49] (rows=372680 width=1436) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_50] (rows=204974 width=1436) + Filter Operator [FIL_48] (rows=372680 width=1436) predicate:(_col11 <= CAST( (0.1 * UDFToDouble(_col8)) AS decimal(30,15))) - Merge Join Operator [MERGEJOIN_76] (rows=614922 width=1436) - Conds:RS_47._col7, _col0, _col2=RS_48._col0, _col0, _col1(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col8","_col11"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_48] + Merge Join Operator [MERGEJOIN_73] (rows=1118040 width=1436) + Conds:RS_45._col7, _col0, _col2=RS_46._col0, _col0, _col1(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col8","_col11"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_46] PartitionCols:_col0, _col0, _col1 Select Operator [SEL_38] (rows=20088 width=1119) Output:["_col0","_col1","_col2"] Group By Operator [GBY_37] (rows=20088 width=1119) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 13 [SIMPLE_EDGE] + <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_36] PartitionCols:_col0, _col1 Group By Operator [GBY_35] (rows=40176 width=1119) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col1, _col2 - Merge Join Operator [MERGEJOIN_74] (rows=40176 width=1119) + Merge Join Operator [MERGEJOIN_72] (rows=40176 width=1119) Conds:RS_31._col0=RS_32._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 12 [SIMPLE_EDGE] + <-Map 11 [SIMPLE_EDGE] SHUFFLE [RS_31] PartitionCols:_col0 Select Operator [SEL_27] (rows=1 width=0) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_70] (rows=1 width=0) + Filter Operator [FIL_68] (rows=1 width=0) predicate:((ss_sold_date_sk is not null and ss_store_sk is not null) and ss_item_sk is not null) TableScan [TS_25] (rows=1 width=0) default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] - <-Map 15 [SIMPLE_EDGE] + <-Map 14 [SIMPLE_EDGE] SHUFFLE [RS_32] PartitionCols:_col0 Select Operator [SEL_30] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_71] (rows=36524 width=1119) + Filter Operator [FIL_69] (rows=36524 width=1119) predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) TableScan [TS_28] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_47] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_45] PartitionCols:_col7, _col0, _col2 - Merge Join Operator [MERGEJOIN_75] (rows=559020 width=1436) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_45] + Merge Join Operator [MERGEJOIN_71] (rows=1016400 width=1436) + Conds:(Inner),(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_41] + Select Operator [SEL_2] (rows=1704 width=1910) + Output:["_col0","_col1"] + Filter Operator [FIL_64] (rows=1704 width=1910) + predicate:s_store_sk is not null + TableScan [TS_0] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_42] + Select Operator [SEL_5] (rows=462000 width=1436) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_65] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_3] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc","i_current_price","i_wholesale_cost","i_brand"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_43] Group By Operator [GBY_23] (rows=10044 width=1119) Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0 - <-Reducer 9 [SIMPLE_EDGE] + <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col0 Group By Operator [GBY_21] (rows=20088 width=1119) @@ -157,49 +171,29 @@ Stage-0 Output:["_col1","_col2"] Group By Operator [GBY_18] (rows=20088 width=1119) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 8 [SIMPLE_EDGE] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0, _col1 Group By Operator [GBY_16] (rows=40176 width=1119) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col1, _col2 - Merge Join Operator [MERGEJOIN_73] (rows=40176 width=1119) + Merge Join Operator [MERGEJOIN_70] (rows=40176 width=1119) Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 11 [SIMPLE_EDGE] + <-Map 10 [SIMPLE_EDGE] SHUFFLE [RS_13] PartitionCols:_col0 Select Operator [SEL_11] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_69] (rows=36524 width=1119) + Filter Operator [FIL_67] (rows=36524 width=1119) predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) TableScan [TS_9] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] - <-Map 7 [SIMPLE_EDGE] + <-Map 6 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col0 Select Operator [SEL_8] (rows=1 width=0) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_68] (rows=1 width=0) + Filter Operator [FIL_66] (rows=1 width=0) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_6] (rows=1 width=0) default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_44] - Merge Join Operator [MERGEJOIN_72] (rows=508200 width=1436) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_41] - Select Operator [SEL_2] (rows=1704 width=1910) - Output:["_col0","_col1"] - Filter Operator [FIL_66] (rows=1704 width=1910) - predicate:s_store_sk is not null - TableScan [TS_0] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Map 6 [SIMPLE_EDGE] - SHUFFLE [RS_42] - Select Operator [SEL_5] (rows=462000 width=1436) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_67] (rows=462000 width=1436) - predicate:i_item_sk is not null - TableScan [TS_3] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc","i_current_price","i_wholesale_cost","i_brand"]
