Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out?rev=1642997&r1=1642996&r2=1642997&view=diff ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out (original) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out Tue Dec 2 19:57:10 2014 @@ -57,122 +57,108 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -206,122 +192,108 @@ EXPLAIN SELECT a.*, b.* FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 3), Map 7 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -359,102 +331,91 @@ EXPLAIN SELECT a.key, count(1) FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val group by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 3), Map 8 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) + Reducer 4 <- Union 3 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 6 (NONE, 0) - Reducer 3 <- Union 2 (GROUP, 3) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map 6 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 5 - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work - Reducer 3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -473,30 +434,27 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 5 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -523,102 +481,91 @@ POSTHOOK: query: EXPLAIN SELECT a.key, count(1) FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.val group by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 3), Map 8 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) + Reducer 4 <- Union 3 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 6 (NONE, 0) - Reducer 3 <- Union 2 (GROUP, 3) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map 6 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 5 - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work - Reducer 3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -637,30 +584,27 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 5 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out?rev=1642997&r1=1642996&r2=1642997&view=diff ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out (original) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out Tue Dec 2 19:57:10 2014 @@ -51,122 +51,112 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out?rev=1642997&r1=1642996&r2=1642997&view=diff ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out (original) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out Tue Dec 2 19:57:10 2014 @@ -51,122 +51,112 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out?rev=1642997&r1=1642996&r2=1642997&view=diff ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out (original) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out Tue Dec 2 19:57:10 2014 @@ -47,122 +47,112 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -197,122 +187,112 @@ EXPLAIN SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 3), Map 7 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out?rev=1642997&r1=1642996&r2=1642997&view=diff ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out (original) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out Tue Dec 2 19:57:10 2014 @@ -49,122 +49,112 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator
