Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out?rev=1646998&r1=1646997&r2=1646998&view=diff ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out (original) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out Sat Dec 20 14:55:13 2014 @@ -63,59 +63,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -146,53 +125,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Sorted Merge Bucket Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -228,53 +189,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Sorted Merge Bucket Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -309,53 +252,35 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Sorted Merge Bucket Map Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -395,8 +320,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -407,47 +330,28 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -478,8 +382,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -487,44 +389,28 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Sorted Merge Bucket Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -560,8 +446,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -569,44 +453,28 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Sorted Merge Bucket Map Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -641,8 +509,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -650,44 +516,28 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Sorted Merge Bucket Map Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {key} {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out?rev=1646998&r1=1646997&r2=1646998&view=diff ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out (original) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out Sat Dec 20 14:55:13 2014 @@ -79,57 +79,38 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 3 Data size: 414 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((userid is not null and pageid is not null) and postid is not null) and type is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: userid (type: int), pageid (type: int), postid (type: int), type (type: string) - sort order: ++++ - Map-reduce partition columns: userid (type: int), pageid (type: int), postid (type: int), type (type: string) - Statistics: Num rows: 1 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Map 3 - Map Operator Tree: - TableScan alias: b Statistics: Num rows: 3 Data size: 414 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((userid is not null and pageid is not null) and postid is not null) and type is not null) (type: boolean) Statistics: Num rows: 1 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: userid (type: int), pageid (type: int), postid (type: int), type (type: string) - sort order: ++++ - Map-reduce partition columns: userid (type: int), pageid (type: int), postid (type: int), type (type: string) - Statistics: Num rows: 1 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {KEY.reducesinkkey2} {KEY.reducesinkkey3} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {KEY.reducesinkkey2} {KEY.reducesinkkey3} - outputColumnNames: _col0, _col1, _col2, _col3, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string), '1' (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col11 (type: string), '2' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {userid} {pageid} {postid} {type} + 1 {userid} {pageid} {postid} {type} + keys: + 0 userid (type: int), pageid (type: int), postid (type: int), type (type: string) + 1 userid (type: int), pageid (type: int), postid (type: int), type (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string), '1' (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col11 (type: string), '2' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out?rev=1646998&view=auto ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out (added) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out Sat Dec 20 14:55:13 2014 @@ -0,0 +1,305 @@ +PREHOOK: query: -- This test verifies that the output of a sort merge join on 2 partitions (one on each side of the join) is bucketed + +-- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table1 +POSTHOOK: query: -- This test verifies that the output of a sort merge join on 2 partitions (one on each side of the join) is bucketed + +-- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1@ds=1 +PREHOOK: Output: default@test_table2@ds=1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1@ds=1 +POSTHOOK: Output: default@test_table2@ds=1 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Create a bucketed table +CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table3 +POSTHOOK: query: -- Create a bucketed table +CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table3 +PREHOOK: query: -- Insert data into the bucketed table by joining the two bucketed and sorted tables, bucketing is not enforced +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by joining the two bucketed and sorted tables, bucketing is not enforced +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test_table1 + a + TOK_TABREF + TOK_TABNAME + test_table2 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + ds + '1' + = + . + TOK_TABLE_OR_COL + b + ds + '1' + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + test_table3 + TOK_PARTSPEC + TOK_PARTVAL + ds + '1' + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col7 + Position of Big Table: 0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=1/ + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table3 + partition_columns ds + partition_columns.types string + serialization.ddl struct test_table3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table1 + numFiles 16 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 5312 + serialization.ddl struct test_table1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test_table1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table1 + name: default.test_table1 + Truncated Path -> Alias: + /test_table1/ds=1 [a] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table3 + partition_columns ds + partition_columns.types string + serialization.ddl struct test_table3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +PREHOOK: Output: default@test_table3@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +POSTHOOK: Output: default@test_table3@ds=1 +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: -- Join data from a sampled bucket to verify the data is bucketed +SELECT COUNT(*) FROM test_table3 TABLESAMPLE(BUCKET 2 OUT OF 16) a JOIN test_table1 TABLESAMPLE(BUCKET 2 OUT OF 16) b ON a.key = b.key AND a.ds = '1' AND b.ds='1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: -- Join data from a sampled bucket to verify the data is bucketed +SELECT COUNT(*) FROM test_table3 TABLESAMPLE(BUCKET 2 OUT OF 16) a JOIN test_table1 TABLESAMPLE(BUCKET 2 OUT OF 16) b ON a.key = b.key AND a.ds = '1' AND b.ds='1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +293 Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out?rev=1646998&view=auto ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out (added) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out Sat Dec 20 14:55:13 2014 @@ -0,0 +1,584 @@ +PREHOOK: query: -- This test verifies that the output of a sort merge join on 1 big partition with multiple small partitions is bucketed and sorted + +-- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table1 +POSTHOOK: query: -- This test verifies that the output of a sort merge join on 1 big partition with multiple small partitions is bucketed and sorted + +-- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT * +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '3') SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1@ds=1 +PREHOOK: Output: default@test_table2@ds=1 +PREHOOK: Output: default@test_table2@ds=2 +PREHOOK: Output: default@test_table2@ds=3 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT * +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '3') SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1@ds=1 +POSTHOOK: Output: default@test_table2@ds=1 +POSTHOOK: Output: default@test_table2@ds=2 +POSTHOOK: Output: default@test_table2@ds=3 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Create a bucketed table +CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table3 +POSTHOOK: query: -- Create a bucketed table +CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table3 +PREHOOK: query: -- Insert data into the bucketed table by joining the two bucketed and sorted tables, bucketing is not enforced +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by joining the two bucketed and sorted tables, bucketing is not enforced +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test_table1 + a + TOK_TABREF + TOK_TABNAME + test_table2 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + ds + '1' + >= + . + TOK_TABLE_OR_COL + b + ds + '1' + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + test_table3 + TOK_PARTSPEC + TOK_PARTVAL + ds + '1' + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col7 + Position of Big Table: 0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=1/ + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table3 + partition_columns ds + partition_columns.types string + serialization.ddl struct test_table3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table1 + numFiles 16 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 5312 + serialization.ddl struct test_table1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test_table1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table1 + name: default.test_table1 + Truncated Path -> Alias: + /test_table1/ds=1 [a] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table3 + partition_columns ds + partition_columns.types string + serialization.ddl struct test_table3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +PREHOOK: Input: default@test_table2@ds=2 +PREHOOK: Input: default@test_table2@ds=3 +PREHOOK: Output: default@test_table3@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +POSTHOOK: Input: default@test_table2@ds=2 +POSTHOOK: Input: default@test_table2@ds=3 +POSTHOOK: Output: default@test_table3@ds=1 +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: -- Join data from a sampled bucket to verify the data is bucketed +SELECT COUNT(*) FROM test_table3 TABLESAMPLE(BUCKET 2 OUT OF 16) a JOIN test_table1 TABLESAMPLE(BUCKET 2 OUT OF 16) b ON a.key = b.key AND a.ds = '1' AND b.ds='1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: -- Join data from a sampled bucket to verify the data is bucketed +SELECT COUNT(*) FROM test_table3 TABLESAMPLE(BUCKET 2 OUT OF 16) a JOIN test_table1 TABLESAMPLE(BUCKET 2 OUT OF 16) b ON a.key = b.key AND a.ds = '1' AND b.ds='1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +879 +PREHOOK: query: -- Join data from the sampled buckets of 2 tables to verify the data is bucketed and sorted +explain extended +INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') +SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Join data from the sampled buckets of 2 tables to verify the data is bucketed and sorted +explain extended +INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') +SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test_table3 + a + TOK_TABREF + TOK_TABNAME + test_table1 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + ds + '1' + = + . + TOK_TABLE_OR_COL + b + ds + '1' + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + test_table3 + TOK_PARTSPEC + TOK_PARTVAL + ds + '2' + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + TOK_FUNCTION + concat + . + TOK_TABLE_OR_COL + a + value + . + TOK_TABLE_OR_COL + b + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3084 Data size: 32904 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1542 Data size: 16452 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col7 + Position of Big Table: 0 + Statistics: Num rows: 1696 Data size: 18097 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), concat(_col1, _col7) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1696 Data size: 18097 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2/ + Statistics: Num rows: 1696 Data size: 18097 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table3 + partition_columns ds + partition_columns.types string + serialization.ddl struct test_table3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table3 + numFiles 16 + numRows 3084 + partition_columns ds + partition_columns.types string + rawDataSize 32904 + serialization.ddl struct test_table3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 35988 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table3 + partition_columns ds + partition_columns.types string + serialization.ddl struct test_table3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + name: default.test_table3 + Truncated Path -> Alias: + /test_table3/ds=1 [a] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table3 + partition_columns ds + partition_columns.types string + serialization.ddl struct test_table3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') +SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table3@ds=1 +PREHOOK: Output: default@test_table3@ds=2 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') +SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table3@ds=1 +POSTHOOK: Output: default@test_table3@ds=2 +POSTHOOK: Lineage: test_table3 PARTITION(ds=2).key SIMPLE [(test_table3)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=2).value EXPRESSION [(test_table3)a.FieldSchema(name:value, type:string, comment:null), (test_table1)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: SELECT count(*) from test_table3 tablesample (bucket 2 out of 16) a where ds = '2' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table3@ds=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) from test_table3 tablesample (bucket 2 out of 16) a where ds = '2' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table3@ds=2 +#### A masked pattern was here #### +879
