http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out index 36bfac3..8fc43e4 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out @@ -112,11 +112,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -130,22 +129,11 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -194,7 +182,6 @@ STAGE PLANS: name: default.bucket_small Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] - Map 2 Map Operator Tree: TableScan alias: b @@ -208,15 +195,12 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 1 => 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - input vertices: - 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -232,13 +216,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint) auto parallelism: false Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -287,7 +269,6 @@ STAGE PLANS: name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -405,77 +386,50 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs + Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -483,30 +437,68 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: llap + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: bucket_count 4 bucket_field_name key @@ -549,61 +541,30 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -611,25 +572,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] Reducer 2 Execution mode: llap Needs Tagging: false @@ -698,77 +660,50 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs + Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -776,30 +711,68 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: llap + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: bucket_count 4 bucket_field_name key @@ -842,61 +815,30 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -904,25 +846,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] Reducer 2 Execution mode: llap Needs Tagging: false
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out index c1459d5..a724617 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out @@ -71,9 +71,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 4 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) - Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Union 2 (CONTAINS) + Map 5 <- Union 2 (CONTAINS) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -88,27 +89,14 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 5 - Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: a @@ -120,27 +108,14 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 5 - Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: a @@ -157,16 +132,30 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -240,8 +229,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -252,19 +240,10 @@ STAGE PLANS: Filter Operator predicate: (key < 6) (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: int) - mode: final + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a @@ -272,18 +251,17 @@ STAGE PLANS: Filter Operator predicate: (key < 6) (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) + Group By Operator + keys: key (type: int) + mode: final outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 0 Map 1 Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() @@ -295,8 +273,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out index 37d97d2..3eb8fde 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (KEY) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (KEY) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -23,11 +23,11 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2out POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-08 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY(KEY) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY(KEY) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -110,8 +110,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -172,6 +172,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key column.name.delimiter , @@ -191,7 +192,7 @@ STAGE PLANS: name: default.bucket_small Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] - Map 2 + Map 4 Map Operator Tree: TableScan alias: b @@ -205,29 +206,14 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 1 => 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -264,6 +250,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + SORTBUCKETCOLSPREFIX TRUE bucket_count 4 bucket_field_name key column.name.delimiter , @@ -312,6 +299,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + SORTBUCKETCOLSPREFIX TRUE bucket_count 4 bucket_field_name key column.name.delimiter , @@ -332,6 +320,30 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [b] /bucket_big/ds=2008-04-09 [b] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 1 + Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false @@ -400,11 +412,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -418,22 +429,11 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -462,6 +462,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key column.name.delimiter , @@ -481,7 +482,6 @@ STAGE PLANS: name: default.bucket_small Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] - Map 2 Map Operator Tree: TableScan alias: b @@ -495,15 +495,12 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 1 => 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - input vertices: - 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -519,13 +516,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint) auto parallelism: false Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -554,6 +549,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + SORTBUCKETCOLSPREFIX TRUE bucket_count 4 bucket_field_name key column.name.delimiter , @@ -573,7 +569,6 @@ STAGE PLANS: name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -602,6 +597,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + SORTBUCKETCOLSPREFIX TRUE bucket_count 4 bucket_field_name key column.name.delimiter , @@ -690,11 +686,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -704,22 +699,11 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -748,6 +732,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key column.name.delimiter , @@ -767,7 +752,6 @@ STAGE PLANS: name: default.bucket_small Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] - Map 2 Map Operator Tree: TableScan alias: b @@ -777,15 +761,12 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 1 => 1 keys: 0 key (type: string) 1 key (type: string) - input vertices: - 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -801,13 +782,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint) auto parallelism: false Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -836,6 +815,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + SORTBUCKETCOLSPREFIX TRUE bucket_count 4 bucket_field_name key column.name.delimiter , @@ -855,7 +835,6 @@ STAGE PLANS: name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -884,6 +863,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + SORTBUCKETCOLSPREFIX TRUE bucket_count 4 bucket_field_name key column.name.delimiter , @@ -972,11 +952,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -986,22 +965,11 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1030,6 +998,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key column.name.delimiter , @@ -1049,50 +1018,20 @@ STAGE PLANS: name: default.bucket_small Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] - Map 2 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - Estimated key counts: Map 1 => 1, Map 4 => 23 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - input vertices: - 0 Map 1 - 2 Map 4 - Position of Big Table: 1 - Statistics: Num rows: 244 Data size: 67852 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1121,6 +1060,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + SORTBUCKETCOLSPREFIX TRUE bucket_count 4 bucket_field_name key column.name.delimiter , @@ -1140,7 +1080,6 @@ STAGE PLANS: name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1169,6 +1108,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + SORTBUCKETCOLSPREFIX TRUE bucket_count 4 bucket_field_name key column.name.delimiter , @@ -1187,34 +1127,45 @@ STAGE PLANS: name: default.bucket_big name: default.bucket_big Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - Map 4 + /bucket_big/ds=2008-04-08 [c] + /bucket_big/ds=2008-04-09 [c] Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE - tag: 2 - auto parallelism: true + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Position of Big Table: 1 + Statistics: Num rows: 244 Data size: 67852 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1243,6 +1194,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + SORTBUCKETCOLSPREFIX TRUE bucket_count 4 bucket_field_name key column.name.delimiter , @@ -1262,7 +1214,6 @@ STAGE PLANS: name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1291,6 +1242,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + SORTBUCKETCOLSPREFIX TRUE bucket_count 4 bucket_field_name key column.name.delimiter , @@ -1309,8 +1261,8 @@ STAGE PLANS: name: default.bucket_big name: default.bucket_big Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [c] - /bucket_big/ds=2008-04-09 [c] + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Reducer 3 Execution mode: llap Needs Tagging: false http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out index 6555736..c14441a 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out @@ -134,7 +134,7 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3out POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_medium@ds=2008-04-08 -Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key @@ -148,12 +148,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 3 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE) Reducer 4 <- Map 3 (XPROD_EDGE), Map 6 (XPROD_EDGE) Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: a @@ -167,22 +166,11 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -231,7 +219,6 @@ STAGE PLANS: name: default.bucket_small Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] - Map 2 Map Operator Tree: TableScan alias: b @@ -245,22 +232,11 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -309,7 +285,6 @@ STAGE PLANS: name: default.bucket_medium Truncated Path -> Alias: /bucket_medium/ds=2008-04-08 [b] - Map 3 Map Operator Tree: TableScan alias: c @@ -323,18 +298,14 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 111 Data size: 19719 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 Inner Join 1 to 2 - Estimated key counts: Map 1 => 1, Map 2 => 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - input vertices: - 0 Map 1 - 1 Map 2 Position of Big Table: 2 Statistics: Num rows: 244 Data size: 43381 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -344,13 +315,11 @@ STAGE PLANS: tag: 0 auto parallelism: false Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -399,7 +368,6 @@ STAGE PLANS: name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -581,7 +549,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@bucket_big http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out index b78a517..78d02b8 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out @@ -79,7 +79,6 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### @@ -87,6 +86,17 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -96,15 +106,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 4 Statistics: Num rows: 16 Data size: 2976 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int) @@ -157,27 +165,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) Execution mode: llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out index 72d2c62..235c13a 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out @@ -49,27 +49,32 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator + Merge Join Operator condition map: Left Outer Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() @@ -81,23 +86,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -156,11 +144,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -169,14 +156,6 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 2 Map Operator Tree: TableScan alias: b @@ -185,14 +164,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 189 Data size: 756 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator + Merge Join Operator condition map: Right Outer Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 0 Map 1 Statistics: Num rows: 189 Data size: 1512 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() @@ -204,7 +181,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree:
