http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out index 8ee44b3..43500fb 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out @@ -110,11 +110,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -128,22 +127,11 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -192,7 +180,6 @@ STAGE PLANS: name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -242,7 +229,6 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] /bucket_small/ds=2008-04-09 [a] - Map 2 Map Operator Tree: TableScan alias: b @@ -256,15 +242,12 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 1 => 4 keys: 0 _col0 (type: string) 1 _col0 (type: string) - input vertices: - 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 28 Data size: 8175 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -280,13 +263,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint) auto parallelism: false Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -403,77 +384,50 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 27 Data size: 7718 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 4 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 28 Data size: 8175 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -481,59 +435,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: bucket_count 4 bucket_field_name key @@ -576,31 +500,70 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 27 Data size: 7718 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 28 Data size: 8175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: llap + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -608,26 +571,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] + /bucket_big/ds=2008-04-08 [a] Reducer 2 Execution mode: llap Needs Tagging: false @@ -696,77 +658,50 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 27 Data size: 7718 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 4 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 28 Data size: 8175 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -774,59 +709,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: bucket_count 4 bucket_field_name key @@ -869,31 +774,70 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 27 Data size: 7718 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 28 Data size: 8175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: llap + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -901,26 +845,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] + /bucket_big/ds=2008-04-08 [a] Reducer 2 Execution mode: llap Needs Tagging: false
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out index 83d5a96..a88396b 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out @@ -127,11 +127,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -145,22 +144,11 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -209,7 +197,6 @@ STAGE PLANS: name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -259,7 +246,6 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] /bucket_small/ds=2008-04-09 [a] - Map 2 Map Operator Tree: TableScan alias: b @@ -273,15 +259,12 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 1 => 4 keys: 0 _col0 (type: string) 1 _col0 (type: string) - input vertices: - 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -297,13 +280,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint) auto parallelism: false Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -352,7 +333,6 @@ STAGE PLANS: name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -472,77 +452,50 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 4 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -550,48 +503,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -599,78 +551,87 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Map 3 + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -678,48 +639,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -727,26 +687,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] Reducer 2 Execution mode: llap Needs Tagging: false @@ -817,77 +777,50 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 4 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -895,48 +828,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -944,78 +876,87 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Map 3 + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1023,48 +964,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1072,26 +1012,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] Reducer 2 Execution mode: llap Needs Tagging: false
