http://git-wip-us.apache.org/repos/asf/hive/blob/3464df45/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out index b54c574..0d586fd 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,22 +97,6 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_big@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_big@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_big@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -127,16 +111,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -166,7 +150,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -174,7 +158,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -182,7 +166,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -190,7 +174,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -259,7 +243,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -78 +38 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -274,16 +258,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -313,7 +297,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -321,7 +305,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -329,7 +313,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -337,7 +321,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -406,7 +390,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -78 +38 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -550,16 +534,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -590,7 +574,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -598,7 +582,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -606,7 +590,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -614,7 +598,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -771,7 +755,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -779,7 +763,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -787,7 +771,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -795,7 +779,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -816,16 +800,16 @@ STAGE PLANS: $hdt$_0:a TableScan alias: a - Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -877,7 +861,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -885,7 +869,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -893,7 +877,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -901,7 +885,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -1050,16 +1034,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 120 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 27500 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -1089,7 +1073,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -1097,7 +1081,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -1105,7 +1089,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1113,7 +1097,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -1182,4 +1166,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -78 +38
http://git-wip-us.apache.org/repos/asf/hive/blob/3464df45/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out index 451c3b3..45704d1 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_small -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_small @@ -22,11 +22,27 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000002_0' INTO TABLE bucket_small +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small +PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small +POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/small/000003_0' INTO TABLE bucket_small +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -46,22 +62,6 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_big -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_big -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_big -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_big PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -76,16 +76,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -114,7 +114,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -122,13 +122,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -136,7 +136,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -144,13 +144,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -216,16 +216,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -254,7 +254,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -262,13 +262,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -276,7 +276,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -284,13 +284,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -369,16 +369,16 @@ STAGE PLANS: $hdt$_1:b TableScan alias: b - Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -390,16 +390,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -429,7 +429,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -437,13 +437,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -451,7 +451,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -459,13 +459,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -475,7 +475,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -492,7 +492,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -500,13 +500,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small @@ -551,16 +551,16 @@ STAGE PLANS: $hdt$_0:a TableScan alias: a - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -572,16 +572,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2260 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -611,7 +611,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -619,13 +619,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -633,7 +633,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -641,13 +641,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big @@ -657,7 +657,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -674,7 +674,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value @@ -682,13 +682,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_small - numFiles 2 + numFiles 4 numRows 0 rawDataSize 0 serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small @@ -728,16 +728,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -766,7 +766,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -774,13 +774,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -788,7 +788,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -796,13 +796,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big http://git-wip-us.apache.org/repos/asf/hive/blob/3464df45/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out index f335142..1959075 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out @@ -72,11 +72,11 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_small@ds=2008-04-09 -PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucket_big -POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucket_big @@ -97,22 +97,6 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_big@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_big@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_big@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_big@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09') PREHOOK: type: LOAD #### A masked pattern was here #### @@ -130,22 +114,6 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/bi POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_big@ds=2008-04-09 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_big@ds=2008-04-09 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_big@ds=2008-04-09 -PREHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@bucket_big@ds=2008-04-09 -POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@bucket_big@ds=2008-04-09 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key @@ -160,16 +128,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -199,7 +167,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -207,7 +175,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -215,7 +183,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -223,7 +191,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -248,7 +216,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -256,7 +224,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -264,7 +232,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -272,7 +240,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -344,7 +312,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -156 +76 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -359,16 +327,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -398,7 +366,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -406,7 +374,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -414,7 +382,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -422,7 +390,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -447,7 +415,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -455,7 +423,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -463,7 +431,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -471,7 +439,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -543,7 +511,7 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -156 +76 PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key @@ -687,16 +655,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -727,7 +695,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -735,7 +703,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -743,7 +711,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -751,7 +719,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -776,7 +744,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -784,7 +752,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -792,7 +760,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -800,7 +768,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -958,7 +926,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -966,7 +934,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -974,7 +942,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -982,7 +950,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -1006,7 +974,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -1014,7 +982,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -1022,7 +990,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1030,7 +998,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -1051,16 +1019,16 @@ STAGE PLANS: $hdt$_0:a TableScan alias: a - Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -1112,7 +1080,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -1120,7 +1088,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -1128,7 +1096,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1136,7 +1104,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -1161,7 +1129,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -1169,7 +1137,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -1177,7 +1145,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1185,7 +1153,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -1334,16 +1302,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 240 Data size: 116240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 112 Data size: 55000 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -1373,7 +1341,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -1381,7 +1349,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -1389,7 +1357,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1397,7 +1365,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -1422,7 +1390,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -1430,7 +1398,7 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.bucket_big - numFiles 4 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string @@ -1438,7 +1406,7 @@ STAGE PLANS: serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1446,7 +1414,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value @@ -1518,4 +1486,4 @@ POSTHOOK: Input: default@bucket_small POSTHOOK: Input: default@bucket_small@ds=2008-04-08 POSTHOOK: Input: default@bucket_small@ds=2008-04-09 #### A masked pattern was here #### -156 +76
