http://git-wip-us.apache.org/repos/asf/hive/blob/6f5c1135/ql/src/test/results/clientpositive/bucketmapjoin4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/bucketmapjoin4.q.out b/ql/src/test/results/clientpositive/bucketmapjoin4.q.out deleted file mode 100644 index 9353318..0000000 --- a/ql/src/test/results/clientpositive/bucketmapjoin4.q.out +++ /dev/null @@ -1,1050 +0,0 @@ -PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@srcbucket_mapjoin -POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@srcbucket_mapjoin -PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@srcbucket_mapjoin -POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@srcbucket_mapjoin -PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@srcbucket_mapjoin -POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@srcbucket_mapjoin -PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@srcbucket_mapjoin_part -POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@srcbucket_mapjoin_part -PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@srcbucket_mapjoin_part -POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@srcbucket_mapjoin_part -POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 -PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@srcbucket_mapjoin_part_2 -POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@srcbucket_mapjoin_part_2 -PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@srcbucket_mapjoin_part_2 -POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@srcbucket_mapjoin_part_2 -POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 -PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 -POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 -PREHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@bucketmapjoin_hash_result_1 -POSTHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@bucketmapjoin_hash_result_1 -PREHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@bucketmapjoin_hash_result_2 -POSTHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@bucketmapjoin_hash_result_2 -PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@bucketmapjoin_tmp_result -POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@bucketmapjoin_tmp_result -PREHOOK: query: explain extended -insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin b -on a.key=b.key -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin b -on a.key=b.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-9 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Bucket Mapjoin Context: - Alias Bucket Base File Name Mapping: - b {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt]} - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### - - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 - Position of Big Table: 0 - Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: srcbucket_mapjoin - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.srcbucket_mapjoin - numFiles 2 - numRows 0 - rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.srcbucket_mapjoin - numFiles 2 - numRows 0 - rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin - name: default.srcbucket_mapjoin - Truncated Path -> Alias: - /srcbucket_mapjoin [a] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10002 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - name: default.bucketmapjoin_tmp_result - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10002 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - name: default.bucketmapjoin_tmp_result - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin b -on a.key=b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@srcbucket_mapjoin -PREHOOK: Output: default@bucketmapjoin_tmp_result -POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin b -on a.key=b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcbucket_mapjoin -POSTHOOK: Output: default@bucketmapjoin_tmp_result -POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin)b.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: select count(1) from bucketmapjoin_tmp_result -PREHOOK: type: QUERY -PREHOOK: Input: default@bucketmapjoin_tmp_result -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result -POSTHOOK: type: QUERY -POSTHOOK: Input: default@bucketmapjoin_tmp_result -#### A masked pattern was here #### -464 -PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result -PREHOOK: type: QUERY -PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: default@bucketmapjoin_hash_result_1 -POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result -POSTHOOK: type: QUERY -POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: default@bucketmapjoin_hash_result_1 -POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] -POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] -PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin b -on a.key=b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@srcbucket_mapjoin -PREHOOK: Output: default@bucketmapjoin_tmp_result -POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin b -on a.key=b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcbucket_mapjoin -POSTHOOK: Output: default@bucketmapjoin_tmp_result -POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin)b.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: select count(1) from bucketmapjoin_tmp_result -PREHOOK: type: QUERY -PREHOOK: Input: default@bucketmapjoin_tmp_result -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result -POSTHOOK: type: QUERY -POSTHOOK: Input: default@bucketmapjoin_tmp_result -#### A masked pattern was here #### -464 -PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result -PREHOOK: type: QUERY -PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: default@bucketmapjoin_hash_result_2 -POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result -POSTHOOK: type: QUERY -POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: default@bucketmapjoin_hash_result_2 -POSTHOOK: Lineage: bucketmapjoin_hash_result_2.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] -POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] -PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 -from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b -on a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@bucketmapjoin_hash_result_1 -PREHOOK: Input: default@bucketmapjoin_hash_result_2 -#### A masked pattern was here #### -POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 -from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b -on a.key = b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@bucketmapjoin_hash_result_1 -POSTHOOK: Input: default@bucketmapjoin_hash_result_2 -#### A masked pattern was here #### -0 0 0 -PREHOOK: query: explain extended -insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin b -on a.key=b.key -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin b -on a.key=b.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-1 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 - -STAGE PLANS: - Stage: Stage-9 - Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 1 - Bucket Mapjoin Context: - Alias Bucket Base File Name Mapping: - a {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt]} - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### - - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 - Position of Big Table: 1 - Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 464 - rawDataSize 8519 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 8983 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: srcbucket_mapjoin - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.srcbucket_mapjoin - numFiles 2 - numRows 0 - rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.srcbucket_mapjoin - numFiles 2 - numRows 0 - rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin - name: default.srcbucket_mapjoin - Truncated Path -> Alias: - /srcbucket_mapjoin [b] - - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 464 - rawDataSize 8519 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 8983 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - - Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 464 - rawDataSize 8519 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 8983 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10002 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 464 - rawDataSize 8519 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 8983 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 464 - rawDataSize 8519 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 8983 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - name: default.bucketmapjoin_tmp_result - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 464 - rawDataSize 8519 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 8983 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10002 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 464 - rawDataSize 8519 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 8983 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 464 - rawDataSize 8519 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 8983 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - name: default.bucketmapjoin_tmp_result - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin b -on a.key=b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@srcbucket_mapjoin -PREHOOK: Output: default@bucketmapjoin_tmp_result -POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin b -on a.key=b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcbucket_mapjoin -POSTHOOK: Output: default@bucketmapjoin_tmp_result -POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin)b.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: select count(1) from bucketmapjoin_tmp_result -PREHOOK: type: QUERY -PREHOOK: Input: default@bucketmapjoin_tmp_result -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result -POSTHOOK: type: QUERY -POSTHOOK: Input: default@bucketmapjoin_tmp_result -#### A masked pattern was here #### -464 -PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result -PREHOOK: type: QUERY -PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: default@bucketmapjoin_hash_result_1 -POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result -POSTHOOK: type: QUERY -POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: default@bucketmapjoin_hash_result_1 -POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] -POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] -PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin b -on a.key=b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@srcbucket_mapjoin -PREHOOK: Output: default@bucketmapjoin_tmp_result -POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin b -on a.key=b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcbucket_mapjoin -POSTHOOK: Output: default@bucketmapjoin_tmp_result -POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin)b.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: select count(1) from bucketmapjoin_tmp_result -PREHOOK: type: QUERY -PREHOOK: Input: default@bucketmapjoin_tmp_result -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result -POSTHOOK: type: QUERY -POSTHOOK: Input: default@bucketmapjoin_tmp_result -#### A masked pattern was here #### -464 -PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result -PREHOOK: type: QUERY -PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: default@bucketmapjoin_hash_result_2 -POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result -POSTHOOK: type: QUERY -POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: default@bucketmapjoin_hash_result_2 -POSTHOOK: Lineage: bucketmapjoin_hash_result_2.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] -POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] -PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 -from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b -on a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@bucketmapjoin_hash_result_1 -PREHOOK: Input: default@bucketmapjoin_hash_result_2 -#### A masked pattern was here #### -POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 -from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b -on a.key = b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@bucketmapjoin_hash_result_1 -POSTHOOK: Input: default@bucketmapjoin_hash_result_2 -#### A masked pattern was here #### -0 0 0
http://git-wip-us.apache.org/repos/asf/hive/blob/6f5c1135/ql/src/test/results/clientpositive/bucketmapjoin6.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/bucketmapjoin6.q.out b/ql/src/test/results/clientpositive/bucketmapjoin6.q.out deleted file mode 100644 index 198404b..0000000 --- a/ql/src/test/results/clientpositive/bucketmapjoin6.q.out +++ /dev/null @@ -1,146 +0,0 @@ -PREHOOK: query: -- SORT_QUERY_RESULTS - -create table tmp1 (a string, b string) clustered by (a) sorted by (a) into 10 buckets -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tmp1 -POSTHOOK: query: -- SORT_QUERY_RESULTS - -create table tmp1 (a string, b string) clustered by (a) sorted by (a) into 10 buckets -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tmp1 -PREHOOK: query: create table tmp2 (a string, b string) clustered by (a) sorted by (a) into 10 buckets -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tmp2 -POSTHOOK: query: create table tmp2 (a string, b string) clustered by (a) sorted by (a) into 10 buckets -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tmp2 -PREHOOK: query: insert overwrite table tmp1 select * from src where key < 50 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@tmp1 -POSTHOOK: query: insert overwrite table tmp1 select * from src where key < 50 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@tmp1 -POSTHOOK: Lineage: tmp1.a SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tmp1.b SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tmp2 select * from src where key < 50 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@tmp2 -POSTHOOK: query: insert overwrite table tmp2 select * from src where key < 50 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@tmp2 -POSTHOOK: Lineage: tmp2.a SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tmp2.b SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table tmp3 (a string, b string, c string) clustered by (a) sorted by (a) into 10 buckets -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tmp3 -POSTHOOK: query: create table tmp3 (a string, b string, c string) clustered by (a) sorted by (a) into 10 buckets -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tmp3 -PREHOOK: query: insert overwrite table tmp3 - select /*+ MAPJOIN(l) */ i.a, i.b, l.b - from tmp1 i join tmp2 l ON i.a = l.a -PREHOOK: type: QUERY -PREHOOK: Input: default@tmp1 -PREHOOK: Input: default@tmp2 -PREHOOK: Output: default@tmp3 -POSTHOOK: query: insert overwrite table tmp3 - select /*+ MAPJOIN(l) */ i.a, i.b, l.b - from tmp1 i join tmp2 l ON i.a = l.a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tmp1 -POSTHOOK: Input: default@tmp2 -POSTHOOK: Output: default@tmp3 -POSTHOOK: Lineage: tmp3.a SIMPLE [(tmp1)i.FieldSchema(name:a, type:string, comment:null), ] -POSTHOOK: Lineage: tmp3.b SIMPLE [(tmp1)i.FieldSchema(name:b, type:string, comment:null), ] -POSTHOOK: Lineage: tmp3.c SIMPLE [(tmp2)l.FieldSchema(name:b, type:string, comment:null), ] -PREHOOK: query: select * from tmp3 -PREHOOK: type: QUERY -PREHOOK: Input: default@tmp3 -#### A masked pattern was here #### -POSTHOOK: query: select * from tmp3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tmp3 -#### A masked pattern was here #### -0 val_0 val_0 -0 val_0 val_0 -0 val_0 val_0 -0 val_0 val_0 -0 val_0 val_0 -0 val_0 val_0 -0 val_0 val_0 -0 val_0 val_0 -0 val_0 val_0 -10 val_10 val_10 -11 val_11 val_11 -12 val_12 val_12 -12 val_12 val_12 -12 val_12 val_12 -12 val_12 val_12 -15 val_15 val_15 -15 val_15 val_15 -15 val_15 val_15 -15 val_15 val_15 -17 val_17 val_17 -18 val_18 val_18 -18 val_18 val_18 -18 val_18 val_18 -18 val_18 val_18 -19 val_19 val_19 -2 val_2 val_2 -20 val_20 val_20 -24 val_24 val_24 -24 val_24 val_24 -24 val_24 val_24 -24 val_24 val_24 -26 val_26 val_26 -26 val_26 val_26 -26 val_26 val_26 -26 val_26 val_26 -27 val_27 val_27 -28 val_28 val_28 -30 val_30 val_30 -33 val_33 val_33 -34 val_34 val_34 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -37 val_37 val_37 -37 val_37 val_37 -37 val_37 val_37 -37 val_37 val_37 -4 val_4 val_4 -41 val_41 val_41 -42 val_42 val_42 -42 val_42 val_42 -42 val_42 val_42 -42 val_42 val_42 -43 val_43 val_43 -44 val_44 val_44 -47 val_47 val_47 -5 val_5 val_5 -5 val_5 val_5 -5 val_5 val_5 -5 val_5 val_5 -5 val_5 val_5 -5 val_5 val_5 -5 val_5 val_5 -5 val_5 val_5 -5 val_5 val_5 -8 val_8 val_8 -9 val_9 val_9 http://git-wip-us.apache.org/repos/asf/hive/blob/6f5c1135/ql/src/test/results/clientpositive/bucketmapjoin7.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/bucketmapjoin7.q.out b/ql/src/test/results/clientpositive/bucketmapjoin7.q.out deleted file mode 100644 index 6597fb5..0000000 --- a/ql/src/test/results/clientpositive/bucketmapjoin7.q.out +++ /dev/null @@ -1,298 +0,0 @@ -PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) -CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@srcbucket_mapjoin_part_1 -POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) -CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@srcbucket_mapjoin_part_1 -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@srcbucket_mapjoin_part_1 -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@srcbucket_mapjoin_part_1 -POSTHOOK: Output: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 -PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) -CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@srcbucket_mapjoin_part_2 -POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) -CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@srcbucket_mapjoin_part_2 -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@srcbucket_mapjoin_part_2 -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@srcbucket_mapjoin_part_2 -POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 -PREHOOK: query: -- Tests that bucket map join works with a table with more than one level of partitioning - -EXPLAIN EXTENDED -SELECT /*+ MAPJOIN(b) */ a.key, b.value -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b -ON a.key = b.key AND a.ds = '2008-04-08' AND b.ds = '2008-04-08' -ORDER BY a.key, b.value LIMIT 1 -PREHOOK: type: QUERY -POSTHOOK: query: -- Tests that bucket map join works with a table with more than one level of partitioning - -EXPLAIN EXTENDED -SELECT /*+ MAPJOIN(b) */ a.key, b.value -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b -ON a.key = b.key AND a.ds = '2008-04-08' AND b.ds = '2008-04-08' -ORDER BY a.key, b.value LIMIT 1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-3 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 0 - properties: - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 - numFiles 2 - numRows 0 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Bucket Mapjoin Context: - Alias Bucket Base File Name Mapping: - b {ds=2008-04-08/hr=0/srcbucket20.txt=[ds=2008-04-08/hr=0/srcbucket20.txt], ds=2008-04-08/hr=0/srcbucket21.txt=[ds=2008-04-08/hr=0/srcbucket21.txt]} - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### - - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col8 - Position of Big Table: 0 - Statistics: Num rows: 755 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Select Operator - expressions: _col0 (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 755 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: aa - sort order: ++ - Statistics: Num rows: 755 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - tag: -1 - TopN: 1 - TopN Hash Memory Usage: 0.1 - auto parallelism: false - Local Work: - Map Reduce Local Work - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=0 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 0 - properties: - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 - numFiles 2 - numRows 0 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/ds=2008-04-08/hr=0 [a] - Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 755 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types int:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: 1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b -ON a.key = b.key AND a.ds = '2008-04-08' AND b.ds = '2008-04-08' -ORDER BY a.key, b.value LIMIT 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@srcbucket_mapjoin_part_1 -PREHOOK: Input: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 -PREHOOK: Input: default@srcbucket_mapjoin_part_2 -PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 -#### A masked pattern was here #### -POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b -ON a.key = b.key AND a.ds = '2008-04-08' AND b.ds = '2008-04-08' -ORDER BY a.key, b.value LIMIT 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcbucket_mapjoin_part_1 -POSTHOOK: Input: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 -POSTHOOK: Input: default@srcbucket_mapjoin_part_2 -POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 -#### A masked pattern was here #### -0 val_0