http://git-wip-us.apache.org/repos/asf/hive/blob/3890ed65/ql/src/test/results/beelinepositive/bucketmapjoin1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/beelinepositive/bucketmapjoin1.q.out b/ql/src/test/results/beelinepositive/bucketmapjoin1.q.out deleted file mode 100644 index e7a798b..0000000 --- a/ql/src/test/results/beelinepositive/bucketmapjoin1.q.out +++ /dev/null @@ -1,1131 +0,0 @@ -Saving all output to "!!{outputDirectory}!!/bucketmapjoin1.q.raw". Enter "record" with no arguments to stop it. ->>> !run !!{qFileDirectory}!!/bucketmapjoin1.q ->>> CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -No rows affected ->>> ->>> CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -No rows affected ->>> ->>> CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -No rows affected ->>> ->>> set hive.optimize.bucketmapjoin = true; -No rows affected ->>> ->>> -- empty partitions (HIVE-3205) ->>> explain extended -select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b -on a.key=b.key where b.ds="2008-04-08"; -'Explain' -'ABSTRACT SYNTAX TREE:' -' (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08"))))' -'' -'STAGE DEPENDENCIES:' -' Stage-3 is a root stage' -' Stage-1 depends on stages: Stage-3' -' Stage-0 is a root stage' -'' -'STAGE PLANS:' -' Stage: Stage-3' -' Map Reduce Local Work' -' Alias -> Map Local Tables:' -' b ' -' Fetch Operator' -' limit: -1' -' Alias -> Map Local Operator Tree:' -' b ' -' TableScan' -' alias: b' -' GatherStats: false' -' Filter Operator' -' isSamplingPred: false' -' predicate:' -' expr: (ds = '2008-04-08')' -' type: boolean' -' HashTable Sink Operator' -' condition expressions:' -' 0 {key} {value}' -' 1 {value} {ds}' -' handleSkewJoin: false' -' keys:' -' 0 [Column[key]]' -' 1 [Column[key]]' -' Position of Big Table: 0' -' Bucket Mapjoin Context:' -' Alias Bucket Base File Name Mapping:' -' b {}' -' Alias Bucket File Name Mapping:' -' b {}' -'' -' Stage: Stage-1' -' Map Reduce' -' Alias -> Map Operator Tree:' -' a ' -' TableScan' -' alias: a' -' GatherStats: false' -' Map Join Operator' -' condition map:' -' Inner Join 0 to 1' -' condition expressions:' -' 0 {key} {value}' -' 1 {value} {ds}' -' handleSkewJoin: false' -' keys:' -' 0 [Column[key]]' -' 1 [Column[key]]' -' outputColumnNames: _col0, _col1, _col6, _col7' -' Position of Big Table: 0' -' Select Operator' -' expressions:' -' expr: _col0' -' type: int' -' expr: _col1' -' type: string' -' expr: _col6' -' type: string' -' expr: _col7' -' type: string' -' outputColumnNames: _col0, _col1, _col6, _col7' -' Select Operator' -' expressions:' -' expr: _col0' -' type: int' -' expr: _col1' -' type: string' -' expr: _col6' -' type: string' -' outputColumnNames: _col0, _col1, _col2' -' File Output Operator' -' compressed: false' -' GlobalTableId: 0' -' directory: file:!!{hive.exec.scratchdir}!!' -' NumFilesPerFileSink: 1' -' Stats Publishing Key Prefix: file:!!{hive.exec.scratchdir}!!' -' table:' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' columns _col0,_col1,_col2' -' columns.types int:string:string' -' escape.delim \' -' serialization.format 1' -' TotalFiles: 1' -' GatherStats: false' -' MultiFileSpray: false' -' Local Work:' -' Map Reduce Local Work' -' Needs Tagging: false' -'' -' Stage: Stage-0' -' Fetch Operator' -' limit: -1' -'' -'' -105 rows selected ->>> ->>> select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b -on a.key=b.key where b.ds="2008-04-08"; -'key','value','value' -No rows selected ->>> ->>> explain extended -select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b -on a.key=b.key where b.ds="2008-04-08"; -'Explain' -'ABSTRACT SYNTAX TREE:' -' (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08"))))' -'' -'STAGE DEPENDENCIES:' -' Stage-3 is a root stage' -' Stage-1 depends on stages: Stage-3' -' Stage-0 is a root stage' -'' -'STAGE PLANS:' -' Stage: Stage-3' -' Map Reduce Local Work' -' Alias -> Map Local Tables:' -' a ' -' Fetch Operator' -' limit: -1' -' Alias -> Map Local Operator Tree:' -' a ' -' TableScan' -' alias: a' -' GatherStats: false' -' HashTable Sink Operator' -' condition expressions:' -' 0 {key} {value}' -' 1 {value} {ds}' -' handleSkewJoin: false' -' keys:' -' 0 [Column[key]]' -' 1 [Column[key]]' -' Position of Big Table: 1' -' Bucket Mapjoin Context:' -' Alias Bucket Base File Name Mapping:' -' a {}' -' Alias Bucket File Name Mapping:' -' a {}' -'' -' Stage: Stage-1' -' Map Reduce' -' Alias -> Map Operator Tree:' -' b ' -' TableScan' -' alias: b' -' GatherStats: false' -' Filter Operator' -' isSamplingPred: false' -' predicate:' -' expr: (ds = '2008-04-08')' -' type: boolean' -' Map Join Operator' -' condition map:' -' Inner Join 0 to 1' -' condition expressions:' -' 0 {key} {value}' -' 1 {value} {ds}' -' handleSkewJoin: false' -' keys:' -' 0 [Column[key]]' -' 1 [Column[key]]' -' outputColumnNames: _col0, _col1, _col6, _col7' -' Position of Big Table: 1' -' Select Operator' -' expressions:' -' expr: _col0' -' type: int' -' expr: _col1' -' type: string' -' expr: _col6' -' type: string' -' expr: _col7' -' type: string' -' outputColumnNames: _col0, _col1, _col6, _col7' -' Select Operator' -' expressions:' -' expr: _col0' -' type: int' -' expr: _col1' -' type: string' -' expr: _col6' -' type: string' -' outputColumnNames: _col0, _col1, _col2' -' File Output Operator' -' compressed: false' -' GlobalTableId: 0' -' directory: file:!!{hive.exec.scratchdir}!!' -' NumFilesPerFileSink: 1' -' Stats Publishing Key Prefix: file:!!{hive.exec.scratchdir}!!' -' table:' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' columns _col0,_col1,_col2' -' columns.types int:string:string' -' escape.delim \' -' serialization.format 1' -' TotalFiles: 1' -' GatherStats: false' -' MultiFileSpray: false' -' Local Work:' -' Map Reduce Local Work' -' Needs Tagging: false' -'' -' Stage: Stage-0' -' Fetch Operator' -' limit: -1' -'' -'' -105 rows selected ->>> ->>> select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b -on a.key=b.key where b.ds="2008-04-08"; -'key','value','value' -No rows selected ->>> ->>> load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin; -No rows affected ->>> load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin; -No rows affected ->>> ->>> load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -No rows affected ->>> load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -No rows affected ->>> load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -No rows affected ->>> load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -No rows affected ->>> ->>> load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); -No rows affected ->>> load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); -No rows affected ->>> ->>> create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint); -No rows affected ->>> create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint); -No rows affected ->>> ->>> set hive.optimize.bucketmapjoin = true; -No rows affected ->>> create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); -No rows affected ->>> ->>> explain extended -insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin_part b -on a.key=b.key where b.ds="2008-04-08"; -'Explain' -'ABSTRACT SYNTAX TREE:' -' (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketmapjoin_tmp_result))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08"))))' -'' -'STAGE DEPENDENCIES:' -' Stage-9 is a root stage' -' Stage-1 depends on stages: Stage-9' -' Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5' -' Stage-4' -' Stage-0 depends on stages: Stage-4, Stage-3, Stage-6' -' Stage-2 depends on stages: Stage-0' -' Stage-3' -' Stage-5' -' Stage-6 depends on stages: Stage-5' -'' -'STAGE PLANS:' -' Stage: Stage-9' -' Map Reduce Local Work' -' Alias -> Map Local Tables:' -' b ' -' Fetch Operator' -' limit: -1' -' Alias -> Map Local Operator Tree:' -' b ' -' TableScan' -' alias: b' -' GatherStats: false' -' HashTable Sink Operator' -' condition expressions:' -' 0 {key} {value}' -' 1 {value} {ds}' -' handleSkewJoin: false' -' keys:' -' 0 [Column[key]]' -' 1 [Column[key]]' -' Position of Big Table: 0' -' Bucket Mapjoin Context:' -' Alias Bucket Base File Name Mapping:' -' b {srcbucket20.txt=[ds=2008-04-08/srcbucket20.txt, ds=2008-04-08/srcbucket22.txt], srcbucket21.txt=[ds=2008-04-08/srcbucket21.txt, ds=2008-04-08/srcbucket23.txt]}' -' Alias Bucket File Name Mapping:' -' b {!!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin/srcbucket20.txt=[!!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt, !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt], !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin/srcbucket21.txt=[!!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt, !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt]}' -' Alias Bucket Output File Name Mapping:' -' !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin/srcbucket20.txt 0' -' !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin/srcbucket21.txt 1' -'' -' Stage: Stage-1' -' Map Reduce' -' Alias -> Map Operator Tree:' -' a ' -' TableScan' -' alias: a' -' GatherStats: false' -' Map Join Operator' -' condition map:' -' Inner Join 0 to 1' -' condition expressions:' -' 0 {key} {value}' -' 1 {value} {ds}' -' handleSkewJoin: false' -' keys:' -' 0 [Column[key]]' -' 1 [Column[key]]' -' outputColumnNames: _col0, _col1, _col5, _col6' -' Position of Big Table: 0' -' Select Operator' -' expressions:' -' expr: _col0' -' type: int' -' expr: _col1' -' type: string' -' expr: _col5' -' type: string' -' expr: _col6' -' type: string' -' outputColumnNames: _col0, _col1, _col5, _col6' -' Select Operator' -' expressions:' -' expr: _col0' -' type: int' -' expr: _col1' -' type: string' -' expr: _col5' -' type: string' -' outputColumnNames: _col0, _col1, _col2' -' File Output Operator' -' compressed: false' -' GlobalTableId: 1' -' directory: pfile:!!{hive.exec.scratchdir}!!' -' NumFilesPerFileSink: 1' -' Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!' -' table:' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count -1' -' columns key,value1,value2' -' columns.types string:string:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/bucketmapjoin_tmp_result' -' name bucketmapjoin1.bucketmapjoin_tmp_result' -' serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' name: bucketmapjoin1.bucketmapjoin_tmp_result' -' TotalFiles: 1' -' GatherStats: true' -' MultiFileSpray: false' -' Local Work:' -' Map Reduce Local Work' -' Needs Tagging: false' -' Path -> Alias:' -' !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin [a]' -' Path -> Partition:' -' !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin ' -' Partition' -' base file name: srcbucket_mapjoin' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count 2' -' bucket_field_name key' -' columns key,value' -' columns.types int:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin' -' name bucketmapjoin1.srcbucket_mapjoin' -' numFiles 2' -' numPartitions 0' -' numRows 0' -' rawDataSize 0' -' serialization.ddl struct srcbucket_mapjoin { i32 key, string value}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 2750' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' ' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count 2' -' bucket_field_name key' -' columns key,value' -' columns.types int:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin' -' name bucketmapjoin1.srcbucket_mapjoin' -' numFiles 2' -' numPartitions 0' -' numRows 0' -' rawDataSize 0' -' serialization.ddl struct srcbucket_mapjoin { i32 key, string value}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 2750' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' name: bucketmapjoin1.srcbucket_mapjoin' -' name: bucketmapjoin1.srcbucket_mapjoin' -'' -' Stage: Stage-7' -' Conditional Operator' -'' -' Stage: Stage-4' -' Move Operator' -' files:' -' hdfs directory: true' -' source: pfile:!!{hive.exec.scratchdir}!!' -' destination: pfile:!!{hive.exec.scratchdir}!!' -'' -' Stage: Stage-0' -' Move Operator' -' tables:' -' replace: true' -' source: pfile:!!{hive.exec.scratchdir}!!' -' table:' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count -1' -' columns key,value1,value2' -' columns.types string:string:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/bucketmapjoin_tmp_result' -' name bucketmapjoin1.bucketmapjoin_tmp_result' -' serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' name: bucketmapjoin1.bucketmapjoin_tmp_result' -' tmp directory: pfile:!!{hive.exec.scratchdir}!!' -'' -' Stage: Stage-2' -' Stats-Aggr Operator' -' Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!' -'' -' Stage: Stage-3' -' Map Reduce' -' Alias -> Map Operator Tree:' -' pfile:!!{hive.exec.scratchdir}!! ' -' File Output Operator' -' compressed: false' -' GlobalTableId: 0' -' directory: pfile:!!{hive.exec.scratchdir}!!' -' NumFilesPerFileSink: 1' -' table:' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count -1' -' columns key,value1,value2' -' columns.types string:string:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/bucketmapjoin_tmp_result' -' name bucketmapjoin1.bucketmapjoin_tmp_result' -' serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' name: bucketmapjoin1.bucketmapjoin_tmp_result' -' TotalFiles: 1' -' GatherStats: false' -' MultiFileSpray: false' -' Needs Tagging: false' -' Path -> Alias:' -' pfile:!!{hive.exec.scratchdir}!! [pfile:!!{hive.exec.scratchdir}!!]' -' Path -> Partition:' -' pfile:!!{hive.exec.scratchdir}!! ' -' Partition' -' base file name: -ext-10002' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count -1' -' columns key,value1,value2' -' columns.types string:string:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/bucketmapjoin_tmp_result' -' name bucketmapjoin1.bucketmapjoin_tmp_result' -' serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' ' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count -1' -' columns key,value1,value2' -' columns.types string:string:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/bucketmapjoin_tmp_result' -' name bucketmapjoin1.bucketmapjoin_tmp_result' -' serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' name: bucketmapjoin1.bucketmapjoin_tmp_result' -' name: bucketmapjoin1.bucketmapjoin_tmp_result' -'' -' Stage: Stage-5' -' Map Reduce' -' Alias -> Map Operator Tree:' -' pfile:!!{hive.exec.scratchdir}!! ' -' File Output Operator' -' compressed: false' -' GlobalTableId: 0' -' directory: pfile:!!{hive.exec.scratchdir}!!' -' NumFilesPerFileSink: 1' -' table:' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count -1' -' columns key,value1,value2' -' columns.types string:string:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/bucketmapjoin_tmp_result' -' name bucketmapjoin1.bucketmapjoin_tmp_result' -' serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' name: bucketmapjoin1.bucketmapjoin_tmp_result' -' TotalFiles: 1' -' GatherStats: false' -' MultiFileSpray: false' -' Needs Tagging: false' -' Path -> Alias:' -' pfile:!!{hive.exec.scratchdir}!! [pfile:!!{hive.exec.scratchdir}!!]' -' Path -> Partition:' -' pfile:!!{hive.exec.scratchdir}!! ' -' Partition' -' base file name: -ext-10002' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count -1' -' columns key,value1,value2' -' columns.types string:string:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/bucketmapjoin_tmp_result' -' name bucketmapjoin1.bucketmapjoin_tmp_result' -' serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' ' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count -1' -' columns key,value1,value2' -' columns.types string:string:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/bucketmapjoin_tmp_result' -' name bucketmapjoin1.bucketmapjoin_tmp_result' -' serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' name: bucketmapjoin1.bucketmapjoin_tmp_result' -' name: bucketmapjoin1.bucketmapjoin_tmp_result' -'' -' Stage: Stage-6' -' Move Operator' -' files:' -' hdfs directory: true' -' source: pfile:!!{hive.exec.scratchdir}!!' -' destination: pfile:!!{hive.exec.scratchdir}!!' -'' -'' -350 rows selected ->>> ->>> insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin_part b -on a.key=b.key where b.ds="2008-04-08"; -'key','value','value' -No rows selected ->>> ->>> select count(1) from bucketmapjoin_tmp_result; -'_c0' -'464' -1 row selected ->>> ->>> insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; -'_c0','_c1','_c2' -No rows selected ->>> ->>> set hive.optimize.bucketmapjoin = false; -No rows affected ->>> insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin_part b -on a.key=b.key where b.ds="2008-04-08"; -'key','value','value' -No rows selected ->>> ->>> select count(1) from bucketmapjoin_tmp_result; -'_c0' -'464' -1 row selected ->>> insert overwrite table bucketmapjoin_hash_result_2 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; -'_c0','_c1','_c2' -No rows selected ->>> ->>> ->>> select a.key-b.key, a.value1-b.value1, a.value2-b.value2 -from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b -on a.key = b.key; -'_c0','_c1','_c2' -'0','0','0' -1 row selected ->>> ->>> ->>> set hive.optimize.bucketmapjoin = true; -No rows affected ->>> explain extended -insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin_part b -on a.key=b.key where b.ds="2008-04-08"; -'Explain' -'ABSTRACT SYNTAX TREE:' -' (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucketmapjoin_tmp_result))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08"))))' -'' -'STAGE DEPENDENCIES:' -' Stage-9 is a root stage' -' Stage-1 depends on stages: Stage-9' -' Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5' -' Stage-4' -' Stage-0 depends on stages: Stage-4, Stage-3, Stage-6' -' Stage-2 depends on stages: Stage-0' -' Stage-3' -' Stage-5' -' Stage-6 depends on stages: Stage-5' -'' -'STAGE PLANS:' -' Stage: Stage-9' -' Map Reduce Local Work' -' Alias -> Map Local Tables:' -' a ' -' Fetch Operator' -' limit: -1' -' Alias -> Map Local Operator Tree:' -' a ' -' TableScan' -' alias: a' -' GatherStats: false' -' HashTable Sink Operator' -' condition expressions:' -' 0 {key} {value}' -' 1 {value} {ds}' -' handleSkewJoin: false' -' keys:' -' 0 [Column[key]]' -' 1 [Column[key]]' -' Position of Big Table: 1' -' Bucket Mapjoin Context:' -' Alias Bucket Base File Name Mapping:' -' a {ds=2008-04-08/srcbucket20.txt=[srcbucket20.txt], ds=2008-04-08/srcbucket21.txt=[srcbucket21.txt], ds=2008-04-08/srcbucket22.txt=[srcbucket20.txt], ds=2008-04-08/srcbucket23.txt=[srcbucket21.txt]}' -' Alias Bucket File Name Mapping:' -' a {!!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[!!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin/srcbucket20.txt], !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[!!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin/srcbucket21.txt], !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[!!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin/srcbucket20.txt], !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[!!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin/srcbucket21.txt]}' -' Alias Bucket Output File Name Mapping:' -' !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt 0' -' !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt 1' -' !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt 2' -' !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt 3' -'' -' Stage: Stage-1' -' Map Reduce' -' Alias -> Map Operator Tree:' -' b ' -' TableScan' -' alias: b' -' GatherStats: false' -' Map Join Operator' -' condition map:' -' Inner Join 0 to 1' -' condition expressions:' -' 0 {key} {value}' -' 1 {value} {ds}' -' handleSkewJoin: false' -' keys:' -' 0 [Column[key]]' -' 1 [Column[key]]' -' outputColumnNames: _col0, _col1, _col5, _col6' -' Position of Big Table: 1' -' Select Operator' -' expressions:' -' expr: _col0' -' type: int' -' expr: _col1' -' type: string' -' expr: _col5' -' type: string' -' expr: _col6' -' type: string' -' outputColumnNames: _col0, _col1, _col5, _col6' -' Select Operator' -' expressions:' -' expr: _col0' -' type: int' -' expr: _col1' -' type: string' -' expr: _col5' -' type: string' -' outputColumnNames: _col0, _col1, _col2' -' File Output Operator' -' compressed: false' -' GlobalTableId: 1' -' directory: pfile:!!{hive.exec.scratchdir}!!' -' NumFilesPerFileSink: 1' -' Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!' -' table:' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count -1' -' columns key,value1,value2' -' columns.types string:string:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/bucketmapjoin_tmp_result' -' name bucketmapjoin1.bucketmapjoin_tmp_result' -' numFiles 1' -' numPartitions 0' -' numRows 464' -' rawDataSize 8519' -' serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 8983' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' name: bucketmapjoin1.bucketmapjoin_tmp_result' -' TotalFiles: 1' -' GatherStats: true' -' MultiFileSpray: false' -' Local Work:' -' Map Reduce Local Work' -' Needs Tagging: false' -' Path -> Alias:' -' !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin_part/ds=2008-04-08 [b]' -' Path -> Partition:' -' !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin_part/ds=2008-04-08 ' -' Partition' -' base file name: ds=2008-04-08' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' partition values:' -' ds 2008-04-08' -' properties:' -' bucket_count 4' -' bucket_field_name key' -' columns key,value' -' columns.types int:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin_part/ds=2008-04-08' -' name bucketmapjoin1.srcbucket_mapjoin_part' -' numFiles 4' -' numPartitions 1' -' numRows 0' -' partition_columns ds' -' rawDataSize 0' -' serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 5812' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' ' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count 4' -' bucket_field_name key' -' columns key,value' -' columns.types int:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/srcbucket_mapjoin_part' -' name bucketmapjoin1.srcbucket_mapjoin_part' -' numFiles 4' -' numPartitions 1' -' numRows 0' -' partition_columns ds' -' rawDataSize 0' -' serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 5812' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' name: bucketmapjoin1.srcbucket_mapjoin_part' -' name: bucketmapjoin1.srcbucket_mapjoin_part' -'' -' Stage: Stage-7' -' Conditional Operator' -'' -' Stage: Stage-4' -' Move Operator' -' files:' -' hdfs directory: true' -' source: pfile:!!{hive.exec.scratchdir}!!' -' destination: pfile:!!{hive.exec.scratchdir}!!' -'' -' Stage: Stage-0' -' Move Operator' -' tables:' -' replace: true' -' source: pfile:!!{hive.exec.scratchdir}!!' -' table:' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count -1' -' columns key,value1,value2' -' columns.types string:string:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/bucketmapjoin_tmp_result' -' name bucketmapjoin1.bucketmapjoin_tmp_result' -' numFiles 1' -' numPartitions 0' -' numRows 464' -' rawDataSize 8519' -' serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 8983' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' name: bucketmapjoin1.bucketmapjoin_tmp_result' -' tmp directory: pfile:!!{hive.exec.scratchdir}!!' -'' -' Stage: Stage-2' -' Stats-Aggr Operator' -' Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!' -'' -' Stage: Stage-3' -' Map Reduce' -' Alias -> Map Operator Tree:' -' pfile:!!{hive.exec.scratchdir}!! ' -' File Output Operator' -' compressed: false' -' GlobalTableId: 0' -' directory: pfile:!!{hive.exec.scratchdir}!!' -' NumFilesPerFileSink: 1' -' table:' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count -1' -' columns key,value1,value2' -' columns.types string:string:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/bucketmapjoin_tmp_result' -' name bucketmapjoin1.bucketmapjoin_tmp_result' -' numFiles 1' -' numPartitions 0' -' numRows 464' -' rawDataSize 8519' -' serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 8983' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' name: bucketmapjoin1.bucketmapjoin_tmp_result' -' TotalFiles: 1' -' GatherStats: false' -' MultiFileSpray: false' -' Needs Tagging: false' -' Path -> Alias:' -' pfile:!!{hive.exec.scratchdir}!! [pfile:!!{hive.exec.scratchdir}!!]' -' Path -> Partition:' -' pfile:!!{hive.exec.scratchdir}!! ' -' Partition' -' base file name: -ext-10002' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count -1' -' columns key,value1,value2' -' columns.types string:string:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/bucketmapjoin_tmp_result' -' name bucketmapjoin1.bucketmapjoin_tmp_result' -' numFiles 1' -' numPartitions 0' -' numRows 464' -' rawDataSize 8519' -' serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 8983' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' ' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count -1' -' columns key,value1,value2' -' columns.types string:string:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/bucketmapjoin_tmp_result' -' name bucketmapjoin1.bucketmapjoin_tmp_result' -' numFiles 1' -' numPartitions 0' -' numRows 464' -' rawDataSize 8519' -' serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 8983' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' name: bucketmapjoin1.bucketmapjoin_tmp_result' -' name: bucketmapjoin1.bucketmapjoin_tmp_result' -'' -' Stage: Stage-5' -' Map Reduce' -' Alias -> Map Operator Tree:' -' pfile:!!{hive.exec.scratchdir}!! ' -' File Output Operator' -' compressed: false' -' GlobalTableId: 0' -' directory: pfile:!!{hive.exec.scratchdir}!!' -' NumFilesPerFileSink: 1' -' table:' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count -1' -' columns key,value1,value2' -' columns.types string:string:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/bucketmapjoin_tmp_result' -' name bucketmapjoin1.bucketmapjoin_tmp_result' -' numFiles 1' -' numPartitions 0' -' numRows 464' -' rawDataSize 8519' -' serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 8983' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' name: bucketmapjoin1.bucketmapjoin_tmp_result' -' TotalFiles: 1' -' GatherStats: false' -' MultiFileSpray: false' -' Needs Tagging: false' -' Path -> Alias:' -' pfile:!!{hive.exec.scratchdir}!! [pfile:!!{hive.exec.scratchdir}!!]' -' Path -> Partition:' -' pfile:!!{hive.exec.scratchdir}!! ' -' Partition' -' base file name: -ext-10002' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count -1' -' columns key,value1,value2' -' columns.types string:string:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/bucketmapjoin_tmp_result' -' name bucketmapjoin1.bucketmapjoin_tmp_result' -' numFiles 1' -' numPartitions 0' -' numRows 464' -' rawDataSize 8519' -' serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 8983' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' ' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count -1' -' columns key,value1,value2' -' columns.types string:string:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin1.db/bucketmapjoin_tmp_result' -' name bucketmapjoin1.bucketmapjoin_tmp_result' -' numFiles 1' -' numPartitions 0' -' numRows 464' -' rawDataSize 8519' -' serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 8983' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' name: bucketmapjoin1.bucketmapjoin_tmp_result' -' name: bucketmapjoin1.bucketmapjoin_tmp_result' -'' -' Stage: Stage-6' -' Move Operator' -' files:' -' hdfs directory: true' -' source: pfile:!!{hive.exec.scratchdir}!!' -' destination: pfile:!!{hive.exec.scratchdir}!!' -'' -'' -396 rows selected ->>> ->>> insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin_part b -on a.key=b.key where b.ds="2008-04-08"; -'key','value','value' -No rows selected ->>> ->>> select count(1) from bucketmapjoin_tmp_result; -'_c0' -'464' -1 row selected ->>> ->>> ->>> insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; -'_c0','_c1','_c2' -No rows selected ->>> ->>> set hive.optimize.bucketmapjoin = false; -No rows affected ->>> insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin_part b -on a.key=b.key where b.ds="2008-04-08"; -'key','value','value' -No rows selected ->>> ->>> select count(1) from bucketmapjoin_tmp_result; -'_c0' -'464' -1 row selected ->>> insert overwrite table bucketmapjoin_hash_result_2 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; -'_c0','_c1','_c2' -No rows selected ->>> ->>> select a.key-b.key, a.value1-b.value1, a.value2-b.value2 -from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b -on a.key = b.key; -'_c0','_c1','_c2' -'0','0','0' -1 row selected ->>> !record
http://git-wip-us.apache.org/repos/asf/hive/blob/3890ed65/ql/src/test/results/beelinepositive/bucketmapjoin10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/beelinepositive/bucketmapjoin10.q.out b/ql/src/test/results/beelinepositive/bucketmapjoin10.q.out deleted file mode 100644 index cc6dc9e..0000000 --- a/ql/src/test/results/beelinepositive/bucketmapjoin10.q.out +++ /dev/null @@ -1,318 +0,0 @@ -Saving all output to "!!{outputDirectory}!!/bucketmapjoin10.q.raw". Enter "record" with no arguments to stop it. ->>> !run !!{qFileDirectory}!!/bucketmapjoin10.q ->>> set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -No rows affected ->>> ->>> CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) -CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -No rows affected ->>> LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1'); -No rows affected ->>> LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1'); -No rows affected ->>> ->>> ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 3 BUCKETS; -No rows affected ->>> LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2'); -No rows affected ->>> LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2'); -No rows affected ->>> LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2'); -No rows affected ->>> ->>> CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) -CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE; -No rows affected ->>> LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); -No rows affected ->>> LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); -No rows affected ->>> LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); -No rows affected ->>> ->>> ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS; -No rows affected ->>> LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2'); -No rows affected ->>> LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2'); -No rows affected ->>> ->>> ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS; -No rows affected ->>> ->>> set hive.optimize.bucketmapjoin=true; -No rows affected ->>> ->>> -- The table bucketing metadata matches but the partition metadata does not, bucket map join should not be used ->>> ->>> EXPLAIN EXTENDED -SELECT /*+ MAPJOIN(b) */ count(*) -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b -ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL; -'Explain' -'ABSTRACT SYNTAX TREE:' -' (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))' -'' -'STAGE DEPENDENCIES:' -' Stage-4 is a root stage' -' Stage-1 depends on stages: Stage-4' -' Stage-2 depends on stages: Stage-1' -' Stage-0 is a root stage' -'' -'STAGE PLANS:' -' Stage: Stage-4' -' Map Reduce Local Work' -' Alias -> Map Local Tables:' -' b ' -' Fetch Operator' -' limit: -1' -' Alias -> Map Local Operator Tree:' -' b ' -' TableScan' -' alias: b' -' GatherStats: false' -' HashTable Sink Operator' -' condition expressions:' -' 0 ' -' 1 ' -' handleSkewJoin: false' -' keys:' -' 0 [Column[key]]' -' 1 [Column[key]]' -' Position of Big Table: 0' -'' -' Stage: Stage-1' -' Map Reduce' -' Alias -> Map Operator Tree:' -' a ' -' TableScan' -' alias: a' -' GatherStats: false' -' Map Join Operator' -' condition map:' -' Inner Join 0 to 1' -' condition expressions:' -' 0 ' -' 1 ' -' handleSkewJoin: false' -' keys:' -' 0 [Column[key]]' -' 1 [Column[key]]' -' Position of Big Table: 0' -' File Output Operator' -' compressed: false' -' GlobalTableId: 0' -' directory: file:!!{hive.exec.scratchdir}!!' -' NumFilesPerFileSink: 1' -' table:' -' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' -' properties:' -' columns ' -' columns.types ' -' escape.delim \' -' TotalFiles: 1' -' GatherStats: false' -' MultiFileSpray: false' -' Local Work:' -' Map Reduce Local Work' -' Needs Tagging: false' -' Path -> Alias:' -' !!{hive.metastore.warehouse.dir}!!/bucketmapjoin10.db/srcbucket_mapjoin_part_1/part=1 [a]' -' !!{hive.metastore.warehouse.dir}!!/bucketmapjoin10.db/srcbucket_mapjoin_part_1/part=2 [a]' -' Path -> Partition:' -' !!{hive.metastore.warehouse.dir}!!/bucketmapjoin10.db/srcbucket_mapjoin_part_1/part=1 ' -' Partition' -' base file name: part=1' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' partition values:' -' part 1' -' properties:' -' bucket_count 2' -' bucket_field_name key' -' columns key,value' -' columns.types int:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' last_modified_by !!{user.name}!!' -' last_modified_time !!UNIXTIME!!' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin10.db/srcbucket_mapjoin_part_1/part=1' -' name bucketmapjoin10.srcbucket_mapjoin_part_1' -' numFiles 2' -' numPartitions 2' -' numRows 0' -' partition_columns part' -' rawDataSize 0' -' serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 2750' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' ' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count 3' -' bucket_field_name key' -' columns key,value' -' columns.types int:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' last_modified_by !!{user.name}!!' -' last_modified_time !!UNIXTIME!!' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin10.db/srcbucket_mapjoin_part_1' -' name bucketmapjoin10.srcbucket_mapjoin_part_1' -' numFiles 5' -' numPartitions 2' -' numRows 0' -' partition_columns part' -' rawDataSize 0' -' serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 6950' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' name: bucketmapjoin10.srcbucket_mapjoin_part_1' -' name: bucketmapjoin10.srcbucket_mapjoin_part_1' -' !!{hive.metastore.warehouse.dir}!!/bucketmapjoin10.db/srcbucket_mapjoin_part_1/part=2 ' -' Partition' -' base file name: part=2' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' partition values:' -' part 2' -' properties:' -' bucket_count 3' -' bucket_field_name key' -' columns key,value' -' columns.types int:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' last_modified_by !!{user.name}!!' -' last_modified_time !!UNIXTIME!!' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin10.db/srcbucket_mapjoin_part_1/part=2' -' name bucketmapjoin10.srcbucket_mapjoin_part_1' -' numFiles 3' -' numPartitions 2' -' numRows 0' -' partition_columns part' -' rawDataSize 0' -' serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 4200' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' ' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' bucket_count 3' -' bucket_field_name key' -' columns key,value' -' columns.types int:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' last_modified_by !!{user.name}!!' -' last_modified_time !!UNIXTIME!!' -' location !!{hive.metastore.warehouse.dir}!!/bucketmapjoin10.db/srcbucket_mapjoin_part_1' -' name bucketmapjoin10.srcbucket_mapjoin_part_1' -' numFiles 5' -' numPartitions 2' -' numRows 0' -' partition_columns part' -' rawDataSize 0' -' serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 6950' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' name: bucketmapjoin10.srcbucket_mapjoin_part_1' -' name: bucketmapjoin10.srcbucket_mapjoin_part_1' -'' -' Stage: Stage-2' -' Map Reduce' -' Alias -> Map Operator Tree:' -' file:!!{hive.exec.scratchdir}!! ' -' Select Operator' -' Select Operator' -' Group By Operator' -' aggregations:' -' expr: count()' -' bucketGroup: false' -' mode: hash' -' outputColumnNames: _col0' -' Reduce Output Operator' -' sort order: ' -' tag: -1' -' value expressions:' -' expr: _col0' -' type: bigint' -' Needs Tagging: false' -' Path -> Alias:' -' file:!!{hive.exec.scratchdir}!! [file:!!{hive.exec.scratchdir}!!]' -' Path -> Partition:' -' file:!!{hive.exec.scratchdir}!! ' -' Partition' -' base file name: -mr-10002' -' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' -' properties:' -' columns ' -' columns.types ' -' escape.delim \' -' ' -' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' -' properties:' -' columns ' -' columns.types ' -' escape.delim \' -' Reduce Operator Tree:' -' Group By Operator' -' aggregations:' -' expr: count(VALUE._col0)' -' bucketGroup: false' -' mode: mergepartial' -' outputColumnNames: _col0' -' Select Operator' -' expressions:' -' expr: _col0' -' type: bigint' -' outputColumnNames: _col0' -' File Output Operator' -' compressed: false' -' GlobalTableId: 0' -' directory: file:!!{hive.exec.scratchdir}!!' -' NumFilesPerFileSink: 1' -' Stats Publishing Key Prefix: file:!!{hive.exec.scratchdir}!!' -' table:' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' columns _col0' -' columns.types bigint' -' escape.delim \' -' serialization.format 1' -' TotalFiles: 1' -' GatherStats: false' -' MultiFileSpray: false' -'' -' Stage: Stage-0' -' Fetch Operator' -' limit: -1' -'' -'' -257 rows selected ->>> ->>> SELECT /*+ MAPJOIN(b) */ count(*) -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b -ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL; -'_c1' -'2116' -1 row selected ->>> !record