http://git-wip-us.apache.org/repos/asf/bigtop/blob/e209fdbb/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/integration/hbase_stats/in ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/integration/hbase_stats/in b/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/integration/hbase_stats/in deleted file mode 100644 index 9a3d71f..0000000 --- a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/integration/hbase_stats/in +++ /dev/null @@ -1,35 +0,0 @@ --- Licensed to the Apache Software Foundation (ASF) under one or more --- contributor license agreements. See the NOTICE file distributed with --- this work for additional information regarding copyright ownership. --- The ASF licenses this file to You under the Apache License, Version 2.0 --- (the "License") you may not use this file except in compliance with --- the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, software --- distributed under the License is distributed on an "AS IS" BASIS, --- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --- See the License for the specific language governing permissions and --- limitations under the License. -set datanucleus.cache.collections=false; - -set hive.stats.dbclass=hbase; - -create table stats_src like src; -insert overwrite table stats_src select * from src; -analyze table stats_src compute statistics; -desc formatted stats_src; - -create table hbase_part like srcpart; - -insert overwrite table hbase_part partition (ds='2010-04-08', hr = '11') select key, value from src; -insert overwrite table hbase_part partition (ds='2010-04-08', hr = '12') select key, value from src; - -analyze table hbase_part partition(ds='2008-04-08', hr=11) compute statistics; -analyze table hbase_part partition(ds='2008-04-08', hr=12) compute statistics; - -desc formatted hbase_part; -desc formatted hbase_part partition (ds='2010-04-08', hr = '11'); -desc formatted hbase_part partition (ds='2010-04-08', hr = '12'); -
http://git-wip-us.apache.org/repos/asf/bigtop/blob/e209fdbb/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/integration/hbase_stats/out ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/integration/hbase_stats/out b/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/integration/hbase_stats/out deleted file mode 100644 index aef56e8..0000000 --- a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/integration/hbase_stats/out +++ /dev/null @@ -1,170 +0,0 @@ --- Licensed to the Apache Software Foundation (ASF) under one or more --- contributor license agreements. See the NOTICE file distributed with --- this work for additional information regarding copyright ownership. --- The ASF licenses this file to You under the Apache License, Version 2.0 --- (the "License") you may not use this file except in compliance with --- the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, software --- distributed under the License is distributed on an "AS IS" BASIS, --- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --- See the License for the specific language governing permissions and --- limitations under the License. -set datanucleus.cache.collections=false -set hive.stats.dbclass=hbase - - -create table stats_src like src - -insert overwrite table stats_src select * from src -Deleted HDFS_URL/user/hive/warehouse/stats_src - -analyze table stats_src compute statistics - -desc formatted stats_src -# col_name data_type comment - -key string None -value string None - -# Detailed Table Information -Database: default -Owner: [email protected] -CreateTime: Fri Mar 25 11:29:26 PDT 2011 -LastAccessTime: UNKNOWN -Protect Mode: None -Retention: 0 -Location: hdfs://minotaur01.sf.cloudera.com:17020/user/hive/warehouse/stats_src -Table Type: MANAGED_TABLE -Table Parameters: - transient_lastDdlTime 1301077789 - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 - - -create table hbase_part like srcpart - - -insert overwrite table hbase_part partition (ds='2010-04-08', hr = '11') select key, value from src - -insert overwrite table hbase_part partition (ds='2010-04-08', hr = '12') select key, value from src - - -analyze table hbase_part partition(ds='2008-04-08', hr=11) compute statistics - -analyze table hbase_part partition(ds='2008-04-08', hr=12) compute statistics - - -desc formatted hbase_part -# col_name data_type comment - -key string None -value string None - -# Partition Information -# col_name data_type comment - -ds string None -hr string None - -# Detailed Table Information -Database: default -Owner: [email protected] -CreateTime: Fri Mar 25 11:30:31 PDT 2011 -LastAccessTime: UNKNOWN -Protect Mode: None -Retention: 0 -Location: hdfs://minotaur01.sf.cloudera.com:17020/user/hive/warehouse/hbase_part -Table Type: MANAGED_TABLE -Table Parameters: - transient_lastDdlTime 1301077831 - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 - -desc formatted hbase_part partition (ds='2010-04-08', hr = '11') -# col_name data_type comment - -key string None -value string None - -# Partition Information -# col_name data_type comment - -ds string None -hr string None - -# Detailed Partition Information -Partition Value: [2010-04-08, 11] -Database: default -Table: hbase_part -CreateTime: Fri Mar 25 11:30:54 PDT 2011 -LastAccessTime: UNKNOWN -Protect Mode: None -Location: hdfs://minotaur01.sf.cloudera.com:17020/user/hive/warehouse/hbase_part/ds=2010-04-08/hr=11 -Partition Parameters: - transient_lastDdlTime 1301077854 - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 - -desc formatted hbase_part partition (ds='2010-04-08', hr = '12') -# col_name data_type comment - -key string None -value string None - -# Partition Information -# col_name data_type comment - -ds string None -hr string None - -# Detailed Partition Information -Partition Value: [2010-04-08, 12] -Database: default -Table: hbase_part -CreateTime: Fri Mar 25 11:31:29 PDT 2011 -LastAccessTime: UNKNOWN -Protect Mode: None -Location: hdfs://minotaur01.sf.cloudera.com:17020/user/hive/warehouse/hbase_part/ds=2010-04-08/hr=12 -Partition Parameters: - transient_lastDdlTime 1301077889 - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 http://git-wip-us.apache.org/repos/asf/bigtop/blob/e209fdbb/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/auto_join20/filter ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/auto_join20/filter b/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/auto_join20/filter deleted file mode 100644 index 9cdeb19..0000000 --- a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/auto_join20/filter +++ /dev/null @@ -1,3 +0,0 @@ -sed -re 's#hdfs://.*/-(ext|mr)-1000#hdfs://HADOOP/-\1-1000#' \ - -e 's#file:/.*/-(ext|mr)-1000#file:/HADOOP/-\1-1000#' \ - -e '/.*jobconf.xml:an attempt to override final parameter: mapreduce.job.end-notification.*; Ignoring\./ d' http://git-wip-us.apache.org/repos/asf/bigtop/blob/e209fdbb/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/auto_join20/in ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/auto_join20/in b/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/auto_join20/in deleted file mode 100644 index 2e11f5a..0000000 --- a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/auto_join20/in +++ /dev/null @@ -1,45 +0,0 @@ --- Licensed to the Apache Software Foundation (ASF) under one or more --- contributor license agreements. See the NOTICE file distributed with --- this work for additional information regarding copyright ownership. --- The ASF licenses this file to You under the Apache License, Version 2.0 --- (the "License") you may not use this file except in compliance with --- the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, software --- distributed under the License is distributed on an "AS IS" BASIS, --- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --- See the License for the specific language governing permissions and --- limitations under the License. -set hive.auto.convert.join = true; - -explain -select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 -FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) -SORT BY k1,v1,k2,v2,k3,v3 -)a; - -select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 -FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) -SORT BY k1,v1,k2,v2,k3,v3 -)a; - -explain -select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 -FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key < 15) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) -SORT BY k1,v1,k2,v2,k3,v3 -)a; - -select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 -FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key < 15) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) -SORT BY k1,v1,k2,v2,k3,v3 -)a; http://git-wip-us.apache.org/repos/asf/bigtop/blob/e209fdbb/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/auto_join20/out ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/auto_join20/out b/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/auto_join20/out deleted file mode 100644 index 092f64d..0000000 --- a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/auto_join20/out +++ /dev/null @@ -1,669 +0,0 @@ -set hive.auto.convert.join = true - - -explain -select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 -FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) -SORT BY k1,v1,k2,v2,k3,v3 -)a -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) src1) (TOK_TABREF (TOK_TABNAME src) src2) (AND (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)) (< (. (TOK_TABLE_OR_COL src1) key) 10))) (TOK_TABREF (TOK_TABNAME src) src3) (AND (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src3) key)) (< (. (TOK_TABLE_OR_COL src3) key) 20)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) value) v1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) key) k2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) value) v2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src3) key) k3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src3) value) v3)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL k1)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL v1)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL k2)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL v2)) (TOK_TABSO RTCOLNAMEASC (TOK_TABLE_OR_COL k3)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL v3))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL a) k1) (. (TOK_TABLE_OR_COL a) v1) (. (TOK_TABLE_OR_COL a) k2) (. (TOK_TABLE_OR_COL a) v2) (. (TOK_TABLE_OR_COL a) k3) (. (TOK_TABLE_OR_COL a) v3))))))) - -STAGE DEPENDENCIES: - Stage-7 is a root stage , consists of Stage-8, Stage-1 - Stage-8 has a backup stage: Stage-1 - Stage-6 depends on stages: Stage-8 - Stage-2 depends on stages: Stage-1, Stage-6 - Stage-3 depends on stages: Stage-2 - Stage-1 - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-7 - Conditional Operator - - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - a:src1 - Fetch Operator - limit: -1 - a:src2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a:src1 - TableScan - alias: src1 - Filter Operator - predicate: - expr: (key < 10.0) - type: boolean - HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 - 1 - 2 {(key < 20.0)} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - Position of Big Table: 2 - a:src2 - TableScan - alias: src2 - Filter Operator - predicate: - expr: (key < 10.0) - type: boolean - HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 - 1 - 2 {(key < 20.0)} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - Position of Big Table: 2 - - Stage: Stage-6 - Map Reduce - Alias -> Map Operator Tree: - a:src3 - TableScan - alias: src3 - Map Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 - 1 - 2 {(key < 20.0)} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 2 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_11-35-58_309_5301659382942807999/-mr-10002 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - Reduce Operator Tree: - Extract - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator - aggregations: - expr: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: - hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_11-35-58_309_5301659382942807999/-mr-10003 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - a:src1 - TableScan - alias: src1 - Filter Operator - predicate: - expr: (key < 10.0) - type: boolean - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - a:src2 - TableScan - alias: src2 - Filter Operator - predicate: - expr: (key < 10.0) - type: boolean - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - a:src3 - TableScan - alias: src3 - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: value - type: string - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} {VALUE._col1} - 2 {VALUE._col0} {VALUE._col1} - filter predicates: - 0 - 1 - 2 {(VALUE._col0 < 20.0)} - handleSkewJoin: false - outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - - - -select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 -FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) -SORT BY k1,v1,k2,v2,k3,v3 -)a -56157587016 - - -explain -select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 -FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key < 15) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) -SORT BY k1,v1,k2,v2,k3,v3 -)a -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) src1) (TOK_TABREF (TOK_TABNAME src) src2) (AND (AND (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)) (< (. (TOK_TABLE_OR_COL src1) key) 10)) (< (. (TOK_TABLE_OR_COL src2) key) 15))) (TOK_TABREF (TOK_TABNAME src) src3) (AND (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src3) key)) (< (. (TOK_TABLE_OR_COL src3) key) 20)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) value) v1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) key) k2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) value) v2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src3) key) k3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src3) value) v3)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL k1)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL v1)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL k2)) (TOK_TABSOR TCOLNAMEASC (TOK_TABLE_OR_COL v2)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL k3)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL v3))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL a) k1) (. (TOK_TABLE_OR_COL a) v1) (. (TOK_TABLE_OR_COL a) k2) (. (TOK_TABLE_OR_COL a) v2) (. (TOK_TABLE_OR_COL a) k3) (. (TOK_TABLE_OR_COL a) v3))))))) - -STAGE DEPENDENCIES: - Stage-7 is a root stage , consists of Stage-8, Stage-1 - Stage-8 has a backup stage: Stage-1 - Stage-6 depends on stages: Stage-8 - Stage-2 depends on stages: Stage-1, Stage-6 - Stage-3 depends on stages: Stage-2 - Stage-1 - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-7 - Conditional Operator - - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - a:src1 - Fetch Operator - limit: -1 - a:src2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a:src1 - TableScan - alias: src1 - Filter Operator - predicate: - expr: ((key < 10.0) and (key < 15.0)) - type: boolean - HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 - 1 - 2 {(key < 20.0)} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - Position of Big Table: 2 - a:src2 - TableScan - alias: src2 - Filter Operator - predicate: - expr: ((key < 15.0) and (key < 10.0)) - type: boolean - HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 - 1 - 2 {(key < 20.0)} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - Position of Big Table: 2 - - Stage: Stage-6 - Map Reduce - Alias -> Map Operator Tree: - a:src3 - TableScan - alias: src3 - Map Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 - 1 - 2 {(key < 20.0)} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Position of Big Table: 2 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_11-37-20_822_7360885266220174309/-mr-10002 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - sort order: ++++++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - Reduce Operator Tree: - Extract - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator - aggregations: - expr: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: - hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_11-37-20_822_7360885266220174309/-mr-10003 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: sum(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - a:src1 - TableScan - alias: src1 - Filter Operator - predicate: - expr: ((key < 10.0) and (key < 15.0)) - type: boolean - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 0 - value expressions: - expr: key - type: string - expr: value - type: string - a:src2 - TableScan - alias: src2 - Filter Operator - predicate: - expr: ((key < 15.0) and (key < 10.0)) - type: boolean - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 1 - value expressions: - expr: key - type: string - expr: value - type: string - a:src3 - TableScan - alias: src3 - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string - expr: value - type: string - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} {VALUE._col1} - 2 {VALUE._col0} {VALUE._col1} - filter predicates: - 0 - 1 - 2 {(VALUE._col0 < 20.0)} - handleSkewJoin: false - outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - - - -select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) -from ( -SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 -FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key < 15) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) -SORT BY k1,v1,k2,v2,k3,v3 -)a -56157587016 http://git-wip-us.apache.org/repos/asf/bigtop/blob/e209fdbb/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/basic/filter ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/basic/filter b/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/basic/filter deleted file mode 100644 index fa2e13d..0000000 --- a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/basic/filter +++ /dev/null @@ -1,4 +0,0 @@ -sed -e 's#hdfs://[^/]*/#hdfs://HADOOP/#' \ - -e '/.*Copying .*kv1.txt$/d' \ - -e '/^Deleted.*text_kv1$/d' \ - -e '/.*jobconf.xml:an attempt to override final parameter: mapreduce.job.end-notification.*; Ignoring\./ d' http://git-wip-us.apache.org/repos/asf/bigtop/blob/e209fdbb/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/basic/in ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/basic/in b/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/basic/in deleted file mode 100644 index 599af59..0000000 --- a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/basic/in +++ /dev/null @@ -1,29 +0,0 @@ --- Licensed to the Apache Software Foundation (ASF) under one or more --- contributor license agreements. See the NOTICE file distributed with --- this work for additional information regarding copyright ownership. --- The ASF licenses this file to You under the Apache License, Version 2.0 --- (the "License") you may not use this file except in compliance with --- the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, software --- distributed under the License is distributed on an "AS IS" BASIS, --- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --- See the License for the specific language governing permissions and --- limitations under the License. -DROP TABLE text_kv1; - -CREATE TABLE text_kv1 ( - value INT, - valueString STRING) -ROW FORMAT DELIMITED -FIELDS TERMINATED BY '\001' -STORED AS TEXTFILE; - -LOAD DATA LOCAL INPATH 'seed_data_files/kv1.txt' -OVERWRITE INTO TABLE text_kv1; - -INSERT OVERWRITE DIRECTORY '/tmp/count' -SELECT COUNT(1) FROM text_kv1; -dfs -cat /tmp/count/* ; http://git-wip-us.apache.org/repos/asf/bigtop/blob/e209fdbb/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/basic/out ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/basic/out b/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/basic/out deleted file mode 100644 index 0fd11cc..0000000 --- a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/basic/out +++ /dev/null @@ -1,19 +0,0 @@ -DROP TABLE text_kv1 - - -CREATE TABLE text_kv1 ( - value INT, - valueString STRING) -ROW FORMAT DELIMITED -FIELDS TERMINATED BY '\001' -STORED AS TEXTFILE - - -LOAD DATA LOCAL INPATH 'seed_data_files/kv1.txt' -OVERWRITE INTO TABLE text_kv1 - - -INSERT OVERWRITE DIRECTORY '/tmp/count' -SELECT COUNT(1) FROM text_kv1 -dfs -cat /tmp/count/* -500 http://git-wip-us.apache.org/repos/asf/bigtop/blob/e209fdbb/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketizedhiveinputformat/filter ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketizedhiveinputformat/filter b/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketizedhiveinputformat/filter deleted file mode 100644 index b65fd71..0000000 --- a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketizedhiveinputformat/filter +++ /dev/null @@ -1,5 +0,0 @@ -sed -re 's#hdfs://.*/-(ext|mr)-1000#hdfs://HADOOP/-\1-1000#' \ - -e 's#Copying file:.*/kv..txt#Copying file:kvX.txt#' \ - -e '/^Deleted hdfs:/d' \ - -e 's#file:/.*/-(ext|mr)-1000#file:/HADOOP/-\1-1000#' \ - -e '/.*jobconf.xml:an attempt to override final parameter: mapreduce.job.end-notification.*; Ignoring\./ d' http://git-wip-us.apache.org/repos/asf/bigtop/blob/e209fdbb/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketizedhiveinputformat/in ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketizedhiveinputformat/in b/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketizedhiveinputformat/in deleted file mode 100644 index e84e0d9..0000000 --- a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketizedhiveinputformat/in +++ /dev/null @@ -1,47 +0,0 @@ --- Licensed to the Apache Software Foundation (ASF) under one or more --- contributor license agreements. See the NOTICE file distributed with --- this work for additional information regarding copyright ownership. --- The ASF licenses this file to You under the Apache License, Version 2.0 --- (the "License") you may not use this file except in compliance with --- the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, software --- distributed under the License is distributed on an "AS IS" BASIS, --- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --- See the License for the specific language governing permissions and --- limitations under the License. -set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; -set mapred.min.split.size = 64; - -CREATE TABLE T1(name STRING) STORED AS TEXTFILE; - -LOAD DATA LOCAL INPATH 'seed_data_files/kv1.txt' INTO TABLE T1; - -CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE; - -EXPLAIN INSERT OVERWRITE TABLE T2 SELECT * FROM ( -SELECT tmp1.name as name FROM ( - SELECT name, 'MMM' AS n FROM T1) tmp1 - JOIN (SELECT 'MMM' AS n FROM T1) tmp2 - JOIN (SELECT 'MMM' AS n FROM T1) tmp3 - ON tmp1.n = tmp2.n AND tmp1.n = tmp3.n) ttt LIMIT 5000000; - - -INSERT OVERWRITE TABLE T2 SELECT * FROM ( -SELECT tmp1.name as name FROM ( - SELECT name, 'MMM' AS n FROM T1) tmp1 - JOIN (SELECT 'MMM' AS n FROM T1) tmp2 - JOIN (SELECT 'MMM' AS n FROM T1) tmp3 - ON tmp1.n = tmp2.n AND tmp1.n = tmp3.n) ttt LIMIT 5000000; - -EXPLAIN SELECT COUNT(1) FROM T2; -SELECT COUNT(1) FROM T2; - -CREATE TABLE T3(name STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH 'seed_data_files/kv1.txt' INTO TABLE T3; -LOAD DATA LOCAL INPATH 'seed_data_files/kv2.txt' INTO TABLE T3; - -EXPLAIN SELECT COUNT(1) FROM T3; -SELECT COUNT(1) FROM T3; http://git-wip-us.apache.org/repos/asf/bigtop/blob/e209fdbb/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketizedhiveinputformat/out ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketizedhiveinputformat/out b/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketizedhiveinputformat/out deleted file mode 100644 index 00ab302..0000000 --- a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketizedhiveinputformat/out +++ /dev/null @@ -1,315 +0,0 @@ -set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat -set mapred.min.split.size = 64 - - -CREATE TABLE T1(name STRING) STORED AS TEXTFILE - - -LOAD DATA LOCAL INPATH 'seed_data_files/kv1.txt' INTO TABLE T1 -Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/kv1.txt - - -CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE - - -EXPLAIN INSERT OVERWRITE TABLE T2 SELECT * FROM ( -SELECT tmp1.name as name FROM ( - SELECT name, 'MMM' AS n FROM T1) tmp1 - JOIN (SELECT 'MMM' AS n FROM T1) tmp2 - JOIN (SELECT 'MMM' AS n FROM T1) tmp3 - ON tmp1.n = tmp2.n AND tmp1.n = tmp3.n) ttt LIMIT 5000000 -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL name)) (TOK_SELEXPR 'MMM' n)))) tmp1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'MMM' n)))) tmp2)) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'MMM' n)))) tmp3) (AND (= (. (TOK_TABLE_OR_COL tmp1) n) (. (TOK_TABLE_OR_COL tmp2) n)) (= (. (TOK_TABLE_OR_COL tmp1) n) (. (TOK_TABLE_OR_COL tmp3) n))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp1) name) name)))) ttt)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME T2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 5000000))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ttt:tmp1:t1 - TableScan - alias: t1 - Select Operator - expressions: - expr: name - type: string - expr: 'MMM' - type: string - outputColumnNames: _col0, _col1 - Reduce Output Operator - sort order: - tag: 0 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - ttt:tmp2:t1 - TableScan - alias: t1 - Select Operator - expressions: - expr: 'MMM' - type: string - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: 1 - value expressions: - expr: _col0 - type: string - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} - handleSkewJoin: false - outputColumnNames: _col0, _col1, _col2 - Filter Operator - predicate: - expr: (_col1 = _col2) - type: boolean - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - $INTNAME - Reduce Output Operator - key expressions: - expr: _col1 - type: string - sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string - ttt:tmp3:t1 - TableScan - alias: t1 - Select Operator - expressions: - expr: 'MMM' - type: string - outputColumnNames: _col0 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: 1 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col1} - 1 - handleSkewJoin: false - outputColumnNames: _col1 - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: - hdfs://mgrover-bigtop-centos-1.ent.cloudera.com:17020/tmp/hive-root/hive_2013-04-23_11-41-40_405_3858259332039900449/-mr-10003 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - Reduce Operator Tree: - Extract - Limit - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t2 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t2 - - Stage: Stage-4 - Stats-Aggr Operator - - - - - -INSERT OVERWRITE TABLE T2 SELECT * FROM ( -SELECT tmp1.name as name FROM ( - SELECT name, 'MMM' AS n FROM T1) tmp1 - JOIN (SELECT 'MMM' AS n FROM T1) tmp2 - JOIN (SELECT 'MMM' AS n FROM T1) tmp3 - ON tmp1.n = tmp2.n AND tmp1.n = tmp3.n) ttt LIMIT 5000000 -Deleted /user/hive/warehouse/t2 - - -EXPLAIN SELECT COUNT(1) FROM T2 -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COUNT 1))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - t2 - TableScan - alias: t2 - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - - -SELECT COUNT(1) FROM T2 -5000000 - - -CREATE TABLE T3(name STRING) STORED AS TEXTFILE - -LOAD DATA LOCAL INPATH 'seed_data_files/kv1.txt' INTO TABLE T3 -Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/kv1.txt - -LOAD DATA LOCAL INPATH 'seed_data_files/kv2.txt' INTO TABLE T3 -Copying file: file:/root/bigtop/bigtop-tests/test-execution/smokes/hive/target/seed_data_files/kv2.txt - - -EXPLAIN SELECT COUNT(1) FROM T3 -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COUNT 1))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - t3 - TableScan - alias: t3 - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - - -SELECT COUNT(1) FROM T3 -1000 http://git-wip-us.apache.org/repos/asf/bigtop/blob/e209fdbb/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketmapjoin5/filter ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketmapjoin5/filter b/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketmapjoin5/filter deleted file mode 100644 index 2384b8a..0000000 --- a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketmapjoin5/filter +++ /dev/null @@ -1,6 +0,0 @@ -sed -re 's#hdfs://[^/]*/#hdfs://HADOOP/#g' \ - -e 's#Copying file:.*/srcbucket2#Copying file:srcbucket2#' \ - -e 's#hdfs://.*/-(ext|mr)-1000#hdfs://HADOOP/-\1-1000#' \ - -e 's#transient_lastDdlTime [0-9]*#transient_lastDdlTime JUSTNOW#' \ - -e 's#file:/.*/-(ext|mr)-1000#file:/HADOOP/-\1-1000#' \ - -e '/.*jobconf.xml:an attempt to override final parameter: mapreduce.job.end-notification.*; Ignoring\./ d' http://git-wip-us.apache.org/repos/asf/bigtop/blob/e209fdbb/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketmapjoin5/in ---------------------------------------------------------------------- diff --git a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketmapjoin5/in b/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketmapjoin5/in deleted file mode 100644 index 609df9b..0000000 --- a/bigtop-tests/test-artifacts/hive/src/main/resources/scripts/ql/bucketmapjoin5/in +++ /dev/null @@ -1,99 +0,0 @@ --- Licensed to the Apache Software Foundation (ASF) under one or more --- contributor license agreements. See the NOTICE file distributed with --- this work for additional information regarding copyright ownership. --- The ASF licenses this file to You under the Apache License, Version 2.0 --- (the "License") you may not use this file except in compliance with --- the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, software --- distributed under the License is distributed on an "AS IS" BASIS, --- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --- See the License for the specific language governing permissions and --- limitations under the License. -CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -load data local inpath 'seed_data_files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin; -load data local inpath 'seed_data_files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin; - -CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -load data local inpath 'seed_data_files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath 'seed_data_files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath 'seed_data_files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath 'seed_data_files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath 'seed_data_files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09'); -load data local inpath 'seed_data_files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09'); -load data local inpath 'seed_data_files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09'); -load data local inpath 'seed_data_files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09'); - -CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -load data local inpath 'seed_data_files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); -load data local inpath 'seed_data_files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); -load data local inpath 'seed_data_files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09'); -load data local inpath 'seed_data_files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09'); - -create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint); -create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint); - -set hive.optimize.bucketmapjoin = true; -create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); - -explain extended -insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin_part b -on a.key=b.key; - -insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin_part b -on a.key=b.key; - -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; - -set hive.optimize.bucketmapjoin = false; -insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin_part b -on a.key=b.key; - -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_2 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; - -select a.key-b.key, a.value1-b.value1, a.value2-b.value2 -from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b -on a.key = b.key; - - -set hive.optimize.bucketmapjoin = true; -explain extended -insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b -on a.key=b.key; - -insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b -on a.key=b.key; - -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; - -set hive.optimize.bucketmapjoin = false; -insert overwrite table bucketmapjoin_tmp_result -select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b -on a.key=b.key; - -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_2 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; - -select a.key-b.key, a.value1-b.value1, a.value2-b.value2 -from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b -on a.key = b.key;
