Added: hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.8.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.8.out?rev=1647912&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.8.out (added) +++ hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.8.out Thu Dec 25 18:06:30 2014 @@ -0,0 +1,598 @@ +PREHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS +-- JAVA_VERSION_SPECIFIC_OUTPUT + +-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns +create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) + partitioned by (ds String, hr String) + skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_mul_col +POSTHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS +-- JAVA_VERSION_SPECIFIC_OUTPUT + +-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns +create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) + partitioned by (ds String, hr String) + skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_mul_col +PREHOOK: query: -- list bucketing DML +explain extended +insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') +select 1, key, 1, value, 1 from src +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML +explain extended +insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') +select 1, key, 1, value, 1 from src +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + list_bucketing_mul_col + TOK_PARTSPEC + TOK_PARTVAL + ds + '2008-04-08' + TOK_PARTVAL + hr + '11' + TOK_SELECT + TOK_SELEXPR + 1 + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + 1 + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + 1 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: UDFToString(1) (type: string), key (type: string), UDFToString(1) (type: string), value (type: string), UDFToString(1) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [$hdt$_0:src] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 11 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') +select 1, key, 1, value, 1 from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 +POSTHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') +select 1, key, 1, value, 1 from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION [] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION [] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION [] +PREHOOK: query: -- check DML result +show partitions list_bucketing_mul_col +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: query: -- check DML result +show partitions list_bucketing_mul_col +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_mul_col +ds=2008-04-08/hr=11 +PREHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_mul_col +# col_name data_type comment + +col1 string +col2 string +col3 string +col4 string +col5 string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: list_bucketing_mul_col +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 4 + numRows 500 + rawDataSize 6312 + totalSize 7094 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [col2, col4] +Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[466, val_466]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=466/col4=val_466, [82, val_82]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=82/col4=val_82, [287, val_287]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=287/col4=val_287} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + list_bucketing_mul_col + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + and + and + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + = + TOK_TABLE_OR_COL + hr + '11' + = + TOK_TABLE_OR_COL + col2 + "466" + = + TOK_TABLE_OR_COL + col4 + "val_466" + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_mul_col + Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:string:string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: col4=val_466 + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + numFiles 4 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 6312 + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 7094 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + name: default.list_bucketing_mul_col + Truncated Path -> Alias: + /list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=466/col4=val_466 [list_bucketing_mul_col] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_mul_col +PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +1 466 1 val_466 1 2008-04-08 11 +1 466 1 val_466 1 2008-04-08 11 +1 466 1 val_466 1 2008-04-08 11 +PREHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + list_bucketing_mul_col + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + and + and + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + = + TOK_TABLE_OR_COL + hr + '11' + = + TOK_TABLE_OR_COL + col2 + "382" + = + TOK_TABLE_OR_COL + col4 + "val_382" + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_mul_col + Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((col2 = '382') and (col4 = 'val_382')) (type: boolean) + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: string), '382' (type: string), col3 (type: string), 'val_382' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:string:string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + numFiles 4 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 6312 + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 7094 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + name: default.list_bucketing_mul_col + Truncated Path -> Alias: + /list_bucketing_mul_col/ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [list_bucketing_mul_col] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_mul_col +PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +1 382 1 val_382 1 2008-04-08 11 +1 382 1 val_382 1 2008-04-08 11 +PREHOOK: query: drop table list_bucketing_mul_col +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@list_bucketing_mul_col +PREHOOK: Output: default@list_bucketing_mul_col +POSTHOOK: query: drop table list_bucketing_mul_col +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: Output: default@list_bucketing_mul_col
Added: hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.7.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.7.out?rev=1647912&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.7.out (added) +++ hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.7.out Thu Dec 25 18:06:30 2014 @@ -0,0 +1,441 @@ +PREHOOK: query: -- Ensure skewed value map has escaped directory name + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS +-- JAVA_VERSION_SPECIFIC_OUTPUT + +-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns +create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) + partitioned by (ds String, hr String) + skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_mul_col +POSTHOOK: query: -- Ensure skewed value map has escaped directory name + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS +-- JAVA_VERSION_SPECIFIC_OUTPUT + +-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns +create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) + partitioned by (ds String, hr String) + skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_mul_col +PREHOOK: query: -- list bucketing DML +explain extended +insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') +select 1, key, 1, value, 1 from src +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML +explain extended +insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') +select 1, key, 1, value, 1 from src +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + list_bucketing_mul_col + TOK_PARTSPEC + TOK_PARTVAL + ds + '2008-04-08' + TOK_PARTVAL + hr + '2013-01-23+18:00:99' + TOK_SELECT + TOK_SELEXPR + 1 + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + 1 + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + 1 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: UDFToString(1) (type: string), key (type: string), UDFToString(1) (type: string), value (type: string), UDFToString(1) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [$hdt$_0:src] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 2013-01-23+18:00:99 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') +select 1, key, 1, value, 1 from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +POSTHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') +select 1, key, 1, value, 1 from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col1 EXPRESSION [] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col3 EXPRESSION [] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col5 EXPRESSION [] +PREHOOK: query: -- check DML result +show partitions list_bucketing_mul_col +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: query: -- check DML result +show partitions list_bucketing_mul_col +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_mul_col +ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +PREHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='2013-01-23+18:00:99') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='2013-01-23+18:00:99') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_mul_col +# col_name data_type comment + +col1 string +col2 string +col3 string +col4 string +col5 string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 2013-01-23+18:00:99] +Database: default +Table: list_bucketing_mul_col +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 4 + numRows 500 + rawDataSize 6312 + totalSize 7094 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [col2, col4] +Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[82, val_82]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=82/col4=val_82, [466, val_466]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=466/col4=val_466, [287, val_287]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=287/col4=val_287} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + list_bucketing_mul_col + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + and + and + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + = + TOK_TABLE_OR_COL + hr + '2013-01-23+18:00:99' + = + TOK_TABLE_OR_COL + col2 + "466" + = + TOK_TABLE_OR_COL + col4 + "val_466" + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_mul_col + Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '2013-01-23+18:00:99' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:string:string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: col4=val_466 + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr 2013-01-23+18:00:99 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + numFiles 4 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 6312 + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 7094 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + name: default.list_bucketing_mul_col + Truncated Path -> Alias: + /list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=466/col4=val_466 [list_bucketing_mul_col] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_mul_col +PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +#### A masked pattern was here #### +POSTHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +#### A masked pattern was here #### +1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 +1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 +1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 +PREHOOK: query: drop table list_bucketing_mul_col +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@list_bucketing_mul_col +PREHOOK: Output: default@list_bucketing_mul_col +POSTHOOK: query: drop table list_bucketing_mul_col +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: Output: default@list_bucketing_mul_col Added: hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.8.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.8.out?rev=1647912&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.8.out (added) +++ hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.8.out Thu Dec 25 18:06:30 2014 @@ -0,0 +1,441 @@ +PREHOOK: query: -- Ensure skewed value map has escaped directory name + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS +-- JAVA_VERSION_SPECIFIC_OUTPUT + +-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns +create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) + partitioned by (ds String, hr String) + skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_mul_col +POSTHOOK: query: -- Ensure skewed value map has escaped directory name + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS +-- JAVA_VERSION_SPECIFIC_OUTPUT + +-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns +create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) + partitioned by (ds String, hr String) + skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_mul_col +PREHOOK: query: -- list bucketing DML +explain extended +insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') +select 1, key, 1, value, 1 from src +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML +explain extended +insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') +select 1, key, 1, value, 1 from src +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + list_bucketing_mul_col + TOK_PARTSPEC + TOK_PARTVAL + ds + '2008-04-08' + TOK_PARTVAL + hr + '2013-01-23+18:00:99' + TOK_SELECT + TOK_SELEXPR + 1 + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + 1 + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + 1 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: UDFToString(1) (type: string), key (type: string), UDFToString(1) (type: string), value (type: string), UDFToString(1) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [$hdt$_0:src] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 2013-01-23+18:00:99 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') +select 1, key, 1, value, 1 from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +POSTHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') +select 1, key, 1, value, 1 from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col1 EXPRESSION [] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col3 EXPRESSION [] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col5 EXPRESSION [] +PREHOOK: query: -- check DML result +show partitions list_bucketing_mul_col +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: query: -- check DML result +show partitions list_bucketing_mul_col +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_mul_col +ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +PREHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='2013-01-23+18:00:99') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='2013-01-23+18:00:99') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_mul_col +# col_name data_type comment + +col1 string +col2 string +col3 string +col4 string +col5 string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 2013-01-23+18:00:99] +Database: default +Table: list_bucketing_mul_col +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 4 + numRows 500 + rawDataSize 6312 + totalSize 7094 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [col2, col4] +Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[466, val_466]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=466/col4=val_466, [287, val_287]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=287/col4=val_287, [82, val_82]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=82/col4=val_82} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + list_bucketing_mul_col + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + and + and + and + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + = + TOK_TABLE_OR_COL + hr + '2013-01-23+18:00:99' + = + TOK_TABLE_OR_COL + col2 + "466" + = + TOK_TABLE_OR_COL + col4 + "val_466" + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_mul_col + Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '2013-01-23+18:00:99' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:string:string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: col4=val_466 + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr 2013-01-23+18:00:99 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + numFiles 4 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 6312 + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 7094 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + name: default.list_bucketing_mul_col + Truncated Path -> Alias: + /list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=466/col4=val_466 [list_bucketing_mul_col] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_mul_col +PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +#### A masked pattern was here #### +POSTHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +#### A masked pattern was here #### +1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 +1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 +1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 +PREHOOK: query: drop table list_bucketing_mul_col +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@list_bucketing_mul_col +PREHOOK: Output: default@list_bucketing_mul_col +POSTHOOK: query: drop table list_bucketing_mul_col +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: Output: default@list_bucketing_mul_col
