http://git-wip-us.apache.org/repos/asf/hive/blob/ec4b936e/ql/src/test/results/clientpositive/autoColumnStats_3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/autoColumnStats_3.q.out b/ql/src/test/results/clientpositive/autoColumnStats_3.q.out new file mode 100644 index 0000000..ee41910 --- /dev/null +++ b/ql/src/test/results/clientpositive/autoColumnStats_3.q.out @@ -0,0 +1,420 @@ +PREHOOK: query: drop table src_multi1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table src_multi1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src_multi1 like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_multi1 +POSTHOOK: query: create table src_multi1 like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_multi1 +PREHOOK: query: analyze table src_multi1 compute statistics for columns key +PREHOOK: type: QUERY +PREHOOK: Input: default@src_multi1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table src_multi1 compute statistics for columns key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_multi1 +#### A masked pattern was here #### +PREHOOK: query: describe formatted src_multi1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_multi1 +POSTHOOK: query: describe formatted src_multi1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_multi1 +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into table src_multi1 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_multi1 +POSTHOOK: query: insert into table src_multi1 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_multi1 +POSTHOOK: Lineage: src_multi1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted src_multi1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_multi1 +POSTHOOK: query: describe formatted src_multi1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_multi1 +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table nzhang_part14 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table nzhang_part14 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists nzhang_part14 (key string, value string) + partitioned by (ds string, hr string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: create table if not exists nzhang_part14 (key string, value string) + partitioned by (ds string, hr string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: describe formatted nzhang_part14 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: describe formatted nzhang_part14 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into table nzhang_part14 partition(ds, hr) +select key, value, ds, hr from ( + select * from (select 'k1' as key, cast(null as string) as value, '1' as ds, '2' as hr from src limit 2)a + union all + select * from (select 'k2' as key, '' as value, '1' as ds, '3' as hr from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value, '2' as ds, '1' as hr from src limit 2)c +) T +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: insert into table nzhang_part14 partition(ds, hr) +select key, value, ds, hr from ( + select * from (select 'k1' as key, cast(null as string) as value, '1' as ds, '2' as hr from src limit 2)a + union all + select * from (select 'k2' as key, '' as value, '1' as ds, '3' as hr from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value, '2' as ds, '1' as hr from src limit 2)c +) T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_part14@ds=1/hr=2 +POSTHOOK: Output: default@nzhang_part14@ds=1/hr=3 +POSTHOOK: Output: default@nzhang_part14@ds=2/hr=1 +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=2).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=2).value EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=3).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=3).value EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2,hr=1).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2,hr=1).value EXPRESSION [] +PREHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [1, 3] +Database: default +Table: nzhang_part14 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 2 + rawDataSize 6 + totalSize 8 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: analyze table nzhang_part14 partition(ds='1', hr='3') compute statistics for columns value +PREHOOK: type: QUERY +PREHOOK: Input: default@nzhang_part14 +PREHOOK: Input: default@nzhang_part14@ds=1/hr=3 +#### A masked pattern was here #### +POSTHOOK: query: analyze table nzhang_part14 partition(ds='1', hr='3') compute statistics for columns value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nzhang_part14 +POSTHOOK: Input: default@nzhang_part14@ds=1/hr=3 +#### A masked pattern was here #### +PREHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [1, 3] +Database: default +Table: nzhang_part14 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 2 + rawDataSize 6 + totalSize 8 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted nzhang_part14 partition(ds='2', hr='1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: desc formatted nzhang_part14 partition(ds='2', hr='1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2, 1] +Database: default +Table: nzhang_part14 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 2 + rawDataSize 8 + totalSize 10 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into table nzhang_part14 partition(ds, hr) +select key, value, ds, hr from ( + select * from (select 'k1' as key, cast(null as string) as value, '1' as ds, '2' as hr from src limit 2)a + union all + select * from (select 'k2' as key, '' as value, '1' as ds, '3' as hr from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value, '2' as ds, '1' as hr from src limit 2)c +) T +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@nzhang_part14 +POSTHOOK: query: insert into table nzhang_part14 partition(ds, hr) +select key, value, ds, hr from ( + select * from (select 'k1' as key, cast(null as string) as value, '1' as ds, '2' as hr from src limit 2)a + union all + select * from (select 'k2' as key, '' as value, '1' as ds, '3' as hr from src limit 2)b + union all + select * from (select 'k3' as key, ' ' as value, '2' as ds, '1' as hr from src limit 2)c +) T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_part14@ds=1/hr=2 +POSTHOOK: Output: default@nzhang_part14@ds=1/hr=3 +POSTHOOK: Output: default@nzhang_part14@ds=2/hr=1 +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=2).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=2).value EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=3).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=1,hr=3).value EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2,hr=1).key EXPRESSION [] +POSTHOOK: Lineage: nzhang_part14 PARTITION(ds=2,hr=1).value EXPRESSION [] +PREHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [1, 3] +Database: default +Table: nzhang_part14 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 2 + numRows 4 + rawDataSize 12 + totalSize 16 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted nzhang_part14 partition(ds='2', hr='1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: desc formatted nzhang_part14 partition(ds='2', hr='1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2, 1] +Database: default +Table: nzhang_part14 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 2 + numRows 4 + rawDataSize 16 + totalSize 20 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1
http://git-wip-us.apache.org/repos/asf/hive/blob/ec4b936e/ql/src/test/results/clientpositive/autoColumnStats_4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/autoColumnStats_4.q.out b/ql/src/test/results/clientpositive/autoColumnStats_4.q.out new file mode 100644 index 0000000..676a27a --- /dev/null +++ b/ql/src/test/results/clientpositive/autoColumnStats_4.q.out @@ -0,0 +1,260 @@ +PREHOOK: query: create table acid_dtt(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acid_dtt +POSTHOOK: query: create table acid_dtt(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid_dtt +PREHOOK: query: desc formatted acid_dtt +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@acid_dtt +POSTHOOK: query: desc formatted acid_dtt +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@acid_dtt +# col_name data_type comment + +a int +b varchar(128) + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 + transactional true +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 2 +Bucket Columns: [a] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain insert into table acid_dtt select cint, cast(cstring1 as varchar(128)) from alltypesorc where cint is not null order by cint limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain insert into table acid_dtt select cint, cast(cstring1 as varchar(128)) from alltypesorc where cint is not null order by cint limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int), CAST( cstring1 AS varchar(128)) (type: varchar(128)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: varchar(128)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(128)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: varchar(128)) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: varchar(128)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acid_dtt + Select Operator + expressions: _col0 (type: int), _col1 (type: varchar(128)) + outputColumnNames: a, b + Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acid_dtt + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: int, varchar(128) + Table: default.acid_dtt + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: insert into table acid_dtt select cint, cast(cstring1 as varchar(128)) from alltypesorc where cint is not null order by cint limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@acid_dtt +POSTHOOK: query: insert into table acid_dtt select cint, cast(cstring1 as varchar(128)) from alltypesorc where cint is not null order by cint limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@acid_dtt +POSTHOOK: Lineage: acid_dtt.a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: acid_dtt.b EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +PREHOOK: query: desc formatted acid_dtt +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@acid_dtt +POSTHOOK: query: desc formatted acid_dtt +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@acid_dtt +# col_name data_type comment + +a int +b varchar(128) + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 2 + numRows 10 + rawDataSize 0 + totalSize 1714 + transactional true +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 2 +Bucket Columns: [a] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: delete from acid_dtt where b = '0ruyd6Y50JpdGRf6HqD' or b = '2uLyD28144vklju213J1mr' +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_dtt +PREHOOK: Output: default@acid_dtt +POSTHOOK: query: delete from acid_dtt where b = '0ruyd6Y50JpdGRf6HqD' or b = '2uLyD28144vklju213J1mr' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_dtt +POSTHOOK: Output: default@acid_dtt +PREHOOK: query: desc formatted acid_dtt +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@acid_dtt +POSTHOOK: query: desc formatted acid_dtt +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@acid_dtt +# col_name data_type comment + +a int +b varchar(128) + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 4 + numRows 8 + rawDataSize 0 + totalSize 2719 + transactional true +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 2 +Bucket Columns: [a] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 http://git-wip-us.apache.org/repos/asf/hive/blob/ec4b936e/ql/src/test/results/clientpositive/autoColumnStats_5.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/autoColumnStats_5.q.out b/ql/src/test/results/clientpositive/autoColumnStats_5.q.out new file mode 100644 index 0000000..e905748 --- /dev/null +++ b/ql/src/test/results/clientpositive/autoColumnStats_5.q.out @@ -0,0 +1,664 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: explain insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +POSTHOOK: query: explain insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: values__tmp__table__1 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(tmp_values_col1) (type: int), tmp_values_col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), 1 (type: int) + outputColumnNames: a, b, part + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + keys: 1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 1 (type: int) + sort order: + + Map-reduce partition columns: 1 (type: int) + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: 1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + part 1 + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: int, string + Table: default.partitioned1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: desc formatted partitioned1 partition(part=1) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partitioned1 +POSTHOOK: query: desc formatted partitioned1 partition(part=1) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partitioned1 +col_name data_type comment +# col_name data_type comment + +a int +b string + +# Partition Information +# col_name data_type comment + +part int + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: partitioned1 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 4 + rawDataSize 40 + totalSize 44 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted partitioned1 partition(part=1) a +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partitioned1 +POSTHOOK: query: desc formatted partitioned1 partition(part=1) a +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partitioned1 +col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +a int 1 4 0 5 from deserializer +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: desc formatted partitioned1 partition(part=1) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partitioned1 +POSTHOOK: query: desc formatted partitioned1 partition(part=1) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partitioned1 +col_name data_type comment +# col_name data_type comment + +a int +b string + +# Partition Information +# col_name data_type comment + +part int + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: partitioned1 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 4 + rawDataSize 40 + totalSize 44 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +POSTHOOK: query: explain insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: values__tmp__table__3 + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(tmp_values_col1) (type: int), tmp_values_col2 (type: string), UDFToInteger(tmp_values_col3) (type: int), tmp_values_col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), 2 (type: int) + outputColumnNames: a, b, c, d, part + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16), compute_stats(c, 16), compute_stats(d, 16) + keys: 2 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 2 (type: int) + sort order: + + Map-reduce partition columns: 2 (type: int) + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col4 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: 2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col4 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + part 2 + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: a, b, c, d + Column Types: int, string, int, string + Table: default.partitioned1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: desc formatted partitioned1 partition(part=2) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partitioned1 +POSTHOOK: query: desc formatted partitioned1 partition(part=2) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partitioned1 +col_name data_type comment +# col_name data_type comment + +a int +b string +c int +d string + +# Partition Information +# col_name data_type comment + +part int + +# Detailed Partition Information +Partition Value: [2] +Database: default +Table: partitioned1 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 4 + rawDataSize 56 + totalSize 60 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted partitioned1 partition(part=2) c +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partitioned1 +POSTHOOK: query: desc formatted partitioned1 partition(part=2) c +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partitioned1 +col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +c int 10 40 0 4 from deserializer +PREHOOK: query: explain insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +POSTHOOK: query: explain insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: values__tmp__table__5 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(tmp_values_col1) (type: int), tmp_values_col2 (type: string), UDFToInteger(tmp_values_col3) (type: int), tmp_values_col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), 1 (type: int) + outputColumnNames: a, b, c, d, part + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16), compute_stats(c, 16), compute_stats(d, 16) + keys: 1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 1 (type: int) + sort order: + + Map-reduce partition columns: 1 (type: int) + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col4 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: 1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col4 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + part 1 + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: a, b, c, d + Column Types: int, string, int, string + Table: default.partitioned1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partitioned1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: desc formatted partitioned1 partition(part=1) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partitioned1 +POSTHOOK: query: desc formatted partitioned1 partition(part=1) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partitioned1 +col_name data_type comment +# col_name data_type comment + +a int +b string + +# Partition Information +# col_name data_type comment + +part int + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: partitioned1 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\"},\"BASIC_STATS\":\"true\"} + numFiles 2 + numRows 6 + rawDataSize 78 + totalSize 84 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted partitioned1 partition(part=1) a +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partitioned1 +POSTHOOK: query: desc formatted partitioned1 partition(part=1) a +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partitioned1 +col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +a int 1 6 0 5 from deserializer +PREHOOK: query: desc formatted partitioned1 partition(part=1) c +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partitioned1 +POSTHOOK: query: desc formatted partitioned1 partition(part=1) c +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partitioned1 +col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +c int 100 200 0 3 from deserializer http://git-wip-us.apache.org/repos/asf/hive/blob/ec4b936e/ql/src/test/results/clientpositive/autoColumnStats_6.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/autoColumnStats_6.q.out b/ql/src/test/results/clientpositive/autoColumnStats_6.q.out new file mode 100644 index 0000000..de75d8a --- /dev/null +++ b/ql/src/test/results/clientpositive/autoColumnStats_6.q.out @@ -0,0 +1,299 @@ +PREHOOK: query: DROP TABLE orcfile_merge2a +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orcfile_merge2a +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE orcfile_merge2a (key INT, value STRING) + PARTITIONED BY (one string, two string, three string) + STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orcfile_merge2a +POSTHOOK: query: CREATE TABLE orcfile_merge2a (key INT, value STRING) + PARTITIONED BY (one string, two string, three string) + STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orcfile_merge2a +PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE orcfile_merge2a PARTITION (one='1', two, three) + SELECT key, value, PMOD(HASH(key), 10) as two, + PMOD(HASH(value), 10) as three + FROM src +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE orcfile_merge2a PARTITION (one='1', two, three) + SELECT key, value, PMOD(HASH(key), 10) as two, + PMOD(HASH(value), 10) as three + FROM src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 10) (type: int), (hash(value) pmod 10) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge2a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1' (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: '1' (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col4 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col4 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), '1' (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + one 1 + three + two + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge2a + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge2a + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge2a PARTITION (one='1', two, three) + SELECT key, value, PMOD(HASH(key), 10) as two, + PMOD(HASH(value), 10) as three + FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orcfile_merge2a@one=1 +POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge2a PARTITION (one='1', two, three) + SELECT key, value, PMOD(HASH(key), 10) as two, + PMOD(HASH(value), 10) as three + FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=0/three=2 +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=0/three=8 +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=1/three=3 +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=1/three=9 +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=2/three=0 +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=2/three=4 +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=3/three=1 +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=3/three=5 +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=4/three=2 +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=4/three=6 +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=5/three=3 +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=5/three=7 +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=6/three=4 +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=6/three=8 +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=7/three=5 +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=7/three=9 +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=8/three=0 +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=8/three=6 +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=9/three=1 +POSTHOOK: Output: default@orcfile_merge2a@one=1/two=9/three=7 +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=0,three=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=0,three=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=0,three=8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=0,three=8).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=1,three=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=1,three=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=1,three=9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=1,three=9).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=2,three=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=2,three=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=2,three=4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=2,three=4).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=3,three=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=3,three=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=3,three=5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=3,three=5).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=4,three=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=4,three=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=4,three=6).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=4,three=6).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=5,three=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=5,three=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=5,three=7).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=5,three=7).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=6,three=4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=6,three=4).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=6,three=8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=6,three=8).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=7,three=5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=7,three=5).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=7,three=9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=7,three=9).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=8,three=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=8,three=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=8,three=6).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=8,three=6).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=9,three=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=9,three=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=9,three=7).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge2a PARTITION(one=1,two=9,three=7).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +Found 1 items +#### A masked pattern was here #### +PREHOOK: query: SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(*) USING 'tr \t _' AS (c) + FROM orcfile_merge2a +) t +PREHOOK: type: QUERY +PREHOOK: Input: default@orcfile_merge2a +PREHOOK: Input: default@orcfile_merge2a@one=1/two=0/three=2 +PREHOOK: Input: default@orcfile_merge2a@one=1/two=0/three=8 +PREHOOK: Input: default@orcfile_merge2a@one=1/two=1/three=3 +PREHOOK: Input: default@orcfile_merge2a@one=1/two=1/three=9 +PREHOOK: Input: default@orcfile_merge2a@one=1/two=2/three=0 +PREHOOK: Input: default@orcfile_merge2a@one=1/two=2/three=4 +PREHOOK: Input: default@orcfile_merge2a@one=1/two=3/three=1 +PREHOOK: Input: default@orcfile_merge2a@one=1/two=3/three=5 +PREHOOK: Input: default@orcfile_merge2a@one=1/two=4/three=2 +PREHOOK: Input: default@orcfile_merge2a@one=1/two=4/three=6 +PREHOOK: Input: default@orcfile_merge2a@one=1/two=5/three=3 +PREHOOK: Input: default@orcfile_merge2a@one=1/two=5/three=7 +PREHOOK: Input: default@orcfile_merge2a@one=1/two=6/three=4 +PREHOOK: Input: default@orcfile_merge2a@one=1/two=6/three=8 +PREHOOK: Input: default@orcfile_merge2a@one=1/two=7/three=5 +PREHOOK: Input: default@orcfile_merge2a@one=1/two=7/three=9 +PREHOOK: Input: default@orcfile_merge2a@one=1/two=8/three=0 +PREHOOK: Input: default@orcfile_merge2a@one=1/two=8/three=6 +PREHOOK: Input: default@orcfile_merge2a@one=1/two=9/three=1 +PREHOOK: Input: default@orcfile_merge2a@one=1/two=9/three=7 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(*) USING 'tr \t _' AS (c) + FROM orcfile_merge2a +) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcfile_merge2a +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=0/three=2 +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=0/three=8 +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=1/three=3 +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=1/three=9 +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=2/three=0 +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=2/three=4 +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=3/three=1 +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=3/three=5 +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=4/three=2 +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=4/three=6 +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=5/three=3 +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=5/three=7 +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=6/three=4 +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=6/three=8 +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=7/three=5 +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=7/three=9 +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=8/three=0 +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=8/three=6 +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=9/three=1 +POSTHOOK: Input: default@orcfile_merge2a@one=1/two=9/three=7 +#### A masked pattern was here #### +-4209012844 +PREHOOK: query: SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(key, value, '1', PMOD(HASH(key), 10), + PMOD(HASH(value), 10)) USING 'tr \t _' AS (c) + FROM src +) t +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(key, value, '1', PMOD(HASH(key), 10), + PMOD(HASH(value), 10)) USING 'tr \t _' AS (c) + FROM src +) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +-4209012844 +PREHOOK: query: DROP TABLE orcfile_merge2a +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orcfile_merge2a +PREHOOK: Output: default@orcfile_merge2a +POSTHOOK: query: DROP TABLE orcfile_merge2a +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orcfile_merge2a +POSTHOOK: Output: default@orcfile_merge2a
