http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out index 3b3e227..f15c0f9 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out @@ -106,6 +106,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -161,6 +177,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary> + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -203,6 +248,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -429,7 +479,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -625,6 +676,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary> + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -658,6 +736,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, string, int + Table: default.outputtbl2 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary> + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary> + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary> + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val @@ -737,7 +889,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -758,6 +910,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -813,6 +981,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary> + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -832,7 +1029,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -855,6 +1052,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -870,7 +1072,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -900,7 +1102,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -921,7 +1123,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -957,7 +1159,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -987,7 +1189,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1008,7 +1210,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1112,7 +1314,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1133,6 +1335,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1188,6 +1406,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary> + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1207,7 +1454,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1230,6 +1477,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -1245,7 +1497,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1275,7 +1527,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1296,7 +1548,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1332,7 +1584,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1362,7 +1614,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1383,7 +1635,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1516,6 +1768,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1571,6 +1839,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary> + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1613,6 +1910,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -1840,7 +2142,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2036,6 +2339,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary> + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2069,33 +2399,107 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl4 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl4 -POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl4 + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl4 + Path -> Partition: #### A masked pattern was here #### -1 1 11 1 -2 1 12 1 -3 1 13 1 -7 1 17 1 + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary> + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary> + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary> + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + +PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@outputtbl4 +POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@outputtbl4 +POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] +POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM outputTbl4 +PREHOOK: type: QUERY +PREHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM outputTbl4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +1 1 11 1 +2 1 12 1 +3 1 13 1 +7 1 17 1 8 1 18 1 8 1 28 1 PREHOOK: query: EXPLAIN EXTENDED @@ -2110,7 +2514,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2285,7 +2690,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2306,6 +2711,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary> + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2316,7 +2748,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2339,6 +2771,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary> + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary> + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary> + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 @@ -2382,7 +2888,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2567,7 +3074,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2588,6 +3095,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary> + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2598,7 +3132,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2621,6 +3155,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary> + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary> + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary> + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT cast(key + key as string), sum(cnt) from @@ -2712,7 +3320,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2733,6 +3341,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -2764,7 +3388,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2785,6 +3409,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2840,6 +3480,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:t1, null-subquery2:$hdt$_0-subquery2:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary> + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -2859,7 +3528,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2882,6 +3551,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -2897,7 +3571,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2927,7 +3601,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2948,7 +3622,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2984,7 +3658,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3014,7 +3688,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3035,7 +3709,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3344,7 +4018,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3365,6 +4039,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false TableScan GatherStats: false Union @@ -3384,7 +4074,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3405,6 +4095,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3483,6 +4189,35 @@ STAGE PLANS: Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:t1] #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary> + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -3502,7 +4237,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3525,6 +4260,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-4 Map Reduce @@ -3540,7 +4280,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3570,7 +4310,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3591,7 +4331,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3627,7 +4367,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3657,7 +4397,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3678,7 +4418,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3763,7 +4503,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -3895,7 +4636,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3916,6 +4657,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary> + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3926,7 +4694,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3949,6 +4717,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary> + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary> + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary> + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM @@ -4366,7 +5208,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -4405,7 +5248,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4428,7 +5271,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4542,7 +5385,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4563,6 +5406,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary> + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -4573,7 +5443,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4589,13 +5459,87 @@ STAGE PLANS: serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-3 - Stats Work - Basic Stats Work: + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-3 + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary> + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary> + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary> + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key @@ -4673,7 +5617,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnore
<TRUNCATED>
