http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out index c024062..9255be7 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out @@ -52,52 +52,28 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part5 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll') - keys: value (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) - Execution mode: llap + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: string) + Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial + Select Operator + expressions: VALUE._col0 (type: string), KEY._col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part5 Stage: Stage-2 Dependency Collection
http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out index 2c838b6..4ed30ee 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out @@ -60,7 +60,6 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 55 Data size: 19268 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -70,7 +69,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY._col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 19268 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/mm_dp.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/mm_dp.q.out b/ql/src/test/results/clientpositive/llap/mm_dp.q.out index b23a3d7..6ad05e9 100644 --- a/ql/src/test/results/clientpositive/llap/mm_dp.q.out +++ b/ql/src/test/results/clientpositive/llap/mm_dp.q.out @@ -149,7 +149,6 @@ STAGE PLANS: key expressions: _col3 (type: string), _bucket_number (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 2605 Data size: 268315 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -159,11 +158,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: string), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _bucket_number - Statistics: Num rows: 2605 Data size: 747635 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 2605 Data size: 747635 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2605 Data size: 268315 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/orc_analyze.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/orc_analyze.q.out b/ql/src/test/results/clientpositive/llap/orc_analyze.q.out index 25426bf..1389de0 100644 --- a/ql/src/test/results/clientpositive/llap/orc_analyze.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_analyze.q.out @@ -772,7 +772,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 22043 - totalSize 5339 + totalSize 5336 #### A masked pattern was here #### # Storage Information @@ -866,7 +866,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 22043 - totalSize 5339 + totalSize 5336 #### A masked pattern was here #### # Storage Information @@ -1009,7 +1009,7 @@ Partition Parameters: numFiles 4 numRows 50 rawDataSize 22043 - totalSize 5339 + totalSize 5336 #### A masked pattern was here #### # Storage Information http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/orc_merge1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/orc_merge1.q.out b/ql/src/test/results/clientpositive/llap/orc_merge1.q.out index d4f9517..5bb6432 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge1.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge1.q.out @@ -76,52 +76,28 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orcfile_merge1_n1 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) - outputColumnNames: key, value, ds, part - Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - keys: ds (type: string), part (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1_n1 Stage: Stage-2 Dependency Collection @@ -164,13 +140,8 @@ POSTHOOK: Lineage: orcfile_merge1_n1 PARTITION(ds=1,part=0).key EXPRESSION [(src POSTHOOK: Lineage: orcfile_merge1_n1 PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1_n1 PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1_n1 PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -Found 6 items --rw-r--r-- 3 ### USER ### ### GROUP ### 555 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-r--r-- 3 ### USER ### ### GROUP ### 562 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-r--r-- 3 ### USER ### ### GROUP ### 561 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-r--r-- 3 ### USER ### ### GROUP ### 496 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-r--r-- 3 ### USER ### ### GROUP ### 554 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-r--r-- 3 ### USER ### ### GROUP ### 478 ### HDFS DATE ### hdfs://### HDFS PATH ### +Found 1 items +-rw-rw-rw- 3 ### USER ### ### GROUP ### 1359 ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE orcfile_merge1b_n1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part @@ -212,52 +183,28 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orcfile_merge1b_n1 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) - outputColumnNames: key, value, ds, part - Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - keys: ds (type: string), part (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1b_n1 Stage: Stage-8 Conditional Operator @@ -346,7 +293,7 @@ POSTHOOK: Lineage: orcfile_merge1b_n1 PARTITION(ds=1,part=0).value SIMPLE [(src) POSTHOOK: Lineage: orcfile_merge1b_n1 PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1b_n1 PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] Found 1 items --rw-rw-rw- 3 ### USER ### ### GROUP ### 1360 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 1359 ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE orcfile_merge1c_n1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part @@ -388,52 +335,28 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orcfile_merge1c_n1 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) - outputColumnNames: key, value, ds, part - Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - keys: ds (type: string), part (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1c_n1 Stage: Stage-8 Conditional Operator @@ -514,7 +437,7 @@ POSTHOOK: Lineage: orcfile_merge1c_n1 PARTITION(ds=1,part=0).value SIMPLE [(src) POSTHOOK: Lineage: orcfile_merge1c_n1 PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1c_n1 PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] Found 1 items --rw-rw-rw- 3 ### USER ### ### GROUP ### 2461 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 1359 ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(*) USING 'tr \t _' AS (c) FROM orcfile_merge1_n1 WHERE ds='1' http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/orc_merge10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/orc_merge10.q.out b/ql/src/test/results/clientpositive/llap/orc_merge10.q.out index 95b352a..d943b40 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge10.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge10.q.out @@ -76,52 +76,28 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orcfile_merge1 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) - outputColumnNames: key, value, ds, part - Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - keys: ds (type: string), part (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1 Stage: Stage-2 Dependency Collection @@ -164,10 +140,8 @@ POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).key EXPRESSION [(src)sr POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -Found 3 items --rw-r--r-- 3 ### USER ### ### GROUP ### 947 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-r--r-- 3 ### USER ### ### GROUP ### 875 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-r--r-- 3 ### USER ### ### GROUP ### 856 ### HDFS DATE ### hdfs://### HDFS PATH ### +Found 1 items +-rw-rw-rw- 3 ### USER ### ### GROUP ### 1754 ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part @@ -209,52 +183,28 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orcfile_merge1b - Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) - outputColumnNames: key, value, ds, part - Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - keys: ds (type: string), part (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1b Stage: Stage-8 Conditional Operator @@ -385,52 +335,28 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orcfile_merge1c - Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) - outputColumnNames: key, value, ds, part - Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - keys: ds (type: string), part (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1c Stage: Stage-8 Conditional Operator @@ -511,7 +437,7 @@ POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=0).value SIMPLE [(src)src POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] Found 1 items --rw-rw-rw- 3 ### USER ### ### GROUP ### 2409 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 1754 ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(*) USING 'tr \t _' AS (c) FROM orcfile_merge1 WHERE ds='1' @@ -618,7 +544,7 @@ POSTHOOK: type: ALTER_PARTITION_MERGE POSTHOOK: Input: default@orcfile_merge1 POSTHOOK: Output: default@orcfile_merge1@ds=1/part=0 Found 1 items --rw-rw-rw- 3 ### USER ### ### GROUP ### 2409 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 1754 ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(*) USING 'tr \t _' AS (c) FROM orcfile_merge1c WHERE ds='1' @@ -698,80 +624,41 @@ Type: struct<key:int,value:string> Stripe Statistics: Stripe 1: - Column 0: count: 90 hasNull: false - Column 1: count: 90 hasNull: false bytesOnDisk: 185 min: 0 max: 495 sum: 22736 - Column 2: count: 90 hasNull: false bytesOnDisk: 428 min: val_0 max: val_86 sum: 612 - Stripe 2: - Column 0: count: 78 hasNull: false - Column 1: count: 78 hasNull: false bytesOnDisk: 161 min: 0 max: 497 sum: 18371 - Column 2: count: 78 hasNull: false bytesOnDisk: 380 min: val_0 max: val_95 sum: 529 - Stripe 3: - Column 0: count: 74 hasNull: false - Column 1: count: 74 hasNull: false bytesOnDisk: 153 min: 2 max: 493 sum: 19663 - Column 2: count: 74 hasNull: false bytesOnDisk: 363 min: val_105 max: val_97 sum: 505 + Column 0: count: 242 hasNull: false + Column 1: count: 242 hasNull: false bytesOnDisk: 489 min: 0 max: 497 sum: 60770 + Column 2: count: 242 hasNull: false bytesOnDisk: 910 min: val_0 max: val_97 sum: 1646 File Statistics: Column 0: count: 242 hasNull: false - Column 1: count: 242 hasNull: false bytesOnDisk: 499 min: 0 max: 497 sum: 60770 - Column 2: count: 242 hasNull: false bytesOnDisk: 1171 min: val_0 max: val_97 sum: 1646 + Column 1: count: 242 hasNull: false bytesOnDisk: 489 min: 0 max: 497 sum: 60770 + Column 2: count: 242 hasNull: false bytesOnDisk: 910 min: val_0 max: val_97 sum: 1646 Stripes: - Stripe: offset: 3 data: 613 rows: 90 tail: 61 index: 76 - Stream: column 0 section ROW_INDEX start: 3 length 11 - Stream: column 1 section ROW_INDEX start: 14 length 27 - Stream: column 2 section ROW_INDEX start: 41 length 38 - Stream: column 1 section DATA start: 79 length 185 - Stream: column 2 section DATA start: 264 length 377 - Stream: column 2 section LENGTH start: 641 length 51 + Stripe: offset: 3 data: 1399 rows: 242 tail: 73 index: 77 + Stream: column 0 section ROW_INDEX start: 3 length 12 + Stream: column 1 section ROW_INDEX start: 15 length 28 + Stream: column 2 section ROW_INDEX start: 43 length 37 + Stream: column 1 section DATA start: 80 length 489 + Stream: column 2 section DATA start: 569 length 247 + Stream: column 2 section LENGTH start: 816 length 71 + Stream: column 2 section DICTIONARY_DATA start: 887 length 592 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 - Encoding column 2: DIRECT_V2 + Encoding column 2: DICTIONARY_V2[153] Row group indices for column 0: - Entry 0: count: 90 hasNull: false positions: + Entry 0: count: 242 hasNull: false positions: Row group indices for column 1: - Entry 0: count: 90 hasNull: false min: 0 max: 495 sum: 22736 positions: 0,0,0 + Entry 0: count: 242 hasNull: false min: 0 max: 497 sum: 60770 positions: 0,0,0 Row group indices for column 2: - Entry 0: count: 90 hasNull: false min: val_0 max: val_86 sum: 612 positions: 0,0,0,0,0 - Stripe: offset: 753 data: 541 rows: 78 tail: 61 index: 76 - Stream: column 0 section ROW_INDEX start: 753 length 11 - Stream: column 1 section ROW_INDEX start: 764 length 27 - Stream: column 2 section ROW_INDEX start: 791 length 38 - Stream: column 1 section DATA start: 829 length 161 - Stream: column 2 section DATA start: 990 length 332 - Stream: column 2 section LENGTH start: 1322 length 48 - Encoding column 0: DIRECT - Encoding column 1: DIRECT_V2 - Encoding column 2: DIRECT_V2 - Row group indices for column 0: - Entry 0: count: 78 hasNull: false positions: - Row group indices for column 1: - Entry 0: count: 78 hasNull: false min: 0 max: 497 sum: 18371 positions: 0,0,0 - Row group indices for column 2: - Entry 0: count: 78 hasNull: false min: val_0 max: val_95 sum: 529 positions: 0,0,0,0,0 - Stripe: offset: 1431 data: 516 rows: 74 tail: 61 index: 78 - Stream: column 0 section ROW_INDEX start: 1431 length 11 - Stream: column 1 section ROW_INDEX start: 1442 length 27 - Stream: column 2 section ROW_INDEX start: 1469 length 40 - Stream: column 1 section DATA start: 1509 length 153 - Stream: column 2 section DATA start: 1662 length 331 - Stream: column 2 section LENGTH start: 1993 length 32 - Encoding column 0: DIRECT - Encoding column 1: DIRECT_V2 - Encoding column 2: DIRECT_V2 - Row group indices for column 0: - Entry 0: count: 74 hasNull: false positions: - Row group indices for column 1: - Entry 0: count: 74 hasNull: false min: 2 max: 493 sum: 19663 positions: 0,0,0 - Row group indices for column 2: - Entry 0: count: 74 hasNull: false min: val_105 max: val_97 sum: 505 positions: 0,0,0,0,0 + Entry 0: count: 242 hasNull: false min: val_0 max: val_97 sum: 1646 positions: 0,0,0 -File length: 2409 bytes +File length: 1754 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ -- END ORC FILE DUMP -- -86 val_86 1 0 +2 val_2 1 0 PREHOOK: query: select * from orcfile_merge1c where ds='1' and part='0' limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@orcfile_merge1c @@ -787,80 +674,41 @@ Type: struct<key:int,value:string> Stripe Statistics: Stripe 1: - Column 0: count: 90 hasNull: false - Column 1: count: 90 hasNull: false bytesOnDisk: 185 min: 0 max: 495 sum: 22736 - Column 2: count: 90 hasNull: false bytesOnDisk: 428 min: val_0 max: val_86 sum: 612 - Stripe 2: - Column 0: count: 78 hasNull: false - Column 1: count: 78 hasNull: false bytesOnDisk: 161 min: 0 max: 497 sum: 18371 - Column 2: count: 78 hasNull: false bytesOnDisk: 380 min: val_0 max: val_95 sum: 529 - Stripe 3: - Column 0: count: 74 hasNull: false - Column 1: count: 74 hasNull: false bytesOnDisk: 153 min: 2 max: 493 sum: 19663 - Column 2: count: 74 hasNull: false bytesOnDisk: 363 min: val_105 max: val_97 sum: 505 + Column 0: count: 242 hasNull: false + Column 1: count: 242 hasNull: false bytesOnDisk: 489 min: 0 max: 497 sum: 60770 + Column 2: count: 242 hasNull: false bytesOnDisk: 910 min: val_0 max: val_97 sum: 1646 File Statistics: Column 0: count: 242 hasNull: false - Column 1: count: 242 hasNull: false bytesOnDisk: 499 min: 0 max: 497 sum: 60770 - Column 2: count: 242 hasNull: false bytesOnDisk: 1171 min: val_0 max: val_97 sum: 1646 + Column 1: count: 242 hasNull: false bytesOnDisk: 489 min: 0 max: 497 sum: 60770 + Column 2: count: 242 hasNull: false bytesOnDisk: 910 min: val_0 max: val_97 sum: 1646 Stripes: - Stripe: offset: 3 data: 613 rows: 90 tail: 61 index: 76 - Stream: column 0 section ROW_INDEX start: 3 length 11 - Stream: column 1 section ROW_INDEX start: 14 length 27 - Stream: column 2 section ROW_INDEX start: 41 length 38 - Stream: column 1 section DATA start: 79 length 185 - Stream: column 2 section DATA start: 264 length 377 - Stream: column 2 section LENGTH start: 641 length 51 - Encoding column 0: DIRECT - Encoding column 1: DIRECT_V2 - Encoding column 2: DIRECT_V2 - Row group indices for column 0: - Entry 0: count: 90 hasNull: false positions: - Row group indices for column 1: - Entry 0: count: 90 hasNull: false min: 0 max: 495 sum: 22736 positions: 0,0,0 - Row group indices for column 2: - Entry 0: count: 90 hasNull: false min: val_0 max: val_86 sum: 612 positions: 0,0,0,0,0 - Stripe: offset: 753 data: 541 rows: 78 tail: 61 index: 76 - Stream: column 0 section ROW_INDEX start: 753 length 11 - Stream: column 1 section ROW_INDEX start: 764 length 27 - Stream: column 2 section ROW_INDEX start: 791 length 38 - Stream: column 1 section DATA start: 829 length 161 - Stream: column 2 section DATA start: 990 length 332 - Stream: column 2 section LENGTH start: 1322 length 48 - Encoding column 0: DIRECT - Encoding column 1: DIRECT_V2 - Encoding column 2: DIRECT_V2 - Row group indices for column 0: - Entry 0: count: 78 hasNull: false positions: - Row group indices for column 1: - Entry 0: count: 78 hasNull: false min: 0 max: 497 sum: 18371 positions: 0,0,0 - Row group indices for column 2: - Entry 0: count: 78 hasNull: false min: val_0 max: val_95 sum: 529 positions: 0,0,0,0,0 - Stripe: offset: 1431 data: 516 rows: 74 tail: 61 index: 78 - Stream: column 0 section ROW_INDEX start: 1431 length 11 - Stream: column 1 section ROW_INDEX start: 1442 length 27 - Stream: column 2 section ROW_INDEX start: 1469 length 40 - Stream: column 1 section DATA start: 1509 length 153 - Stream: column 2 section DATA start: 1662 length 331 - Stream: column 2 section LENGTH start: 1993 length 32 + Stripe: offset: 3 data: 1399 rows: 242 tail: 73 index: 77 + Stream: column 0 section ROW_INDEX start: 3 length 12 + Stream: column 1 section ROW_INDEX start: 15 length 28 + Stream: column 2 section ROW_INDEX start: 43 length 37 + Stream: column 1 section DATA start: 80 length 489 + Stream: column 2 section DATA start: 569 length 247 + Stream: column 2 section LENGTH start: 816 length 71 + Stream: column 2 section DICTIONARY_DATA start: 887 length 592 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 - Encoding column 2: DIRECT_V2 + Encoding column 2: DICTIONARY_V2[153] Row group indices for column 0: - Entry 0: count: 74 hasNull: false positions: + Entry 0: count: 242 hasNull: false positions: Row group indices for column 1: - Entry 0: count: 74 hasNull: false min: 2 max: 493 sum: 19663 positions: 0,0,0 + Entry 0: count: 242 hasNull: false min: 0 max: 497 sum: 60770 positions: 0,0,0 Row group indices for column 2: - Entry 0: count: 74 hasNull: false min: val_105 max: val_97 sum: 505 positions: 0,0,0,0,0 + Entry 0: count: 242 hasNull: false min: val_0 max: val_97 sum: 1646 positions: 0,0,0 -File length: 2409 bytes +File length: 1754 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ -- END ORC FILE DUMP -- -86 val_86 1 0 +2 val_2 1 0 PREHOOK: query: DROP TABLE orcfile_merge1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@orcfile_merge1 http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/orc_merge2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/orc_merge2.q.out b/ql/src/test/results/clientpositive/llap/orc_merge2.q.out index 430ce16..7265292 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge2.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge2.q.out @@ -50,52 +50,28 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 10) (type: int), (hash(value) pmod 10) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orcfile_merge2a_n0 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string), CAST( _col3 AS STRING) (type: string) - outputColumnNames: key, value, one, two, three - Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - keys: one (type: string), two (type: string), three (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col4 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + Reduce Output Operator + key expressions: _col2 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col3 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col4 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge2a_n0 Stage: Stage-2 Dependency Collection @@ -196,7 +172,7 @@ POSTHOOK: Lineage: orcfile_merge2a_n0 PARTITION(one=1,two=9,three=1).value SIMPL POSTHOOK: Lineage: orcfile_merge2a_n0 PARTITION(one=1,two=9,three=7).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge2a_n0 PARTITION(one=1,two=9,three=7).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] Found 1 items --rw-r--r-- 3 ### USER ### ### GROUP ### 349 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 349 ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(*) USING 'tr \t _' AS (c) FROM orcfile_merge2a_n0 http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/orc_merge7.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/orc_merge7.q.out b/ql/src/test/results/clientpositive/llap/orc_merge7.q.out index 1f67f7b..634f139 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge7.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge7.q.out @@ -52,52 +52,28 @@ STAGE PLANS: expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(38,0)), ts (type: timestamp), subtype (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orc_merge5a_n0 - Select Operator - expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(38,0)), _col4 (type: timestamp), _col5 (type: double) - outputColumnNames: userid, string1, subtype, decimal1, ts, st - Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') - keys: st (type: double) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>), _col4 (type: struct<columntype:string,min:decimal(38,0),max:decimal(38,0),countnulls:bigint,bitvector:binary>), _col5 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + Reduce Output Operator + key expressions: _col5 (type: double) + sort order: + + Map-reduce partition columns: _col5 (type: double) + value expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(38,0)), _col4 (type: timestamp) Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) - keys: KEY._col0 (type: double) - mode: mergepartial + Select Operator + expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: string), VALUE._col2 (type: double), VALUE._col3 (type: decimal(38,0)), VALUE._col4 (type: timestamp), KEY._col5 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col4 (type: struct<columntype:string,min:decimal(38,0),max:decimal(38,0),countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col5 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a_n0 Stage: Stage-2 Dependency Collection @@ -272,52 +248,28 @@ STAGE PLANS: expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(38,0)), ts (type: timestamp), subtype (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orc_merge5a_n0 - Select Operator - expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(38,0)), _col4 (type: timestamp), _col5 (type: double) - outputColumnNames: userid, string1, subtype, decimal1, ts, st - Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') - keys: st (type: double) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>), _col4 (type: struct<columntype:string,min:decimal(38,0),max:decimal(38,0),countnulls:bigint,bitvector:binary>), _col5 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + Reduce Output Operator + key expressions: _col5 (type: double) + sort order: + + Map-reduce partition columns: _col5 (type: double) + value expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(38,0)), _col4 (type: timestamp) Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) - keys: KEY._col0 (type: double) - mode: mergepartial + Select Operator + expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: string), VALUE._col2 (type: double), VALUE._col3 (type: decimal(38,0)), VALUE._col4 (type: timestamp), KEY._col5 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col4 (type: struct<columntype:string,min:decimal(38,0),max:decimal(38,0),countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col5 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a_n0 Stage: Stage-8 Conditional Operator
