HIVE-19995 : Aggregate row traffic for acid tables (Zoltan Haindrich via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/08eba3e1 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/08eba3e1 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/08eba3e1 Branch: refs/heads/master-txnstats Commit: 08eba3e1e81e6afb3d866da5ecef69b1a7f91c6c Parents: 1c33fea Author: Zoltan Haindrich <[email protected]> Authored: Sun Jul 1 09:58:59 2018 -0700 Committer: Ashutosh Chauhan <[email protected]> Committed: Sun Jul 1 09:58:59 2018 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../hadoop/hive/ql/stats/BasicStatsTask.java | 10 +- .../queries/clientpositive/sqlmerge_stats.q | 38 ++ .../results/clientpositive/acid_nullscan.q.out | 8 +- .../clientpositive/acid_table_stats.q.out | 16 +- .../clientpositive/autoColumnStats_4.q.out | 4 + .../clientpositive/druid/druidmini_mv.q.out | 50 +- .../llap/acid_bucket_pruning.q.out | 6 +- .../llap/acid_vectorization_original.q.out | 14 +- .../llap/dynpart_sort_optimization_acid.q.out | 144 +++--- .../llap/enforce_constraint_notnull.q.out | 24 +- .../llap/insert_into_default_keyword.q.out | 100 ++-- .../insert_values_orig_table_use_metadata.q.out | 24 +- .../materialized_view_create_rewrite_3.q.out | 40 +- .../materialized_view_create_rewrite_4.q.out | 32 +- .../materialized_view_create_rewrite_5.q.out | 50 +- ...ized_view_create_rewrite_rebuild_dummy.q.out | 40 +- .../llap/results_cache_invalidation.q.out | 74 +-- .../llap/results_cache_transactional.q.out | 38 +- .../clientpositive/llap/sqlmerge_stats.q.out | 511 +++++++++++++++++++ .../test/results/clientpositive/row__id.q.out | 18 +- .../tez/acid_vectorization_original_tez.q.out | 14 +- .../clientpositive/tez/explainanalyze_5.q.out | 8 +- 23 files changed, 918 insertions(+), 346 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 3dd6580..35fad2c 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -683,6 +683,7 @@ minillaplocal.query.files=\ smb_cache.q,\ special_character_in_tabnames_1.q,\ sqlmerge.q,\ + sqlmerge_stats.q,\ stats_based_fetch_decision.q,\ stats_only_external.q,\ strict_managed_tables_sysdb.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java index 8c23887..f31c170 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java @@ -169,7 +169,7 @@ public class BasicStatsTask implements Serializable, IStatsProcessor { // though we are marking stats as not being accurate. if (StatsSetupConst.areBasicStatsUptoDate(parameters) || p.isTransactionalTable()) { String prefix = getAggregationPrefix(p.getTable(), p.getPartition()); - updateStats(statsAggregator, parameters, prefix, p.isAcid()); + updateStats(statsAggregator, parameters, prefix); } } @@ -206,14 +206,8 @@ public class BasicStatsTask implements Serializable, IStatsProcessor { } private void updateStats(StatsAggregator statsAggregator, Map<String, String> parameters, - String aggKey, boolean isFullAcid) throws HiveException { + String aggKey) throws HiveException { for (String statType : StatsSetupConst.statsRequireCompute) { - if (isFullAcid && !work.isTargetRewritten()) { - // Don't bother with aggregation in this case, it will probably be invalid. - parameters.remove(statType); - continue; - } - String value = statsAggregator.aggregateStats(aggKey, statType); if (value != null && !value.isEmpty()) { long longValue = Long.parseLong(value); http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/test/queries/clientpositive/sqlmerge_stats.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/sqlmerge_stats.q b/ql/src/test/queries/clientpositive/sqlmerge_stats.q new file mode 100644 index 0000000..c480eb6 --- /dev/null +++ b/ql/src/test/queries/clientpositive/sqlmerge_stats.q @@ -0,0 +1,38 @@ +set hive.mapred.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.explain.user=false; +set hive.merge.cardinality.check=true; + +create table t(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); +create table upd_t(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false'); + +desc formatted t; + +insert into t values (1,1); +insert into upd_t values (1,1),(2,2); + +desc formatted t; + +explain merge into t as t using upd_t as u ON t.a = u.a +WHEN MATCHED THEN UPDATE SET b = 99 +WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b); + +merge into t as t using upd_t as u ON t.a = u.a +WHEN MATCHED THEN UPDATE SET b = 99 +WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b); + +-- merge could keep track of inserts +select assert_true(count(1) = 2) from t group by a>-1; +-- rownum is 2 +desc formatted t; + +merge into t as t using upd_t as u ON t.a = u.a +WHEN MATCHED THEN DELETE +WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b); + + +select assert_true(count(1) = 0) from t group by a>-1; +-- rownum is 0; because the orc writer can keep track of delta +desc formatted t; + http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/test/results/clientpositive/acid_nullscan.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/acid_nullscan.q.out b/ql/src/test/results/clientpositive/acid_nullscan.q.out index c9684dd..19fcc8c 100644 --- a/ql/src/test/results/clientpositive/acid_nullscan.q.out +++ b/ql/src/test/results/clientpositive/acid_nullscan.q.out @@ -42,12 +42,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_vectorized_n1 - Statistics: Num rows: 90 Data size: 25960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 25960 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: false (type: boolean) - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2360 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(a) mode: hash @@ -79,6 +79,8 @@ STAGE PLANS: #### A masked pattern was here #### name default.acid_vectorized_n1 numFiles 3 + numRows 11 + rawDataSize 0 serialization.ddl struct acid_vectorized_n1 { i32 a, string b} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe @@ -101,6 +103,8 @@ STAGE PLANS: #### A masked pattern was here #### name default.acid_vectorized_n1 numFiles 3 + numRows 11 + rawDataSize 0 serialization.ddl struct acid_vectorized_n1 { i32 a, string b} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/test/results/clientpositive/acid_table_stats.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/acid_table_stats.q.out b/ql/src/test/results/clientpositive/acid_table_stats.q.out index 084d232..841a5a4 100644 --- a/ql/src/test/results/clientpositive/acid_table_stats.q.out +++ b/ql/src/test/results/clientpositive/acid_table_stats.q.out @@ -94,6 +94,8 @@ Table: acid #### A masked pattern was here #### Partition Parameters: numFiles 2 + numRows 2000 + rawDataSize 0 totalSize 4063 #### A masked pattern was here #### @@ -133,17 +135,17 @@ STAGE PLANS: TableScan alias: acid filterExpr: (ds = '2008-04-08') (type: boolean) - Statistics: Num rows: 83 Data size: 40630 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2000 Data size: 40630 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 83 Data size: 40630 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2000 Data size: 40630 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -151,10 +153,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -387,6 +389,8 @@ Table: acid #### A masked pattern was here #### Partition Parameters: numFiles 4 + numRows 3000 + rawDataSize 208000 totalSize 8118 #### A masked pattern was here #### http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/test/results/clientpositive/autoColumnStats_4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/autoColumnStats_4.q.out b/ql/src/test/results/clientpositive/autoColumnStats_4.q.out index a16ec07..42c7b43 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_4.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_4.q.out @@ -201,6 +201,8 @@ Table Type: MANAGED_TABLE Table Parameters: bucketing_version 2 numFiles 2 + numRows 10 + rawDataSize 0 totalSize 1899 transactional true transactional_properties default @@ -244,6 +246,8 @@ Table Parameters: COLUMN_STATS_ACCURATE {} bucketing_version 2 numFiles 4 + numRows 8 + rawDataSize 0 totalSize 3275 transactional true transactional_properties default http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out b/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out index e5e1ea9..2e44e14 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out @@ -341,34 +341,34 @@ STAGE PLANS: TableScan alias: cmv_basetable_n2 filterExpr: (a = 3) (type: boolean) - Statistics: Num rows: 31 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (a = 3) (type: boolean) - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: double) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Map 3 Map Operator Tree: TableScan alias: cmv_basetable_n2 filterExpr: ((d = 3) and (a = 3)) (type: boolean) - Statistics: Num rows: 31 Data size: 496 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((a = 3) and (d = 3)) (type: boolean) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Reducer 2 Reduce Operator Tree: @@ -379,14 +379,14 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 49 Data size: 1421 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 3 (type: int), _col0 (type: double), 3 (type: int), _col1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 49 Data size: 1421 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 49 Data size: 1421 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -460,34 +460,34 @@ STAGE PLANS: TableScan alias: cmv_basetable_n2 filterExpr: (a = 3) (type: boolean) - Statistics: Num rows: 31 Data size: 22692 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 5124 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (a = 3) (type: boolean) - Statistics: Num rows: 5 Data size: 3660 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 5124 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: CAST( t AS timestamp with local time zone) (type: timestamp with local time zone), 3 (type: int), b (type: varchar(256)), c (type: double), userid (type: varchar(256)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 3660 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 5124 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: timestamp with local time zone), _col1 (type: int), _col2 (type: varchar(256)), _col3 (type: double), _col4 (type: varchar(256)), floor_hour(CAST( GenericUDFEpochMilli(_col0) AS TIMESTAMP)) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, __time_granularity - Statistics: Num rows: 5 Data size: 3660 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 5124 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: __time_granularity (type: timestamp) sort order: + Map-reduce partition columns: __time_granularity (type: timestamp) - Statistics: Num rows: 5 Data size: 3660 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 5124 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: timestamp with local time zone), _col1 (type: int), _col2 (type: varchar(256)), _col3 (type: double), _col4 (type: varchar(256)) Reducer 2 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: timestamp with local time zone), VALUE._col1 (type: int), VALUE._col2 (type: varchar(256)), VALUE._col3 (type: double), VALUE._col4 (type: varchar(256)), KEY.__time_granularity (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, __time_granularity - Statistics: Num rows: 5 Data size: 3660 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 5124 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 5 Data size: 3660 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 5124 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat @@ -552,17 +552,17 @@ STAGE PLANS: TableScan alias: cmv_basetable_n2 filterExpr: ((a = 3) and (d = 3)) (type: boolean) - Statistics: Num rows: 31 Data size: 496 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((a = 3) and (d = 3)) (type: boolean) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: double) outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) Map 3 Map Operator Tree: @@ -587,14 +587,14 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 3 Data size: 87 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21 Data size: 609 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col0 (type: int), _col6 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 87 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21 Data size: 609 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 87 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21 Data size: 609 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out index b856b99..29a05ae 100644 --- a/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out @@ -45,7 +45,7 @@ STAGE PLANS: alias: acidtbldefault filterExpr: (a = 1) (type: boolean) buckets included: [13,] of 16 - Statistics: Num rows: 1850 Data size: 7036 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9174 Data size: 34868 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false @@ -99,6 +99,8 @@ STAGE PLANS: location hdfs://### HDFS PATH ### name default.acidtbldefault numFiles 17 + numRows 9174 + rawDataSize 0 serialization.ddl struct acidtbldefault { i32 a} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -122,6 +124,8 @@ STAGE PLANS: location hdfs://### HDFS PATH ### name default.acidtbldefault numFiles 17 + numRows 9174 + rawDataSize 0 serialization.ddl struct acidtbldefault { i32 a} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out index 028c3ca..57ff575 100644 --- a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out +++ b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out @@ -665,22 +665,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over10k_orc_bucketed - Statistics: Num rows: 1234 Data size: 706090 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2098 Data size: 622340 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) outputColumnNames: ROW__ID - Statistics: Num rows: 1234 Data size: 706090 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2098 Data size: 622340 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 617 Data size: 51828 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1049 Data size: 88116 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) sort order: + Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) - Statistics: Num rows: 617 Data size: 51828 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1049 Data size: 88116 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: may be used (ACID table) @@ -692,13 +692,13 @@ STAGE PLANS: keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 617 Data size: 51828 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1049 Data size: 88116 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 205 Data size: 17220 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 349 Data size: 29316 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 205 Data size: 17220 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 349 Data size: 29316 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out index 33bceed..7905441 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out @@ -95,19 +95,19 @@ STAGE PLANS: TableScan alias: acid_part filterExpr: ((key = 'foo') and (ds = '2008-04-08')) (type: boolean) - Statistics: Num rows: 160 Data size: 61001 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1601 Data size: 150414 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 1906 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 1906 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 5 Data size: 1906 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -116,10 +116,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 1906 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 1906 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -191,19 +191,19 @@ STAGE PLANS: TableScan alias: acid_part filterExpr: ((key = 'foo') and (ds) IN ('2008-04-08')) (type: boolean) - Statistics: Num rows: 159 Data size: 104317 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 1601 Data size: 444998 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 3280 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1355 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), ds (type: string) outputColumnNames: _col0, _col3 - Statistics: Num rows: 5 Data size: 3280 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 5 Data size: 3280 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col3 (type: string) Execution mode: llap LLAP IO: may be used (ACID table) @@ -213,10 +213,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 3280 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 3280 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -383,19 +383,19 @@ STAGE PLANS: TableScan alias: acid_part_sdpo filterExpr: ((key = 'foo') and (ds = '2008-04-08')) (type: boolean) - Statistics: Num rows: 176 Data size: 67063 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1601 Data size: 150414 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 1905 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 1905 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 5 Data size: 1905 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -404,10 +404,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 1905 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 1905 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -479,19 +479,19 @@ STAGE PLANS: TableScan alias: acid_part_sdpo filterExpr: ((key = 'foo') and (ds) IN ('2008-04-08')) (type: boolean) - Statistics: Num rows: 171 Data size: 112152 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 1601 Data size: 444998 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 3279 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1355 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), ds (type: string) outputColumnNames: _col0, _col3 - Statistics: Num rows: 5 Data size: 3279 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col3 (type: string), '_bucket_number' (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) sort order: +++ Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 5 Data size: 3279 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -500,11 +500,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), KEY._col3 (type: string), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, '_bucket_number' - Statistics: Num rows: 5 Data size: 3279 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1790 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 5 Data size: 3279 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1790 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -680,19 +680,19 @@ STAGE PLANS: TableScan alias: acid_2l_part filterExpr: ((key = 'foo') and (ds = '2008-04-08') and (hr = 11)) (type: boolean) - Statistics: Num rows: 157 Data size: 60527 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1601 Data size: 150414 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -701,10 +701,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), 11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -777,19 +777,19 @@ STAGE PLANS: TableScan alias: acid_2l_part filterExpr: ((key = 'foo') and (ds = '2008-04-08') and (hr >= 11)) (type: boolean) - Statistics: Num rows: 1804 Data size: 235871 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 3201 Data size: 313458 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 455 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), hr (type: int) outputColumnNames: _col0, _col4 - Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col4 (type: int) Execution mode: llap LLAP IO: may be used (ACID table) @@ -799,10 +799,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), VALUE._col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -904,19 +904,19 @@ STAGE PLANS: TableScan alias: acid_2l_part filterExpr: (value = 'bar') (type: boolean) - Statistics: Num rows: 2015 Data size: 726272 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 4200 Data size: 1253037 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (value = 'bar') (type: boolean) - Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1375 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), ds (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), _col2 (type: int) Execution mode: llap LLAP IO: may be used (ACID table) @@ -926,10 +926,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: string), VALUE._col1 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1103,19 +1103,19 @@ STAGE PLANS: TableScan alias: acid_2l_part_sdpo filterExpr: ((key = 'foo') and (ds = '2008-04-08') and (hr = 11)) (type: boolean) - Statistics: Num rows: 157 Data size: 60527 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1601 Data size: 150414 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -1124,10 +1124,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), 11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 1927 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1200,19 +1200,19 @@ STAGE PLANS: TableScan alias: acid_2l_part_sdpo filterExpr: ((key = 'foo') and (ds = '2008-04-08') and (hr >= 11)) (type: boolean) - Statistics: Num rows: 1804 Data size: 235871 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 3201 Data size: 313458 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 455 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), hr (type: int) outputColumnNames: _col0, _col4 - Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: '2008-04-08' (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) sort order: ++++ Map-reduce partition columns: '2008-04-08' (type: string), _col4 (type: int) - Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -1221,11 +1221,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1360 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 5 Data size: 653 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1360 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1327,19 +1327,19 @@ STAGE PLANS: TableScan alias: acid_2l_part_sdpo filterExpr: (value = 'bar') (type: boolean) - Statistics: Num rows: 2015 Data size: 726272 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 4952 Data size: 1456618 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (value = 'bar') (type: boolean) - Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1375 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), ds (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col1 (type: string), _col2 (type: int), '_bucket_number' (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) sort order: ++++ Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -1348,11 +1348,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), KEY._col1 (type: string), KEY._col2 (type: int), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, '_bucket_number' - Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1810 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 5 Data size: 1802 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1810 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1526,19 +1526,19 @@ STAGE PLANS: TableScan alias: acid_2l_part_sdpo_no_cp filterExpr: ((key = 'foo') and (ds = '2008-04-08') and (hr = 11)) (type: boolean) - Statistics: Num rows: 97 Data size: 82922 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 1601 Data size: 599036 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 4274 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), key (type: string), ds (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 5 Data size: 4274 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) sort order: ++++ Map-reduce partition columns: _col3 (type: string), _col4 (type: int) - Statistics: Num rows: 5 Data size: 4274 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), 'bar' (type: string) Execution mode: llap LLAP IO: may be used (ACID table) @@ -1548,11 +1548,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 5 Data size: 4274 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 3165 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 5 Data size: 4274 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 3165 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1625,19 +1625,19 @@ STAGE PLANS: TableScan alias: acid_2l_part_sdpo_no_cp filterExpr: ((key = 'foo') and (ds = '2008-04-08') and (hr >= 11)) (type: boolean) - Statistics: Num rows: 1725 Data size: 705510 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 3201 Data size: 1197516 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 5 Data size: 2044 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), key (type: string), ds (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 5 Data size: 2044 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) sort order: ++++ Map-reduce partition columns: _col3 (type: string), _col4 (type: int) - Statistics: Num rows: 5 Data size: 2044 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), 'bar' (type: string) Execution mode: llap LLAP IO: may be used (ACID table) @@ -1647,11 +1647,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 5 Data size: 2044 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 3165 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 5 Data size: 2044 Basic stats: PARTIAL Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 3165 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/08eba3e1/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out index 8a5a326..5a3f519 100644 --- a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out +++ b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out @@ -3237,19 +3237,19 @@ STAGE PLANS: TableScan alias: acid_uami_n1 filterExpr: (((de = 109.23) or (de = 119.23)) and enforce_constraint(vc is not null)) (type: boolean) - Statistics: Num rows: 281 Data size: 87904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1002 Data size: 312584 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((de = 109.23) or (de = 119.23)) and enforce_constraint(vc is not null)) (type: boolean) - Statistics: Num rows: 5 Data size: 1564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1559 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), i (type: int), vc (type: varchar(128)) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 5 Data size: 1564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1559 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 5 Data size: 1564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1559 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col3 (type: varchar(128)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -3259,10 +3259,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), 3.14 (type: decimal(5,2)), VALUE._col1 (type: varchar(128)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 1564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1559 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 1564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1559 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -3331,19 +3331,19 @@ STAGE PLANS: TableScan alias: acid_uami_n1 filterExpr: ((de = 3.14) and enforce_constraint((i is not null and vc is not null))) (type: boolean) - Statistics: Num rows: 320 Data size: 100040 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1002 Data size: 312584 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((de = 3.14) and enforce_constraint((i is not null and vc is not null))) (type: boolean) - Statistics: Num rows: 2 Data size: 625 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 623 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), i (type: int), vc (type: varchar(128)) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 2 Data size: 625 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 623 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 2 Data size: 625 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 623 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col3 (type: varchar(128)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -3353,10 +3353,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), 3.14 (type: decimal(5,2)), VALUE._col1 (type: varchar(128)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 625 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 623 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 625 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 623 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
