Repository: hive Updated Branches: refs/heads/branch-2.0 6fed7783d -> 7ca1c4d58
HIVE-12684: NPE in stats annotation when all values in decimal column are NULLs (Prasanth Jayachandran reviewed by Pengcheng Xiong) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7ca1c4d5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7ca1c4d5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7ca1c4d5 Branch: refs/heads/branch-2.0 Commit: 7ca1c4d5849da3d83a84ab51c189f5912948f338 Parents: 6fed778 Author: Prasanth Jayachandran <[email protected]> Authored: Thu Dec 17 13:38:57 2015 -0600 Committer: Prasanth Jayachandran <[email protected]> Committed: Thu Dec 17 13:41:15 2015 -0600 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/stats/StatsUtils.java | 20 ++-- .../test/queries/clientpositive/decimal_stats.q | 16 +++ .../results/clientpositive/decimal_stats.q.out | 106 +++++++++++++++++++ 3 files changed, 135 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/7ca1c4d5/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 149cbc1..2f78fe8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -709,13 +709,19 @@ public class StatsUtils { cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal()); cs.setCountDistint(csd.getDecimalStats().getNumDVs()); cs.setNumNulls(csd.getDecimalStats().getNumNulls()); - Decimal val = csd.getDecimalStats().getHighValue(); - BigDecimal maxVal = HiveDecimal. - create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue(); - val = csd.getDecimalStats().getLowValue(); - BigDecimal minVal = HiveDecimal. - create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue(); - cs.setRange(minVal, maxVal); + Decimal highValue = csd.getDecimalStats().getHighValue(); + Decimal lowValue = csd.getDecimalStats().getLowValue(); + if (highValue != null && highValue.getUnscaled() != null + && lowValue != null && lowValue.getUnscaled() != null) { + HiveDecimal maxHiveDec = HiveDecimal.create(new BigInteger(highValue.getUnscaled()), highValue.getScale()); + BigDecimal maxVal = maxHiveDec == null ? null : maxHiveDec.bigDecimalValue(); + HiveDecimal minHiveDec = HiveDecimal.create(new BigInteger(lowValue.getUnscaled()), lowValue.getScale()); + BigDecimal minVal = minHiveDec == null ? null : minHiveDec.bigDecimalValue(); + + if (minVal != null && maxVal != null) { + cs.setRange(minVal, maxVal); + } + } } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) { cs.setAvgColLen(JavaDataModel.get().lengthOfDate()); } else { http://git-wip-us.apache.org/repos/asf/hive/blob/7ca1c4d5/ql/src/test/queries/clientpositive/decimal_stats.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/decimal_stats.q b/ql/src/test/queries/clientpositive/decimal_stats.q new file mode 100644 index 0000000..2370e7d --- /dev/null +++ b/ql/src/test/queries/clientpositive/decimal_stats.q @@ -0,0 +1,16 @@ +set hive.stats.fetch.column.stats=true; +drop table if exists decimal_1; + +create table decimal_1 (t decimal(4,2), u decimal(5), v decimal); + +desc decimal_1; + +insert overwrite table decimal_1 + select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src; + +analyze table decimal_1 compute statistics for columns; + +desc formatted decimal_1 v; + +explain select * from decimal_1 order by 1 limit 100; +drop table decimal_1; http://git-wip-us.apache.org/repos/asf/hive/blob/7ca1c4d5/ql/src/test/results/clientpositive/decimal_stats.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/decimal_stats.q.out b/ql/src/test/results/clientpositive/decimal_stats.q.out new file mode 100644 index 0000000..dabf7f8 --- /dev/null +++ b/ql/src/test/results/clientpositive/decimal_stats.q.out @@ -0,0 +1,106 @@ +PREHOOK: query: drop table if exists decimal_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists decimal_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table decimal_1 (t decimal(4,2), u decimal(5), v decimal) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@decimal_1 +POSTHOOK: query: create table decimal_1 (t decimal(4,2), u decimal(5), v decimal) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@decimal_1 +PREHOOK: query: desc decimal_1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@decimal_1 +POSTHOOK: query: desc decimal_1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@decimal_1 +t decimal(4,2) +u decimal(5,0) +v decimal(10,0) +PREHOOK: query: insert overwrite table decimal_1 + select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@decimal_1 +POSTHOOK: query: insert overwrite table decimal_1 + select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@decimal_1 +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +POSTHOOK: Lineage: decimal_1.u EXPRESSION [] +POSTHOOK: Lineage: decimal_1.v EXPRESSION [] +PREHOOK: query: analyze table decimal_1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table decimal_1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +PREHOOK: query: desc formatted decimal_1 v +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@decimal_1 +POSTHOOK: query: desc formatted decimal_1 v +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@decimal_1 +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +v decimal(10,0) 500 1 from deserializer +PREHOOK: query: explain select * from decimal_1 order by 1 limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from decimal_1 order by 1 limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_1 + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: t (type: decimal(4,2)), u (type: decimal(5,0)), v (type: decimal(10,0)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 1 (type: int) + sort order: + + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: decimal(4,2)), _col1 (type: decimal(5,0)), _col2 (type: decimal(10,0)) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: decimal(4,2)), VALUE._col1 (type: decimal(5,0)), VALUE._col2 (type: decimal(10,0)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 22400 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 22400 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: drop table decimal_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_1 +PREHOOK: Output: default@decimal_1 +POSTHOOK: query: drop table decimal_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_1 +POSTHOOK: Output: default@decimal_1
