Repository: hive
Updated Branches:
  refs/heads/master 0f1c112fc -> cac5804de


HIVE-12684: NPE in stats annotation when all values in decimal column are NULLs 
(Prasanth Jayachandran reviewed by Pengcheng Xiong)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cac5804d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cac5804d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cac5804d

Branch: refs/heads/master
Commit: cac5804de034ad54821e0524091cff0f4a97476b
Parents: 0f1c112
Author: Prasanth Jayachandran <[email protected]>
Authored: Thu Dec 17 13:38:57 2015 -0600
Committer: Prasanth Jayachandran <[email protected]>
Committed: Thu Dec 17 13:38:57 2015 -0600

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/stats/StatsUtils.java |  20 ++--
 .../test/queries/clientpositive/decimal_stats.q |  16 +++
 .../results/clientpositive/decimal_stats.q.out  | 106 +++++++++++++++++++
 3 files changed, 135 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/cac5804d/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 149cbc1..2f78fe8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -709,13 +709,19 @@ public class StatsUtils {
       cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal());
       cs.setCountDistint(csd.getDecimalStats().getNumDVs());
       cs.setNumNulls(csd.getDecimalStats().getNumNulls());
-      Decimal val = csd.getDecimalStats().getHighValue();
-      BigDecimal maxVal = HiveDecimal.
-          create(new BigInteger(val.getUnscaled()), 
val.getScale()).bigDecimalValue();
-      val = csd.getDecimalStats().getLowValue();
-      BigDecimal minVal = HiveDecimal.
-          create(new BigInteger(val.getUnscaled()), 
val.getScale()).bigDecimalValue();
-      cs.setRange(minVal, maxVal);
+      Decimal highValue = csd.getDecimalStats().getHighValue();
+      Decimal lowValue = csd.getDecimalStats().getLowValue();
+      if (highValue != null && highValue.getUnscaled() != null
+          && lowValue != null && lowValue.getUnscaled() != null) {
+        HiveDecimal maxHiveDec = HiveDecimal.create(new 
BigInteger(highValue.getUnscaled()), highValue.getScale());
+        BigDecimal maxVal = maxHiveDec == null ? null : 
maxHiveDec.bigDecimalValue();
+        HiveDecimal minHiveDec = HiveDecimal.create(new 
BigInteger(lowValue.getUnscaled()), lowValue.getScale());
+        BigDecimal minVal = minHiveDec == null ? null : 
minHiveDec.bigDecimalValue();
+
+        if (minVal != null && maxVal != null) {
+          cs.setRange(minVal, maxVal);
+        }
+      }
     } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
       cs.setAvgColLen(JavaDataModel.get().lengthOfDate());
     } else {

http://git-wip-us.apache.org/repos/asf/hive/blob/cac5804d/ql/src/test/queries/clientpositive/decimal_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/decimal_stats.q 
b/ql/src/test/queries/clientpositive/decimal_stats.q
new file mode 100644
index 0000000..2370e7d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/decimal_stats.q
@@ -0,0 +1,16 @@
+set hive.stats.fetch.column.stats=true;
+drop table if exists decimal_1;
+
+create table decimal_1 (t decimal(4,2), u decimal(5), v decimal);
+
+desc decimal_1;
+
+insert overwrite table decimal_1
+  select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src;
+
+analyze table decimal_1 compute statistics for columns;
+
+desc formatted decimal_1 v;
+
+explain select * from decimal_1 order by 1 limit 100;
+drop table decimal_1;

http://git-wip-us.apache.org/repos/asf/hive/blob/cac5804d/ql/src/test/results/clientpositive/decimal_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/decimal_stats.q.out 
b/ql/src/test/results/clientpositive/decimal_stats.q.out
new file mode 100644
index 0000000..dabf7f8
--- /dev/null
+++ b/ql/src/test/results/clientpositive/decimal_stats.q.out
@@ -0,0 +1,106 @@
+PREHOOK: query: drop table if exists decimal_1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists decimal_1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table decimal_1 (t decimal(4,2), u decimal(5), v 
decimal)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@decimal_1
+POSTHOOK: query: create table decimal_1 (t decimal(4,2), u decimal(5), v 
decimal)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@decimal_1
+PREHOOK: query: desc decimal_1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@decimal_1
+POSTHOOK: query: desc decimal_1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@decimal_1
+t                      decimal(4,2)                                
+u                      decimal(5,0)                                
+v                      decimal(10,0)                               
+PREHOOK: query: insert overwrite table decimal_1
+  select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@decimal_1
+POSTHOOK: query: insert overwrite table decimal_1
+  select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@decimal_1
+POSTHOOK: Lineage: decimal_1.t EXPRESSION []
+POSTHOOK: Lineage: decimal_1.u EXPRESSION []
+POSTHOOK: Lineage: decimal_1.v EXPRESSION []
+PREHOOK: query: analyze table decimal_1 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@decimal_1
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table decimal_1 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@decimal_1
+#### A masked pattern was here ####
+PREHOOK: query: desc formatted decimal_1 v
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@decimal_1
+POSTHOOK: query: desc formatted decimal_1 v
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@decimal_1
+# col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
+                                                                               
 
+v                      decimal(10,0)                                           
                500                     1                                       
                                                                                
from deserializer   
+PREHOOK: query: explain select * from decimal_1 order by 1 limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from decimal_1 order by 1 limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: decimal_1
+            Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Select Operator
+              expressions: t (type: decimal(4,2)), u (type: decimal(5,0)), v 
(type: decimal(10,0))
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 500 Data size: 112000 Basic stats: 
COMPLETE Column stats: COMPLETE
+              Reduce Output Operator
+                key expressions: 1 (type: int)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 112000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                TopN Hash Memory Usage: 0.1
+                value expressions: _col0 (type: decimal(4,2)), _col1 (type: 
decimal(5,0)), _col2 (type: decimal(10,0))
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: decimal(4,2)), VALUE._col1 (type: 
decimal(5,0)), VALUE._col2 (type: decimal(10,0))
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Limit
+            Number of rows: 100
+            Statistics: Num rows: 100 Data size: 22400 Basic stats: COMPLETE 
Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 100 Data size: 22400 Basic stats: COMPLETE 
Column stats: COMPLETE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 100
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: drop table decimal_1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@decimal_1
+PREHOOK: Output: default@decimal_1
+POSTHOOK: query: drop table decimal_1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@decimal_1
+POSTHOOK: Output: default@decimal_1

Reply via email to