Author: hashutosh
Date: Fri Sep 12 17:30:46 2014
New Revision: 1624596

URL: http://svn.apache.org/r1624596
Log:
HIVE-8062 : Stats collection for columns fails on a partitioned table with null 
values in partitioning column (Ashutosh Chauhan via Gunther Hagleitner)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
    hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q
    hive/trunk/ql/src/test/results/clientpositive/stats_only_null.q.out

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java?rev=1624596&r1=1624595&r2=1624596&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java 
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java 
Fri Sep 12 17:30:46 2014
@@ -28,6 +28,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
@@ -305,8 +306,10 @@ public class ColumnStatsTask extends Tas
         List<String> partVals = new ArrayList<String>();
         // Iterate over partition columns to figure out partition name
         for (int i = fields.size() - partColSchema.size(); i < fields.size(); 
i++) {
-          
partVals.add(((PrimitiveObjectInspector)fields.get(i).getFieldObjectInspector()).
-            getPrimitiveJavaObject(list.get(i)).toString());
+          Object partVal = 
((PrimitiveObjectInspector)fields.get(i).getFieldObjectInspector()).
+              getPrimitiveJavaObject(list.get(i));
+          partVals.add(partVal == null ? // could be null for default partition
+            this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : 
partVal.toString());
         }
         partName = Warehouse.makePartName(partColSchema, partVals);
       }

Modified: hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q?rev=1624596&r1=1624595&r2=1624596&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q Fri Sep 12 
17:30:46 2014
@@ -34,6 +34,17 @@ select count(*), count(a), count(b), cou
 
 select count(*), count(a), count(b), count(c), count(d) from stats_null;
 select count(*), count(a), count(b), count(c), count(d) from stats_null_part;
+
+drop table stats_null_part;
+set hive.exec.dynamic.partition.mode=nonstrict;
+CREATE TABLE stats_null_part(a double, b int, c STRING, d smallint) 
partitioned by (dt int) STORED AS TEXTFILE; 
+
+insert into table stats_null_part partition(dt) select a,b,c,d,b from 
temps_null ;
+analyze table stats_null_part compute statistics for columns;
+
+describe formatted stats_null_part.a partition(dt = 1);
+
+reset hive.exec.dynamic.partition.mode;
 drop table stats_null;
 drop table stats_null_part;
 drop table temps_null;

Modified: hive/trunk/ql/src/test/results/clientpositive/stats_only_null.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/stats_only_null.q.out?rev=1624596&r1=1624595&r2=1624596&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/stats_only_null.q.out 
(original)
+++ hive/trunk/ql/src/test/results/clientpositive/stats_only_null.q.out Fri Sep 
12 17:30:46 2014
@@ -334,6 +334,60 @@ POSTHOOK: query: select count(*), count(
 POSTHOOK: type: QUERY
 #### A masked pattern was here ####
 10     8       8       10      10
+PREHOOK: query: drop table stats_null_part
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@stats_null_part
+PREHOOK: Output: default@stats_null_part
+POSTHOOK: query: drop table stats_null_part
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@stats_null_part
+POSTHOOK: Output: default@stats_null_part
+PREHOOK: query: CREATE TABLE stats_null_part(a double, b int, c STRING, d 
smallint) partitioned by (dt int) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@stats_null_part
+POSTHOOK: query: CREATE TABLE stats_null_part(a double, b int, c STRING, d 
smallint) partitioned by (dt int) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@stats_null_part
+PREHOOK: query: insert into table stats_null_part partition(dt) select 
a,b,c,d,b from temps_null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@temps_null
+PREHOOK: Output: default@stats_null_part
+POSTHOOK: query: insert into table stats_null_part partition(dt) select 
a,b,c,d,b from temps_null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@temps_null
+POSTHOOK: Output: default@stats_null_part@dt=1
+POSTHOOK: Output: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=1).a SIMPLE 
[(temps_null)temps_null.FieldSchema(name:a, type:double, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=1).b SIMPLE 
[(temps_null)temps_null.FieldSchema(name:b, type:int, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=1).c SIMPLE 
[(temps_null)temps_null.FieldSchema(name:c, type:string, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=1).d SIMPLE 
[(temps_null)temps_null.FieldSchema(name:d, type:smallint, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=__HIVE_DEFAULT_PARTITION__).a 
SIMPLE [(temps_null)temps_null.FieldSchema(name:a, type:double, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=__HIVE_DEFAULT_PARTITION__).b 
SIMPLE [(temps_null)temps_null.FieldSchema(name:b, type:int, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=__HIVE_DEFAULT_PARTITION__).c 
SIMPLE [(temps_null)temps_null.FieldSchema(name:c, type:string, comment:null), ]
+POSTHOOK: Lineage: stats_null_part PARTITION(dt=__HIVE_DEFAULT_PARTITION__).d 
SIMPLE [(temps_null)temps_null.FieldSchema(name:d, type:smallint, 
comment:null), ]
+PREHOOK: query: analyze table stats_null_part compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_null_part
+PREHOOK: Input: default@stats_null_part@dt=1
+PREHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table stats_null_part compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_null_part
+POSTHOOK: Input: default@stats_null_part@dt=1
+POSTHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__
+#### A masked pattern was here ####
+PREHOOK: query: describe formatted stats_null_part.a partition(dt = 1)
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@stats_null_part
+POSTHOOK: query: describe formatted stats_null_part.a partition(dt = 1)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@stats_null_part
+# col_name             data_type               min                     max     
                num_nulls               distinct_count          avg_col_len     
        max_col_len             num_trues               num_falses              
comment             
+                                                                               
 
+a                      double                  1.0                     1.0     
                1                       1                                       
                                                                                
from deserializer   
 PREHOOK: query: drop table stats_null
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@stats_null


Reply via email to