Repository: hive
Updated Branches:
  refs/heads/master 0eea6871d -> 4157792a9


HIVE-16068: BloomFilter expectedEntries not always using NDV when it's 
available during runtime filtering (Jason Dere, reviewed by Gunther Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4157792a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4157792a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4157792a

Branch: refs/heads/master
Commit: 4157792a98380276a23d080619866fd74a6903ce
Parents: 0eea687
Author: Jason Dere <[email protected]>
Authored: Wed Mar 1 10:41:43 2017 -0800
Committer: Jason Dere <[email protected]>
Committed: Wed Mar 1 10:41:43 2017 -0800

----------------------------------------------------------------------
 .../hive/ql/udf/generic/GenericUDAFBloomFilter.java | 16 +++++++++++-----
 .../results/clientpositive/llap/mergejoin.q.out     |  4 ++--
 2 files changed, 13 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/4157792a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java
index b32e04a..788aace 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java
@@ -20,10 +20,13 @@ package org.apache.hadoop.hive.ql.udf.generic;
 
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
 import org.apache.hadoop.hive.ql.plan.Statistics;
 import org.apache.hadoop.hive.ql.plan.Statistics.State;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
@@ -68,7 +71,7 @@ public class GenericUDAFBloomFilter implements 
GenericUDAFResolver2 {
    */
   public static class GenericUDAFBloomFilterEvaluator extends 
GenericUDAFEvaluator {
     // Source operator to get the number of entries
-    private Operator<?> sourceOperator;
+    private SelectOperator sourceOperator;
     private long maxEntries = 0;
 
     // ObjectInspector for input data.
@@ -258,10 +261,13 @@ public class GenericUDAFBloomFilter implements 
GenericUDAFResolver2 {
         switch (stats.getColumnStatsState()) {
           case COMPLETE:
           case PARTIAL:
-            // There should only be column stats for one column, use if that 
is the case.
+            // There should only be column in sourceOperator
             List<ColStatistics> colStats = stats.getColumnStats();
-            if (colStats.size() == 1) {
-              long ndv = colStats.get(0).getCountDistint();
+            ExprNodeColumnDesc colExpr = ExprNodeDescUtils.getColumnExpr(
+                sourceOperator.getConf().getColList().get(0));
+            if (colExpr != null
+                && stats.getColumnStatisticsFromColName(colExpr.getColumn()) 
!= null) {
+              long ndv = 
stats.getColumnStatisticsFromColName(colExpr.getColumn()).getCountDistint();
               if (ndv > 0) {
                 expectedEntries = ndv;
               }
@@ -279,7 +285,7 @@ public class GenericUDAFBloomFilter implements 
GenericUDAFResolver2 {
       return sourceOperator;
     }
 
-    public void setSourceOperator(Operator<?> sourceOperator) {
+    public void setSourceOperator(SelectOperator sourceOperator) {
       this.sourceOperator = sourceOperator;
     }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/4157792a/ql/src/test/results/clientpositive/llap/mergejoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/mergejoin.q.out 
b/ql/src/test/results/clientpositive/llap/mergejoin.q.out
index cc6cf47..2dcfd6b 100644
--- a/ql/src/test/results/clientpositive/llap/mergejoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/mergejoin.q.out
@@ -63,7 +63,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 25 Data size: 4375 Basic stats: 
COMPLETE Column stats: COMPLETE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), 
bloom_filter(_col0, expectedEntries=25)
+                          aggregations: min(_col0), max(_col0), 
bloom_filter(_col0, expectedEntries=14)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 552 Basic stats: 
COMPLETE Column stats: COMPLETE
@@ -95,7 +95,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), 
bloom_filter(VALUE._col2, expectedEntries=25)
+                aggregations: min(VALUE._col0), max(VALUE._col1), 
bloom_filter(VALUE._col2, expectedEntries=14)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE 
Column stats: COMPLETE

Reply via email to