Repository: hive
Updated Branches:
  refs/heads/master 0ad71121d -> 804b125e7


HIVE-20153: Count and Sum UDF consume more memory in Hive 2+ (Aihua Xu, 
reviewed by Gopal V)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/804b125e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/804b125e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/804b125e

Branch: refs/heads/master
Commit: 804b125e743516ca0139a22c42244b980ffaf47f
Parents: 0ad7112
Author: Aihua Xu <aihu...@apache.org>
Authored: Thu Jul 26 13:05:31 2018 -0700
Committer: Aihua Xu <aihu...@apache.org>
Committed: Fri Jul 27 13:41:16 2018 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java | 6 +++++-
 .../apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java   | 9 ++++++---
 2 files changed, 11 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/804b125e/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
index 2d7cc8d..c2414d2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
@@ -153,7 +153,7 @@ public class GenericUDAFCount implements 
GenericUDAFResolver2 {
     @Override
     public void reset(AggregationBuffer agg) throws HiveException {
       ((CountAgg) agg).value = 0;
-      ((CountAgg) agg).uniqueObjects = new HashSet<ObjectInspectorObject>();
+      ((CountAgg) agg).uniqueObjects = null;
     }
 
     @Override
@@ -177,7 +177,11 @@ public class GenericUDAFCount implements 
GenericUDAFResolver2 {
 
         // Skip the counting if the values are the same for windowing 
COUNT(DISTINCT) case
         if (countThisRow && isWindowingDistinct()) {
+          if (((CountAgg) agg).uniqueObjects == null) {
+            ((CountAgg) agg).uniqueObjects = new 
HashSet<ObjectInspectorObject>();
+          }
           HashSet<ObjectInspectorObject> uniqueObjs = ((CountAgg) 
agg).uniqueObjects;
+
           ObjectInspectorObject obj = new ObjectInspectorObject(
               ObjectInspectorUtils.copyToStandardObject(parameters, inputOI, 
ObjectInspectorCopyOption.JAVA),
               outputOI);

http://git-wip-us.apache.org/repos/asf/hive/blob/804b125e/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
index 1439b64..e30b903 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
@@ -184,6 +184,9 @@ public class GenericUDAFSum extends 
AbstractGenericUDAFResolver {
       }
 
       if (isWindowingDistinct()) {
+        if (agg.uniqueObjects == null) {
+          agg.uniqueObjects = new HashSet<ObjectInspectorObject>();
+        }
         HashSet<ObjectInspectorObject> uniqueObjs = agg.uniqueObjects;
         ObjectInspectorObject obj = input instanceof ObjectInspectorObject ?
             (ObjectInspectorObject)input :
@@ -266,7 +269,7 @@ public class GenericUDAFSum extends 
AbstractGenericUDAFResolver {
       SumAgg<HiveDecimalWritable> bdAgg = (SumAgg<HiveDecimalWritable>) agg;
       bdAgg.empty = true;
       bdAgg.sum = new HiveDecimalWritable(0);
-      bdAgg.uniqueObjects = new HashSet<ObjectInspectorObject>();
+      bdAgg.uniqueObjects = null;
     }
 
     boolean warned = false;
@@ -410,7 +413,7 @@ public class GenericUDAFSum extends 
AbstractGenericUDAFResolver {
       SumDoubleAgg myagg = (SumDoubleAgg) agg;
       myagg.empty = true;
       myagg.sum = 0.0;
-      myagg.uniqueObjects = new HashSet<ObjectInspectorObject>();
+      myagg.uniqueObjects = null;
     }
 
     boolean warned = false;
@@ -540,7 +543,7 @@ public class GenericUDAFSum extends 
AbstractGenericUDAFResolver {
       SumLongAgg myagg = (SumLongAgg) agg;
       myagg.empty = true;
       myagg.sum = 0L;
-      myagg.uniqueObjects = new HashSet<ObjectInspectorObject>();
+      myagg.uniqueObjects = null;
     }
 
     private boolean warned = false;

Reply via email to