Repository: hive Updated Branches: refs/heads/master 0ad71121d -> 804b125e7
HIVE-20153: Count and Sum UDF consume more memory in Hive 2+ (Aihua Xu, reviewed by Gopal V) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/804b125e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/804b125e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/804b125e Branch: refs/heads/master Commit: 804b125e743516ca0139a22c42244b980ffaf47f Parents: 0ad7112 Author: Aihua Xu <aihu...@apache.org> Authored: Thu Jul 26 13:05:31 2018 -0700 Committer: Aihua Xu <aihu...@apache.org> Committed: Fri Jul 27 13:41:16 2018 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java | 6 +++++- .../apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java | 9 ++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/804b125e/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java index 2d7cc8d..c2414d2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java @@ -153,7 +153,7 @@ public class GenericUDAFCount implements GenericUDAFResolver2 { @Override public void reset(AggregationBuffer agg) throws HiveException { ((CountAgg) agg).value = 0; - ((CountAgg) agg).uniqueObjects = new HashSet<ObjectInspectorObject>(); + ((CountAgg) agg).uniqueObjects = null; } @Override @@ -177,7 +177,11 @@ public class GenericUDAFCount implements GenericUDAFResolver2 { // Skip the counting if the values are the same for windowing COUNT(DISTINCT) case if (countThisRow && isWindowingDistinct()) { + if (((CountAgg) agg).uniqueObjects == null) { + ((CountAgg) agg).uniqueObjects = new HashSet<ObjectInspectorObject>(); + } HashSet<ObjectInspectorObject> uniqueObjs = ((CountAgg) agg).uniqueObjects; + ObjectInspectorObject obj = new ObjectInspectorObject( ObjectInspectorUtils.copyToStandardObject(parameters, inputOI, ObjectInspectorCopyOption.JAVA), outputOI); http://git-wip-us.apache.org/repos/asf/hive/blob/804b125e/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java index 1439b64..e30b903 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java @@ -184,6 +184,9 @@ public class GenericUDAFSum extends AbstractGenericUDAFResolver { } if (isWindowingDistinct()) { + if (agg.uniqueObjects == null) { + agg.uniqueObjects = new HashSet<ObjectInspectorObject>(); + } HashSet<ObjectInspectorObject> uniqueObjs = agg.uniqueObjects; ObjectInspectorObject obj = input instanceof ObjectInspectorObject ? (ObjectInspectorObject)input : @@ -266,7 +269,7 @@ public class GenericUDAFSum extends AbstractGenericUDAFResolver { SumAgg<HiveDecimalWritable> bdAgg = (SumAgg<HiveDecimalWritable>) agg; bdAgg.empty = true; bdAgg.sum = new HiveDecimalWritable(0); - bdAgg.uniqueObjects = new HashSet<ObjectInspectorObject>(); + bdAgg.uniqueObjects = null; } boolean warned = false; @@ -410,7 +413,7 @@ public class GenericUDAFSum extends AbstractGenericUDAFResolver { SumDoubleAgg myagg = (SumDoubleAgg) agg; myagg.empty = true; myagg.sum = 0.0; - myagg.uniqueObjects = new HashSet<ObjectInspectorObject>(); + myagg.uniqueObjects = null; } boolean warned = false; @@ -540,7 +543,7 @@ public class GenericUDAFSum extends AbstractGenericUDAFResolver { SumLongAgg myagg = (SumLongAgg) agg; myagg.empty = true; myagg.sum = 0L; - myagg.uniqueObjects = new HashSet<ObjectInspectorObject>(); + myagg.uniqueObjects = null; } private boolean warned = false;