Repository: hive
Updated Branches:
  refs/heads/master 860ba06fb -> 4f90a7156


HIVE-9534: incorrect result set for query that projects a windowed aggregate 
(Aihua Xu, reviewed by Yongzhi Chen)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4f90a715
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4f90a715
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4f90a715

Branch: refs/heads/master
Commit: 4f90a71565d47f4b9d8ce267ace206d6a1f199fa
Parents: 860ba06
Author: Aihua Xu <aihu...@apache.org>
Authored: Thu Feb 4 10:57:16 2016 -0500
Committer: Aihua Xu <aihu...@apache.org>
Committed: Fri Feb 12 15:21:31 2016 -0500

----------------------------------------------------------------------
 .../calcite/functions/CanAggregateDistinct.java | 27 +++++++++++++++++
 .../functions/HiveSqlCountAggFunction.java      |  3 +-
 .../functions/HiveSqlSumAggFunction.java        |  4 +--
 .../translator/SqlFunctionConverter.java        | 29 ++++++++++--------
 .../hive/ql/udf/generic/GenericUDAFAverage.java | 31 ++++++++++++++++++--
 .../queries/clientpositive/windowing_distinct.q |  8 +++++
 .../clientpositive/windowing_distinct.q.out     | 26 ++++++++++++++++
 7 files changed, 111 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/4f90a715/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/CanAggregateDistinct.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/CanAggregateDistinct.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/CanAggregateDistinct.java
new file mode 100644
index 0000000..c24f3c0
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/CanAggregateDistinct.java
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.functions;
+
+/**
+ * This is the UDAF interface to support DISTINCT function.
+ *
+ */
+public interface CanAggregateDistinct {
+  boolean isDistinct();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/4f90a715/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java
index 58191e5..bc48707 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java
@@ -30,7 +30,7 @@ import org.apache.calcite.sql.type.SqlReturnTypeInference;
 import org.apache.calcite.sql.type.SqlTypeName;
 import org.apache.calcite.util.ImmutableIntList;
 
-public class HiveSqlCountAggFunction extends SqlAggFunction {
+public class HiveSqlCountAggFunction extends SqlAggFunction implements 
CanAggregateDistinct {
 
   final boolean                isDistinct;
   final SqlReturnTypeInference returnTypeInference;
@@ -52,6 +52,7 @@ public class HiveSqlCountAggFunction extends SqlAggFunction {
     this.operandTypeInference = operandTypeInference;
   }
 
+  @Override
   public boolean isDistinct() {
     return isDistinct;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/4f90a715/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
index 498cd0e..dc286a2 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
@@ -46,7 +46,7 @@ import com.google.common.collect.ImmutableList;
  * <code>long</code>, <code>float</code>, <code>double</code>), and the result
  * is the same type.
  */
-public class HiveSqlSumAggFunction extends SqlAggFunction {
+public class HiveSqlSumAggFunction extends SqlAggFunction implements 
CanAggregateDistinct{
   final boolean isDistinct;
   final SqlReturnTypeInference returnTypeInference;
   final SqlOperandTypeInference operandTypeInference;
@@ -70,7 +70,7 @@ public class HiveSqlSumAggFunction extends SqlAggFunction {
   }
 
   //~ Methods ----------------------------------------------------------------
-
+  @Override
   public boolean isDistinct() {
     return isDistinct;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/4f90a715/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
index 19aa414..b4c6e05 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
+import 
org.apache.hadoop.hive.ql.optimizer.calcite.functions.CanAggregateDistinct;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunction;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlMinMaxAggFunction;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction;
@@ -217,24 +218,20 @@ public class SqlFunctionConverter {
         } else if (op.kind == SqlKind.PLUS_PREFIX) {
           node = (ASTNode) ParseDriver.adaptor.create(HiveParser.PLUS, "PLUS");
         } else {
-          // Handle 'COUNT' function for the case of COUNT(*) and 
COUNT(DISTINCT)
-          if (op instanceof HiveSqlCountAggFunction) {
+          // Handle COUNT/SUM/AVG function for the case of COUNT(*) and 
COUNT(DISTINCT)
+          if (op instanceof HiveSqlCountAggFunction ||
+              op instanceof HiveSqlSumAggFunction ||
+              (op instanceof CalciteUDAF && 
op.getName().equalsIgnoreCase(SqlStdOperatorTable.AVG.getName()))) {
             if (children.size() == 0) {
               node = (ASTNode) 
ParseDriver.adaptor.create(HiveParser.TOK_FUNCTIONSTAR,
                 "TOK_FUNCTIONSTAR");
             } else {
-              HiveSqlCountAggFunction countFunction = 
(HiveSqlCountAggFunction)op;
-              if (countFunction.isDistinct()) {
+              CanAggregateDistinct distinctFunction = (CanAggregateDistinct) 
op;
+              if (distinctFunction.isDistinct()) {
                 node = (ASTNode) 
ParseDriver.adaptor.create(HiveParser.TOK_FUNCTIONDI,
                     "TOK_FUNCTIONDI");
               }
             }
-          } else if (op instanceof HiveSqlSumAggFunction) { // case 
SUM(DISTINCT)
-            HiveSqlSumAggFunction sumFunction = (HiveSqlSumAggFunction) op;
-            if (sumFunction.isDistinct()) {
-              node = (ASTNode) 
ParseDriver.adaptor.create(HiveParser.TOK_FUNCTIONDI,
-                  "TOK_FUNCTIONDI");
-            }
           }
           node.addChild((ASTNode) 
ParseDriver.adaptor.create(HiveParser.Identifier, op.getName()));
         }
@@ -364,11 +361,18 @@ public class SqlFunctionConverter {
   }
 
   // UDAF is assumed to be deterministic
-  public static class CalciteUDAF extends SqlAggFunction {
-    public CalciteUDAF(String opName, SqlReturnTypeInference 
returnTypeInference,
+  public static class CalciteUDAF extends SqlAggFunction implements 
CanAggregateDistinct {
+    private boolean isDistinct;
+    public CalciteUDAF(boolean isDistinct, String opName, 
SqlReturnTypeInference returnTypeInference,
         SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker 
operandTypeChecker) {
       super(opName, SqlKind.OTHER_FUNCTION, returnTypeInference, 
operandTypeInference,
           operandTypeChecker, SqlFunctionCategory.USER_DEFINED_FUNCTION);
+      this.isDistinct = isDistinct;
+    }
+
+    @Override
+    public boolean isDistinct() {
+      return isDistinct;
     }
   }
 
@@ -466,6 +470,7 @@ public class SqlFunctionConverter {
           break;
         default:
           calciteAggFn = new CalciteUDAF(
+              isDistinct,
               udfInfo.udfName,
               udfInfo.returnTypeInference,
               udfInfo.operandTypeInference,

http://git-wip-us.apache.org/repos/asf/hive/blob/4f90a715/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
index cd2449f..3c1ce26 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
@@ -26,8 +26,6 @@ import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec;
-import org.apache.hadoop.hive.ql.plan.ptf.BoundaryDef;
 import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
 import 
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
 import org.apache.hadoop.hive.ql.util.JavaDataModel;
@@ -35,9 +33,11 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
@@ -95,6 +95,19 @@ public class GenericUDAFAverage extends 
AbstractGenericUDAFResolver {
     }
   }
 
+  @Override
+  public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo paramInfo)
+  throws SemanticException {
+    if (paramInfo.isAllColumns()) {
+      throw new SemanticException(
+          "The specified syntax for UDAF invocation is invalid.");
+    }
+
+    AbstractGenericUDAFAverageEvaluator eval =
+        (AbstractGenericUDAFAverageEvaluator) 
getEvaluator(paramInfo.getParameters());
+    eval.avgDistinct = paramInfo.isDistinct();
+    return eval;
+  }
 
   public static class GenericUDAFAverageEvaluatorDouble extends 
AbstractGenericUDAFAverageEvaluator<Double> {
 
@@ -102,6 +115,7 @@ public class GenericUDAFAverage extends 
AbstractGenericUDAFResolver {
     public void doReset(AverageAggregationBuffer<Double> aggregation) throws 
HiveException {
       aggregation.count = 0;
       aggregation.sum = new Double(0);
+      aggregation.previousValue = null;
     }
 
     @Override
@@ -319,15 +333,18 @@ public class GenericUDAFAverage extends 
AbstractGenericUDAFResolver {
   }
 
   private static class AverageAggregationBuffer<TYPE> implements 
AggregationBuffer {
+    private Object previousValue;
     private long count;
     private TYPE sum;
   };
 
   @SuppressWarnings("unchecked")
   public static abstract class AbstractGenericUDAFAverageEvaluator<TYPE> 
extends GenericUDAFEvaluator {
+    protected boolean avgDistinct;
 
     // For PARTIAL1 and COMPLETE
     protected transient PrimitiveObjectInspector inputOI;
+    protected transient ObjectInspector copiedOI;
     // For PARTIAL2 and FINAL
     private transient StructObjectInspector soi;
     private transient StructField countField;
@@ -359,6 +376,8 @@ public class GenericUDAFAverage extends 
AbstractGenericUDAFResolver {
       // init input
       if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
         inputOI = (PrimitiveObjectInspector) parameters[0];
+        copiedOI = ObjectInspectorUtils.getStandardObjectInspector(inputOI,
+            ObjectInspectorCopyOption.JAVA);
       } else {
         soi = (StructObjectInspector) parameters[0];
         countField = soi.getStructFieldRef("count");
@@ -412,6 +431,14 @@ public class GenericUDAFAverage extends 
AbstractGenericUDAFResolver {
       if (parameter != null) {
         AverageAggregationBuffer<TYPE> averageAggregation = 
(AverageAggregationBuffer<TYPE>) aggregation;
         try {
+          // Skip the same value if avgDistinct is true
+          if (this.avgDistinct &&
+              ObjectInspectorUtils.compare(parameter, inputOI, 
averageAggregation.previousValue, copiedOI) == 0) {
+            return;
+          }
+          averageAggregation.previousValue = 
ObjectInspectorUtils.copyToStandardObject(
+              parameter, inputOI, ObjectInspectorCopyOption.JAVA);
+
           doIterate(averageAggregation, inputOI, parameter);
         } catch (NumberFormatException e) {
           if (!warned) {

http://git-wip-us.apache.org/repos/asf/hive/blob/4f90a715/ql/src/test/queries/clientpositive/windowing_distinct.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/windowing_distinct.q 
b/ql/src/test/queries/clientpositive/windowing_distinct.q
index 9f6ddfd..bb192a7 100644
--- a/ql/src/test/queries/clientpositive/windowing_distinct.q
+++ b/ql/src/test/queries/clientpositive/windowing_distinct.q
@@ -36,3 +36,11 @@ SELECT SUM(DISTINCT t) OVER (PARTITION BY index),
        SUM(DISTINCT ts) OVER (PARTITION BY index),
        SUM(DISTINCT dec) OVER (PARTITION BY index)
 FROM windowing_distinct;
+
+SELECT AVG(DISTINCT t) OVER (PARTITION BY index),
+       AVG(DISTINCT d) OVER (PARTITION BY index),
+       AVG(DISTINCT s) OVER (PARTITION BY index),
+       AVG(DISTINCT concat('Mr.', s)) OVER (PARTITION BY index),
+       AVG(DISTINCT ts) OVER (PARTITION BY index),
+       AVG(DISTINCT dec) OVER (PARTITION BY index)
+FROM windowing_distinct;

http://git-wip-us.apache.org/repos/asf/hive/blob/4f90a715/ql/src/test/results/clientpositive/windowing_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/windowing_distinct.q.out 
b/ql/src/test/results/clientpositive/windowing_distinct.q.out
index 0858f0f..074a594 100644
--- a/ql/src/test/results/clientpositive/windowing_distinct.q.out
+++ b/ql/src/test/results/clientpositive/windowing_distinct.q.out
@@ -102,3 +102,29 @@ POSTHOOK: Input: default@windowing_distinct
 235    77.42   0.0     0.0     2.724315837406612E9     69
 235    77.42   0.0     0.0     2.724315837406612E9     69
 235    77.42   0.0     0.0     2.724315837406612E9     69
+PREHOOK: query: SELECT AVG(DISTINCT t) OVER (PARTITION BY index),
+       AVG(DISTINCT d) OVER (PARTITION BY index),
+       AVG(DISTINCT s) OVER (PARTITION BY index),
+       AVG(DISTINCT concat('Mr.', s)) OVER (PARTITION BY index),
+       AVG(DISTINCT ts) OVER (PARTITION BY index),
+       AVG(DISTINCT dec) OVER (PARTITION BY index)
+FROM windowing_distinct
+PREHOOK: type: QUERY
+PREHOOK: Input: default@windowing_distinct
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT AVG(DISTINCT t) OVER (PARTITION BY index),
+       AVG(DISTINCT d) OVER (PARTITION BY index),
+       AVG(DISTINCT s) OVER (PARTITION BY index),
+       AVG(DISTINCT concat('Mr.', s)) OVER (PARTITION BY index),
+       AVG(DISTINCT ts) OVER (PARTITION BY index),
+       AVG(DISTINCT dec) OVER (PARTITION BY index)
+FROM windowing_distinct
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@windowing_distinct
+#### A masked pattern was here ####
+27.0   28.315  NULL    NULL    1.362157918703148E9     28.5000
+27.0   28.315  NULL    NULL    1.362157918703148E9     28.5000
+27.0   28.315  NULL    NULL    1.362157918703148E9     28.5000
+117.5  38.71   NULL    NULL    1.362157918703306E9     34.5000
+117.5  38.71   NULL    NULL    1.362157918703306E9     34.5000
+117.5  38.71   NULL    NULL    1.362157918703306E9     34.5000

Reply via email to