Author: navis
Date: Fri Dec  6 00:55:08 2013
New Revision: 1548343

URL: http://svn.apache.org/r1548343
Log:
HIVE-3455 : ANSI CORR(X,Y) is incorrect (Maxim Bolotin via Navis)

Added:
    
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFCorrelation.java
Modified:
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java?rev=1548343&r1=1548342&r2=1548343&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
 Fri Dec  6 00:55:08 2013
@@ -19,8 +19,6 @@ package org.apache.hadoop.hive.ql.udf.ge
 
 import java.util.ArrayList;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -74,8 +72,6 @@ import org.apache.hadoop.io.LongWritable
         + "and STDDEV_POP is the population standard deviation.")
 public class GenericUDAFCorrelation extends AbstractGenericUDAFResolver {
 
-  static final Log LOG = 
LogFactory.getLog(GenericUDAFCorrelation.class.getName());
-
   @Override
   public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws 
SemanticException {
     if (parameters.length != 2) {
@@ -289,15 +285,15 @@ public class GenericUDAFCorrelation exte
         StdAgg myagg = (StdAgg) agg;
         double vx = PrimitiveObjectInspectorUtils.getDouble(px, xInputOI);
         double vy = PrimitiveObjectInspectorUtils.getDouble(py, yInputOI);
-        double xavgOld = myagg.xavg;
-        double yavgOld = myagg.yavg;
+        double deltaX = vx - myagg.xavg;
+        double deltaY = vy - myagg.yavg;
         myagg.count++;
-        myagg.xavg += (vx - xavgOld) / myagg.count;
-        myagg.yavg += (vy - yavgOld) / myagg.count;
+        myagg.xavg += deltaX / myagg.count;
+        myagg.yavg += deltaY / myagg.count;
         if (myagg.count > 1) {
-            myagg.covar += (vx - xavgOld) * (vy - myagg.yavg);
-            myagg.xvar += (vx - xavgOld) * (vx - myagg.xavg);
-            myagg.yvar += (vy - yavgOld) * (vy - myagg.yavg);
+          myagg.covar += deltaX * (vy - myagg.yavg);
+          myagg.xvar += deltaX * (vx - myagg.xavg);
+          myagg.yvar += deltaY * (vy - myagg.yavg);
         }
       }
     }
@@ -352,8 +348,8 @@ public class GenericUDAFCorrelation exte
           myagg.count += nB;
           myagg.xavg = (xavgA * nA + xavgB * nB) / myagg.count;
           myagg.yavg = (yavgA * nA + yavgB * nB) / myagg.count;
-          myagg.xvar += xvarB + (xavgA - xavgB) * (xavgA - xavgB) * 
myagg.count;
-          myagg.yvar += yvarB + (yavgA - yavgB) * (yavgA - yavgB) * 
myagg.count;
+          myagg.xvar += xvarB + (xavgA - xavgB) * (xavgA - xavgB) * nA * nB / 
myagg.count;
+          myagg.yvar += yvarB + (yavgA - yavgB) * (yavgA - yavgB) * nA * nB / 
myagg.count;
           myagg.covar +=
               covarB + (xavgA - xavgB) * (yavgA - yavgB) * ((double) (nA * nB) 
/ myagg.count);
         }

Added: 
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFCorrelation.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFCorrelation.java?rev=1548343&view=auto
==============================================================================
--- 
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFCorrelation.java
 (added)
+++ 
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFCorrelation.java
 Fri Dec  6 00:55:08 2013
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import junit.framework.TestCase;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+public class TestGenericUDAFCorrelation extends TestCase {
+
+  public void testCorr() throws HiveException {
+    GenericUDAFCorrelation corr = new GenericUDAFCorrelation();
+    GenericUDAFEvaluator eval1 = corr.getEvaluator(
+        new 
TypeInfo[]{TypeInfoFactory.doubleTypeInfo,TypeInfoFactory.doubleTypeInfo });
+    GenericUDAFEvaluator eval2 = corr.getEvaluator(
+        new 
TypeInfo[]{TypeInfoFactory.doubleTypeInfo,TypeInfoFactory.doubleTypeInfo });
+
+    ObjectInspector poi1 = eval1.init(GenericUDAFEvaluator.Mode.PARTIAL1,
+        new ObjectInspector[] 
{PrimitiveObjectInspectorFactory.javaDoubleObjectInspector,
+            PrimitiveObjectInspectorFactory.javaDoubleObjectInspector});
+    ObjectInspector poi2 = eval2.init(GenericUDAFEvaluator.Mode.PARTIAL1,
+        new ObjectInspector[] 
{PrimitiveObjectInspectorFactory.javaDoubleObjectInspector,
+            PrimitiveObjectInspectorFactory.javaDoubleObjectInspector});
+
+    GenericUDAFEvaluator.AggregationBuffer buffer1 = 
eval1.getNewAggregationBuffer();
+    eval1.iterate(buffer1, new Object[]{100d, 200d});
+    eval1.iterate(buffer1, new Object[]{150d, 210d});
+    eval1.iterate(buffer1, new Object[]{200d, 220d});
+    Object object1 = eval1.terminatePartial(buffer1);
+
+    GenericUDAFEvaluator.AggregationBuffer buffer2 = 
eval2.getNewAggregationBuffer();
+    eval2.iterate(buffer2, new Object[]{250d, 230d});
+    eval2.iterate(buffer2, new Object[]{250d, 240d});
+    eval2.iterate(buffer2, new Object[]{300d, 250d});
+    eval2.iterate(buffer2, new Object[]{350d, 260d});
+    Object object2 = eval2.terminatePartial(buffer2);
+
+    ObjectInspector coi = eval2.init(GenericUDAFEvaluator.Mode.FINAL,
+        new ObjectInspector[]{poi1});
+
+    GenericUDAFEvaluator.AggregationBuffer buffer3 = 
eval2.getNewAggregationBuffer();
+    eval2.merge(buffer3, object1);
+    eval2.merge(buffer3, object2);
+
+    Object result = eval2.terminate(buffer3);
+    assertEquals("0.987829161147262", String.valueOf(result));
+  }
+}


Reply via email to