Github user mengxr commented on a diff in the pull request:
https://github.com/apache/spark/pull/10960#discussion_r51533775
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala
---
@@ -29,165 +28,95 @@ import org.apache.spark.sql.types._
* Definition of Pearson correlation can be found at
*
http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient
*/
-case class Corr(
- left: Expression,
- right: Expression,
- mutableAggBufferOffset: Int = 0,
- inputAggBufferOffset: Int = 0)
- extends ImperativeAggregate {
-
- def this(left: Expression, right: Expression) =
- this(left, right, mutableAggBufferOffset = 0, inputAggBufferOffset = 0)
-
- override def children: Seq[Expression] = Seq(left, right)
+case class Corr(x: Expression, y: Expression) extends DeclarativeAggregate
{
+ override def children: Seq[Expression] = Seq(x, y)
override def nullable: Boolean = true
-
override def dataType: DataType = DoubleType
-
override def inputTypes: Seq[AbstractDataType] = Seq(DoubleType,
DoubleType)
- override def checkInputDataTypes(): TypeCheckResult = {
- if (left.dataType.isInstanceOf[DoubleType] &&
right.dataType.isInstanceOf[DoubleType]) {
- TypeCheckResult.TypeCheckSuccess
+ protected val count = AttributeReference("count", DoubleType, nullable =
false)()
+ protected val xAvg = AttributeReference("xAvg", DoubleType, nullable =
false)()
+ protected val yAvg = AttributeReference("yAvg", DoubleType, nullable =
false)()
+ protected val ck = AttributeReference("ck", DoubleType, nullable =
false)()
+ protected val xMk = AttributeReference("xMk", DoubleType, nullable =
false)()
+ protected val yMk = AttributeReference("yMk", DoubleType, nullable =
false)()
+
+ override val aggBufferAttributes: Seq[AttributeReference] = Seq(count,
xAvg, yAvg, ck, xMk, yMk)
+
+ override val initialValues: Seq[Expression] = Seq(
+ /* count = */ Literal(0.0),
+ /* xAvg = */ Literal(0.0),
+ /* yAvg = */ Literal(0.0),
+ /* ck = */ Literal(0.0),
+ /* xMk = */ Literal(0.0),
+ /* yMk = */ Literal(0.0)
+ )
+
+ override lazy val updateExpressions: Seq[Expression] = {
+ val n = count + Literal(1.0)
--- End diff --
Same argument on `count` and `n` usage. Calling them `n` and `newN` might
be better.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]