Github user chenghao-intel commented on a diff in the pull request:
https://github.com/apache/spark/pull/1935#discussion_r16221450
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala
---
@@ -65,6 +65,80 @@ abstract class PartialAggregate extends
AggregateExpression {
def asPartial: SplitEvaluation
}
+abstract class MergeableAggregate extends PartialAggregate {
+ self: Product =>
+
+ def asPartial: SplitEvaluation = {
+ val partialValues = Alias(ReturnAggregate(this), "partialValue")()
+ SplitEvaluation(
+ MergeAggregates(partialValues.toAttribute),
+ partialValues :: Nil
+ )
+ }
+
+ def newInstance(): MergableAggregateFunction
+}
+
+case class ReturnAggregate(child: AggregateExpression)
+ extends AggregateExpression with Serializable with
trees.UnaryNode[Expression] {
+
+ def dataType = child.dataType
+
+ def nullable = child.nullable
+
+ def references = child.references
+
+ def newInstance() = new ReturnAggregateFunction(child, this)
+}
+
+case class ReturnAggregateFunction(agg: AggregateExpression, base:
AggregateExpression)
+ extends AggregateFunction {
+
+ def this() = this(null, null) // Required for serialization.
+
+ var currentValue: AggregateFunction = agg.newInstance()
+
+ override def eval(input: Row): Any = currentValue
+
+ override def update(input: Row): Unit = {
+ currentValue.update(input)
+ }
+}
+
+
+case class MergeAggregates(child: Expression)
+ extends AggregateExpression with Serializable with
trees.UnaryNode[Expression] {
+
+ def dataType = child.dataType
+
+ def nullable = child.nullable
+
+ def references = child.references
+
+ def newInstance() = new MergeAggregateFunctions(child, this)
+}
+
+case class MergeAggregateFunctions(expr: Expression, base:
AggregateExpression)
+ extends AggregateFunction {
+
+ def this() = this(null, null) // Required for serialization.
+
+ var currentValue: MergableAggregateFunction = null
--- End diff --
Should we put a default value for `currentValue`? And then we can ignore
the null checking in function `eval` and `update`
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]