maropu commented on a change in pull request #26420: [SPARK-27986][SQL] Support
ANSI SQL filter predicate for aggregate expression.
URL: https://github.com/apache/spark/pull/26420#discussion_r349866189
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregationIterator.scala
##########
@@ -116,12 +116,35 @@ abstract class AggregationIterator(
protected val aggregateFunctions: Array[AggregateFunction] =
initializeAggregateFunctions(aggregateExpressions,
initialInputBufferOffset)
+ protected def initializeFilterPredicates(
+ expressions: Seq[AggregateExpression]): mutable.Map[Int, BasePredicate]
= {
+ val filterPredicates = new mutable.HashMap[Int, BasePredicate]
+ expressions.zipWithIndex.foreach {
+ case (ae: AggregateExpression, i) =>
+ ae.mode match {
+ case Partial | Complete =>
+ ae.filter.foreach { filterExpr =>
+ val filterAttrs = filterExpr.references.toSeq
+ val predicate = Predicate.create(filterExpr, inputAttributes ++
filterAttrs)
+ predicate.initialize(partIndex)
+ filterPredicates(i) = predicate
+ }
+ case _ =>
+ }
+ case _ =>
+ }
+ filterPredicates
+ }
+
+ protected val predicates: mutable.Map[Int, BasePredicate] =
Review comment:
I think we don't need this variable outside generateProcessRow, so can you
move this variable inside it like this?
```
// Initializing functions used to process a row.
protected def generateProcessRow(
expressions: Seq[AggregateExpression],
functions: Seq[AggregateFunction],
inputAttributes: Seq[Attribute]): (InternalRow, InternalRow) => Unit =
{
val joinedRow = new JoinedRow
if (expressions.nonEmpty) {
// Initialize predicates for aggregate functions if necessary
val predicateOptions = expressions.map {
case AggregateExpression(_, mode, _, Some(filter), _) =>
mode match {
case Partial | Complete =>
val filterAttrs = filter.references.toSeq
val predicate = Predicate.create(filter, inputAttributes ++
filterAttrs)
predicate.initialize(partIndex)
Some(predicate)
case _ =>
None
}
case _ =>
None
}
....
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]