maropu commented on a change in pull request #26420: [SPARK-27986][SQL] Support 
ANSI SQL filter predicate for aggregate expression.
URL: https://github.com/apache/spark/pull/26420#discussion_r349866351
 
 

 ##########
 File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregationIterator.scala
 ##########
 @@ -157,38 +180,87 @@ abstract class AggregationIterator(
       inputAttributes: Seq[Attribute]): (InternalRow, InternalRow) => Unit = {
     val joinedRow = new JoinedRow
     if (expressions.nonEmpty) {
-      val mergeExpressions = functions.zip(expressions).flatMap {
-        case (ae: DeclarativeAggregate, expression) =>
-          expression.mode match {
+      var isFinalOrMerge = false
+      val mergeExpressions = functions.zipWithIndex.collect {
+        case (ae: DeclarativeAggregate, i) =>
+          expressions(i).mode match {
             case Partial | Complete => ae.updateExpressions
-            case PartialMerge | Final => ae.mergeExpressions
+            case PartialMerge | Final =>
+              isFinalOrMerge = true
+              ae.mergeExpressions
           }
         case (agg: AggregateFunction, _) => 
Seq.fill(agg.aggBufferAttributes.length)(NoOp)
       }
       val updateFunctions = functions.zipWithIndex.collect {
         case (ae: ImperativeAggregate, i) =>
           expressions(i).mode match {
             case Partial | Complete =>
-              (buffer: InternalRow, row: InternalRow) => ae.update(buffer, row)
+              Option(predicates(i)) match {
+                case Some(predicate) =>
+                  (buffer: InternalRow, row: InternalRow) =>
+                    if (predicate.eval(row)) { ae.update(buffer, row) }
+                case _ => (buffer: InternalRow, row: InternalRow) => 
ae.update(buffer, row)
+              }
             case PartialMerge | Final =>
               (buffer: InternalRow, row: InternalRow) => ae.merge(buffer, row)
           }
       }.toArray
       // This projection is used to merge buffer values for all 
expression-based aggregates.
       val aggregationBufferSchema = functions.flatMap(_.aggBufferAttributes)
-      val updateProjection =
-        newMutableProjection(mergeExpressions, aggregationBufferSchema ++ 
inputAttributes)
+      val updateProjection = newMutableProjection(
+        mergeExpressions.flatMap(_.seq), aggregationBufferSchema ++ 
inputAttributes)
 
-      (currentBuffer: InternalRow, row: InternalRow) => {
-        // Process all expression-based aggregate functions.
-        updateProjection.target(currentBuffer)(joinedRow(currentBuffer, row))
+      val processImperative = (currentBuffer: InternalRow, row: InternalRow) 
=> {
         // Process all imperative aggregate functions.
         var i = 0
         while (i < updateFunctions.length) {
           updateFunctions(i)(currentBuffer, row)
           i += 1
         }
       }
+
+      // The following two situations will adopt a common implementation:
+      // First, no filter predicate is specified for any aggregate expression.
+      // Second, aggregate expressions are in merge or final mode.
+      if (predicates.isEmpty || isFinalOrMerge) {
+        (currentBuffer: InternalRow, row: InternalRow) => {
+          updateProjection.target(currentBuffer)(joinedRow(currentBuffer, row))
+          processImperative(currentBuffer, row)
+        }
+      } else {
+        // In the list of aggregate expressions, if a filter predicate is 
specified for at least one
+        // aggregate expression and aggregate expressions are in partial or 
complete mode,
+        // then the filter will be used.
+        // Suppose there is a list of aggregate expressions, such as exprA 
with filterA, exprB,
+        // exprC with filterC, then the specific implementation process is as 
follows:
+        // 1. Accept data row.
+        // 2. Execute multiple aggregate expressions in sequence.
+        // 2-1. Filter the data row using filter predicate filterA. If the 
filter predicate
+        //      filterA is met, then calculate using aggregate expression 
exprA.
+        // 2-2. Calculate using aggregate expression exprB.
+        // 2-3. Filter the data row using filter predicate filterC. If the 
filter predicate
+        //      filterC is met, then calculate using aggregate expression 
exprC.
+        (currentBuffer: InternalRow, row: InternalRow) => {
+          val dynamicMergeExpressions = new mutable.ArrayBuffer[Expression]
 
 Review comment:
   Can you move the `predicate` process for expression-based agg functions 
outside this row-by-row loop? The current code case cause overkilling overhead 
when processing rows....

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to