Github user nongli commented on a diff in the pull request:
https://github.com/apache/spark/pull/10228#discussion_r47277687
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregationIterator.scala
---
@@ -326,73 +192,61 @@ abstract class AggregationIterator(
} else {
safeOutputRow
}
-
- aggregationMode match {
- // Partial-only or PartialMerge-only: every output row is basically
the values of
- // the grouping expressions and the corresponding aggregation buffer.
- case (Some(Partial), None) | (Some(PartialMerge), None) =>
- // Because we cannot copy a joinedRow containing a UnsafeRow
(UnsafeRow does not
- // support generic getter), we create a mutable projection to
output the
- // JoinedRow(currentGroupingKey, currentBuffer)
- val bufferSchema =
nonCompleteAggregateFunctions.flatMap(_.aggBufferAttributes)
- val resultProjection =
- newMutableProjection(
- groupingKeyAttributes ++ bufferSchema,
- groupingKeyAttributes ++ bufferSchema)()
- resultProjection.target(mutableOutput)
-
- (currentGroupingKey: InternalRow, currentBuffer: MutableRow) => {
- resultProjection(rowToBeEvaluated(currentGroupingKey,
currentBuffer))
- // rowToBeEvaluated(currentGroupingKey, currentBuffer)
- }
-
+ val modes = aggregateExpressions.map(_.mode).distinct
+ if (!modes.contains(Final) && !modes.contains(Complete)) {
+ // Because we cannot copy a joinedRow containing a UnsafeRow
(UnsafeRow does not
+ // support generic getter), we create a mutable projection to output
the
+ // JoinedRow(currentGroupingKey, currentBuffer)
+ val bufferSchema = aggregateFunctions.flatMap(_.aggBufferAttributes)
+ val resultProjection =
+ newMutableProjection(
+ groupingKeyAttributes ++ bufferSchema,
+ groupingKeyAttributes ++ bufferSchema)()
+ resultProjection.target(mutableOutput)
+ (currentGroupingKey: InternalRow, currentBuffer: MutableRow) => {
+ resultProjection(rowToBeEvaluated(currentGroupingKey,
currentBuffer))
+ }
+ } else if (aggregateExpressions.nonEmpty) {
// Final-only, Complete-only and Final-Complete: every output row
contains values representing
// resultExpressions.
- case (Some(Final), None) | (Some(Final) | None, Some(Complete)) =>
- val bufferSchemata =
- allAggregateFunctions.flatMap(_.aggBufferAttributes)
- val evalExpressions = allAggregateFunctions.map {
- case ae: DeclarativeAggregate => ae.evaluateExpression
- case agg: AggregateFunction => NoOp
- }
- val expressionAggEvalProjection =
newMutableProjection(evalExpressions, bufferSchemata)()
- val aggregateResultSchema = nonCompleteAggregateAttributes ++
completeAggregateAttributes
- // TODO: Use unsafe row.
- val aggregateResult = new
SpecificMutableRow(aggregateResultSchema.map(_.dataType))
- expressionAggEvalProjection.target(aggregateResult)
- val resultProjection =
- newMutableProjection(
- resultExpressions, groupingKeyAttributes ++
aggregateResultSchema)()
- resultProjection.target(mutableOutput)
-
- (currentGroupingKey: InternalRow, currentBuffer: MutableRow) => {
- // Generate results for all expression-based aggregate functions.
- expressionAggEvalProjection(currentBuffer)
- // Generate results for all imperative aggregate functions.
- var i = 0
- while (i < allImperativeAggregateFunctions.length) {
- aggregateResult.update(
- allImperativeAggregateFunctionPositions(i),
- allImperativeAggregateFunctions(i).eval(currentBuffer))
- i += 1
- }
- resultProjection(rowToBeEvaluated(currentGroupingKey,
aggregateResult))
+ val bufferSchemata =
aggregateFunctions.flatMap(_.aggBufferAttributes)
+ val evalExpressions = aggregateFunctions.map {
+ case ae: DeclarativeAggregate => ae.evaluateExpression
+ case agg: AggregateFunction => NoOp
+ }
+ val expressionAggEvalProjection =
newMutableProjection(evalExpressions, bufferSchemata)()
+ val aggregateResultSchema = aggregateAttributes
+ // TODO: Use unsafe row.
+ val aggregateResult = new
SpecificMutableRow(aggregateResultSchema.map(_.dataType))
+ expressionAggEvalProjection.target(aggregateResult)
+
+ val resultProjection =
+ newMutableProjection(
+ resultExpressions, groupingKeyAttributes ++
aggregateResultSchema)()
+ resultProjection.target(mutableOutput)
+
+ (currentGroupingKey: InternalRow, currentBuffer: MutableRow) => {
+ // Generate results for all expression-based aggregate functions.
+ expressionAggEvalProjection(currentBuffer)
+ // Generate results for all imperative aggregate functions.
+ var i = 0
+ while (i < allImperativeAggregateFunctions.length) {
+ aggregateResult.update(
+ allImperativeAggregateFunctionPositions(i),
+ allImperativeAggregateFunctions(i).eval(currentBuffer))
+ i += 1
}
-
+ resultProjection(rowToBeEvaluated(currentGroupingKey,
aggregateResult))
+ }
+ } else {
// Grouping-only: we only output values of grouping expressions.
--- End diff --
Should this projection be necesary? Is this not just currentGroupingKey?
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]