Github user nongli commented on a diff in the pull request:
https://github.com/apache/spark/pull/10998#discussion_r51633120
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregate.scala
---
@@ -287,39 +288,98 @@ case class TungstenAggregate(
GenerateUnsafeRowJoiner.create(groupingKeySchema, bufferSchema)
}
-
/**
- * Update peak execution memory, called in generated Java class.
+ * Called by generated Java class to finish the aggregate and return a
KVIterator.
*/
- def updatePeakMemory(hashMap: UnsafeFixedWidthAggregationMap): Unit = {
+ def finishAggregate(
+ hashMap: UnsafeFixedWidthAggregationMap,
+ sorter: UnsafeKVExternalSorter): KVIterator[UnsafeRow, UnsafeRow] = {
+
+ // update peak execution memory
val mapMemory = hashMap.getPeakMemoryUsedBytes
+ val sorterMemory =
Option(sorter).map(_.getPeakMemoryUsedBytes).getOrElse(0L)
+ val peakMemory = Math.max(mapMemory, sorterMemory)
val metrics = TaskContext.get().taskMetrics()
- metrics.incPeakExecutionMemory(mapMemory)
- }
+ metrics.incPeakExecutionMemory(peakMemory)
- private def doProduceWithKeys(ctx: CodegenContext): String = {
- val initAgg = ctx.freshName("initAgg")
- ctx.addMutableState("boolean", initAgg, s"$initAgg = false;")
+ if (sorter == null) {
+ // not spilled
+ return hashMap.iterator()
+ }
- // create hashMap
- val thisPlan = ctx.addReferenceObj("plan", this)
- hashMapTerm = ctx.freshName("hashMap")
- val hashMapClassName = classOf[UnsafeFixedWidthAggregationMap].getName
- ctx.addMutableState(hashMapClassName, hashMapTerm, s"$hashMapTerm =
$thisPlan.createHashMap();")
+ // merge the final hashMap into sorter
+ sorter.merge(hashMap.destructAndCreateExternalSorter())
--- End diff --
Here you call free after destructAndCreate() but in the other places you
don't. Do you need to?
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]