This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 8a512ba fix: Deallocate row addresses and size arrays after exporting
(#246)
8a512ba is described below
commit 8a512ba69009ae96bad35544456dcf9f51c69abe
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Sun Apr 7 13:23:37 2024 -0700
fix: Deallocate row addresses and size arrays after exporting (#246)
When we export row addresses and sizes to native, they are copied to
separate arrays. We should deallocate array buffers after exporting them to
reduce memory allocation.
---
.../scala/org/apache/spark/shuffle/sort/RowPartition.scala | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git
a/spark/src/main/scala/org/apache/spark/shuffle/sort/RowPartition.scala
b/spark/src/main/scala/org/apache/spark/shuffle/sort/RowPartition.scala
index bce24be..32d64fa 100644
--- a/spark/src/main/scala/org/apache/spark/shuffle/sort/RowPartition.scala
+++ b/spark/src/main/scala/org/apache/spark/shuffle/sort/RowPartition.scala
@@ -32,8 +32,17 @@ class RowPartition(initialSize: Int) {
def getNumRows: Int = rowAddresses.size
- def getRowAddresses: Array[Long] = rowAddresses.toArray
- def getRowSizes: Array[Int] = rowSizes.toArray
+ def getRowAddresses: Array[Long] = {
+ val array = rowAddresses.toArray
+ rowAddresses = null
+ array
+ }
+
+ def getRowSizes: Array[Int] = {
+ val array = rowSizes.toArray
+ rowSizes = null
+ array
+ }
def reset(): Unit = {
rowAddresses = new ArrayBuffer[Long](initialSize)