This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 8a512ba  fix: Deallocate row addresses and size arrays after exporting 
(#246)
8a512ba is described below

commit 8a512ba69009ae96bad35544456dcf9f51c69abe
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Sun Apr 7 13:23:37 2024 -0700

    fix: Deallocate row addresses and size arrays after exporting (#246)
    
    When we export row addresses and sizes to native, they are copied to 
separate arrays. We should deallocate array buffers after exporting them to 
reduce memory allocation.
---
 .../scala/org/apache/spark/shuffle/sort/RowPartition.scala  | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git 
a/spark/src/main/scala/org/apache/spark/shuffle/sort/RowPartition.scala 
b/spark/src/main/scala/org/apache/spark/shuffle/sort/RowPartition.scala
index bce24be..32d64fa 100644
--- a/spark/src/main/scala/org/apache/spark/shuffle/sort/RowPartition.scala
+++ b/spark/src/main/scala/org/apache/spark/shuffle/sort/RowPartition.scala
@@ -32,8 +32,17 @@ class RowPartition(initialSize: Int) {
 
   def getNumRows: Int = rowAddresses.size
 
-  def getRowAddresses: Array[Long] = rowAddresses.toArray
-  def getRowSizes: Array[Int] = rowSizes.toArray
+  def getRowAddresses: Array[Long] = {
+    val array = rowAddresses.toArray
+    rowAddresses = null
+    array
+  }
+
+  def getRowSizes: Array[Int] = {
+    val array = rowSizes.toArray
+    rowSizes = null
+    array
+  }
 
   def reset(): Unit = {
     rowAddresses = new ArrayBuffer[Long](initialSize)

Reply via email to