This is an automated email from the ASF dual-hosted git repository.

ethanfeng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-celeborn.git


The following commit(s) were added to refs/heads/main by this push:
     new 90959cbfd [CELEBORN-845][BUG] Sort memory counter won't decrease after 
sort failed
90959cbfd is described below

commit 90959cbfd7097d83a8d4e076ab889d5b406300b9
Author: mingji <[email protected]>
AuthorDate: Thu Jul 27 15:16:04 2023 +0800

    [CELEBORN-845][BUG] Sort memory counter won't decrease after sort failed
    
    ### What changes were proposed in this pull request?
    Decrease sort memory counter after sorting procedure is complete.
    
    ### Why are the changes needed?
    Fix incorrect sort memory counter.
    
    ### Does this PR introduce _any_ user-facing change?
    NO.
    
    ### How was this patch tested?
    UT.
    
    Closes #1766 from FMX/CELEBORN-845.
    
    Authored-by: mingji <[email protected]>
    Signed-off-by: mingji <[email protected]>
---
 .../src/main/scala/org/apache/celeborn/common/CelebornConf.scala   | 1 +
 .../service/deploy/worker/storage/PartitionFilesSorter.java        | 7 +++----
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git 
a/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala 
b/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
index 196a3cfcf..cc71954f6 100644
--- a/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
+++ b/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
@@ -2173,6 +2173,7 @@ object CelebornConf extends Logging {
       .doc("Reserved memory when sorting a shuffle file off-heap.")
       .version("0.3.0")
       .bytesConf(ByteUnit.BYTE)
+      .checkValue(v => v < Int.MaxValue, "Reserved memory per partition must 
be less than 2GB.")
       .createWithDefaultString("1mb")
 
   val WORKER_FLUSHER_BUFFER_SIZE: ConfigEntry[Long] =
diff --git 
a/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/PartitionFilesSorter.java
 
b/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/PartitionFilesSorter.java
index 51f7af3f0..e9bdfbf34 100644
--- 
a/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/PartitionFilesSorter.java
+++ 
b/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/PartitionFilesSorter.java
@@ -134,8 +134,9 @@ public class PartitionFilesSorter extends 
ShuffleRecoverHelper {
                         try {
                           task.sort();
                         } catch (InterruptedException e) {
-                          logger.warn(
-                              "File sorter thread was interrupted when 
expanding padding buffer.");
+                          logger.warn("File sorter thread was interrupted.");
+                        } finally {
+                          
memoryManager.releaseSortMemory(reservedMemoryPerPartition);
                         }
                       });
                 }
@@ -586,8 +587,6 @@ public class PartitionFilesSorter extends 
ShuffleRecoverHelper {
           sortedBlockInfoMap.put(mapId, sortedShuffleBlocks);
         }
 
-        memoryManager.releaseSortMemory(reserveMemory);
-
         writeIndex(sortedBlockInfoMap, indexFilePath, isHdfs);
         updateSortedShuffleFiles(shuffleKey, fileId, originFileLen);
         deleteOriginFiles();

Reply via email to