This is an automated email from the ASF dual-hosted git repository.
ethanfeng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-celeborn.git
The following commit(s) were added to refs/heads/main by this push:
new 90959cbfd [CELEBORN-845][BUG] Sort memory counter won't decrease after
sort failed
90959cbfd is described below
commit 90959cbfd7097d83a8d4e076ab889d5b406300b9
Author: mingji <[email protected]>
AuthorDate: Thu Jul 27 15:16:04 2023 +0800
[CELEBORN-845][BUG] Sort memory counter won't decrease after sort failed
### What changes were proposed in this pull request?
Decrease sort memory counter after sorting procedure is complete.
### Why are the changes needed?
Fix incorrect sort memory counter.
### Does this PR introduce _any_ user-facing change?
NO.
### How was this patch tested?
UT.
Closes #1766 from FMX/CELEBORN-845.
Authored-by: mingji <[email protected]>
Signed-off-by: mingji <[email protected]>
---
.../src/main/scala/org/apache/celeborn/common/CelebornConf.scala | 1 +
.../service/deploy/worker/storage/PartitionFilesSorter.java | 7 +++----
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git
a/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
b/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
index 196a3cfcf..cc71954f6 100644
--- a/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
+++ b/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
@@ -2173,6 +2173,7 @@ object CelebornConf extends Logging {
.doc("Reserved memory when sorting a shuffle file off-heap.")
.version("0.3.0")
.bytesConf(ByteUnit.BYTE)
+ .checkValue(v => v < Int.MaxValue, "Reserved memory per partition must
be less than 2GB.")
.createWithDefaultString("1mb")
val WORKER_FLUSHER_BUFFER_SIZE: ConfigEntry[Long] =
diff --git
a/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/PartitionFilesSorter.java
b/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/PartitionFilesSorter.java
index 51f7af3f0..e9bdfbf34 100644
---
a/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/PartitionFilesSorter.java
+++
b/worker/src/main/java/org/apache/celeborn/service/deploy/worker/storage/PartitionFilesSorter.java
@@ -134,8 +134,9 @@ public class PartitionFilesSorter extends
ShuffleRecoverHelper {
try {
task.sort();
} catch (InterruptedException e) {
- logger.warn(
- "File sorter thread was interrupted when
expanding padding buffer.");
+ logger.warn("File sorter thread was interrupted.");
+ } finally {
+
memoryManager.releaseSortMemory(reservedMemoryPerPartition);
}
});
}
@@ -586,8 +587,6 @@ public class PartitionFilesSorter extends
ShuffleRecoverHelper {
sortedBlockInfoMap.put(mapId, sortedShuffleBlocks);
}
- memoryManager.releaseSortMemory(reserveMemory);
-
writeIndex(sortedBlockInfoMap, indexFilePath, isHdfs);
updateSortedShuffleFiles(shuffleKey, fileId, originFileLen);
deleteOriginFiles();