waitinfuture commented on code in PR #2358:
URL:
https://github.com/apache/incubator-celeborn/pull/2358#discussion_r1522389972
##########
client-spark/common/src/main/java/org/apache/spark/shuffle/celeborn/SortBasedPusher.java:
##########
@@ -44,6 +44,64 @@
public class SortBasedPusher extends MemoryConsumer {
+ class MemoryThresholdManager {
+
+ private final long maxMemoryThresholdInBytes;
+ private final double smallPushTolerateFactor;
+
+ private final long sendBufferSizeInBytes;
+
+ MemoryThresholdManager(
+ int numPartitions, long sendBufferSizeInBytes, double
smallPushTolerateFactor) {
+ this.maxMemoryThresholdInBytes = numPartitions * sendBufferSizeInBytes;
+ this.smallPushTolerateFactor = smallPushTolerateFactor;
+ this.sendBufferSizeInBytes = sendBufferSizeInBytes;
+ }
+
+ private boolean shouldGrow() {
+ boolean enoughSpace = pushSortMemoryThreshold * 2 <=
maxMemoryThresholdInBytes;
Review Comment:
Maybe `pushSortMemoryThreshold <= maxMemoryThresholdInBytes`
##########
client-spark/common/src/main/java/org/apache/spark/shuffle/celeborn/SortBasedPusher.java:
##########
@@ -169,6 +238,7 @@ public long pushData() throws IOException {
numPartitions);
mapStatusLengths[currentPartition].add(bytesWritten);
afterPush.accept(bytesWritten);
+ memoryThresholdManager.updateStats(offSet, offSet ==
pushBufferMaxSize);
Review Comment:
I think for most cases `offSet == pushBufferMaxSize` is false, but can be
close. So maybe just comparing with `pushBufferMaxSize / (1 + factor)` is fine.
##########
client-spark/common/src/main/java/org/apache/spark/shuffle/celeborn/SortBasedPusher.java:
##########
@@ -72,6 +130,10 @@ public class SortBasedPusher extends MemoryConsumer {
private int[] inversedShuffledPartitions = null;
private final SendBufferPool sendBufferPool;
+ final MemoryThresholdManager memoryThresholdManager;
+
+ private final boolean useAdaptiveThreshold;
Review Comment:
This is unused
##########
client-spark/common/src/main/java/org/apache/spark/shuffle/celeborn/SortBasedPusher.java:
##########
@@ -195,11 +266,15 @@ public long pushData() throws IOException {
if (offSet > 0) {
try {
dataPusher.addTask(currentPartition, dataBuf, offSet);
+ memoryThresholdManager.updateStats(offSet, offSet ==
pushBufferMaxSize);
Review Comment:
ditto
##########
client-spark/spark-2/src/main/java/org/apache/spark/shuffle/celeborn/SortBasedShuffleWriter.java:
##########
@@ -211,7 +211,7 @@ private void fastWrite0(scala.collection.Iterator iterator)
throws IOException {
private void doPush() throws IOException {
long start = System.nanoTime();
- pusher.pushData();
+ pusher.pushData(false);
Review Comment:
why false here?
##########
client-spark/common/src/main/java/org/apache/spark/shuffle/celeborn/SortBasedPusher.java:
##########
@@ -44,6 +44,64 @@
public class SortBasedPusher extends MemoryConsumer {
+ class MemoryThresholdManager {
+
+ private final long maxMemoryThresholdInBytes;
+ private final double smallPushTolerateFactor;
+
+ private final long sendBufferSizeInBytes;
Review Comment:
This is unused
##########
client-spark/common/src/main/java/org/apache/spark/shuffle/celeborn/SortBasedPusher.java:
##########
@@ -44,6 +44,64 @@
public class SortBasedPusher extends MemoryConsumer {
+ class MemoryThresholdManager {
+
+ private final long maxMemoryThresholdInBytes;
+ private final double smallPushTolerateFactor;
+
+ private final long sendBufferSizeInBytes;
+
+ MemoryThresholdManager(
+ int numPartitions, long sendBufferSizeInBytes, double
smallPushTolerateFactor) {
+ this.maxMemoryThresholdInBytes = numPartitions * sendBufferSizeInBytes;
+ this.smallPushTolerateFactor = smallPushTolerateFactor;
+ this.sendBufferSizeInBytes = sendBufferSizeInBytes;
+ }
+
+ private boolean shouldGrow() {
+ boolean enoughSpace = pushSortMemoryThreshold * 2 <=
maxMemoryThresholdInBytes;
+ double expectedPushSize = Long.MAX_VALUE;
+ if (this.expectedPushedCount != 0) {
+ expectedPushSize = this.expectedPushedBytes * 1.0 /
this.expectedPushedCount;
+ }
+ boolean tooManyPushed =
+ pushedMemorySizeInBytes * 1.0 / pushedCount * (1 +
this.smallPushTolerateFactor)
+ < expectedPushSize;
+ return enoughSpace && tooManyPushed;
+ }
+
+ public void growThresholdIfNeeded() {
+ if (shouldGrow()) {
+ long oldThreshold = pushSortMemoryThreshold;
+ pushSortMemoryThreshold = pushSortMemoryThreshold * 2;
Review Comment:
pushSortMemoryThreshold = max(pushSortMemoryThreshold * 2,
maxMemoryThresholdInBytes)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]