This is an automated email from the ASF dual-hosted git repository.
nicholasjiang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/celeborn.git
The following commit(s) were added to refs/heads/main by this push:
new deb7538f2 [CELEBORN-2248] Implement lazy loading for columnar shuffle
classes and skew shuffle method using static holder pattern
deb7538f2 is described below
commit deb7538f23c739292030748abd99001f4aede225
Author: zhenghuan <[email protected]>
AuthorDate: Sat Feb 28 13:16:58 2026 +0800
[CELEBORN-2248] Implement lazy loading for columnar shuffle classes and
skew shuffle method using static holder pattern
### What changes were proposed in this pull request?
This PR converts the static initialization of columnar shuffle class
constructors
and skew shuffle method to lazy initialization using the
initialization-on-demand
holder idiom (static inner class pattern) in SparkUtils.java.
Specifically, the following changes were made:
1. Introduced `ColumnarHashBasedShuffleWriterConstructorHolder` static
inner class
to lazily initialize the constructor for ColumnarHashBasedShuffleWriter
2. Introduced `ColumnarShuffleReaderConstructorHolder` static inner class
to lazily
initialize the constructor for CelebornColumnarShuffleReader
3. Introduced `CelebornSkewShuffleMethodHolder` static inner class to lazily
initialize the `isCelebornSkewedShuffle` method reference
4. Modified `createColumnarHashBasedShuffleWriter()`,
`createColumnarShuffleReader()`,
and `isCelebornSkewShuffleOrChildShuffle()` methods to use the holder
pattern for
lazy initialization
5. Added JavaDoc comments explaining the lazy loading mechanism
### Why are the changes needed?
The current implementation statically initializes columnar shuffle class
constructors
and the skew shuffle method at SparkUtils class loading time, which means
these
classes/methods are loaded regardless of whether they are actually used.
This lazy loading approach ensures that:
- Columnar shuffle classes are only loaded when actually needed (when
`celeborn.columnarShuffle.enabled` is true and the create methods are
called)
- CelebornShuffleState class is only loaded when skew shuffle detection is
needed
- Reduces unnecessary class loading overhead for users not using these
features
- Improves startup performance and memory footprint
- Aligns with the conditional usage pattern already present in
SparkShuffleManager
The static holder pattern (initialization-on-demand holder idiom) provides
several
advantages:
- Thread-safe without explicit synchronization (guaranteed by JVM class
loading mechanism)
- No synchronization overhead at runtime (no volatile reads or lock
acquisition)
- Simpler and more concise code compared to double-checked locking
- Recommended by Effective Java (Item 83) for lazy initialization
### Does this PR resolve a correctness bug?
No, this is a performance optimization.
### Does this PR introduce any user-facing change?
No. This change only affects when certain classes are loaded internally.
The functionality and API remain unchanged.
### How was this patch tested?
- Code review to verify correct implementation of the
initialization-on-demand holder pattern
- Verified that JVM class loading guarantees thread safety
- The changes are backward compatible and don't alter functionality, only
initialization timing
Closes #3581 from ever4Kenny/CELEBORN-2248.
Authored-by: zhenghuan <[email protected]>
Signed-off-by: SteNicholas <[email protected]>
---
.../apache/spark/shuffle/celeborn/SparkUtils.java | 113 ++++++++++++---------
1 file changed, 66 insertions(+), 47 deletions(-)
diff --git
a/client-spark/spark-3/src/main/java/org/apache/spark/shuffle/celeborn/SparkUtils.java
b/client-spark/spark-3/src/main/java/org/apache/spark/shuffle/celeborn/SparkUtils.java
index b50d3546e..609f56c04 100644
---
a/client-spark/spark-3/src/main/java/org/apache/spark/shuffle/celeborn/SparkUtils.java
+++
b/client-spark/spark-3/src/main/java/org/apache/spark/shuffle/celeborn/SparkUtils.java
@@ -228,17 +228,25 @@ public class SparkUtils {
public static final String COLUMNAR_HASH_BASED_SHUFFLE_WRITER_CLASS =
"org.apache.spark.shuffle.celeborn.ColumnarHashBasedShuffleWriter";
- static final DynConstructors.Builder
COLUMNAR_HASH_BASED_SHUFFLE_WRITER_CONSTRUCTOR_BUILDER =
- DynConstructors.builder()
- .impl(
- COLUMNAR_HASH_BASED_SHUFFLE_WRITER_CLASS,
- int.class,
- CelebornShuffleHandle.class,
- TaskContext.class,
- CelebornConf.class,
- ShuffleClient.class,
- ShuffleWriteMetricsReporter.class,
- SendBufferPool.class);
+
+ /**
+ * Lazy holder for ColumnarHashBasedShuffleWriter constructor. The
constructor is initialized only
+ * when this class is first accessed, ensuring lazy loading without explicit
synchronization.
+ */
+ private static class ColumnarHashBasedShuffleWriterConstructorHolder {
+ private static final DynConstructors.Ctor<?> INSTANCE =
+ DynConstructors.builder()
+ .impl(
+ COLUMNAR_HASH_BASED_SHUFFLE_WRITER_CLASS,
+ int.class,
+ CelebornShuffleHandle.class,
+ TaskContext.class,
+ CelebornConf.class,
+ ShuffleClient.class,
+ ShuffleWriteMetricsReporter.class,
+ SendBufferPool.class)
+ .build();
+ }
public static <K, V, C> HashBasedShuffleWriter<K, V, C>
createColumnarHashBasedShuffleWriter(
int shuffleId,
@@ -248,26 +256,33 @@ public class SparkUtils {
ShuffleClient client,
ShuffleWriteMetricsReporter metrics,
SendBufferPool sendBufferPool) {
- return COLUMNAR_HASH_BASED_SHUFFLE_WRITER_CONSTRUCTOR_BUILDER
- .build()
- .invoke(null, shuffleId, handle, taskContext, conf, client, metrics,
sendBufferPool);
+ return ColumnarHashBasedShuffleWriterConstructorHolder.INSTANCE.invoke(
+ null, shuffleId, handle, taskContext, conf, client, metrics,
sendBufferPool);
}
public static final String COLUMNAR_SHUFFLE_READER_CLASS =
"org.apache.spark.shuffle.celeborn.CelebornColumnarShuffleReader";
- static final DynConstructors.Builder
COLUMNAR_SHUFFLE_READER_CONSTRUCTOR_BUILDER =
- DynConstructors.builder()
- .impl(
- COLUMNAR_SHUFFLE_READER_CLASS,
- CelebornShuffleHandle.class,
- int.class,
- int.class,
- int.class,
- int.class,
- TaskContext.class,
- CelebornConf.class,
- ShuffleReadMetricsReporter.class,
- ExecutorShuffleIdTracker.class);
+
+ /**
+ * Lazy holder for CelebornColumnarShuffleReader constructor. The
constructor is initialized only
+ * when this class is first accessed, ensuring lazy loading without explicit
synchronization.
+ */
+ private static class ColumnarShuffleReaderConstructorHolder {
+ private static final DynConstructors.Ctor<?> INSTANCE =
+ DynConstructors.builder()
+ .impl(
+ COLUMNAR_SHUFFLE_READER_CLASS,
+ CelebornShuffleHandle.class,
+ int.class,
+ int.class,
+ int.class,
+ int.class,
+ TaskContext.class,
+ CelebornConf.class,
+ ShuffleReadMetricsReporter.class,
+ ExecutorShuffleIdTracker.class)
+ .build();
+ }
public static <K, C> CelebornShuffleReader<K, C> createColumnarShuffleReader(
CelebornShuffleHandle<K, ?, C> handle,
@@ -279,19 +294,17 @@ public class SparkUtils {
CelebornConf conf,
ShuffleReadMetricsReporter metrics,
ExecutorShuffleIdTracker shuffleIdTracker) {
- return COLUMNAR_SHUFFLE_READER_CONSTRUCTOR_BUILDER
- .build()
- .invoke(
- null,
- handle,
- startPartition,
- endPartition,
- startMapIndex,
- endMapIndex,
- context,
- conf,
- metrics,
- shuffleIdTracker);
+ return ColumnarShuffleReaderConstructorHolder.INSTANCE.invoke(
+ null,
+ handle,
+ startPartition,
+ endPartition,
+ startMapIndex,
+ endMapIndex,
+ context,
+ conf,
+ metrics,
+ shuffleIdTracker);
}
// Added in SPARK-32920, for Spark 3.2 and above
@@ -541,15 +554,21 @@ public class SparkUtils {
}
}
- private static final DynMethods.UnboundMethod isCelebornSkewShuffle_METHOD =
- DynMethods.builder("isCelebornSkewedShuffle")
- .hiddenImpl("org.apache.spark.celeborn.CelebornShuffleState",
Integer.TYPE)
- .orNoop()
- .build();
+ /**
+ * Lazy holder for isCelebornSkewedShuffle method. The method is initialized
only when this class
+ * is first accessed, ensuring lazy loading without explicit synchronization.
+ */
+ private static class CelebornSkewShuffleMethodHolder {
+ private static final DynMethods.UnboundMethod INSTANCE =
+ DynMethods.builder("isCelebornSkewedShuffle")
+ .hiddenImpl("org.apache.spark.celeborn.CelebornShuffleState",
Integer.TYPE)
+ .orNoop()
+ .build();
+ }
public static boolean isCelebornSkewShuffleOrChildShuffle(int appShuffleId) {
- if (!isCelebornSkewShuffle_METHOD.isNoop()) {
- return isCelebornSkewShuffle_METHOD.asStatic().invoke(appShuffleId);
+ if (!CelebornSkewShuffleMethodHolder.INSTANCE.isNoop()) {
+ return
CelebornSkewShuffleMethodHolder.INSTANCE.asStatic().invoke(appShuffleId);
} else {
return false;
}