This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 8ac0c18e89 [GLUTEN-8398] Bump Celeborn to 0.4.3 and 0.5.2 (#8399)
8ac0c18e89 is described below
commit 8ac0c18e89a7fa8b6b39ad136d4d9657fa04c84a
Author: Nicholas Jiang <[email protected]>
AuthorDate: Sat Jan 4 08:28:27 2025 +0800
[GLUTEN-8398] Bump Celeborn to 0.4.3 and 0.5.2 (#8399)
---
.github/workflows/velox_backend.yml | 8 ++++++--
dev/docker/Dockerfile.centos8-dynamic-build | 2 +-
.../gluten/celeborn/CelebornShuffleManager.java | 22 +++++++++++++++++++---
tools/gluten-it/pom.xml | 2 +-
4 files changed, 27 insertions(+), 7 deletions(-)
diff --git a/.github/workflows/velox_backend.yml
b/.github/workflows/velox_backend.yml
index ada2ea3f23..ec79bc8b1b 100644
--- a/.github/workflows/velox_backend.yml
+++ b/.github/workflows/velox_backend.yml
@@ -544,7 +544,7 @@ jobs:
fail-fast: false
matrix:
spark: [ "spark-3.2" ]
- celeborn: [ "celeborn-0.5.2", "celeborn-0.4.2",
"celeborn-0.3.2-incubating" ]
+ celeborn: [ "celeborn-0.5.2", "celeborn-0.4.3",
"celeborn-0.3.2-incubating" ]
runs-on: ubuntu-20.04
container: apache/gluten:centos-8
steps:
@@ -566,12 +566,16 @@ jobs:
- name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 with ${{
matrix.celeborn }}
run: |
EXTRA_PROFILE=""
- if [ "${{ matrix.celeborn }}" = "celeborn-0.4.2" ]; then
+ if [ "${{ matrix.celeborn }}" = "celeborn-0.4.3" ]; then
EXTRA_PROFILE="-Pceleborn-0.4"
elif [ "${{ matrix.celeborn }}" = "celeborn-0.5.2" ]; then
EXTRA_PROFILE="-Pceleborn-0.5"
fi
echo "EXTRA_PROFILE: ${EXTRA_PROFILE}"
+ if [ ! -e "/opt/apache-${{ matrix.celeborn }}-bin.tgz" ]; then
+ echo "WARNING: please pre-install your required package in docker
image since the downloading is throttled by this site."
+ wget -nv https://archive.apache.org/dist/celeborn/${{
matrix.celeborn }}/apache-${{ matrix.celeborn }}-bin.tgz -P /opt/
+ fi
cd /opt && mkdir -p celeborn && \
tar xzf apache-${{ matrix.celeborn }}-bin.tgz -C /opt/celeborn
--strip-components=1 && cd celeborn && \
mv ./conf/celeborn-env.sh.template ./conf/celeborn-env.sh && \
diff --git a/dev/docker/Dockerfile.centos8-dynamic-build
b/dev/docker/Dockerfile.centos8-dynamic-build
index e0229697f6..daeff3b729 100644
--- a/dev/docker/Dockerfile.centos8-dynamic-build
+++ b/dev/docker/Dockerfile.centos8-dynamic-build
@@ -16,7 +16,7 @@ RUN wget --no-check-certificate
https://downloads.apache.org/maven/maven-3/3.8.8
ENV PATH=${PATH}:/usr/lib/maven/bin
RUN wget -nv
https://archive.apache.org/dist/celeborn/celeborn-0.3.2-incubating/apache-celeborn-0.3.2-incubating-bin.tgz
-P /opt/
-RUN wget -nv
https://archive.apache.org/dist/celeborn/celeborn-0.4.2/apache-celeborn-0.4.2-bin.tgz
-P /opt/
+RUN wget -nv
https://archive.apache.org/dist/celeborn/celeborn-0.4.3/apache-celeborn-0.4.3-bin.tgz
-P /opt/
RUN wget -nv
https://archive.apache.org/dist/celeborn/celeborn-0.5.2/apache-celeborn-0.5.2-bin.tgz
-P /opt/
RUN git clone --depth=1 https://github.com/apache/incubator-gluten /opt/gluten
diff --git
a/gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java
b/gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java
index a4d4c4f5c5..00c87b391f 100644
---
a/gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java
+++
b/gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java
@@ -190,6 +190,8 @@ public class CelebornShuffleManager implements
ShuffleManager {
private <K, V, C> ShuffleHandle registerCelebornShuffleHandle(
int shuffleId, ShuffleDependency<K, V, C> dependency) {
+ // for Celeborn 0.4.0
+ CelebornUtils.registerAppShuffleDeterminate(lifecycleManager, shuffleId,
dependency);
return CelebornUtils.getCelebornShuffleHandle(
appUniqueId,
lifecycleManager.getHost(),
@@ -207,9 +209,6 @@ public class CelebornShuffleManager implements
ShuffleManager {
appUniqueId = SparkUtils.appUniqueId(dependency.rdd().context());
initializeLifecycleManager();
- // for Celeborn 0.4.0
- CelebornUtils.registerAppShuffleDeterminate(lifecycleManager, shuffleId,
dependency);
-
// Note: generate app unique id at driver side, make sure
dependency.rdd.context
// is the same SparkContext among different shuffleIds.
// This method may be called many times.
@@ -307,6 +306,23 @@ public class CelebornShuffleManager implements
ShuffleManager {
false,
extension);
+ // for Celeborn 0.5.2
+ try {
+ Field field =
CelebornShuffleHandle.class.getDeclaredField("throwsFetchFailure");
+ field.setAccessible(true);
+ boolean throwsFetchFailure = (boolean) field.get(handle);
+ if (throwsFetchFailure) {
+ Method addFailureListenerMethod =
+ SparkUtils.class.getMethod(
+ "addFailureListenerIfBarrierTask",
+ ShuffleClient.class,
+ TaskContext.class,
+ CelebornShuffleHandle.class);
+ addFailureListenerMethod.invoke(null, shuffleClient, context, h);
+ }
+ } catch (NoSuchFieldException | NoSuchMethodException ignored) {
+ }
+
int shuffleId;
// for Celeborn 0.4.0
diff --git a/tools/gluten-it/pom.xml b/tools/gluten-it/pom.xml
index 8120bc5ea1..9b1cf10df8 100644
--- a/tools/gluten-it/pom.xml
+++ b/tools/gluten-it/pom.xml
@@ -177,7 +177,7 @@
<profile>
<id>celeborn-0.4</id>
<properties>
- <celeborn.version>0.4.2</celeborn.version>
+ <celeborn.version>0.4.3</celeborn.version>
</properties>
</profile>
<profile>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]