This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 8ac0c18e89 [GLUTEN-8398] Bump Celeborn to 0.4.3 and 0.5.2 (#8399)
8ac0c18e89 is described below

commit 8ac0c18e89a7fa8b6b39ad136d4d9657fa04c84a
Author: Nicholas Jiang <[email protected]>
AuthorDate: Sat Jan 4 08:28:27 2025 +0800

    [GLUTEN-8398] Bump Celeborn to 0.4.3 and 0.5.2 (#8399)
---
 .github/workflows/velox_backend.yml                |  8 ++++++--
 dev/docker/Dockerfile.centos8-dynamic-build        |  2 +-
 .../gluten/celeborn/CelebornShuffleManager.java    | 22 +++++++++++++++++++---
 tools/gluten-it/pom.xml                            |  2 +-
 4 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/velox_backend.yml 
b/.github/workflows/velox_backend.yml
index ada2ea3f23..ec79bc8b1b 100644
--- a/.github/workflows/velox_backend.yml
+++ b/.github/workflows/velox_backend.yml
@@ -544,7 +544,7 @@ jobs:
       fail-fast: false
       matrix:
         spark: [ "spark-3.2" ]
-        celeborn: [ "celeborn-0.5.2", "celeborn-0.4.2", 
"celeborn-0.3.2-incubating" ]
+        celeborn: [ "celeborn-0.5.2", "celeborn-0.4.3", 
"celeborn-0.3.2-incubating" ]
     runs-on: ubuntu-20.04
     container: apache/gluten:centos-8
     steps:
@@ -566,12 +566,16 @@ jobs:
       - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 with ${{ 
matrix.celeborn }}
         run: |
           EXTRA_PROFILE=""
-          if [ "${{ matrix.celeborn }}" = "celeborn-0.4.2" ]; then
+          if [ "${{ matrix.celeborn }}" = "celeborn-0.4.3" ]; then
             EXTRA_PROFILE="-Pceleborn-0.4"
           elif [ "${{ matrix.celeborn }}" = "celeborn-0.5.2" ]; then
             EXTRA_PROFILE="-Pceleborn-0.5"
           fi
           echo "EXTRA_PROFILE: ${EXTRA_PROFILE}"
+          if [ ! -e "/opt/apache-${{ matrix.celeborn }}-bin.tgz" ]; then
+            echo "WARNING: please pre-install your required package in docker 
image since the downloading is throttled by this site."
+            wget -nv https://archive.apache.org/dist/celeborn/${{ 
matrix.celeborn }}/apache-${{ matrix.celeborn }}-bin.tgz -P /opt/
+          fi
           cd /opt && mkdir -p celeborn && \
           tar xzf apache-${{ matrix.celeborn }}-bin.tgz -C /opt/celeborn 
--strip-components=1 && cd celeborn && \
           mv ./conf/celeborn-env.sh.template ./conf/celeborn-env.sh && \
diff --git a/dev/docker/Dockerfile.centos8-dynamic-build 
b/dev/docker/Dockerfile.centos8-dynamic-build
index e0229697f6..daeff3b729 100644
--- a/dev/docker/Dockerfile.centos8-dynamic-build
+++ b/dev/docker/Dockerfile.centos8-dynamic-build
@@ -16,7 +16,7 @@ RUN wget --no-check-certificate 
https://downloads.apache.org/maven/maven-3/3.8.8
 ENV PATH=${PATH}:/usr/lib/maven/bin
 
 RUN wget -nv 
https://archive.apache.org/dist/celeborn/celeborn-0.3.2-incubating/apache-celeborn-0.3.2-incubating-bin.tgz
 -P /opt/
-RUN wget -nv 
https://archive.apache.org/dist/celeborn/celeborn-0.4.2/apache-celeborn-0.4.2-bin.tgz
 -P /opt/
+RUN wget -nv 
https://archive.apache.org/dist/celeborn/celeborn-0.4.3/apache-celeborn-0.4.3-bin.tgz
 -P /opt/
 RUN wget -nv 
https://archive.apache.org/dist/celeborn/celeborn-0.5.2/apache-celeborn-0.5.2-bin.tgz
 -P /opt/
 
 RUN git clone --depth=1 https://github.com/apache/incubator-gluten /opt/gluten
diff --git 
a/gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java
 
b/gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java
index a4d4c4f5c5..00c87b391f 100644
--- 
a/gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java
+++ 
b/gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java
@@ -190,6 +190,8 @@ public class CelebornShuffleManager implements 
ShuffleManager {
 
   private <K, V, C> ShuffleHandle registerCelebornShuffleHandle(
       int shuffleId, ShuffleDependency<K, V, C> dependency) {
+    // for Celeborn 0.4.0
+    CelebornUtils.registerAppShuffleDeterminate(lifecycleManager, shuffleId, 
dependency);
     return CelebornUtils.getCelebornShuffleHandle(
         appUniqueId,
         lifecycleManager.getHost(),
@@ -207,9 +209,6 @@ public class CelebornShuffleManager implements 
ShuffleManager {
     appUniqueId = SparkUtils.appUniqueId(dependency.rdd().context());
     initializeLifecycleManager();
 
-    // for Celeborn 0.4.0
-    CelebornUtils.registerAppShuffleDeterminate(lifecycleManager, shuffleId, 
dependency);
-
     // Note: generate app unique id at driver side, make sure 
dependency.rdd.context
     // is the same SparkContext among different shuffleIds.
     // This method may be called many times.
@@ -307,6 +306,23 @@ public class CelebornShuffleManager implements 
ShuffleManager {
                 false,
                 extension);
 
+        // for Celeborn 0.5.2
+        try {
+          Field field = 
CelebornShuffleHandle.class.getDeclaredField("throwsFetchFailure");
+          field.setAccessible(true);
+          boolean throwsFetchFailure = (boolean) field.get(handle);
+          if (throwsFetchFailure) {
+            Method addFailureListenerMethod =
+                    SparkUtils.class.getMethod(
+                            "addFailureListenerIfBarrierTask",
+                            ShuffleClient.class,
+                            TaskContext.class,
+                            CelebornShuffleHandle.class);
+            addFailureListenerMethod.invoke(null, shuffleClient, context, h);
+          }
+        } catch (NoSuchFieldException | NoSuchMethodException ignored) {
+        }
+
         int shuffleId;
 
         // for Celeborn 0.4.0
diff --git a/tools/gluten-it/pom.xml b/tools/gluten-it/pom.xml
index 8120bc5ea1..9b1cf10df8 100644
--- a/tools/gluten-it/pom.xml
+++ b/tools/gluten-it/pom.xml
@@ -177,7 +177,7 @@
     <profile>
       <id>celeborn-0.4</id>
       <properties>
-        <celeborn.version>0.4.2</celeborn.version>
+        <celeborn.version>0.4.3</celeborn.version>
       </properties>
     </profile>
     <profile>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to