This is an automated email from the ASF dual-hosted git repository.

xianjin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


The following commit(s) were added to refs/heads/master by this push:
     new c1f627b6 [#552] docs: add more doc about spark.serializer requirement 
(#556)
c1f627b6 is described below

commit c1f627b6a58bc5befa40238c1d19a802609c03c4
Author: advancedxy <[email protected]>
AuthorDate: Wed Feb 8 13:07:58 2023 +0800

    [#552] docs: add more doc about spark.serializer requirement (#556)
    
    ### What changes were proposed in this pull request?
    add more doc and warning
    
    ### Why are the changes needed?
    Fixes #552
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    No need.
---
 README.md                                                         | 3 +++
 .../src/main/java/org/apache/spark/shuffle/RssShuffleManager.java | 8 ++++++++
 docs/client_guide.md                                              | 3 +++
 3 files changed, 14 insertions(+)

diff --git a/README.md b/README.md
index 2454e0a4..f419f8d5 100644
--- a/README.md
+++ b/README.md
@@ -196,6 +196,9 @@ rss-xxx.tgz will be generated for deployment
 2. Update Spark conf to enable Uniffle, e.g.,
 
    ```
+   # Uniffle transmits serialized shuffle data over network, therefore a 
serializer that supports relocation of
+   # serialized object should be used. 
+   spark.serialier org.apache.spark.serializer.KryoSerializer # this could 
also be in the spark-defaults.conf
    spark.shuffle.manager org.apache.spark.shuffle.RssShuffleManager
    spark.rss.coordinator.quorum <coordinatorIp1>:19999,<coordinatorIp2>:19999
    # Note: For Spark2, spark.sql.adaptive.enabled should be false because 
Spark2 doesn't support AQE.
diff --git 
a/client-spark/spark3/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
 
b/client-spark/spark3/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
index e095637b..a768a449 100644
--- 
a/client-spark/spark3/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
+++ 
b/client-spark/spark3/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
@@ -20,6 +20,7 @@ package org.apache.spark.shuffle;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ScheduledExecutorService;
@@ -153,6 +154,13 @@ public class RssShuffleManager implements ShuffleManager {
 
   public RssShuffleManager(SparkConf conf, boolean isDriver) {
     this.sparkConf = conf;
+    boolean supportsRelocation = Optional.ofNullable(SparkEnv.get())
+        .map(env -> env.serializer().supportsRelocationOfSerializedObjects())
+        .orElse(true);
+    if (!supportsRelocation) {
+      LOG.warn("RSSShuffleManager requires a serializer which supports 
relocations of serialized object. Please set "
+          + "spark.serializer to org.apache.spark.serializer.KryoSerializer 
instead");
+    }
     this.user = sparkConf.get("spark.rss.quota.user", "user");
     this.uuid = sparkConf.get("spark.rss.quota.uuid",  
Long.toString(System.currentTimeMillis()));
     // set & check replica config
diff --git a/docs/client_guide.md b/docs/client_guide.md
index c2884fa0..12c99c91 100644
--- a/docs/client_guide.md
+++ b/docs/client_guide.md
@@ -38,6 +38,9 @@ This document will introduce how to deploy Uniffle client 
plugins with Spark and
 2. Update Spark conf to enable Uniffle, eg,
 
    ```
+   # Uniffle transmits serialized shuffle data over network, therefore a 
serializer that supports relocation of
+   # serialized object should be used. 
+   spark.serialier org.apache.spark.serializer.KryoSerializer # this could 
also be in the spark-defaults.conf
    spark.shuffle.manager org.apache.spark.shuffle.RssShuffleManager
    spark.rss.coordinator.quorum <coordinatorIp1>:19999,<coordinatorIp2>:19999
    # Note: For Spark2, spark.sql.adaptive.enabled should be false because 
Spark2 doesn't support AQE.

Reply via email to