This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 0eb114e0d6d2 [SPARK-51789][CORE][FOLLOW-UP] Set the initial Spark 
Connect mode properly
0eb114e0d6d2 is described below

commit 0eb114e0d6d25cb3446ce2751ecaeee2d1212b0a
Author: Hyukjin Kwon <gurwls...@apache.org>
AuthorDate: Fri May 9 21:29:48 2025 +0800

    [SPARK-51789][CORE][FOLLOW-UP] Set the initial Spark Connect mode properly
    
    ### What changes were proposed in this pull request?
    
    This PR is a followup of https://github.com/apache/spark/pull/50575 that 
sets the initial Spark Connect mode by reading `SPARK_CONNECT_MODE`.
    
    ### Why are the changes needed?
    
    In Spark 4.0 release (Spark Connect distribution),
    
    ```bash
    SPARK_CONNECT_MODE=1 ./bin/spark-shell
    ```
    
    starts Spark Classic shells.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No to end users because the main change has not been released yet.
    
    ### How was this patch tested?
    
    Manually tested with some combinations below:
    
    ```
    SPARK_CONNECT_MODE=1 ./bin/spark-shell
    SPARK_CONNECT_MODE=1 ./bin/spark-shell --master local
    SPARK_CONNECT_MODE=1 ./bin/pyspark --conf spark.api.mode=classic --master 
local
    SPARK_CONNECT_MODE=1 ./bin/pyspark  --master local --conf 
spark.api.mode=connect
    SPARK_CONNECT_MODE=1 ./bin/pyspark --conf spark.api.mode=classic
    SPARK_CONNECT_MODE=1 ./bin/pyspark --conf spark.api.mode=connect
    SPARK_CONNECT_MODE=1 ./bin/pyspark
    SPARK_CONNECT_MODE=0 ./bin/spark-shell
    SPARK_CONNECT_MODE=0 ./bin/spark-shell --master local
    SPARK_CONNECT_MODE=0 ./bin/pyspark --conf spark.api.mode=classic --master 
local
    SPARK_CONNECT_MODE=0 ./bin/pyspark  --master local --conf 
spark.api.mode=connect
    SPARK_CONNECT_MODE=0 ./bin/pyspark --conf spark.api.mode=classic
    SPARK_CONNECT_MODE=0 ./bin/pyspark --conf spark.api.mode=connect
    SPARK_CONNECT_MODE=0 ./bin/pyspark
    SPARK_CONNECT_MODE=1 ./bin/spark-shell --master "local[*]" --remote 
"local[*]"
    SPARK_CONNECT_MODE=1 ./bin/pyspark --conf spark.remote="local[*]" --conf 
spark.api.mode=connect --conf spark.master="local[*]"
    SPARK_CONNECT_MODE=1 ./bin/pyspark --master "local[*]" --remote "local[*]"
    SPARK_CONNECT_MODE=1 ./bin/pyspark --conf spark.remote="local[*]" --conf 
spark.api.mode=connect --master "local[*]"
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #50846 from HyukjinKwon/SPARK-51789-followup.
    
    Authored-by: Hyukjin Kwon <gurwls...@apache.org>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
    (cherry picked from commit 207e296870a1ecf633a38d308def413e7f21c413)
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../org/apache/spark/launcher/SparkSubmitCommandBuilder.java  | 11 ++++++++---
 python/pyspark/java_gateway.py                                |  1 +
 python/pyspark/sql/connect/session.py                         |  4 ----
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git 
a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
 
b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
index 8eaefc6364a9..5efa3bef78bc 100644
--- 
a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
+++ 
b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
@@ -126,6 +126,7 @@ class SparkSubmitCommandBuilder extends 
AbstractCommandBuilder {
     boolean isExample = false;
     List<String> submitArgs = args;
     this.userArgs = Collections.emptyList();
+    isRemote |= "connect".equalsIgnoreCase(getApiMode(conf));
 
     if (args.size() > 0) {
       switch (args.get(0)) {
@@ -549,11 +550,15 @@ class SparkSubmitCommandBuilder extends 
AbstractCommandBuilder {
           checkArgument(value != null, "Missing argument to %s", CONF);
           String[] setConf = value.split("=", 2);
           checkArgument(setConf.length == 2, "Invalid argument to %s: %s", 
CONF, value);
-          conf.put(setConf[0], setConf[1]);
           // If both spark.remote and spark.mater are set, the error will be 
thrown later when
           // the application is started.
-          isRemote |= conf.containsKey("spark.remote");
-          isRemote |= "connect".equalsIgnoreCase(getApiMode(conf));
+          if (setConf[0].equals("spark.remote")) {
+            isRemote = true;
+          } else if (setConf[0].equals(SparkLauncher.SPARK_API_MODE)) {
+            // Respects if the API mode is explicitly set.
+            isRemote = setConf[1].equalsIgnoreCase("connect");
+          }
+          conf.put(setConf[0], setConf[1]);
         }
         case CLASS -> {
           // The special classes require some special command line handling, 
since they allow
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index 3dd8123d581c..6303a4361857 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -83,6 +83,7 @@ def launch_gateway(conf=None, popen_kwargs=None):
             os.unlink(conn_info_file)
 
             env = dict(os.environ)
+            env["SPARK_CONNECT_MODE"] = "0"
             env["_PYSPARK_DRIVER_CONN_INFO_PATH"] = conn_info_file
 
             # Launch the Java gateway.
diff --git a/python/pyspark/sql/connect/session.py 
b/python/pyspark/sql/connect/session.py
index efa9ce7c2c43..303b9c9aac12 100644
--- a/python/pyspark/sql/connect/session.py
+++ b/python/pyspark/sql/connect/session.py
@@ -1074,13 +1074,11 @@ class SparkSession:
                 overwrite_conf["spark.connect.grpc.binding.port"] = "0"
 
             origin_remote = os.environ.get("SPARK_REMOTE", None)
-            origin_connect_mode = os.environ.get("SPARK_CONNECT_MODE", None)
             try:
                 # So SparkSubmit thinks no remote is set in order to
                 # start the regular PySpark session.
                 if origin_remote is not None:
                     del os.environ["SPARK_REMOTE"]
-                os.environ["SPARK_CONNECT_MODE"] = "0"
 
                 # The regular PySpark session is registered as an active 
session
                 # so would not be garbage-collected.
@@ -1098,8 +1096,6 @@ class SparkSession:
             finally:
                 if origin_remote is not None:
                     os.environ["SPARK_REMOTE"] = origin_remote
-                if origin_connect_mode is not None:
-                    os.environ["SPARK_CONNECT_MODE"] = origin_connect_mode
         else:
             raise PySparkRuntimeError(
                 errorClass="SESSION_OR_CONTEXT_EXISTS",


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to