This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-4.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push: new 0eb114e0d6d2 [SPARK-51789][CORE][FOLLOW-UP] Set the initial Spark Connect mode properly 0eb114e0d6d2 is described below commit 0eb114e0d6d25cb3446ce2751ecaeee2d1212b0a Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Fri May 9 21:29:48 2025 +0800 [SPARK-51789][CORE][FOLLOW-UP] Set the initial Spark Connect mode properly ### What changes were proposed in this pull request? This PR is a followup of https://github.com/apache/spark/pull/50575 that sets the initial Spark Connect mode by reading `SPARK_CONNECT_MODE`. ### Why are the changes needed? In Spark 4.0 release (Spark Connect distribution), ```bash SPARK_CONNECT_MODE=1 ./bin/spark-shell ``` starts Spark Classic shells. ### Does this PR introduce _any_ user-facing change? No to end users because the main change has not been released yet. ### How was this patch tested? Manually tested with some combinations below: ``` SPARK_CONNECT_MODE=1 ./bin/spark-shell SPARK_CONNECT_MODE=1 ./bin/spark-shell --master local SPARK_CONNECT_MODE=1 ./bin/pyspark --conf spark.api.mode=classic --master local SPARK_CONNECT_MODE=1 ./bin/pyspark --master local --conf spark.api.mode=connect SPARK_CONNECT_MODE=1 ./bin/pyspark --conf spark.api.mode=classic SPARK_CONNECT_MODE=1 ./bin/pyspark --conf spark.api.mode=connect SPARK_CONNECT_MODE=1 ./bin/pyspark SPARK_CONNECT_MODE=0 ./bin/spark-shell SPARK_CONNECT_MODE=0 ./bin/spark-shell --master local SPARK_CONNECT_MODE=0 ./bin/pyspark --conf spark.api.mode=classic --master local SPARK_CONNECT_MODE=0 ./bin/pyspark --master local --conf spark.api.mode=connect SPARK_CONNECT_MODE=0 ./bin/pyspark --conf spark.api.mode=classic SPARK_CONNECT_MODE=0 ./bin/pyspark --conf spark.api.mode=connect SPARK_CONNECT_MODE=0 ./bin/pyspark SPARK_CONNECT_MODE=1 ./bin/spark-shell --master "local[*]" --remote "local[*]" SPARK_CONNECT_MODE=1 ./bin/pyspark --conf spark.remote="local[*]" --conf spark.api.mode=connect --conf spark.master="local[*]" SPARK_CONNECT_MODE=1 ./bin/pyspark --master "local[*]" --remote "local[*]" SPARK_CONNECT_MODE=1 ./bin/pyspark --conf spark.remote="local[*]" --conf spark.api.mode=connect --master "local[*]" ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #50846 from HyukjinKwon/SPARK-51789-followup. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 207e296870a1ecf633a38d308def413e7f21c413) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../org/apache/spark/launcher/SparkSubmitCommandBuilder.java | 11 ++++++++--- python/pyspark/java_gateway.py | 1 + python/pyspark/sql/connect/session.py | 4 ---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java index 8eaefc6364a9..5efa3bef78bc 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java @@ -126,6 +126,7 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { boolean isExample = false; List<String> submitArgs = args; this.userArgs = Collections.emptyList(); + isRemote |= "connect".equalsIgnoreCase(getApiMode(conf)); if (args.size() > 0) { switch (args.get(0)) { @@ -549,11 +550,15 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { checkArgument(value != null, "Missing argument to %s", CONF); String[] setConf = value.split("=", 2); checkArgument(setConf.length == 2, "Invalid argument to %s: %s", CONF, value); - conf.put(setConf[0], setConf[1]); // If both spark.remote and spark.mater are set, the error will be thrown later when // the application is started. - isRemote |= conf.containsKey("spark.remote"); - isRemote |= "connect".equalsIgnoreCase(getApiMode(conf)); + if (setConf[0].equals("spark.remote")) { + isRemote = true; + } else if (setConf[0].equals(SparkLauncher.SPARK_API_MODE)) { + // Respects if the API mode is explicitly set. + isRemote = setConf[1].equalsIgnoreCase("connect"); + } + conf.put(setConf[0], setConf[1]); } case CLASS -> { // The special classes require some special command line handling, since they allow diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index 3dd8123d581c..6303a4361857 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -83,6 +83,7 @@ def launch_gateway(conf=None, popen_kwargs=None): os.unlink(conn_info_file) env = dict(os.environ) + env["SPARK_CONNECT_MODE"] = "0" env["_PYSPARK_DRIVER_CONN_INFO_PATH"] = conn_info_file # Launch the Java gateway. diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py index efa9ce7c2c43..303b9c9aac12 100644 --- a/python/pyspark/sql/connect/session.py +++ b/python/pyspark/sql/connect/session.py @@ -1074,13 +1074,11 @@ class SparkSession: overwrite_conf["spark.connect.grpc.binding.port"] = "0" origin_remote = os.environ.get("SPARK_REMOTE", None) - origin_connect_mode = os.environ.get("SPARK_CONNECT_MODE", None) try: # So SparkSubmit thinks no remote is set in order to # start the regular PySpark session. if origin_remote is not None: del os.environ["SPARK_REMOTE"] - os.environ["SPARK_CONNECT_MODE"] = "0" # The regular PySpark session is registered as an active session # so would not be garbage-collected. @@ -1098,8 +1096,6 @@ class SparkSession: finally: if origin_remote is not None: os.environ["SPARK_REMOTE"] = origin_remote - if origin_connect_mode is not None: - os.environ["SPARK_CONNECT_MODE"] = origin_connect_mode else: raise PySparkRuntimeError( errorClass="SESSION_OR_CONTEXT_EXISTS", --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org