This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 03b62c27862a [SPARK-51146][INFRA][FOLLOW-UP] Explicitly disable Spark 
Connect in server side scripts
03b62c27862a is described below

commit 03b62c27862a5a592cec3ae46c0231a503a1d83c
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Mon Mar 24 20:47:16 2025 +0900

    [SPARK-51146][INFRA][FOLLOW-UP] Explicitly disable Spark Connect in server 
side scripts
    
    ### What changes were proposed in this pull request?
    
    This PR is technically a followup of 
https://github.com/apache/spark/pull/49865 that explicitly sets 
`SPARK_CONNECT_MODE` to `0`.
    
    ### Why are the changes needed?
    
    Otherwise, it all fails to start a cluster, launch a Spark Connect with 
locally running Spark Connect server, etc.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Not yet. The main change has not been released out yet.
    
    ### How was this patch tested?
    
    I tested by manually adding:
    
    ```
    export SPARK_CONNECT_MODE=${SPARK_CONNECT_MODE:-1}
    ```
    
    into `bin/pyspark`, `bin/spark-shell`, and `bin/spark-sql`.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No
    
    Closes #50363 from HyukjinKwon/SPARK-51146-followup.
    
    Authored-by: Hyukjin Kwon <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 bin/spark-sql                                                |  1 +
 bin/spark-sql2.cmd                                           |  2 ++
 bin/sparkR                                                   |  1 +
 bin/sparkR2.cmd                                              |  2 ++
 dev/make-distribution.sh                                     | 12 ++++++------
 python/pyspark/sql/connect/session.py                        |  3 +--
 sbin/spark-daemon.sh                                         |  2 +-
 sbin/start-connect-server.sh                                 |  1 +
 sbin/start-thriftserver.sh                                   |  1 +
 .../scala/org/apache/spark/sql/connect/SparkSession.scala    |  2 +-
 10 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/bin/spark-sql b/bin/spark-sql
index b08b944ebd31..6b898f291389 100755
--- a/bin/spark-sql
+++ b/bin/spark-sql
@@ -16,6 +16,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+export SPARK_CONNECT_MODE=0
 
 if [ -z "${SPARK_HOME}" ]; then
   source "$(dirname "$0")"/find-spark-home
diff --git a/bin/spark-sql2.cmd b/bin/spark-sql2.cmd
index c34a3c5aa073..0dc6edb1a1c4 100644
--- a/bin/spark-sql2.cmd
+++ b/bin/spark-sql2.cmd
@@ -18,6 +18,8 @@ rem limitations under the License.
 rem
 
 rem Figure out where the Spark framework is installed
+set SPARK_CONNECT_MODE=0
+
 call "%~dp0find-spark-home.cmd"
 
 set _SPARK_CMD_USAGE=Usage: .\bin\spark-sql [options] [cli option]
diff --git a/bin/sparkR b/bin/sparkR
index 8ecc755839fe..a99b1dd287a1 100755
--- a/bin/sparkR
+++ b/bin/sparkR
@@ -16,6 +16,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+export SPARK_CONNECT_MODE=0
 
 if [ -z "${SPARK_HOME}" ]; then
   source "$(dirname "$0")"/find-spark-home
diff --git a/bin/sparkR2.cmd b/bin/sparkR2.cmd
index 446f0c30bfe8..a047f756a0bf 100644
--- a/bin/sparkR2.cmd
+++ b/bin/sparkR2.cmd
@@ -18,6 +18,8 @@ rem limitations under the License.
 rem
 
 rem Figure out where the Spark framework is installed
+set SPARK_CONNECT_MODE=0
+
 call "%~dp0find-spark-home.cmd"
 
 call "%SPARK_HOME%\bin\load-spark-env.cmd"
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index 2a9fa4d4d0f2..711f76d2671e 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -320,12 +320,12 @@ if [ "$MAKE_TGZ" == "true" ]; then
     rm -rf "$TARDIR"
     cp -r "$DISTDIR" "$TARDIR"
     # Set the Spark Connect system variable in these scripts to enable it by 
default.
-    awk 'NR==1{print; print "export SPARK_CONNECT_MODE=1"; next} {print}' 
"$TARDIR/bin/pyspark" > tmp && cat tmp > "$TARDIR/bin/pyspark"
-    awk 'NR==1{print; print "export SPARK_CONNECT_MODE=1"; next} {print}' 
"$TARDIR/bin/spark-shell" > tmp && cat tmp > "$TARDIR/bin/spark-shell"
-    awk 'NR==1{print; print "export SPARK_CONNECT_MODE=1"; next} {print}' 
"$TARDIR/bin/spark-submit" > tmp && cat tmp > "$TARDIR/bin/spark-submit"
-    awk 'NR==1{print; print "set SPARK_CONNECT_MODE=1"; next} {print}' 
"$TARDIR/bin/pyspark2.cmd" > tmp && cat tmp > "$TARDIR/bin/pyspark2.cmd"
-    awk 'NR==1{print; print "set SPARK_CONNECT_MODE=1"; next} {print}' 
"$TARDIR/bin/spark-shell2.cmd" > tmp && cat tmp > "$TARDIR/bin/spark-shell2.cmd"
-    awk 'NR==1{print; print "set SPARK_CONNECT_MODE=1"; next} {print}' 
"$TARDIR/bin/spark-submit2.cmd" > tmp && cat tmp > 
"$TARDIR/bin/spark-submit2.cmd"
+    awk 'NR==1{print; print "export 
SPARK_CONNECT_MODE=${SPARK_CONNECT_MODE:-1}"; next} {print}' 
"$TARDIR/bin/pyspark" > tmp && cat tmp > "$TARDIR/bin/pyspark"
+    awk 'NR==1{print; print "export 
SPARK_CONNECT_MODE=${SPARK_CONNECT_MODE:-1}"; next} {print}' 
"$TARDIR/bin/spark-shell" > tmp && cat tmp > "$TARDIR/bin/spark-shell"
+    awk 'NR==1{print; print "export 
SPARK_CONNECT_MODE=${SPARK_CONNECT_MODE:-1}"; next} {print}' 
"$TARDIR/bin/spark-submit" > tmp && cat tmp > "$TARDIR/bin/spark-submit"
+    awk 'NR==1{print; print "if [%SPARK_CONNECT_MODE%] == [] set 
SPARK_CONNECT_MODE=1"; next} {print}' "$TARDIR/bin/pyspark2.cmd" > tmp && cat 
tmp > "$TARDIR/bin/pyspark2.cmd"
+    awk 'NR==1{print; print "if [%SPARK_CONNECT_MODE%] == [] set 
SPARK_CONNECT_MODE=1"; next} {print}' "$TARDIR/bin/spark-shell2.cmd" > tmp && 
cat tmp > "$TARDIR/bin/spark-shell2.cmd"
+    awk 'NR==1{print; print "if [%SPARK_CONNECT_MODE%] == [] set 
SPARK_CONNECT_MODE=1"; next} {print}' "$TARDIR/bin/spark-submit2.cmd" > tmp && 
cat tmp > "$TARDIR/bin/spark-submit2.cmd"
     rm tmp
     $TAR -czf "$TARDIR_NAME.tgz" -C "$SPARK_HOME" "$TARDIR_NAME"
     rm -rf "$TARDIR"
diff --git a/python/pyspark/sql/connect/session.py 
b/python/pyspark/sql/connect/session.py
index 76c6050e7615..efa9ce7c2c43 100644
--- a/python/pyspark/sql/connect/session.py
+++ b/python/pyspark/sql/connect/session.py
@@ -1080,8 +1080,7 @@ class SparkSession:
                 # start the regular PySpark session.
                 if origin_remote is not None:
                     del os.environ["SPARK_REMOTE"]
-                if origin_connect_mode is not None:
-                    del os.environ["SPARK_CONNECT_MODE"]
+                os.environ["SPARK_CONNECT_MODE"] = "0"
 
                 # The regular PySpark session is registered as an active 
session
                 # so would not be garbage-collected.
diff --git a/sbin/spark-daemon.sh b/sbin/spark-daemon.sh
index b7233e6e9bf3..7dc241f09722 100755
--- a/sbin/spark-daemon.sh
+++ b/sbin/spark-daemon.sh
@@ -30,7 +30,7 @@
 #   SPARK_NICENESS The scheduling priority for daemons. Defaults to 0.
 #   SPARK_NO_DAEMONIZE   If set, will run the proposed command in the 
foreground. It will not output a PID file.
 ##
-
+export SPARK_CONNECT_MODE=0
 usage="Usage: spark-daemon.sh [--config <conf-dir>] 
(start|stop|submit|decommission|status) <spark-command> <spark-instance-number> 
<args...>"
 
 # if no args specified, show usage
diff --git a/sbin/start-connect-server.sh b/sbin/start-connect-server.sh
index 7f0c430a468a..03e7a118f459 100755
--- a/sbin/start-connect-server.sh
+++ b/sbin/start-connect-server.sh
@@ -16,6 +16,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+export SPARK_CONNECT_MODE=0
 
 # Enter posix mode for bash 
 set -o posix 
diff --git a/sbin/start-thriftserver.sh b/sbin/start-thriftserver.sh
index a45752697934..b77459f0e57a 100755
--- a/sbin/start-thriftserver.sh
+++ b/sbin/start-thriftserver.sh
@@ -19,6 +19,7 @@
 
 #
 # Shell script for starting the Spark SQL Thrift server
+export SPARK_CONNECT_MODE=0
 
 # Enter posix mode for bash
 set -o posix
diff --git 
a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/SparkSession.scala
 
b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/SparkSession.scala
index 6eeeca9d764f..4d93f797e1ec 100644
--- 
a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/SparkSession.scala
+++ 
b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/SparkSession.scala
@@ -778,7 +778,7 @@ object SparkSession extends SparkSessionCompanion with 
Logging {
           val pb = new ProcessBuilder(args: _*)
           // So don't exclude spark-sql jar in classpath
           pb.environment().remove(SparkConnectClient.SPARK_REMOTE)
-          pb.environment().remove("SPARK_CONNECT_MODE")
+          pb.environment().put("SPARK_CONNECT_MODE", "0")
           pb.environment().put("SPARK_IDENT_STRING", serverId)
           pb.environment().put("HOSTNAME", "local")
           pb.environment().put("SPARK_CONNECT_AUTHENTICATE_TOKEN", token)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to