This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 03b62c27862a [SPARK-51146][INFRA][FOLLOW-UP] Explicitly disable Spark
Connect in server side scripts
03b62c27862a is described below
commit 03b62c27862a5a592cec3ae46c0231a503a1d83c
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Mon Mar 24 20:47:16 2025 +0900
[SPARK-51146][INFRA][FOLLOW-UP] Explicitly disable Spark Connect in server
side scripts
### What changes were proposed in this pull request?
This PR is technically a followup of
https://github.com/apache/spark/pull/49865 that explicitly sets
`SPARK_CONNECT_MODE` to `0`.
### Why are the changes needed?
Otherwise, it all fails to start a cluster, launch a Spark Connect with
locally running Spark Connect server, etc.
### Does this PR introduce _any_ user-facing change?
Not yet. The main change has not been released out yet.
### How was this patch tested?
I tested by manually adding:
```
export SPARK_CONNECT_MODE=${SPARK_CONNECT_MODE:-1}
```
into `bin/pyspark`, `bin/spark-shell`, and `bin/spark-sql`.
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #50363 from HyukjinKwon/SPARK-51146-followup.
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
bin/spark-sql | 1 +
bin/spark-sql2.cmd | 2 ++
bin/sparkR | 1 +
bin/sparkR2.cmd | 2 ++
dev/make-distribution.sh | 12 ++++++------
python/pyspark/sql/connect/session.py | 3 +--
sbin/spark-daemon.sh | 2 +-
sbin/start-connect-server.sh | 1 +
sbin/start-thriftserver.sh | 1 +
.../scala/org/apache/spark/sql/connect/SparkSession.scala | 2 +-
10 files changed, 17 insertions(+), 10 deletions(-)
diff --git a/bin/spark-sql b/bin/spark-sql
index b08b944ebd31..6b898f291389 100755
--- a/bin/spark-sql
+++ b/bin/spark-sql
@@ -16,6 +16,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+export SPARK_CONNECT_MODE=0
if [ -z "${SPARK_HOME}" ]; then
source "$(dirname "$0")"/find-spark-home
diff --git a/bin/spark-sql2.cmd b/bin/spark-sql2.cmd
index c34a3c5aa073..0dc6edb1a1c4 100644
--- a/bin/spark-sql2.cmd
+++ b/bin/spark-sql2.cmd
@@ -18,6 +18,8 @@ rem limitations under the License.
rem
rem Figure out where the Spark framework is installed
+set SPARK_CONNECT_MODE=0
+
call "%~dp0find-spark-home.cmd"
set _SPARK_CMD_USAGE=Usage: .\bin\spark-sql [options] [cli option]
diff --git a/bin/sparkR b/bin/sparkR
index 8ecc755839fe..a99b1dd287a1 100755
--- a/bin/sparkR
+++ b/bin/sparkR
@@ -16,6 +16,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+export SPARK_CONNECT_MODE=0
if [ -z "${SPARK_HOME}" ]; then
source "$(dirname "$0")"/find-spark-home
diff --git a/bin/sparkR2.cmd b/bin/sparkR2.cmd
index 446f0c30bfe8..a047f756a0bf 100644
--- a/bin/sparkR2.cmd
+++ b/bin/sparkR2.cmd
@@ -18,6 +18,8 @@ rem limitations under the License.
rem
rem Figure out where the Spark framework is installed
+set SPARK_CONNECT_MODE=0
+
call "%~dp0find-spark-home.cmd"
call "%SPARK_HOME%\bin\load-spark-env.cmd"
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index 2a9fa4d4d0f2..711f76d2671e 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -320,12 +320,12 @@ if [ "$MAKE_TGZ" == "true" ]; then
rm -rf "$TARDIR"
cp -r "$DISTDIR" "$TARDIR"
# Set the Spark Connect system variable in these scripts to enable it by
default.
- awk 'NR==1{print; print "export SPARK_CONNECT_MODE=1"; next} {print}'
"$TARDIR/bin/pyspark" > tmp && cat tmp > "$TARDIR/bin/pyspark"
- awk 'NR==1{print; print "export SPARK_CONNECT_MODE=1"; next} {print}'
"$TARDIR/bin/spark-shell" > tmp && cat tmp > "$TARDIR/bin/spark-shell"
- awk 'NR==1{print; print "export SPARK_CONNECT_MODE=1"; next} {print}'
"$TARDIR/bin/spark-submit" > tmp && cat tmp > "$TARDIR/bin/spark-submit"
- awk 'NR==1{print; print "set SPARK_CONNECT_MODE=1"; next} {print}'
"$TARDIR/bin/pyspark2.cmd" > tmp && cat tmp > "$TARDIR/bin/pyspark2.cmd"
- awk 'NR==1{print; print "set SPARK_CONNECT_MODE=1"; next} {print}'
"$TARDIR/bin/spark-shell2.cmd" > tmp && cat tmp > "$TARDIR/bin/spark-shell2.cmd"
- awk 'NR==1{print; print "set SPARK_CONNECT_MODE=1"; next} {print}'
"$TARDIR/bin/spark-submit2.cmd" > tmp && cat tmp >
"$TARDIR/bin/spark-submit2.cmd"
+ awk 'NR==1{print; print "export
SPARK_CONNECT_MODE=${SPARK_CONNECT_MODE:-1}"; next} {print}'
"$TARDIR/bin/pyspark" > tmp && cat tmp > "$TARDIR/bin/pyspark"
+ awk 'NR==1{print; print "export
SPARK_CONNECT_MODE=${SPARK_CONNECT_MODE:-1}"; next} {print}'
"$TARDIR/bin/spark-shell" > tmp && cat tmp > "$TARDIR/bin/spark-shell"
+ awk 'NR==1{print; print "export
SPARK_CONNECT_MODE=${SPARK_CONNECT_MODE:-1}"; next} {print}'
"$TARDIR/bin/spark-submit" > tmp && cat tmp > "$TARDIR/bin/spark-submit"
+ awk 'NR==1{print; print "if [%SPARK_CONNECT_MODE%] == [] set
SPARK_CONNECT_MODE=1"; next} {print}' "$TARDIR/bin/pyspark2.cmd" > tmp && cat
tmp > "$TARDIR/bin/pyspark2.cmd"
+ awk 'NR==1{print; print "if [%SPARK_CONNECT_MODE%] == [] set
SPARK_CONNECT_MODE=1"; next} {print}' "$TARDIR/bin/spark-shell2.cmd" > tmp &&
cat tmp > "$TARDIR/bin/spark-shell2.cmd"
+ awk 'NR==1{print; print "if [%SPARK_CONNECT_MODE%] == [] set
SPARK_CONNECT_MODE=1"; next} {print}' "$TARDIR/bin/spark-submit2.cmd" > tmp &&
cat tmp > "$TARDIR/bin/spark-submit2.cmd"
rm tmp
$TAR -czf "$TARDIR_NAME.tgz" -C "$SPARK_HOME" "$TARDIR_NAME"
rm -rf "$TARDIR"
diff --git a/python/pyspark/sql/connect/session.py
b/python/pyspark/sql/connect/session.py
index 76c6050e7615..efa9ce7c2c43 100644
--- a/python/pyspark/sql/connect/session.py
+++ b/python/pyspark/sql/connect/session.py
@@ -1080,8 +1080,7 @@ class SparkSession:
# start the regular PySpark session.
if origin_remote is not None:
del os.environ["SPARK_REMOTE"]
- if origin_connect_mode is not None:
- del os.environ["SPARK_CONNECT_MODE"]
+ os.environ["SPARK_CONNECT_MODE"] = "0"
# The regular PySpark session is registered as an active
session
# so would not be garbage-collected.
diff --git a/sbin/spark-daemon.sh b/sbin/spark-daemon.sh
index b7233e6e9bf3..7dc241f09722 100755
--- a/sbin/spark-daemon.sh
+++ b/sbin/spark-daemon.sh
@@ -30,7 +30,7 @@
# SPARK_NICENESS The scheduling priority for daemons. Defaults to 0.
# SPARK_NO_DAEMONIZE If set, will run the proposed command in the
foreground. It will not output a PID file.
##
-
+export SPARK_CONNECT_MODE=0
usage="Usage: spark-daemon.sh [--config <conf-dir>]
(start|stop|submit|decommission|status) <spark-command> <spark-instance-number>
<args...>"
# if no args specified, show usage
diff --git a/sbin/start-connect-server.sh b/sbin/start-connect-server.sh
index 7f0c430a468a..03e7a118f459 100755
--- a/sbin/start-connect-server.sh
+++ b/sbin/start-connect-server.sh
@@ -16,6 +16,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+export SPARK_CONNECT_MODE=0
# Enter posix mode for bash
set -o posix
diff --git a/sbin/start-thriftserver.sh b/sbin/start-thriftserver.sh
index a45752697934..b77459f0e57a 100755
--- a/sbin/start-thriftserver.sh
+++ b/sbin/start-thriftserver.sh
@@ -19,6 +19,7 @@
#
# Shell script for starting the Spark SQL Thrift server
+export SPARK_CONNECT_MODE=0
# Enter posix mode for bash
set -o posix
diff --git
a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/SparkSession.scala
b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/SparkSession.scala
index 6eeeca9d764f..4d93f797e1ec 100644
---
a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/SparkSession.scala
+++
b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/SparkSession.scala
@@ -778,7 +778,7 @@ object SparkSession extends SparkSessionCompanion with
Logging {
val pb = new ProcessBuilder(args: _*)
// So don't exclude spark-sql jar in classpath
pb.environment().remove(SparkConnectClient.SPARK_REMOTE)
- pb.environment().remove("SPARK_CONNECT_MODE")
+ pb.environment().put("SPARK_CONNECT_MODE", "0")
pb.environment().put("SPARK_IDENT_STRING", serverId)
pb.environment().put("HOSTNAME", "local")
pb.environment().put("SPARK_CONNECT_AUTHENTICATE_TOKEN", token)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]