This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push:
new 2e0abca726a [SPARK-42182][CONNECT][TESTS] Make `ReusedConnectTestCase`
to take Spark configurations
2e0abca726a is described below
commit 2e0abca726ae10b022695fcb79743af1acfac532
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Thu Jan 26 13:20:30 2023 +0900
[SPARK-42182][CONNECT][TESTS] Make `ReusedConnectTestCase` to take Spark
configurations
### What changes were proposed in this pull request?
This PR proposes to `pyspark.testing.connectutils.ReusedConnectTestCase` to
have the ability to set configurations propagated to the server like
`pyspark.testing.sqlutils.ReusedSQLTestCase`.
### Why are the changes needed?
`pyspark.testing.connectutils.ReusedConnectTestCase` is designed to cover
all test cases for `pyspark.testing.sqlutils.ReusedSQLTestCase`. It should be
able to set the configurations identically.
### Does this PR introduce _any_ user-facing change?
No, test-only.
### How was this patch tested?
Manually tested.
Closes #39738 from HyukjinKwon/SPARK-42182.
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
(cherry picked from commit 31be205bd6ac8e6eb77fc9b876298e0848dbf929)
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/connect/session.py | 5 ++++-
python/pyspark/sql/session.py | 2 +-
python/pyspark/testing/connectutils.py | 16 ++++++++++++++--
3 files changed, 19 insertions(+), 4 deletions(-)
diff --git a/python/pyspark/sql/connect/session.py
b/python/pyspark/sql/connect/session.py
index 33f596dc897..1f11a30ade2 100644
--- a/python/pyspark/sql/connect/session.py
+++ b/python/pyspark/sql/connect/session.py
@@ -454,7 +454,7 @@ class SparkSession:
return self._client.register_udf(function, return_type)
@staticmethod
- def _start_connect_server(master: str) -> None:
+ def _start_connect_server(master: str, opts: Dict[str, Any]) -> None:
"""
Starts the Spark Connect server given the master.
@@ -493,6 +493,9 @@ class SparkSession:
session = PySparkSession._instantiatedSession
if session is None or session._sc._jsc is None:
conf = SparkConf()
+ for k, v in opts.items():
+ conf.set(k, v)
+
# Do not need to worry about the existing configurations because
# Py4J gateway is not created yet, and `conf` instance is empty
here.
# The configurations belows are manually manipulated later to
respect
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 3f466a0e6ec..36ad1500687 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -436,7 +436,7 @@ class SparkSession(SparkConversionMixin):
if url.startswith("local"):
os.environ["SPARK_LOCAL_REMOTE"] = "1"
- RemoteSparkSession._start_connect_server(url)
+ RemoteSparkSession._start_connect_server(url,
opts)
url = "sc://localhost"
os.environ["SPARK_REMOTE"] = url
diff --git a/python/pyspark/testing/connectutils.py
b/python/pyspark/testing/connectutils.py
index 4079e9790a0..64934c763c3 100644
--- a/python/pyspark/testing/connectutils.py
+++ b/python/pyspark/testing/connectutils.py
@@ -21,7 +21,7 @@ import os
import functools
import unittest
-from pyspark import Row
+from pyspark import Row, SparkConf
from pyspark.testing.utils import PySparkErrorTestUtils
from pyspark.testing.sqlutils import (
have_pandas,
@@ -168,9 +168,21 @@ class ReusedConnectTestCase(unittest.TestCase,
SQLTestUtils, PySparkErrorTestUti
Spark Connect version of
:class:`pyspark.testing.sqlutils.ReusedSQLTestCase`.
"""
+ @classmethod
+ def conf(cls):
+ """
+ Override this in subclasses to supply a more specific conf
+ """
+ return SparkConf(loadDefaults=False)
+
@classmethod
def setUpClass(cls):
- cls.spark =
PySparkSession.builder.appName(cls.__name__).remote("local[4]").getOrCreate()
+ cls.spark = (
+ PySparkSession.builder.config(conf=cls.conf())
+ .appName(cls.__name__)
+ .remote("local[4]")
+ .getOrCreate()
+ )
cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
os.unlink(cls.tempdir.name)
cls.testData = [Row(key=i, value=str(i)) for i in range(100)]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]