This is an automated email from the ASF dual-hosted git repository.

xinrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 027aeb1764a8 [SPARK-46277][PYTHON] Validate startup urls with the 
config being set
027aeb1764a8 is described below

commit 027aeb1764a816858b7ea071cd2b620f02a6a525
Author: Xinrong Meng <xinr...@apache.org>
AuthorDate: Thu Dec 7 13:45:31 2023 -0800

    [SPARK-46277][PYTHON] Validate startup urls with the config being set
    
    ### What changes were proposed in this pull request?
    Validate startup urls with the config being set, see example in the "Does 
this PR introduce _any_ user-facing change".
    
    ### Why are the changes needed?
    Clear and user-friendly error messages.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes.
    
    FROM
    ```py
    >>> SparkSession.builder.config(map={"spark.master": "x", "spark.remote": 
"y"})
    <pyspark.sql.session.SparkSession.Builder object at 0x7fa310115b20
    
    >>> SparkSession.builder.config(map={"spark.master": "x", "spark.remote": 
"y"}).config("x", "z")  # Only raises the error when adding new configs
    Traceback (most recent call last):
    ...
    RuntimeError: Spark master cannot be configured with Spark Connect server; 
however, found URL for Spark Connect [y]
    ```
    
    TO
    ```py
    >>> SparkSession.builder.config(map={"spark.master": "x", "spark.remote": 
"y"})
    Traceback (most recent call last):
    ...
    RuntimeError: Spark master cannot be configured with Spark Connect server; 
however, found URL for Spark Connect [y]
    ```
    
    ### How was this patch tested?
    Unit tests.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #44194 from xinrong-meng/fix_session.
    
    Authored-by: Xinrong Meng <xinr...@apache.org>
    Signed-off-by: Xinrong Meng <xinr...@apache.org>
---
 python/pyspark/errors/error_classes.py   |  6 +++---
 python/pyspark/sql/session.py            | 28 +++++++++++-----------------
 python/pyspark/sql/tests/test_session.py | 30 ++++++++++++++++++++++++++++--
 3 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/python/pyspark/errors/error_classes.py 
b/python/pyspark/errors/error_classes.py
index 965fd04a9135..cc8400270967 100644
--- a/python/pyspark/errors/error_classes.py
+++ b/python/pyspark/errors/error_classes.py
@@ -86,12 +86,12 @@ ERROR_CLASSES_JSON = """
   },
   "CANNOT_CONFIGURE_SPARK_CONNECT": {
     "message": [
-      "Spark Connect server cannot be configured with Spark master; however, 
found URL for Spark master [<url>]."
+      "Spark Connect server cannot be configured: Existing [<existing_url>], 
New [<new_url>]."
     ]
   },
-  "CANNOT_CONFIGURE_SPARK_MASTER": {
+  "CANNOT_CONFIGURE_SPARK_CONNECT_MASTER": {
     "message": [
-      "Spark master cannot be configured with Spark Connect server; however, 
found URL for Spark Connect [<url>]."
+      "Spark Connect server and Spark master cannot be configured together: 
Spark master [<master_url>], Spark Connect [<connect_url>]."
     ]
   },
   "CANNOT_CONVERT_COLUMN_INTO_BOOL": {
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 7f4589557cd2..86aacfa54c6e 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -286,17 +286,17 @@ class SparkSession(SparkConversionMixin):
             with self._lock:
                 if conf is not None:
                     for k, v in conf.getAll():
-                        self._validate_startup_urls()
                         self._options[k] = v
+                        self._validate_startup_urls()
                 elif map is not None:
                     for k, v in map.items():  # type: ignore[assignment]
                         v = to_str(v)  # type: ignore[assignment]
-                        self._validate_startup_urls()
                         self._options[k] = v
+                        self._validate_startup_urls()
                 else:
                     value = to_str(value)
-                    self._validate_startup_urls()
                     self._options[cast(str, key)] = value
+                    self._validate_startup_urls()
                 return self
 
         def _validate_startup_urls(
@@ -306,22 +306,16 @@ class SparkSession(SparkConversionMixin):
             Helper function that validates the combination of startup URLs and 
raises an exception
             if incompatible options are selected.
             """
-            if "spark.master" in self._options and (
+            if ("spark.master" in self._options or "MASTER" in os.environ) and 
(
                 "spark.remote" in self._options or "SPARK_REMOTE" in os.environ
             ):
                 raise PySparkRuntimeError(
-                    error_class="CANNOT_CONFIGURE_SPARK_MASTER",
+                    error_class="CANNOT_CONFIGURE_SPARK_CONNECT_MASTER",
                     message_parameters={
-                        "url": self._options.get("spark.remote", 
os.environ.get("SPARK_REMOTE"))
-                    },
-                )
-            if "spark.remote" in self._options and (
-                "spark.master" in self._options or "MASTER" in os.environ
-            ):
-                raise PySparkRuntimeError(
-                    error_class="CANNOT_CONFIGURE_SPARK_CONNECT",
-                    message_parameters={
-                        "url": self._options.get("spark.master", 
os.environ.get("MASTER"))
+                        "master_url": self._options.get("spark.master", 
os.environ.get("MASTER")),
+                        "connect_url": self._options.get(
+                            "spark.remote", os.environ.get("SPARK_REMOTE")
+                        ),
                     },
                 )
 
@@ -333,8 +327,8 @@ class SparkSession(SparkConversionMixin):
                     raise PySparkRuntimeError(
                         error_class="CANNOT_CONFIGURE_SPARK_CONNECT",
                         message_parameters={
-                            "new_url": os.environ["SPARK_REMOTE"],
-                            "existing_url": remote,
+                            "existing_url": os.environ["SPARK_REMOTE"],
+                            "new_url": remote,
                         },
                     )
 
diff --git a/python/pyspark/sql/tests/test_session.py 
b/python/pyspark/sql/tests/test_session.py
index da27bf925749..ba1d999ff7ba 100644
--- a/python/pyspark/sql/tests/test_session.py
+++ b/python/pyspark/sql/tests/test_session.py
@@ -20,10 +20,11 @@ import unittest
 import unittest.mock
 
 from pyspark import SparkConf, SparkContext
+from pyspark.errors import PySparkRuntimeError
 from pyspark.sql import SparkSession, SQLContext, Row
 from pyspark.sql.functions import col
 from pyspark.testing.sqlutils import ReusedSQLTestCase
-from pyspark.testing.utils import PySparkTestCase
+from pyspark.testing.utils import PySparkTestCase, PySparkErrorTestUtils
 
 
 class SparkSessionTests(ReusedSQLTestCase):
@@ -259,7 +260,7 @@ class SparkSessionTests5(unittest.TestCase):
         self.assertIs(SQLContext.getOrCreate(self.sc)._sc, self.sc)
 
 
-class SparkSessionBuilderTests(unittest.TestCase):
+class SparkSessionBuilderTests(unittest.TestCase, PySparkErrorTestUtils):
     def test_create_spark_context_first_then_spark_session(self):
         sc = None
         session = None
@@ -352,6 +353,31 @@ class SparkSessionBuilderTests(unittest.TestCase):
             if session is not None:
                 session.stop()
 
+    def test_create_spark_context_with_invalid_configs(self):
+        with self.assertRaises(PySparkRuntimeError) as pe1:
+            SparkSession.builder.config(map={"spark.master": "x", 
"spark.remote": "y"})
+
+        self.check_error(
+            exception=pe1.exception,
+            error_class="CANNOT_CONFIGURE_SPARK_CONNECT_MASTER",
+            message_parameters={"master_url": "x", "connect_url": "y"},
+        )
+
+        with unittest.mock.patch.dict(
+            "os.environ", {"SPARK_REMOTE": "remote_url", "SPARK_LOCAL_REMOTE": 
"true"}
+        ):
+            with self.assertRaises(PySparkRuntimeError) as pe2:
+                SparkSession.builder.config("spark.remote", 
"different_remote_url")
+
+            self.check_error(
+                exception=pe2.exception,
+                error_class="CANNOT_CONFIGURE_SPARK_CONNECT",
+                message_parameters={
+                    "existing_url": "remote_url",
+                    "new_url": "different_remote_url",
+                },
+            )
+
 
 class SparkExtensionsTest(unittest.TestCase):
     # These tests are separate because it uses 'spark.sql.extensions' which is


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to