(spark) branch master updated: [SPARK-50167][PYTHON][CONNECT] Improve PySpark plotting error messages and imports

maxgekk Wed, 30 Oct 2024 09:25:40 -0700

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 94abea8572fe [SPARK-50167][PYTHON][CONNECT] Improve PySpark plotting 
error messages and imports
94abea8572fe is described below

commit 94abea8572fe789a1ab3fc65693f624a84425c6d
Author: Xinrong Meng <[email protected]>
AuthorDate: Wed Oct 30 17:25:20 2024 +0100

    [SPARK-50167][PYTHON][CONNECT] Improve PySpark plotting error messages and 
imports
    
    ### What changes were proposed in this pull request?
    Improve PySpark plotting error messages and imports:
    - Minor improvements to error messages for better clarity.
    - Delayed imports of PySparkPlotAccessor as a best practice to avoid 
potential circular imports in the future.
    
    ### Why are the changes needed?
    Improving error messages enhances usability, while delaying 
PySparkPlotAccessor imports prevents potential circular dependencies.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Unit tests.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #48701 from xinrong-meng/impr_err_plot.
    
    Authored-by: Xinrong Meng <[email protected]>
    Signed-off-by: Max Gekk <[email protected]>
---
 python/pyspark/errors/error-conditions.json             | 4 ++--
 python/pyspark/sql/classic/dataframe.py                 | 8 +++-----
 python/pyspark/sql/connect/dataframe.py                 | 8 +++-----
 python/pyspark/sql/plot/core.py                         | 4 ++--
 python/pyspark/sql/tests/plot/test_frame_plot_plotly.py | 4 ++--
 5 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/python/pyspark/errors/error-conditions.json 
b/python/pyspark/errors/error-conditions.json
index 5aa0313631c0..b2a68a83bfa7 100644
--- a/python/pyspark/errors/error-conditions.json
+++ b/python/pyspark/errors/error-conditions.json
@@ -817,12 +817,12 @@
       "Pipe function `<func_name>` exited with error code <error_code>."
     ]
   },
-    "PLOT_INVALID_TYPE_COLUMN": {
+  "PLOT_INVALID_TYPE_COLUMN": {
     "message": [
       "Column <col_name> must be one of <valid_types> for plotting, got 
<col_type>."
     ]
   },
-  "PLOT_NOT_NUMERIC_COLUMN": {
+  "PLOT_NOT_NUMERIC_COLUMN_ARGUMENT": {
     "message": [
       "Argument <arg_name> must be a numerical column for plotting, got 
<arg_type>."
     ]
diff --git a/python/pyspark/sql/classic/dataframe.py 
b/python/pyspark/sql/classic/dataframe.py
index 91dec609e522..fad3fac9890b 100644
--- a/python/pyspark/sql/classic/dataframe.py
+++ b/python/pyspark/sql/classic/dataframe.py
@@ -74,10 +74,6 @@ from pyspark.sql.utils import get_active_spark_context, 
to_java_array, to_scala_
 from pyspark.sql.pandas.conversion import PandasConversionMixin
 from pyspark.sql.pandas.map_ops import PandasMapOpsMixin
 
-try:
-    from pyspark.sql.plot import PySparkPlotAccessor
-except ImportError:
-    PySparkPlotAccessor = None  # type: ignore
 
 if TYPE_CHECKING:
     from py4j.java_gateway import JavaObject
@@ -1795,7 +1791,9 @@ class DataFrame(ParentDataFrame, PandasMapOpsMixin, 
PandasConversionMixin):
         )
 
     @property
-    def plot(self) -> PySparkPlotAccessor:
+    def plot(self) -> "PySparkPlotAccessor":  # type: ignore[name-defined] # 
noqa: F821
+        from pyspark.sql.plot import PySparkPlotAccessor
+
         return PySparkPlotAccessor(self)
 
 
diff --git a/python/pyspark/sql/connect/dataframe.py 
b/python/pyspark/sql/connect/dataframe.py
index 3d5b845fcd24..1a9894b6fac5 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -86,10 +86,6 @@ from pyspark.sql.connect.functions import builtin as F
 from pyspark.sql.pandas.types import from_arrow_schema, to_arrow_schema
 from pyspark.sql.pandas.functions import _validate_pandas_udf  # type: 
ignore[attr-defined]
 
-try:
-    from pyspark.sql.plot import PySparkPlotAccessor
-except ImportError:
-    PySparkPlotAccessor = None  # type: ignore
 
 if TYPE_CHECKING:
     from pyspark.sql.connect._typing import (
@@ -2166,7 +2162,9 @@ class DataFrame(ParentDataFrame):
         return self._execution_info
 
     @property
-    def plot(self) -> PySparkPlotAccessor:
+    def plot(self) -> "PySparkPlotAccessor":  # type: ignore[name-defined] # 
noqa: F821
+        from pyspark.sql.plot import PySparkPlotAccessor
+
         return PySparkPlotAccessor(self)
 
 
diff --git a/python/pyspark/sql/plot/core.py b/python/pyspark/sql/plot/core.py
index d63837ced8c8..c20912dda90a 100644
--- a/python/pyspark/sql/plot/core.py
+++ b/python/pyspark/sql/plot/core.py
@@ -352,10 +352,10 @@ class PySparkPlotAccessor:
         y_field = schema[y] if y in schema.names else None
         if y_field is None or not isinstance(y_field.dataType, NumericType):
             raise PySparkTypeError(
-                errorClass="PLOT_NOT_NUMERIC_COLUMN",
+                errorClass="PLOT_NOT_NUMERIC_COLUMN_ARGUMENT",
                 messageParameters={
                     "arg_name": "y",
-                    "arg_type": str(y_field.dataType) if y_field else "None",
+                    "arg_type": str(y_field.dataType.__class__.__name__) if 
y_field else "None",
                 },
             )
         return self(kind="pie", x=x, y=y, **kwargs)
diff --git a/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py 
b/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py
index 95a706c9d997..362d1225416a 100644
--- a/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py
+++ b/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py
@@ -324,8 +324,8 @@ class DataFramePlotPlotlyTestsMixin:
 
         self.check_error(
             exception=pe.exception,
-            errorClass="PLOT_NOT_NUMERIC_COLUMN",
-            messageParameters={"arg_name": "y", "arg_type": "StringType()"},
+            errorClass="PLOT_NOT_NUMERIC_COLUMN_ARGUMENT",
+            messageParameters={"arg_name": "y", "arg_type": "StringType"},
         )
 
     def test_box_plot(self):


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-50167][PYTHON][CONNECT] Improve PySpark plotting error messages and imports

Reply via email to