This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 94abea8572fe [SPARK-50167][PYTHON][CONNECT] Improve PySpark plotting
error messages and imports
94abea8572fe is described below
commit 94abea8572fe789a1ab3fc65693f624a84425c6d
Author: Xinrong Meng <[email protected]>
AuthorDate: Wed Oct 30 17:25:20 2024 +0100
[SPARK-50167][PYTHON][CONNECT] Improve PySpark plotting error messages and
imports
### What changes were proposed in this pull request?
Improve PySpark plotting error messages and imports:
- Minor improvements to error messages for better clarity.
- Delayed imports of PySparkPlotAccessor as a best practice to avoid
potential circular imports in the future.
### Why are the changes needed?
Improving error messages enhances usability, while delaying
PySparkPlotAccessor imports prevents potential circular dependencies.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Unit tests.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #48701 from xinrong-meng/impr_err_plot.
Authored-by: Xinrong Meng <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
python/pyspark/errors/error-conditions.json | 4 ++--
python/pyspark/sql/classic/dataframe.py | 8 +++-----
python/pyspark/sql/connect/dataframe.py | 8 +++-----
python/pyspark/sql/plot/core.py | 4 ++--
python/pyspark/sql/tests/plot/test_frame_plot_plotly.py | 4 ++--
5 files changed, 12 insertions(+), 16 deletions(-)
diff --git a/python/pyspark/errors/error-conditions.json
b/python/pyspark/errors/error-conditions.json
index 5aa0313631c0..b2a68a83bfa7 100644
--- a/python/pyspark/errors/error-conditions.json
+++ b/python/pyspark/errors/error-conditions.json
@@ -817,12 +817,12 @@
"Pipe function `<func_name>` exited with error code <error_code>."
]
},
- "PLOT_INVALID_TYPE_COLUMN": {
+ "PLOT_INVALID_TYPE_COLUMN": {
"message": [
"Column <col_name> must be one of <valid_types> for plotting, got
<col_type>."
]
},
- "PLOT_NOT_NUMERIC_COLUMN": {
+ "PLOT_NOT_NUMERIC_COLUMN_ARGUMENT": {
"message": [
"Argument <arg_name> must be a numerical column for plotting, got
<arg_type>."
]
diff --git a/python/pyspark/sql/classic/dataframe.py
b/python/pyspark/sql/classic/dataframe.py
index 91dec609e522..fad3fac9890b 100644
--- a/python/pyspark/sql/classic/dataframe.py
+++ b/python/pyspark/sql/classic/dataframe.py
@@ -74,10 +74,6 @@ from pyspark.sql.utils import get_active_spark_context,
to_java_array, to_scala_
from pyspark.sql.pandas.conversion import PandasConversionMixin
from pyspark.sql.pandas.map_ops import PandasMapOpsMixin
-try:
- from pyspark.sql.plot import PySparkPlotAccessor
-except ImportError:
- PySparkPlotAccessor = None # type: ignore
if TYPE_CHECKING:
from py4j.java_gateway import JavaObject
@@ -1795,7 +1791,9 @@ class DataFrame(ParentDataFrame, PandasMapOpsMixin,
PandasConversionMixin):
)
@property
- def plot(self) -> PySparkPlotAccessor:
+ def plot(self) -> "PySparkPlotAccessor": # type: ignore[name-defined] #
noqa: F821
+ from pyspark.sql.plot import PySparkPlotAccessor
+
return PySparkPlotAccessor(self)
diff --git a/python/pyspark/sql/connect/dataframe.py
b/python/pyspark/sql/connect/dataframe.py
index 3d5b845fcd24..1a9894b6fac5 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -86,10 +86,6 @@ from pyspark.sql.connect.functions import builtin as F
from pyspark.sql.pandas.types import from_arrow_schema, to_arrow_schema
from pyspark.sql.pandas.functions import _validate_pandas_udf # type:
ignore[attr-defined]
-try:
- from pyspark.sql.plot import PySparkPlotAccessor
-except ImportError:
- PySparkPlotAccessor = None # type: ignore
if TYPE_CHECKING:
from pyspark.sql.connect._typing import (
@@ -2166,7 +2162,9 @@ class DataFrame(ParentDataFrame):
return self._execution_info
@property
- def plot(self) -> PySparkPlotAccessor:
+ def plot(self) -> "PySparkPlotAccessor": # type: ignore[name-defined] #
noqa: F821
+ from pyspark.sql.plot import PySparkPlotAccessor
+
return PySparkPlotAccessor(self)
diff --git a/python/pyspark/sql/plot/core.py b/python/pyspark/sql/plot/core.py
index d63837ced8c8..c20912dda90a 100644
--- a/python/pyspark/sql/plot/core.py
+++ b/python/pyspark/sql/plot/core.py
@@ -352,10 +352,10 @@ class PySparkPlotAccessor:
y_field = schema[y] if y in schema.names else None
if y_field is None or not isinstance(y_field.dataType, NumericType):
raise PySparkTypeError(
- errorClass="PLOT_NOT_NUMERIC_COLUMN",
+ errorClass="PLOT_NOT_NUMERIC_COLUMN_ARGUMENT",
messageParameters={
"arg_name": "y",
- "arg_type": str(y_field.dataType) if y_field else "None",
+ "arg_type": str(y_field.dataType.__class__.__name__) if
y_field else "None",
},
)
return self(kind="pie", x=x, y=y, **kwargs)
diff --git a/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py
b/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py
index 95a706c9d997..362d1225416a 100644
--- a/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py
+++ b/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py
@@ -324,8 +324,8 @@ class DataFramePlotPlotlyTestsMixin:
self.check_error(
exception=pe.exception,
- errorClass="PLOT_NOT_NUMERIC_COLUMN",
- messageParameters={"arg_name": "y", "arg_type": "StringType()"},
+ errorClass="PLOT_NOT_NUMERIC_COLUMN_ARGUMENT",
+ messageParameters={"arg_name": "y", "arg_type": "StringType"},
)
def test_box_plot(self):
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]