This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new be49ca6dd71b [SPARK-46233][PYTHON] Migrate all remaining
`AttributeError` into PySpark error framework
be49ca6dd71b is described below
commit be49ca6dd71b87172df9d88f305f06a7b87c9ecf
Author: Haejoon Lee <[email protected]>
AuthorDate: Mon Dec 4 16:18:27 2023 -0800
[SPARK-46233][PYTHON] Migrate all remaining `AttributeError` into PySpark
error framework
### What changes were proposed in this pull request?
This PR proposes to migrate all remaining `AttributeError` from
`pyspark/sql/*` into PySpark error framework, `PySparkAttributeError` with
assigning dedicated error classes.
### Why are the changes needed?
To improve the error handling in PySpark.
### Does this PR introduce _any_ user-facing change?
No API changes, but the user-facing error messages will be improved.
### How was this patch tested?
The existing CI should pass.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #44150 from itholic/migrate_attribute_error.
Authored-by: Haejoon Lee <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
python/pyspark/sql/connect/dataframe.py | 10 +++++++---
python/pyspark/sql/dataframe.py | 11 ++++++++---
python/pyspark/sql/types.py | 13 ++++++++++---
3 files changed, 25 insertions(+), 9 deletions(-)
diff --git a/python/pyspark/sql/connect/dataframe.py
b/python/pyspark/sql/connect/dataframe.py
index a73a24818c0c..6a1d45712163 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -14,7 +14,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-from pyspark.errors.exceptions.base import SessionNotSameException,
PySparkIndexError
+from pyspark.errors.exceptions.base import (
+ SessionNotSameException,
+ PySparkIndexError,
+ PySparkAttributeError,
+)
from pyspark.sql.connect.utils import check_dependencies
check_dependencies(__name__)
@@ -1694,8 +1698,8 @@ class DataFrame:
)
if name not in self.columns:
- raise AttributeError(
- "'%s' object has no attribute '%s'" %
(self.__class__.__name__, name)
+ raise PySparkAttributeError(
+ error_class="ATTRIBUTE_NOT_SUPPORTED",
message_parameters={"attr_name": name}
)
return _to_col_with_plan_id(
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 8b40b222a289..5211d874ba33 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -43,7 +43,12 @@ from py4j.java_gateway import JavaObject, JVMView
from pyspark import copy_func, _NoValue
from pyspark._globals import _NoValueType
from pyspark.context import SparkContext
-from pyspark.errors import PySparkTypeError, PySparkValueError,
PySparkIndexError
+from pyspark.errors import (
+ PySparkTypeError,
+ PySparkValueError,
+ PySparkIndexError,
+ PySparkAttributeError,
+)
from pyspark.rdd import (
RDD,
_load_from_socket,
@@ -3613,8 +3618,8 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
+---+
"""
if name not in self.columns:
- raise AttributeError(
- "'%s' object has no attribute '%s'" %
(self.__class__.__name__, name)
+ raise PySparkAttributeError(
+ error_class="ATTRIBUTE_NOT_SUPPORTED",
message_parameters={"attr_name": name}
)
jc = self._jdf.apply(name)
return Column(jc)
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index cbfc4ab5df02..d3eed77b3838 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -55,6 +55,7 @@ from pyspark.errors import (
PySparkTypeError,
PySparkValueError,
PySparkIndexError,
+ PySparkAttributeError,
PySparkKeyError,
)
@@ -2574,16 +2575,22 @@ class Row(tuple):
def __getattr__(self, item: str) -> Any:
if item.startswith("__"):
- raise AttributeError(item)
+ raise PySparkAttributeError(
+ error_class="ATTRIBUTE_NOT_SUPPORTED",
message_parameters={"attr_name": item}
+ )
try:
# it will be slow when it has many fields,
# but this will not be used in normal cases
idx = self.__fields__.index(item)
return self[idx]
except IndexError:
- raise AttributeError(item)
+ raise PySparkAttributeError(
+ error_class="ATTRIBUTE_NOT_SUPPORTED",
message_parameters={"attr_name": item}
+ )
except ValueError:
- raise AttributeError(item)
+ raise PySparkAttributeError(
+ error_class="ATTRIBUTE_NOT_SUPPORTED",
message_parameters={"attr_name": item}
+ )
def __setattr__(self, key: Any, value: Any) -> None:
if key != "__fields__":
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]