(spark) branch master updated: [SPARK-46233][PYTHON] Migrate all remaining `AttributeError` into PySpark error framework

dongjoon Mon, 04 Dec 2023 16:20:06 -0800

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new be49ca6dd71b [SPARK-46233][PYTHON] Migrate all remaining 
`AttributeError` into PySpark error framework
be49ca6dd71b is described below

commit be49ca6dd71b87172df9d88f305f06a7b87c9ecf
Author: Haejoon Lee <[email protected]>
AuthorDate: Mon Dec 4 16:18:27 2023 -0800

    [SPARK-46233][PYTHON] Migrate all remaining `AttributeError` into PySpark 
error framework
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to migrate all remaining `AttributeError`  from 
`pyspark/sql/*` into PySpark error framework, `PySparkAttributeError` with 
assigning dedicated error classes.
    
    ### Why are the changes needed?
    
    To improve the error handling in PySpark.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No API changes, but the user-facing error messages will be improved.
    
    ### How was this patch tested?
    
    The existing CI should pass.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #44150 from itholic/migrate_attribute_error.
    
    Authored-by: Haejoon Lee <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 python/pyspark/sql/connect/dataframe.py | 10 +++++++---
 python/pyspark/sql/dataframe.py         | 11 ++++++++---
 python/pyspark/sql/types.py             | 13 ++++++++++---
 3 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/python/pyspark/sql/connect/dataframe.py 
b/python/pyspark/sql/connect/dataframe.py
index a73a24818c0c..6a1d45712163 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -14,7 +14,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from pyspark.errors.exceptions.base import SessionNotSameException, 
PySparkIndexError
+from pyspark.errors.exceptions.base import (
+    SessionNotSameException,
+    PySparkIndexError,
+    PySparkAttributeError,
+)
 from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__)
@@ -1694,8 +1698,8 @@ class DataFrame:
             )
 
         if name not in self.columns:
-            raise AttributeError(
-                "'%s' object has no attribute '%s'" % 
(self.__class__.__name__, name)
+            raise PySparkAttributeError(
+                error_class="ATTRIBUTE_NOT_SUPPORTED", 
message_parameters={"attr_name": name}
             )
 
         return _to_col_with_plan_id(
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 8b40b222a289..5211d874ba33 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -43,7 +43,12 @@ from py4j.java_gateway import JavaObject, JVMView
 from pyspark import copy_func, _NoValue
 from pyspark._globals import _NoValueType
 from pyspark.context import SparkContext
-from pyspark.errors import PySparkTypeError, PySparkValueError, 
PySparkIndexError
+from pyspark.errors import (
+    PySparkTypeError,
+    PySparkValueError,
+    PySparkIndexError,
+    PySparkAttributeError,
+)
 from pyspark.rdd import (
     RDD,
     _load_from_socket,
@@ -3613,8 +3618,8 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
         +---+
         """
         if name not in self.columns:
-            raise AttributeError(
-                "'%s' object has no attribute '%s'" % 
(self.__class__.__name__, name)
+            raise PySparkAttributeError(
+                error_class="ATTRIBUTE_NOT_SUPPORTED", 
message_parameters={"attr_name": name}
             )
         jc = self._jdf.apply(name)
         return Column(jc)
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index cbfc4ab5df02..d3eed77b3838 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -55,6 +55,7 @@ from pyspark.errors import (
     PySparkTypeError,
     PySparkValueError,
     PySparkIndexError,
+    PySparkAttributeError,
     PySparkKeyError,
 )
 
@@ -2574,16 +2575,22 @@ class Row(tuple):
 
     def __getattr__(self, item: str) -> Any:
         if item.startswith("__"):
-            raise AttributeError(item)
+            raise PySparkAttributeError(
+                error_class="ATTRIBUTE_NOT_SUPPORTED", 
message_parameters={"attr_name": item}
+            )
         try:
             # it will be slow when it has many fields,
             # but this will not be used in normal cases
             idx = self.__fields__.index(item)
             return self[idx]
         except IndexError:
-            raise AttributeError(item)
+            raise PySparkAttributeError(
+                error_class="ATTRIBUTE_NOT_SUPPORTED", 
message_parameters={"attr_name": item}
+            )
         except ValueError:
-            raise AttributeError(item)
+            raise PySparkAttributeError(
+                error_class="ATTRIBUTE_NOT_SUPPORTED", 
message_parameters={"attr_name": item}
+            )
 
     def __setattr__(self, key: Any, value: Any) -> None:
         if key != "__fields__":


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-46233][PYTHON] Migrate all remaining `AttributeError` into PySpark error framework

Reply via email to