This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 8d64cb4144d1 [SPARK-46360][PYTHON] Enhance error message debugging 
with new `getMessage` API
8d64cb4144d1 is described below

commit 8d64cb4144d17107fd3758d2a46430439203c7ad
Author: Haejoon Lee <haejoon....@databricks.com>
AuthorDate: Mon Dec 11 21:03:11 2023 -0800

    [SPARK-46360][PYTHON] Enhance error message debugging with new `getMessage` 
API
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to introduce `getMessage` to provide a standardized way 
for users to obtain a concise and clear error message.
    
    ### Why are the changes needed?
    
    Previously, extracting a simple and informative error message in PySpark 
was not straightforward. The internal `ErrorClassesReader.get_error_message` 
method was often used, but for JVM-originated errors not defined in 
`error_classes.py`, obtaining a succinct error message was challenging.
    
    The new `getMessage` API harmonizes error message retrieval across PySpark, 
leveraging existing JVM implementations to ensure consistency and clarity in 
the messages presented to the users.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, this PR introduces a `getMessage` for directly accessing simplified 
error messages in PySpark.
    
    - **Before**: No official API for simplified error messages; excessive 
details in the error output:
        ```python
        from pyspark.sql.utils import AnalysisException
    
        try:
            spark.sql("""SELECT a""")
        except AnalysisException as e:
            str(e)
        # "[UNRESOLVED_COLUMN.WITHOUT_SUGGESTION] A column, variable, or 
function parameter with name `a` cannot be resolved.  SQLSTATE: 42703; line 1 
pos 7;\n'Project ['a]\n+- OneRowRelation\n"
        ```
    
    - **After**: The `getMessage` API provides streamlined, user-friendly error 
messages:
        ```python
        from pyspark.sql.utils import AnalysisException
    
        try:
            spark.sql("""SELECT a""")
        except AnalysisException as e:
            e.getMessage()
        # '[UNRESOLVED_COLUMN.WITHOUT_SUGGESTION] A column, variable, or 
function parameter with name `a` cannot be resolved.  SQLSTATE: 42703'
        ```
    
    ### How was this patch tested?
    
    Added UTs.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #44292 from itholic/getMessage.
    
    Authored-by: Haejoon Lee <haejoon....@databricks.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 python/pyspark/errors/exceptions/base.py     | 19 ++++++++++++++++++-
 python/pyspark/errors/exceptions/captured.py | 18 ++++++++++++++++++
 python/pyspark/sql/tests/test_utils.py       |  8 ++++++++
 3 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/errors/exceptions/base.py 
b/python/pyspark/errors/exceptions/base.py
index b60800da3ff8..e40e1b2e93cb 100644
--- a/python/pyspark/errors/exceptions/base.py
+++ b/python/pyspark/errors/exceptions/base.py
@@ -60,6 +60,7 @@ class PySparkException(Exception):
 
         See Also
         --------
+        :meth:`PySparkException.getMessage`
         :meth:`PySparkException.getMessageParameters`
         :meth:`PySparkException.getSqlState`
         """
@@ -74,6 +75,7 @@ class PySparkException(Exception):
         See Also
         --------
         :meth:`PySparkException.getErrorClass`
+        :meth:`PySparkException.getMessage`
         :meth:`PySparkException.getSqlState`
         """
         return self._message_parameters
@@ -89,13 +91,28 @@ class PySparkException(Exception):
         See Also
         --------
         :meth:`PySparkException.getErrorClass`
+        :meth:`PySparkException.getMessage`
         :meth:`PySparkException.getMessageParameters`
         """
         return None
 
+    def getMessage(self) -> str:
+        """
+        Returns full error message.
+
+        .. versionadded:: 4.0.0
+
+        See Also
+        --------
+        :meth:`PySparkException.getErrorClass`
+        :meth:`PySparkException.getMessageParameters`
+        :meth:`PySparkException.getSqlState`
+        """
+        return f"[{self.getErrorClass()}] {self._message}"
+
     def __str__(self) -> str:
         if self.getErrorClass() is not None:
-            return f"[{self.getErrorClass()}] {self._message}"
+            return self.getMessage()
         else:
             return self._message
 
diff --git a/python/pyspark/errors/exceptions/captured.py 
b/python/pyspark/errors/exceptions/captured.py
index ec987e0854ea..4164bb7b428d 100644
--- a/python/pyspark/errors/exceptions/captured.py
+++ b/python/pyspark/errors/exceptions/captured.py
@@ -118,6 +118,24 @@ class CapturedException(PySparkException):
         else:
             return None
 
+    def getMessage(self) -> str:
+        assert SparkContext._gateway is not None
+        gw = SparkContext._gateway
+
+        if self._origin is not None and is_instance_of(
+            gw, self._origin, "org.apache.spark.SparkThrowable"
+        ):
+            error_class = self._origin.getErrorClass()
+            message_parameters = self._origin.getMessageParameters()
+
+            error_message = 
gw.jvm.org.apache.spark.SparkThrowableHelper.getMessage(
+                error_class, message_parameters
+            )
+
+            return error_message
+        else:
+            return ""
+
 
 def convert_exception(e: Py4JJavaError) -> CapturedException:
     assert e is not None
diff --git a/python/pyspark/sql/tests/test_utils.py 
b/python/pyspark/sql/tests/test_utils.py
index f633837002e2..e13b933c46ba 100644
--- a/python/pyspark/sql/tests/test_utils.py
+++ b/python/pyspark/sql/tests/test_utils.py
@@ -1750,6 +1750,13 @@ class UtilsTests(ReusedSQLTestCase, UtilsTestsMixin):
             self.assertEqual(e.getErrorClass(), 
"UNRESOLVED_COLUMN.WITHOUT_SUGGESTION")
             self.assertEqual(e.getSqlState(), "42703")
             self.assertEqual(e.getMessageParameters(), {"objectName": "`a`"})
+            self.assertEqual(
+                e.getMessage(),
+                (
+                    "[UNRESOLVED_COLUMN.WITHOUT_SUGGESTION] A column, 
variable, or function "
+                    "parameter with name `a` cannot be resolved.  SQLSTATE: 
42703"
+                ),
+            )
 
         try:
             self.spark.sql("""SELECT assert_true(FALSE)""")
@@ -1757,6 +1764,7 @@ class UtilsTests(ReusedSQLTestCase, UtilsTestsMixin):
             self.assertIsNone(e.getErrorClass())
             self.assertIsNone(e.getSqlState())
             self.assertEqual(e.getMessageParameters(), {})
+            self.assertEqual(e.getMessage(), "")
 
 
 if __name__ == "__main__":


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to