This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 8d64cb4144d1 [SPARK-46360][PYTHON] Enhance error message debugging
with new `getMessage` API
8d64cb4144d1 is described below
commit 8d64cb4144d17107fd3758d2a46430439203c7ad
Author: Haejoon Lee <[email protected]>
AuthorDate: Mon Dec 11 21:03:11 2023 -0800
[SPARK-46360][PYTHON] Enhance error message debugging with new `getMessage`
API
### What changes were proposed in this pull request?
This PR proposes to introduce `getMessage` to provide a standardized way
for users to obtain a concise and clear error message.
### Why are the changes needed?
Previously, extracting a simple and informative error message in PySpark
was not straightforward. The internal `ErrorClassesReader.get_error_message`
method was often used, but for JVM-originated errors not defined in
`error_classes.py`, obtaining a succinct error message was challenging.
The new `getMessage` API harmonizes error message retrieval across PySpark,
leveraging existing JVM implementations to ensure consistency and clarity in
the messages presented to the users.
### Does this PR introduce _any_ user-facing change?
Yes, this PR introduces a `getMessage` for directly accessing simplified
error messages in PySpark.
- **Before**: No official API for simplified error messages; excessive
details in the error output:
```python
from pyspark.sql.utils import AnalysisException
try:
spark.sql("""SELECT a""")
except AnalysisException as e:
str(e)
# "[UNRESOLVED_COLUMN.WITHOUT_SUGGESTION] A column, variable, or
function parameter with name `a` cannot be resolved. SQLSTATE: 42703; line 1
pos 7;\n'Project ['a]\n+- OneRowRelation\n"
```
- **After**: The `getMessage` API provides streamlined, user-friendly error
messages:
```python
from pyspark.sql.utils import AnalysisException
try:
spark.sql("""SELECT a""")
except AnalysisException as e:
e.getMessage()
# '[UNRESOLVED_COLUMN.WITHOUT_SUGGESTION] A column, variable, or
function parameter with name `a` cannot be resolved. SQLSTATE: 42703'
```
### How was this patch tested?
Added UTs.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #44292 from itholic/getMessage.
Authored-by: Haejoon Lee <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/errors/exceptions/base.py | 19 ++++++++++++++++++-
python/pyspark/errors/exceptions/captured.py | 18 ++++++++++++++++++
python/pyspark/sql/tests/test_utils.py | 8 ++++++++
3 files changed, 44 insertions(+), 1 deletion(-)
diff --git a/python/pyspark/errors/exceptions/base.py
b/python/pyspark/errors/exceptions/base.py
index b60800da3ff8..e40e1b2e93cb 100644
--- a/python/pyspark/errors/exceptions/base.py
+++ b/python/pyspark/errors/exceptions/base.py
@@ -60,6 +60,7 @@ class PySparkException(Exception):
See Also
--------
+ :meth:`PySparkException.getMessage`
:meth:`PySparkException.getMessageParameters`
:meth:`PySparkException.getSqlState`
"""
@@ -74,6 +75,7 @@ class PySparkException(Exception):
See Also
--------
:meth:`PySparkException.getErrorClass`
+ :meth:`PySparkException.getMessage`
:meth:`PySparkException.getSqlState`
"""
return self._message_parameters
@@ -89,13 +91,28 @@ class PySparkException(Exception):
See Also
--------
:meth:`PySparkException.getErrorClass`
+ :meth:`PySparkException.getMessage`
:meth:`PySparkException.getMessageParameters`
"""
return None
+ def getMessage(self) -> str:
+ """
+ Returns full error message.
+
+ .. versionadded:: 4.0.0
+
+ See Also
+ --------
+ :meth:`PySparkException.getErrorClass`
+ :meth:`PySparkException.getMessageParameters`
+ :meth:`PySparkException.getSqlState`
+ """
+ return f"[{self.getErrorClass()}] {self._message}"
+
def __str__(self) -> str:
if self.getErrorClass() is not None:
- return f"[{self.getErrorClass()}] {self._message}"
+ return self.getMessage()
else:
return self._message
diff --git a/python/pyspark/errors/exceptions/captured.py
b/python/pyspark/errors/exceptions/captured.py
index ec987e0854ea..4164bb7b428d 100644
--- a/python/pyspark/errors/exceptions/captured.py
+++ b/python/pyspark/errors/exceptions/captured.py
@@ -118,6 +118,24 @@ class CapturedException(PySparkException):
else:
return None
+ def getMessage(self) -> str:
+ assert SparkContext._gateway is not None
+ gw = SparkContext._gateway
+
+ if self._origin is not None and is_instance_of(
+ gw, self._origin, "org.apache.spark.SparkThrowable"
+ ):
+ error_class = self._origin.getErrorClass()
+ message_parameters = self._origin.getMessageParameters()
+
+ error_message =
gw.jvm.org.apache.spark.SparkThrowableHelper.getMessage(
+ error_class, message_parameters
+ )
+
+ return error_message
+ else:
+ return ""
+
def convert_exception(e: Py4JJavaError) -> CapturedException:
assert e is not None
diff --git a/python/pyspark/sql/tests/test_utils.py
b/python/pyspark/sql/tests/test_utils.py
index f633837002e2..e13b933c46ba 100644
--- a/python/pyspark/sql/tests/test_utils.py
+++ b/python/pyspark/sql/tests/test_utils.py
@@ -1750,6 +1750,13 @@ class UtilsTests(ReusedSQLTestCase, UtilsTestsMixin):
self.assertEqual(e.getErrorClass(),
"UNRESOLVED_COLUMN.WITHOUT_SUGGESTION")
self.assertEqual(e.getSqlState(), "42703")
self.assertEqual(e.getMessageParameters(), {"objectName": "`a`"})
+ self.assertEqual(
+ e.getMessage(),
+ (
+ "[UNRESOLVED_COLUMN.WITHOUT_SUGGESTION] A column,
variable, or function "
+ "parameter with name `a` cannot be resolved. SQLSTATE:
42703"
+ ),
+ )
try:
self.spark.sql("""SELECT assert_true(FALSE)""")
@@ -1757,6 +1764,7 @@ class UtilsTests(ReusedSQLTestCase, UtilsTestsMixin):
self.assertIsNone(e.getErrorClass())
self.assertIsNone(e.getSqlState())
self.assertEqual(e.getMessageParameters(), {})
+ self.assertEqual(e.getMessage(), "")
if __name__ == "__main__":
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]