This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 9bac48d4bd6 [SPARK-45852][CONNECT][PYTHON] Gracefully deal with
recursion error during logging
9bac48d4bd6 is described below
commit 9bac48d4bd68d4f0d54c53c29a27b1f6e02c5f61
Author: Martin Grund <[email protected]>
AuthorDate: Fri Nov 10 17:12:25 2023 +0900
[SPARK-45852][CONNECT][PYTHON] Gracefully deal with recursion error during
logging
### What changes were proposed in this pull request?
The Python client for Spark connect logs the text representation of the
proto message. However, for deeply nested objects this can lead to a Python
recursion error even before the maximum nested recursion limit of the GRPC
message is reached.
This patch fixes this issue by explicitly catching the recursion error
during text conversion.
### Why are the changes needed?
Stability
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
UT
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #43732 from grundprinzip/SPARK-45852.
Authored-by: Martin Grund <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/connect/client/core.py | 5 ++++-
python/pyspark/sql/tests/connect/test_connect_basic.py | 13 +++++++++++++
2 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/python/pyspark/sql/connect/client/core.py
b/python/pyspark/sql/connect/client/core.py
index 965c4107cac..7eafcc501f5 100644
--- a/python/pyspark/sql/connect/client/core.py
+++ b/python/pyspark/sql/connect/client/core.py
@@ -935,7 +935,10 @@ class SparkConnectClient(object):
-------
Single line string of the serialized proto message.
"""
- return text_format.MessageToString(p, as_one_line=True)
+ try:
+ return text_format.MessageToString(p, as_one_line=True)
+ except RecursionError:
+ return "<Truncated message due to recursion error>"
def schema(self, plan: pb2.Plan) -> StructType:
"""
diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py
b/python/pyspark/sql/tests/connect/test_connect_basic.py
index daf6772e52b..7a224d68219 100755
--- a/python/pyspark/sql/tests/connect/test_connect_basic.py
+++ b/python/pyspark/sql/tests/connect/test_connect_basic.py
@@ -159,6 +159,19 @@ class SparkConnectSQLTestCase(ReusedConnectTestCase,
SQLTestUtils, PandasOnSpark
class SparkConnectBasicTests(SparkConnectSQLTestCase):
+ def test_recursion_handling_for_plan_logging(self):
+ """SPARK-45852 - Test that we can handle recursion in plan logging."""
+ cdf = self.connect.range(1)
+ for x in range(400):
+ cdf = cdf.withColumn(f"col_{x}", CF.lit(x))
+
+ # Calling schema will trigger logging the message that will in turn
trigger the message
+ # conversion into protobuf that will then trigger the recursion error.
+ self.assertIsNotNone(cdf.schema)
+
+ result =
self.connect._client._proto_to_string(cdf._plan.to_proto(self.connect._client))
+ self.assertIn("recursion", result)
+
def test_df_getattr_behavior(self):
cdf = self.connect.range(10)
sdf = self.spark.range(10)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]