This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 9bac48d4bd6 [SPARK-45852][CONNECT][PYTHON] Gracefully deal with recursion error during logging 9bac48d4bd6 is described below commit 9bac48d4bd68d4f0d54c53c29a27b1f6e02c5f61 Author: Martin Grund <martin.gr...@databricks.com> AuthorDate: Fri Nov 10 17:12:25 2023 +0900 [SPARK-45852][CONNECT][PYTHON] Gracefully deal with recursion error during logging ### What changes were proposed in this pull request? The Python client for Spark connect logs the text representation of the proto message. However, for deeply nested objects this can lead to a Python recursion error even before the maximum nested recursion limit of the GRPC message is reached. This patch fixes this issue by explicitly catching the recursion error during text conversion. ### Why are the changes needed? Stability ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? UT ### Was this patch authored or co-authored using generative AI tooling? No Closes #43732 from grundprinzip/SPARK-45852. Authored-by: Martin Grund <martin.gr...@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/connect/client/core.py | 5 ++++- python/pyspark/sql/tests/connect/test_connect_basic.py | 13 +++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/connect/client/core.py b/python/pyspark/sql/connect/client/core.py index 965c4107cac..7eafcc501f5 100644 --- a/python/pyspark/sql/connect/client/core.py +++ b/python/pyspark/sql/connect/client/core.py @@ -935,7 +935,10 @@ class SparkConnectClient(object): ------- Single line string of the serialized proto message. """ - return text_format.MessageToString(p, as_one_line=True) + try: + return text_format.MessageToString(p, as_one_line=True) + except RecursionError: + return "<Truncated message due to recursion error>" def schema(self, plan: pb2.Plan) -> StructType: """ diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py index daf6772e52b..7a224d68219 100755 --- a/python/pyspark/sql/tests/connect/test_connect_basic.py +++ b/python/pyspark/sql/tests/connect/test_connect_basic.py @@ -159,6 +159,19 @@ class SparkConnectSQLTestCase(ReusedConnectTestCase, SQLTestUtils, PandasOnSpark class SparkConnectBasicTests(SparkConnectSQLTestCase): + def test_recursion_handling_for_plan_logging(self): + """SPARK-45852 - Test that we can handle recursion in plan logging.""" + cdf = self.connect.range(1) + for x in range(400): + cdf = cdf.withColumn(f"col_{x}", CF.lit(x)) + + # Calling schema will trigger logging the message that will in turn trigger the message + # conversion into protobuf that will then trigger the recursion error. + self.assertIsNotNone(cdf.schema) + + result = self.connect._client._proto_to_string(cdf._plan.to_proto(self.connect._client)) + self.assertIn("recursion", result) + def test_df_getattr_behavior(self): cdf = self.connect.range(10) sdf = self.spark.range(10) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org