(spark) branch branch-4.1 updated: Revert "[SPARK-55525][PYTHON] Fix UDTF_ARROW_TYPE_CONVERSION_ERROR with undefined error message parameter"

dongjoon Wed, 25 Feb 2026 10:27:55 -0800

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new 4b1e6b2f95ef Revert "[SPARK-55525][PYTHON] Fix 
UDTF_ARROW_TYPE_CONVERSION_ERROR with undefined error message parameter"
4b1e6b2f95ef is described below

commit 4b1e6b2f95ef8436b3e075f297c53be7505ceaad
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Wed Feb 25 10:27:18 2026 -0800

    Revert "[SPARK-55525][PYTHON] Fix UDTF_ARROW_TYPE_CONVERSION_ERROR with 
undefined error message parameter"
    
    This reverts commit bc1bbe6eb06eaf2faab730743b61b3a848a49c3f.
---
 python/pyspark/errors/error-conditions.json       |  5 -----
 python/pyspark/sql/tests/arrow/test_arrow_udtf.py | 17 -----------------
 python/pyspark/sql/tests/test_udtf.py             | 16 ++++++++--------
 python/pyspark/worker.py                          |  2 +-
 4 files changed, 9 insertions(+), 31 deletions(-)

diff --git a/python/pyspark/errors/error-conditions.json 
b/python/pyspark/errors/error-conditions.json
index da8a9f1fea34..326671c0d5ad 100644
--- a/python/pyspark/errors/error-conditions.json
+++ b/python/pyspark/errors/error-conditions.json
@@ -1187,11 +1187,6 @@
       "Return type of the user-defined function should be <expected>, but is 
<actual>."
     ]
   },
-  "UDTF_ARROW_DATA_CONVERSION_ERROR": {
-    "message": [
-      "Cannot convert UDTF output to Arrow. Data: <data>. Schema: <schema>. 
Arrow Schema: <arrow_schema>."
-    ]
-  },
   "UDTF_ARROW_TYPE_CAST_ERROR": {
     "message": [
       "Cannot convert the output value of the column '<col_name>' with type 
'<col_type>' to the specified return type of the column: '<arrow_type>'. Please 
check if the data types match and try again."
diff --git a/python/pyspark/sql/tests/arrow/test_arrow_udtf.py 
b/python/pyspark/sql/tests/arrow/test_arrow_udtf.py
index 2be399171abb..cc0edda378ab 100644
--- a/python/pyspark/sql/tests/arrow/test_arrow_udtf.py
+++ b/python/pyspark/sql/tests/arrow/test_arrow_udtf.py
@@ -33,23 +33,6 @@ if have_pyarrow:
 
 @unittest.skipIf(not have_pyarrow, pyarrow_requirement_message)
 class ArrowUDTFTestsMixin:
-    def test_arrow_udtf_data_conversion_error(self):
-        from pyspark.sql.functions import udtf
-
-        @udtf(returnType="x int, y int")
-        class DataConversionErrorUDTF:
-            def eval(self):
-                # Return a non-tuple value when multiple return values are 
expected.
-                # This will cause LocalDataToArrowConversion.convert to fail 
with TypeError (len() on int),
-                # which should be wrapped in UDTF_ARROW_DATA_CONVERSION_ERROR.
-                yield 1
-
-        # Enable Arrow optimization for regular UDTFs
-        with self.sql_conf({"spark.sql.execution.pythonUDTF.arrow.enabled": 
"true"}):
-            with self.assertRaisesRegex(PythonException, 
"UDTF_ARROW_DATA_CONVERSION_ERROR"):
-                result_df = DataConversionErrorUDTF()
-                result_df.collect()
-
     def test_arrow_udtf_zero_args(self):
         @arrow_udtf(returnType="id int, value string")
         class TestUDTF:
diff --git a/python/pyspark/sql/tests/test_udtf.py 
b/python/pyspark/sql/tests/test_udtf.py
index 54bd2a34ff1d..5ded5aa67b4e 100644
--- a/python/pyspark/sql/tests/test_udtf.py
+++ b/python/pyspark/sql/tests/test_udtf.py
@@ -3548,7 +3548,7 @@ class UDTFArrowTestsMixin(LegacyUDTFArrowTestsMixin):
             def eval(self):
                 yield 1,
 
-        err = "UDTF_ARROW_DATA_CONVERSION_ERROR"
+        err = "UDTF_ARROW_TYPE_CONVERSION_ERROR"
 
         for ret_type, expected in [
             ("x: boolean", err),
@@ -3575,7 +3575,7 @@ class UDTFArrowTestsMixin(LegacyUDTFArrowTestsMixin):
             def eval(self):
                 yield "1",
 
-        err = "UDTF_ARROW_DATA_CONVERSION_ERROR"
+        err = "UDTF_ARROW_TYPE_CONVERSION_ERROR"
 
         for ret_type, expected in [
             ("x: boolean", err),
@@ -3604,7 +3604,7 @@ class UDTFArrowTestsMixin(LegacyUDTFArrowTestsMixin):
             def eval(self):
                 yield "hello",
 
-        err = "UDTF_ARROW_DATA_CONVERSION_ERROR"
+        err = "UDTF_ARROW_TYPE_CONVERSION_ERROR"
         for ret_type, expected in [
             ("x: boolean", err),
             ("x: tinyint", err),
@@ -3632,7 +3632,7 @@ class UDTFArrowTestsMixin(LegacyUDTFArrowTestsMixin):
             def eval(self):
                 yield [0, 1.1, 2],
 
-        err = "UDTF_ARROW_DATA_CONVERSION_ERROR"
+        err = "UDTF_ARROW_TYPE_CONVERSION_ERROR"
         for ret_type, expected in [
             ("x: boolean", err),
             ("x: tinyint", err),
@@ -3664,7 +3664,7 @@ class UDTFArrowTestsMixin(LegacyUDTFArrowTestsMixin):
             def eval(self):
                 yield {"a": 0, "b": 1.1, "c": 2},
 
-        err = "UDTF_ARROW_DATA_CONVERSION_ERROR"
+        err = "UDTF_ARROW_TYPE_CONVERSION_ERROR"
         for ret_type, expected in [
             ("x: boolean", err),
             ("x: tinyint", err),
@@ -3695,7 +3695,7 @@ class UDTFArrowTestsMixin(LegacyUDTFArrowTestsMixin):
             def eval(self):
                 yield {"a": 0, "b": 1.1, "c": 2},
 
-        err = "UDTF_ARROW_DATA_CONVERSION_ERROR"
+        err = "UDTF_ARROW_TYPE_CONVERSION_ERROR"
         for ret_type, expected in [
             ("x: boolean", err),
             ("x: tinyint", err),
@@ -3725,7 +3725,7 @@ class UDTFArrowTestsMixin(LegacyUDTFArrowTestsMixin):
             def eval(self):
                 yield Row(a=0, b=1.1, c=2),
 
-        err = "UDTF_ARROW_DATA_CONVERSION_ERROR"
+        err = "UDTF_ARROW_TYPE_CONVERSION_ERROR"
         for ret_type, expected in [
             ("x: boolean", err),
             ("x: tinyint", err),
@@ -3761,7 +3761,7 @@ class UDTFArrowTestsMixin(LegacyUDTFArrowTestsMixin):
             "x: array<int>",
         ]:
             with self.subTest(ret_type=ret_type):
-                with self.assertRaisesRegex(PythonException, 
"UDTF_ARROW_DATA_CONVERSION_ERROR"):
+                with self.assertRaisesRegex(PythonException, 
"UDTF_ARROW_TYPE_CONVERSION_ERROR"):
                     udtf(TestUDTF, returnType=ret_type)().collect()
 
     def test_decimal_round(self):
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index fe68b8baa6c9..4bae9f6dc48f 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -2342,7 +2342,7 @@ def read_udtf(pickleSer, infile, eval_type):
 
                 def raise_conversion_error(original_exception):
                     raise PySparkRuntimeError(
-                        errorClass="UDTF_ARROW_DATA_CONVERSION_ERROR",
+                        errorClass="UDTF_ARROW_TYPE_CONVERSION_ERROR",
                         messageParameters={
                             "data": str(data),
                             "schema": return_type.simpleString(),


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch branch-4.1 updated: Revert "[SPARK-55525][PYTHON] Fix UDTF_ARROW_TYPE_CONVERSION_ERROR with undefined error message parameter"

Reply via email to