This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new 4b1e6b2f95ef Revert "[SPARK-55525][PYTHON] Fix
UDTF_ARROW_TYPE_CONVERSION_ERROR with undefined error message parameter"
4b1e6b2f95ef is described below
commit 4b1e6b2f95ef8436b3e075f297c53be7505ceaad
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Wed Feb 25 10:27:18 2026 -0800
Revert "[SPARK-55525][PYTHON] Fix UDTF_ARROW_TYPE_CONVERSION_ERROR with
undefined error message parameter"
This reverts commit bc1bbe6eb06eaf2faab730743b61b3a848a49c3f.
---
python/pyspark/errors/error-conditions.json | 5 -----
python/pyspark/sql/tests/arrow/test_arrow_udtf.py | 17 -----------------
python/pyspark/sql/tests/test_udtf.py | 16 ++++++++--------
python/pyspark/worker.py | 2 +-
4 files changed, 9 insertions(+), 31 deletions(-)
diff --git a/python/pyspark/errors/error-conditions.json
b/python/pyspark/errors/error-conditions.json
index da8a9f1fea34..326671c0d5ad 100644
--- a/python/pyspark/errors/error-conditions.json
+++ b/python/pyspark/errors/error-conditions.json
@@ -1187,11 +1187,6 @@
"Return type of the user-defined function should be <expected>, but is
<actual>."
]
},
- "UDTF_ARROW_DATA_CONVERSION_ERROR": {
- "message": [
- "Cannot convert UDTF output to Arrow. Data: <data>. Schema: <schema>.
Arrow Schema: <arrow_schema>."
- ]
- },
"UDTF_ARROW_TYPE_CAST_ERROR": {
"message": [
"Cannot convert the output value of the column '<col_name>' with type
'<col_type>' to the specified return type of the column: '<arrow_type>'. Please
check if the data types match and try again."
diff --git a/python/pyspark/sql/tests/arrow/test_arrow_udtf.py
b/python/pyspark/sql/tests/arrow/test_arrow_udtf.py
index 2be399171abb..cc0edda378ab 100644
--- a/python/pyspark/sql/tests/arrow/test_arrow_udtf.py
+++ b/python/pyspark/sql/tests/arrow/test_arrow_udtf.py
@@ -33,23 +33,6 @@ if have_pyarrow:
@unittest.skipIf(not have_pyarrow, pyarrow_requirement_message)
class ArrowUDTFTestsMixin:
- def test_arrow_udtf_data_conversion_error(self):
- from pyspark.sql.functions import udtf
-
- @udtf(returnType="x int, y int")
- class DataConversionErrorUDTF:
- def eval(self):
- # Return a non-tuple value when multiple return values are
expected.
- # This will cause LocalDataToArrowConversion.convert to fail
with TypeError (len() on int),
- # which should be wrapped in UDTF_ARROW_DATA_CONVERSION_ERROR.
- yield 1
-
- # Enable Arrow optimization for regular UDTFs
- with self.sql_conf({"spark.sql.execution.pythonUDTF.arrow.enabled":
"true"}):
- with self.assertRaisesRegex(PythonException,
"UDTF_ARROW_DATA_CONVERSION_ERROR"):
- result_df = DataConversionErrorUDTF()
- result_df.collect()
-
def test_arrow_udtf_zero_args(self):
@arrow_udtf(returnType="id int, value string")
class TestUDTF:
diff --git a/python/pyspark/sql/tests/test_udtf.py
b/python/pyspark/sql/tests/test_udtf.py
index 54bd2a34ff1d..5ded5aa67b4e 100644
--- a/python/pyspark/sql/tests/test_udtf.py
+++ b/python/pyspark/sql/tests/test_udtf.py
@@ -3548,7 +3548,7 @@ class UDTFArrowTestsMixin(LegacyUDTFArrowTestsMixin):
def eval(self):
yield 1,
- err = "UDTF_ARROW_DATA_CONVERSION_ERROR"
+ err = "UDTF_ARROW_TYPE_CONVERSION_ERROR"
for ret_type, expected in [
("x: boolean", err),
@@ -3575,7 +3575,7 @@ class UDTFArrowTestsMixin(LegacyUDTFArrowTestsMixin):
def eval(self):
yield "1",
- err = "UDTF_ARROW_DATA_CONVERSION_ERROR"
+ err = "UDTF_ARROW_TYPE_CONVERSION_ERROR"
for ret_type, expected in [
("x: boolean", err),
@@ -3604,7 +3604,7 @@ class UDTFArrowTestsMixin(LegacyUDTFArrowTestsMixin):
def eval(self):
yield "hello",
- err = "UDTF_ARROW_DATA_CONVERSION_ERROR"
+ err = "UDTF_ARROW_TYPE_CONVERSION_ERROR"
for ret_type, expected in [
("x: boolean", err),
("x: tinyint", err),
@@ -3632,7 +3632,7 @@ class UDTFArrowTestsMixin(LegacyUDTFArrowTestsMixin):
def eval(self):
yield [0, 1.1, 2],
- err = "UDTF_ARROW_DATA_CONVERSION_ERROR"
+ err = "UDTF_ARROW_TYPE_CONVERSION_ERROR"
for ret_type, expected in [
("x: boolean", err),
("x: tinyint", err),
@@ -3664,7 +3664,7 @@ class UDTFArrowTestsMixin(LegacyUDTFArrowTestsMixin):
def eval(self):
yield {"a": 0, "b": 1.1, "c": 2},
- err = "UDTF_ARROW_DATA_CONVERSION_ERROR"
+ err = "UDTF_ARROW_TYPE_CONVERSION_ERROR"
for ret_type, expected in [
("x: boolean", err),
("x: tinyint", err),
@@ -3695,7 +3695,7 @@ class UDTFArrowTestsMixin(LegacyUDTFArrowTestsMixin):
def eval(self):
yield {"a": 0, "b": 1.1, "c": 2},
- err = "UDTF_ARROW_DATA_CONVERSION_ERROR"
+ err = "UDTF_ARROW_TYPE_CONVERSION_ERROR"
for ret_type, expected in [
("x: boolean", err),
("x: tinyint", err),
@@ -3725,7 +3725,7 @@ class UDTFArrowTestsMixin(LegacyUDTFArrowTestsMixin):
def eval(self):
yield Row(a=0, b=1.1, c=2),
- err = "UDTF_ARROW_DATA_CONVERSION_ERROR"
+ err = "UDTF_ARROW_TYPE_CONVERSION_ERROR"
for ret_type, expected in [
("x: boolean", err),
("x: tinyint", err),
@@ -3761,7 +3761,7 @@ class UDTFArrowTestsMixin(LegacyUDTFArrowTestsMixin):
"x: array<int>",
]:
with self.subTest(ret_type=ret_type):
- with self.assertRaisesRegex(PythonException,
"UDTF_ARROW_DATA_CONVERSION_ERROR"):
+ with self.assertRaisesRegex(PythonException,
"UDTF_ARROW_TYPE_CONVERSION_ERROR"):
udtf(TestUDTF, returnType=ret_type)().collect()
def test_decimal_round(self):
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index fe68b8baa6c9..4bae9f6dc48f 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -2342,7 +2342,7 @@ def read_udtf(pickleSer, infile, eval_type):
def raise_conversion_error(original_exception):
raise PySparkRuntimeError(
- errorClass="UDTF_ARROW_DATA_CONVERSION_ERROR",
+ errorClass="UDTF_ARROW_TYPE_CONVERSION_ERROR",
messageParameters={
"data": str(data),
"schema": return_type.simpleString(),
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]