This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 60c8c3f30b4d [SPARK-55176][PYTHON][FOLLOW-UP] Fix `_input_type` and
`_arrow_cast` not defined in `ArrowStreamPandasSerializer`
60c8c3f30b4d is described below
commit 60c8c3f30b4d5fd2efc3931954e95c27213ef5f7
Author: Yicong-Huang <[email protected]>
AuthorDate: Tue Feb 3 11:45:56 2026 +0800
[SPARK-55176][PYTHON][FOLLOW-UP] Fix `_input_type` and `_arrow_cast` not
defined in `ArrowStreamPandasSerializer`
### What changes were proposed in this pull request?
This PR moves the definition of `_input_type` and `_arrow_cast` from
`ArrowStreamPandasUDFSerializer` to its parent class
`ArrowStreamPandasSerializer`.
### Why are the changes needed?
This is a fix as `ArrowStreamPandasSerializer` needs those two parameters.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Manual verification that serializers work correctly with the refactored
code.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #54102 from
Yicong-Huang/SPARK-55176/fix/add-input-type-to-arrow-pandas-serializer.
Authored-by: Yicong-Huang <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
python/pyspark/sql/pandas/serializers.py | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/python/pyspark/sql/pandas/serializers.py
b/python/pyspark/sql/pandas/serializers.py
index 99679d2e519e..9be7240ff8d3 100644
--- a/python/pyspark/sql/pandas/serializers.py
+++ b/python/pyspark/sql/pandas/serializers.py
@@ -426,6 +426,8 @@ class ArrowStreamPandasSerializer(ArrowStreamSerializer):
struct_in_pandas: str = "dict",
ndarray_as_list: bool = False,
df_for_struct: bool = False,
+ input_type: Optional["StructType"] = None,
+ arrow_cast: bool = False,
):
super().__init__()
self._timezone = timezone
@@ -435,6 +437,10 @@ class ArrowStreamPandasSerializer(ArrowStreamSerializer):
self._struct_in_pandas = struct_in_pandas
self._ndarray_as_list = ndarray_as_list
self._df_for_struct = df_for_struct
+ if input_type is not None:
+ assert isinstance(input_type, StructType)
+ self._input_type = input_type
+ self._arrow_cast = arrow_cast
def arrow_to_pandas(
self, arrow_column, idx, struct_in_pandas="dict",
ndarray_as_list=False, spark_type=None
@@ -621,12 +627,10 @@ class
ArrowStreamPandasUDFSerializer(ArrowStreamPandasSerializer):
struct_in_pandas,
ndarray_as_list,
df_for_struct,
+ input_type,
+ arrow_cast,
)
self._assign_cols_by_name = assign_cols_by_name
- self._arrow_cast = arrow_cast
- if input_type is not None:
- assert isinstance(input_type, StructType)
- self._input_type = input_type
def _create_struct_array(
self,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]