This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new acfe6d461400 [SPARK-52980][FOLLOWUP] Fix Python Arrow UDTF test cases acfe6d461400 is described below commit acfe6d4614007baf65c344b63de5df903887f372 Author: Allison Wang <allison.w...@databricks.com> AuthorDate: Thu Aug 7 09:41:48 2025 +0800 [SPARK-52980][FOLLOWUP] Fix Python Arrow UDTF test cases ### What changes were proposed in this pull request? This change fixes https://github.com/apache/spark/actions/runs/16772683954/job/47491522789 to be compatible with pyarrow=15.0, pandas=2.2.0 as minimum versions. ### Why are the changes needed? to fix failed tests ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? CI ### Was this patch authored or co-authored using generative AI tooling? no Closes #51880 from allisonwang-db/fix-failed-tests. Authored-by: Allison Wang <allison.w...@databricks.com> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/pyspark/sql/tests/arrow/test_arrow_udtf.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/python/pyspark/sql/tests/arrow/test_arrow_udtf.py b/python/pyspark/sql/tests/arrow/test_arrow_udtf.py index a3efa9c2bc5a..da15d48fceda 100644 --- a/python/pyspark/sql/tests/arrow/test_arrow_udtf.py +++ b/python/pyspark/sql/tests/arrow/test_arrow_udtf.py @@ -98,12 +98,15 @@ class ArrowUDTFTests(ReusedSQLTestCase): size = batch_size[0].as_py() for batch_id in range(3): + # Create arrays for each column + batch_id_array = pa.array([batch_id] * size, type=pa.int32()) + name_array = pa.array([f"batch_{batch_id}"] * size, type=pa.string()) + count_array = pa.array(list(range(size)), type=pa.int32()) + + # Create record batch from arrays and names batch = pa.record_batch( - { - "batch_id": pa.array([batch_id] * size, type=pa.int32()), - "name": pa.array([f"batch_{batch_id}"] * size, type=pa.string()), - "count": pa.array(list(range(size)), type=pa.int32()), - } + [batch_id_array, name_array, count_array], + names=["batch_id", "name", "count"], ) yield batch --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org