This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 50077cd9943b [SPARK-54942][PYTHON][TESTS][FOLLOWUP] Fix struct cast
tests for PyArrow < 21 compatibility
50077cd9943b is described below
commit 50077cd9943b42a658be06872abba33dda0fbeaa
Author: Yicong-Huang <[email protected]>
AuthorDate: Tue Jan 27 13:02:01 2026 +0800
[SPARK-54942][PYTHON][TESTS][FOLLOWUP] Fix struct cast tests for PyArrow <
21 compatibility
### What changes were proposed in this pull request?
Fix PyArrow version compatibility for struct cast tests in
`test_pyarrow_array_cast.py`.
Some struct cast behaviors differ across PyArrow versions:
| Feature | PyArrow < 19 | PyArrow 19-20 | PyArrow >= 21 |
|---------|--------------|---------------|---------------|
| struct cast: field name mismatch (missing fields → null) | ArrowTypeError
| supported | supported |
| struct cast: target has more fields (extra fields → null) |
ArrowTypeError | supported | supported |
| struct cast: field reorder (same fields, different order) |
ArrowTypeError | ArrowTypeError | supported |
This PR:
1. Adds version compatibility documentation to the test file header
2. Introduces `pyarrow_19_or_greater` and `pyarrow_21_or_greater` version
flags
3. Updates `test_struct_casts` to expect `ArrowTypeError` for unsupported
operations on older PyArrow versions
### Why are the changes needed?
The existing tests assumed all struct cast operations were supported in all
PyArrow versions, causing test failures on PyArrow 18-20.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Tested with PyArrow 18, 19, 20, 21, and 22 - all versions pass.
### Was this patch authored or co-authored using generative AI tooling?
Yes
Closes #53994 from Yicong-Huang/SPARK-54942/fix/struct-cast-pyarrow-compat.
Authored-by: Yicong-Huang <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
.../upstream/pyarrow/test_pyarrow_array_cast.py | 118 ++++++++++++++-------
1 file changed, 79 insertions(+), 39 deletions(-)
diff --git a/python/pyspark/tests/upstream/pyarrow/test_pyarrow_array_cast.py
b/python/pyspark/tests/upstream/pyarrow/test_pyarrow_array_cast.py
index 2157ede66020..ccc32b4afa32 100644
--- a/python/pyspark/tests/upstream/pyarrow/test_pyarrow_array_cast.py
+++ b/python/pyspark/tests/upstream/pyarrow/test_pyarrow_array_cast.py
@@ -38,13 +38,31 @@ Tests container-to-container type conversions:
- **List variants**: list, large_list, fixed_size_list
- **Map**: map<key, value>
- **Struct**: struct<fields...>
+
+## PyArrow Version Compatibility
+
+Some behaviors differ across PyArrow versions:
+
+| Feature | PyArrow < 19 |
PyArrow 19-20 | PyArrow >= 21 |
+|----------------------------------------------------------|----------------|----------------|----------------|
+| struct cast: field name mismatch (missing fields → null) | ArrowTypeError |
supported | supported |
+| struct cast: target has more fields (extra fields → null)| ArrowTypeError |
supported | supported |
+| struct cast: field reorder (same fields, different order)| ArrowTypeError |
ArrowTypeError | supported |
+| pa.array(floats, pa.float16()) without numpy | requires
np.float16 | requires np.float16 | native support |
"""
import platform
import unittest
+from pyspark.loose_version import LooseVersion
from pyspark.testing.utils import have_pyarrow, pyarrow_requirement_message
+if have_pyarrow:
+ import pyarrow as pa
+
+ pyarrow_19_or_greater = LooseVersion(pa.__version__) >=
LooseVersion("19.0.0")
+ pyarrow_21_or_greater = LooseVersion(pa.__version__) >=
LooseVersion("21.0.0")
+
def _get_float_to_int_boundary_expected(int_type):
"""
@@ -7826,32 +7844,6 @@ class PyArrowNestedTypeCastTests(unittest.TestCase):
pa.array([], type=pa.struct([("x", pa.int32())])),
),
],
- # struct -> struct (field name mismatch - missing become null)
- "struct_name_mismatch": [
- (
- pa.array(
- [{"x": 1, "y": 2}],
- type=pa.struct([("x", pa.int32()), ("y", pa.int32())]),
- ),
- pa.array(
- [{"a": None, "b": None}],
- type=pa.struct([("a", pa.int32()), ("b", pa.int32())]),
- ),
- ),
- ],
- # struct -> struct (more fields - extra become null)
- "struct_more_fields": [
- (
- pa.array(
- [{"x": 1, "y": 2}],
- type=pa.struct([("x", pa.int32()), ("y", pa.int32())]),
- ),
- pa.array(
- [{"x": 1, "y": 2, "z": None}],
- type=pa.struct([("x", pa.int32()), ("y", pa.int32()),
("z", pa.int32())]),
- ),
- ),
- ],
# struct -> struct (fewer fields - drops extra)
"struct_fewer_fields": [
(
@@ -7862,19 +7854,6 @@ class PyArrowNestedTypeCastTests(unittest.TestCase):
pa.array([{"x": 1}], type=pa.struct([("x", pa.int32())])),
),
],
- # struct -> struct (field reorder)
- "struct_reorder": [
- (
- pa.array(
- [{"x": 1, "y": 2}],
- type=pa.struct([("x", pa.int32()), ("y", pa.int32())]),
- ),
- pa.array(
- [{"y": 2, "x": 1}],
- type=pa.struct([("y", pa.int32()), ("x", pa.int32())]),
- ),
- ),
- ],
# struct -> list (not supported)
"list": [
(
@@ -7898,6 +7877,67 @@ class PyArrowNestedTypeCastTests(unittest.TestCase):
),
],
}
+
+ # struct -> struct (field name mismatch - missing become null)
+ # PyArrow >= 19 supports this; earlier versions raise ArrowTypeError
+ casts["struct_name_mismatch"] = [
+ (
+ pa.array(
+ [{"x": 1, "y": 2}],
+ type=pa.struct([("x", pa.int32()), ("y", pa.int32())]),
+ ),
+ pa.array(
+ [{"a": None, "b": None}],
+ type=pa.struct([("a", pa.int32()), ("b", pa.int32())]),
+ )
+ if pyarrow_19_or_greater
+ else pa.lib.ArrowTypeError,
+ None
+ if pyarrow_19_or_greater
+ else pa.struct([("a", pa.int32()), ("b", pa.int32())]),
+ ),
+ ]
+
+ # struct -> struct (more fields - extra become null)
+ # PyArrow >= 19 supports this; earlier versions raise ArrowTypeError
+ casts["struct_more_fields"] = [
+ (
+ pa.array(
+ [{"x": 1, "y": 2}],
+ type=pa.struct([("x", pa.int32()), ("y", pa.int32())]),
+ ),
+ pa.array(
+ [{"x": 1, "y": 2, "z": None}],
+ type=pa.struct([("x", pa.int32()), ("y", pa.int32()),
("z", pa.int32())]),
+ )
+ if pyarrow_19_or_greater
+ else pa.lib.ArrowTypeError,
+ None
+ if pyarrow_19_or_greater
+ else pa.struct([("x", pa.int32()), ("y", pa.int32()), ("z",
pa.int32())]),
+ ),
+ ]
+
+ # struct -> struct (field reorder)
+ # PyArrow >= 21 supports field reordering; earlier versions raise
ArrowTypeError
+ casts["struct_reorder"] = [
+ (
+ pa.array(
+ [{"x": 1, "y": 2}],
+ type=pa.struct([("x", pa.int32()), ("y", pa.int32())]),
+ ),
+ pa.array(
+ [{"y": 2, "x": 1}],
+ type=pa.struct([("y", pa.int32()), ("x", pa.int32())]),
+ )
+ if pyarrow_21_or_greater
+ else pa.lib.ArrowTypeError,
+ None
+ if pyarrow_21_or_greater
+ else pa.struct([("y", pa.int32()), ("x", pa.int32())]),
+ ),
+ ]
+
self._run_nested_cast_tests(casts, "struct")
def test_nested_struct_casts(self):
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]