This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 50077cd9943b [SPARK-54942][PYTHON][TESTS][FOLLOWUP] Fix struct cast 
tests for PyArrow < 21 compatibility
50077cd9943b is described below

commit 50077cd9943b42a658be06872abba33dda0fbeaa
Author: Yicong-Huang <[email protected]>
AuthorDate: Tue Jan 27 13:02:01 2026 +0800

    [SPARK-54942][PYTHON][TESTS][FOLLOWUP] Fix struct cast tests for PyArrow < 
21 compatibility
    
    ### What changes were proposed in this pull request?
    
    Fix PyArrow version compatibility for struct cast tests in 
`test_pyarrow_array_cast.py`.
    
    Some struct cast behaviors differ across PyArrow versions:
    
    | Feature | PyArrow < 19 | PyArrow 19-20 | PyArrow >= 21 |
    |---------|--------------|---------------|---------------|
    | struct cast: field name mismatch (missing fields → null) | ArrowTypeError 
| supported | supported |
    | struct cast: target has more fields (extra fields → null) | 
ArrowTypeError | supported | supported |
    | struct cast: field reorder (same fields, different order) | 
ArrowTypeError | ArrowTypeError | supported |
    
    This PR:
    1. Adds version compatibility documentation to the test file header
    2. Introduces `pyarrow_19_or_greater` and `pyarrow_21_or_greater` version 
flags
    3. Updates `test_struct_casts` to expect `ArrowTypeError` for unsupported 
operations on older PyArrow versions
    
    ### Why are the changes needed?
    
    The existing tests assumed all struct cast operations were supported in all 
PyArrow versions, causing test failures on PyArrow 18-20.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    Tested with PyArrow 18, 19, 20, 21, and 22 - all versions pass.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Yes
    
    Closes #53994 from Yicong-Huang/SPARK-54942/fix/struct-cast-pyarrow-compat.
    
    Authored-by: Yicong-Huang <[email protected]>
    Signed-off-by: Ruifeng Zheng <[email protected]>
---
 .../upstream/pyarrow/test_pyarrow_array_cast.py    | 118 ++++++++++++++-------
 1 file changed, 79 insertions(+), 39 deletions(-)

diff --git a/python/pyspark/tests/upstream/pyarrow/test_pyarrow_array_cast.py 
b/python/pyspark/tests/upstream/pyarrow/test_pyarrow_array_cast.py
index 2157ede66020..ccc32b4afa32 100644
--- a/python/pyspark/tests/upstream/pyarrow/test_pyarrow_array_cast.py
+++ b/python/pyspark/tests/upstream/pyarrow/test_pyarrow_array_cast.py
@@ -38,13 +38,31 @@ Tests container-to-container type conversions:
 - **List variants**: list, large_list, fixed_size_list
 - **Map**: map<key, value>
 - **Struct**: struct<fields...>
+
+## PyArrow Version Compatibility
+
+Some behaviors differ across PyArrow versions:
+
+| Feature                                                  | PyArrow < 19   | 
PyArrow 19-20  | PyArrow >= 21  |
+|----------------------------------------------------------|----------------|----------------|----------------|
+| struct cast: field name mismatch (missing fields → null) | ArrowTypeError | 
supported      | supported      |
+| struct cast: target has more fields (extra fields → null)| ArrowTypeError | 
supported      | supported      |
+| struct cast: field reorder (same fields, different order)| ArrowTypeError | 
ArrowTypeError | supported      |
+| pa.array(floats, pa.float16()) without numpy             | requires 
np.float16 | requires np.float16 | native support |
 """
 
 import platform
 import unittest
 
+from pyspark.loose_version import LooseVersion
 from pyspark.testing.utils import have_pyarrow, pyarrow_requirement_message
 
+if have_pyarrow:
+    import pyarrow as pa
+
+    pyarrow_19_or_greater = LooseVersion(pa.__version__) >= 
LooseVersion("19.0.0")
+    pyarrow_21_or_greater = LooseVersion(pa.__version__) >= 
LooseVersion("21.0.0")
+
 
 def _get_float_to_int_boundary_expected(int_type):
     """
@@ -7826,32 +7844,6 @@ class PyArrowNestedTypeCastTests(unittest.TestCase):
                     pa.array([], type=pa.struct([("x", pa.int32())])),
                 ),
             ],
-            # struct -> struct (field name mismatch - missing become null)
-            "struct_name_mismatch": [
-                (
-                    pa.array(
-                        [{"x": 1, "y": 2}],
-                        type=pa.struct([("x", pa.int32()), ("y", pa.int32())]),
-                    ),
-                    pa.array(
-                        [{"a": None, "b": None}],
-                        type=pa.struct([("a", pa.int32()), ("b", pa.int32())]),
-                    ),
-                ),
-            ],
-            # struct -> struct (more fields - extra become null)
-            "struct_more_fields": [
-                (
-                    pa.array(
-                        [{"x": 1, "y": 2}],
-                        type=pa.struct([("x", pa.int32()), ("y", pa.int32())]),
-                    ),
-                    pa.array(
-                        [{"x": 1, "y": 2, "z": None}],
-                        type=pa.struct([("x", pa.int32()), ("y", pa.int32()), 
("z", pa.int32())]),
-                    ),
-                ),
-            ],
             # struct -> struct (fewer fields - drops extra)
             "struct_fewer_fields": [
                 (
@@ -7862,19 +7854,6 @@ class PyArrowNestedTypeCastTests(unittest.TestCase):
                     pa.array([{"x": 1}], type=pa.struct([("x", pa.int32())])),
                 ),
             ],
-            # struct -> struct (field reorder)
-            "struct_reorder": [
-                (
-                    pa.array(
-                        [{"x": 1, "y": 2}],
-                        type=pa.struct([("x", pa.int32()), ("y", pa.int32())]),
-                    ),
-                    pa.array(
-                        [{"y": 2, "x": 1}],
-                        type=pa.struct([("y", pa.int32()), ("x", pa.int32())]),
-                    ),
-                ),
-            ],
             # struct -> list (not supported)
             "list": [
                 (
@@ -7898,6 +7877,67 @@ class PyArrowNestedTypeCastTests(unittest.TestCase):
                 ),
             ],
         }
+
+        # struct -> struct (field name mismatch - missing become null)
+        # PyArrow >= 19 supports this; earlier versions raise ArrowTypeError
+        casts["struct_name_mismatch"] = [
+            (
+                pa.array(
+                    [{"x": 1, "y": 2}],
+                    type=pa.struct([("x", pa.int32()), ("y", pa.int32())]),
+                ),
+                pa.array(
+                    [{"a": None, "b": None}],
+                    type=pa.struct([("a", pa.int32()), ("b", pa.int32())]),
+                )
+                if pyarrow_19_or_greater
+                else pa.lib.ArrowTypeError,
+                None
+                if pyarrow_19_or_greater
+                else pa.struct([("a", pa.int32()), ("b", pa.int32())]),
+            ),
+        ]
+
+        # struct -> struct (more fields - extra become null)
+        # PyArrow >= 19 supports this; earlier versions raise ArrowTypeError
+        casts["struct_more_fields"] = [
+            (
+                pa.array(
+                    [{"x": 1, "y": 2}],
+                    type=pa.struct([("x", pa.int32()), ("y", pa.int32())]),
+                ),
+                pa.array(
+                    [{"x": 1, "y": 2, "z": None}],
+                    type=pa.struct([("x", pa.int32()), ("y", pa.int32()), 
("z", pa.int32())]),
+                )
+                if pyarrow_19_or_greater
+                else pa.lib.ArrowTypeError,
+                None
+                if pyarrow_19_or_greater
+                else pa.struct([("x", pa.int32()), ("y", pa.int32()), ("z", 
pa.int32())]),
+            ),
+        ]
+
+        # struct -> struct (field reorder)
+        # PyArrow >= 21 supports field reordering; earlier versions raise 
ArrowTypeError
+        casts["struct_reorder"] = [
+            (
+                pa.array(
+                    [{"x": 1, "y": 2}],
+                    type=pa.struct([("x", pa.int32()), ("y", pa.int32())]),
+                ),
+                pa.array(
+                    [{"y": 2, "x": 1}],
+                    type=pa.struct([("y", pa.int32()), ("x", pa.int32())]),
+                )
+                if pyarrow_21_or_greater
+                else pa.lib.ArrowTypeError,
+                None
+                if pyarrow_21_or_greater
+                else pa.struct([("y", pa.int32()), ("x", pa.int32())]),
+            ),
+        ]
+
         self._run_nested_cast_tests(casts, "struct")
 
     def test_nested_struct_casts(self):


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to