This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 1fafe9ba3e94 [SPARK-55405][PYTHON][TESTS][FOLLOW-UP] Add ARM overrides
for float-to-int unsafe cast golden file mismatches
1fafe9ba3e94 is described below
commit 1fafe9ba3e9456279f4fc7412339a6594d251633
Author: Yicong Huang <[email protected]>
AuthorDate: Wed Feb 18 09:08:44 2026 +0800
[SPARK-55405][PYTHON][TESTS][FOLLOW-UP] Add ARM overrides for float-to-int
unsafe cast golden file mismatches
### What changes were proposed in this pull request?
Add hardcoded ARM (aarch64/arm64) overrides in `_overrides_unsafe()` for 34
float-to-integer unsafe cast golden file mismatches. Also rename
`_version_overrides_safe/unsafe` to `_overrides_safe/unsafe` since overrides
now cover both version and platform differences.
### Why are the changes needed?
The PyArrow array cast golden files are generated on x86. On ARM, unsafe
float-to-integer casts produce different results due to IEEE 754
implementation-defined behavior:
- **ARM FCVT instructions** saturate on overflow: `inf` → type max, `-inf`
→ type min, `nan` → 0
- **x86 SSE/AVX** returns "integer indefinite" values (typically `0x80...0`)
- **Negative float → unsigned int**: ARM saturates to 0, x86 may wrap
This caused `test_scalar_cast_matrix_unsafe` to fail on Ubuntu 24.04 ARM CI
runners with 34 mismatches across three categories:
- `float*:standard` → `uint*` (negative float -1.5 → unsigned int)
- `float*:special` → `int*` (inf/nan → signed int)
- `float*:special` → `uint*` (inf/nan → unsigned int)
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Verified with [ARM CI
run](https://github.com/Yicong-Huang/spark/actions/runs/21966674224) on my fork.
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #54288 from Yicong-Huang/SPARK-55405/fix/arm-float-cast-overrides.
Authored-by: Yicong Huang <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
.../upstream/pyarrow/test_pyarrow_array_cast.py | 89 ++++++++++++++++++----
1 file changed, 74 insertions(+), 15 deletions(-)
diff --git a/python/pyspark/tests/upstream/pyarrow/test_pyarrow_array_cast.py
b/python/pyspark/tests/upstream/pyarrow/test_pyarrow_array_cast.py
index 1395cbb69131..b182e254b90f 100644
--- a/python/pyspark/tests/upstream/pyarrow/test_pyarrow_array_cast.py
+++ b/python/pyspark/tests/upstream/pyarrow/test_pyarrow_array_cast.py
@@ -57,6 +57,7 @@ Some known version-dependent behaviors:
import inspect
import os
+import platform
import unittest
from decimal import Decimal
from typing import Callable, List, Optional
@@ -469,12 +470,12 @@ class PyArrowScalarTypeCastTests(_PyArrowCastTestBase):
source_arrays = dict(cases)
return source_names, source_arrays
- # ----- version overrides -----
+ # ----- overrides -----
@classmethod
- def _version_overrides_safe(cls):
+ def _overrides_safe(cls):
"""
- Build overrides for known PyArrow version-dependent behaviors
(safe=True mode).
+ Build overrides for known version/platform-dependent behaviors
(safe=True mode).
PyArrow < 21: str(scalar) for float16 uses numpy's formatting
(via np.float16), which may vary across numpy versions.
@@ -501,14 +502,19 @@ class PyArrowScalarTypeCastTests(_PyArrowCastTestBase):
return overrides
@classmethod
- def _version_overrides_unsafe(cls):
+ def _overrides_unsafe(cls):
"""
- Build overrides for known PyArrow version-dependent behaviors
(safe=False mode).
+ Build overrides for known PyArrow version/platform-dependent behaviors
(safe=False).
PyArrow < 21: str(scalar) for float16 uses numpy's formatting
(via np.float16). Same dynamic computation approach as safe=True mode.
- Additional overrides may be needed for different PyArrow versions
- as safe=False behavior varies across versions.
+
+ ARM (aarch64/arm64): Unsafe float-to-integer casts produce different
results
+ than x86 due to IEEE 754 implementation-defined behavior:
+ - ARM FCVT instructions saturate on overflow (inf→MAX, -inf→MIN, nan→0)
+ - x86 SSE/AVX returns "integer indefinite" values
+ - Negative float → unsigned int: ARM saturates to 0, x86 may wrap
+ The golden files are generated on x86; ARM values are hardcoded below.
"""
overrides = {}
if LooseVersion(pa.__version__) < LooseVersion("21.0.0"):
@@ -527,7 +533,60 @@ class PyArrowScalarTypeCastTests(_PyArrowCastTestBase):
("float64:fractional", F16): frac,
}
)
- # Additional overrides will be discovered during cross-version testing
+
+ if platform.machine() in ("aarch64", "arm64"):
+ overrides.update(
+ {
+ # float16:standard [0.0, 1.5, -1.5, None] → unsigned int
types
+ # -1.5 saturates to 0 on ARM, wraps on x86
+ ("float16:standard", "uint8"): "[0, 1, 0, None]@uint8",
+ ("float16:standard", "uint16"): "[0, 1, 0, None]@uint16",
+ ("float16:standard", "uint32"): "[0, 1, 0, None]@uint32",
+ ("float16:standard", "uint64"): "[0, 1, 0, None]@uint64",
+ # float16:special [inf, nan, None] → integer types
+ ("float16:special", "int8"): "[-1, 0, None]@int8",
+ ("float16:special", "int16"): "[-1, 0, None]@int16",
+ ("float16:special", "int32"): "[2147483647, 0,
None]@int32",
+ ("float16:special", "int64"): "[9223372036854775807, 0,
None]@int64",
+ ("float16:special", "uint8"): "[255, 0, None]@uint8",
+ ("float16:special", "uint16"): "[65535, 0, None]@uint16",
+ ("float16:special", "uint32"): "[4294967295, 0,
None]@uint32",
+ ("float16:special", "uint64"): "[18446744073709551615, 0,
None]@uint64",
+ # float32:standard [0.0, 1.5, -1.5, None] → unsigned int
types
+ ("float32:standard", "uint8"): "[0, 1, 0, None]@uint8",
+ ("float32:standard", "uint32"): "[0, 1, 0, None]@uint32",
+ ("float32:standard", "uint64"): "[0, 1, 0, None]@uint64",
+ # float32:special [inf, -inf, nan, None] → integer types
+ ("float32:special", "int8"): "[-1, 0, 0, None]@int8",
+ ("float32:special", "int16"): "[-1, 0, 0, None]@int16",
+ ("float32:special", "int32"): "[2147483647, -2147483648,
0, None]@int32",
+ (
+ "float32:special",
+ "int64",
+ ): "[9223372036854775807, -9223372036854775808, 0,
None]@int64",
+ ("float32:special", "uint8"): "[255, 0, 0, None]@uint8",
+ ("float32:special", "uint16"): "[65535, 0, 0,
None]@uint16",
+ ("float32:special", "uint32"): "[4294967295, 0, 0,
None]@uint32",
+ ("float32:special", "uint64"): "[18446744073709551615, 0,
0, None]@uint64",
+ # float64:standard [0.0, 1.5, -1.5, None] → unsigned int
types
+ ("float64:standard", "uint8"): "[0, 1, 0, None]@uint8",
+ ("float64:standard", "uint16"): "[0, 1, 0, None]@uint16",
+ ("float64:standard", "uint64"): "[0, 1, 0, None]@uint64",
+ # float64:special [inf, -inf, nan, None] → integer types
+ ("float64:special", "int8"): "[-1, 0, 0, None]@int8",
+ ("float64:special", "int16"): "[-1, 0, 0, None]@int16",
+ ("float64:special", "int32"): "[-1, 0, 0, None]@int32",
+ (
+ "float64:special",
+ "int64",
+ ): "[9223372036854775807, -9223372036854775808, 0,
None]@int64",
+ ("float64:special", "uint8"): "[255, 0, 0, None]@uint8",
+ ("float64:special", "uint16"): "[65535, 0, 0,
None]@uint16",
+ ("float64:special", "uint32"): "[4294967295, 0, 0,
None]@uint32",
+ ("float64:special", "uint64"): "[18446744073709551615, 0,
0, None]@uint64",
+ }
+ )
+
return overrides
# ----- test methods -----
@@ -546,7 +605,7 @@ class PyArrowScalarTypeCastTests(_PyArrowCastTestBase):
source_arrays[src], target_lookup[tgt], safe=True
),
golden_file_prefix="golden_pyarrow_scalar_cast_safe",
- overrides=self._version_overrides_safe(),
+ overrides=self._overrides_safe(),
)
def test_scalar_cast_matrix_unsafe(self):
@@ -563,7 +622,7 @@ class PyArrowScalarTypeCastTests(_PyArrowCastTestBase):
source_arrays[src], target_lookup[tgt], safe=False
),
golden_file_prefix="golden_pyarrow_scalar_cast_unsafe",
- overrides=self._version_overrides_unsafe(),
+ overrides=self._overrides_unsafe(),
)
@@ -670,10 +729,10 @@ class PyArrowNestedTypeCastTests(_PyArrowCastTestBase):
source_arrays = dict(cases)
return source_names, source_arrays
- # ----- version overrides -----
+ # ----- overrides -----
@staticmethod
- def _version_overrides_safe():
+ def _overrides_safe():
"""
Build overrides for known PyArrow version-dependent behaviors
(safe=True mode).
@@ -698,7 +757,7 @@ class PyArrowNestedTypeCastTests(_PyArrowCastTestBase):
return overrides
@staticmethod
- def _version_overrides_unsafe():
+ def _overrides_unsafe():
"""
Build overrides for known PyArrow version-dependent behaviors
(safe=False mode).
@@ -738,7 +797,7 @@ class PyArrowNestedTypeCastTests(_PyArrowCastTestBase):
source_arrays[src], target_lookup[tgt], safe=True
),
golden_file_prefix="golden_pyarrow_nested_cast_safe",
- overrides=self._version_overrides_safe(),
+ overrides=self._overrides_safe(),
)
def test_nested_cast_matrix_unsafe(self):
@@ -755,7 +814,7 @@ class PyArrowNestedTypeCastTests(_PyArrowCastTestBase):
source_arrays[src], target_lookup[tgt], safe=False
),
golden_file_prefix="golden_pyarrow_nested_cast_unsafe",
- overrides=self._version_overrides_unsafe(),
+ overrides=self._overrides_unsafe(),
)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]