This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 5babf15bb42 Resolve serialization for numpy bool 1.x and 2.x
compatibility (#53690)
5babf15bb42 is described below
commit 5babf15bb4289419e3326aded0670883113d01c0
Author: Kevin Yang <[email protected]>
AuthorDate: Sun Aug 24 03:33:41 2025 -0400
Resolve serialization for numpy bool 1.x and 2.x compatibility (#53690)
* fix serialization for np.bool in numpy 2.x
get update from main
* update serializers importable test case to handle numpy bool
compatibility between v1 and v2
* use pytest xfail to capture np.bool import error in numpy version less
than 2
* parameterize the serializers importable test, so xfail can be applied
individually to numpy bool
* add textwrap dedent for message
* add np.float32 to serializer and improve test description
---
.../src/airflow/serialization/serializers/numpy.py | 13 ++-
.../serialization/serializers/test_serializers.py | 1 -
.../tests/unit/serialization/test_serde.py | 95 ++++++++++++++++------
3 files changed, 77 insertions(+), 32 deletions(-)
diff --git a/airflow-core/src/airflow/serialization/serializers/numpy.py
b/airflow-core/src/airflow/serialization/serializers/numpy.py
index 40d891113f8..ad5c3caa7d0 100644
--- a/airflow-core/src/airflow/serialization/serializers/numpy.py
+++ b/airflow-core/src/airflow/serialization/serializers/numpy.py
@@ -19,7 +19,7 @@ from __future__ import annotations
from typing import TYPE_CHECKING, Any
-from airflow.utils.module_loading import import_string, qualname
+from airflow.utils.module_loading import qualname
# lazy loading for performance reasons
serializers = [
@@ -31,11 +31,13 @@ serializers = [
"numpy.uint16",
"numpy.uint32",
"numpy.uint64",
- "numpy.bool_",
"numpy.float64",
+ "numpy.float32",
"numpy.float16",
"numpy.complex128",
"numpy.complex64",
+ "numpy.bool",
+ "numpy.bool_",
]
if TYPE_CHECKING:
@@ -70,7 +72,7 @@ def serialize(o: object) -> tuple[U, str, int, bool]:
):
return int(o), *metadata
- if isinstance(o, np.bool_):
+ if hasattr(np, "bool") and isinstance(o, np.bool) or isinstance(o,
np.bool_):
return bool(o), *metadata
if isinstance(o, (np.float16, np.float32, np.float64, np.complex64,
np.complex128)):
@@ -83,9 +85,4 @@ def deserialize(cls: type, version: int, data: str) -> Any:
if version > __version__:
raise TypeError("serialized version is newer than class version")
- allowed_deserialize_classes = [import_string(classname) for classname in
deserializers]
-
- if cls not in allowed_deserialize_classes:
- raise TypeError(f"unsupported {qualname(cls)} found for numpy
deserialization")
-
return cls(data)
diff --git
a/airflow-core/tests/unit/serialization/serializers/test_serializers.py
b/airflow-core/tests/unit/serialization/serializers/test_serializers.py
index 4b21fc63dee..3f753150b38 100644
--- a/airflow-core/tests/unit/serialization/serializers/test_serializers.py
+++ b/airflow-core/tests/unit/serialization/serializers/test_serializers.py
@@ -261,7 +261,6 @@ class TestSerializers:
("klass", "ver", "value", "msg"),
[
(np.int32, 999, 123, r"serialized version is newer"),
- (np.float32, 1, 123, r"unsupported numpy\.float32"),
],
)
def test_numpy_deserialize_errors(self, klass, ver, value, msg):
diff --git a/airflow-core/tests/unit/serialization/test_serde.py
b/airflow-core/tests/unit/serialization/test_serde.py
index 560569689cf..0dadfc7e9ee 100644
--- a/airflow-core/tests/unit/serialization/test_serde.py
+++ b/airflow-core/tests/unit/serialization/test_serde.py
@@ -18,13 +18,15 @@ from __future__ import annotations
import datetime
import enum
+import textwrap
from collections import namedtuple
from dataclasses import dataclass
-from importlib import import_module
+from importlib import import_module, metadata
from typing import ClassVar
import attr
import pytest
+from packaging import version
from pydantic import BaseModel
from airflow.sdk.definitions.asset import Asset
@@ -61,6 +63,67 @@ def recalculate_patterns():
_match_regexp.cache_clear()
+def generate_serializers_importable_tests():
+ """
+ Generate test cases for `test_serializers_importable_and_str`.
+
+ The function iterates through all the modules defined under
`airflow.serialization.serializers`. It loads
+ the import strings defined in the `serializers` from each module, and
create a test case to verify that the
+ serializer is importable.
+ """
+ import airflow.serialization.serializers
+
+ NUMPY_VERSION = version.parse(metadata.version("numpy"))
+
+ serializer_tests = []
+
+ for _, name, _ in iter_namespace(airflow.serialization.serializers):
+ ############################################################
+ # Handle compatibility / optional dependency at module level
+ ############################################################
+ # https://github.com/apache/airflow/pull/37320
+ if name == "airflow.serialization.serializers.iceberg":
+ try:
+ import pyiceberg # noqa: F401
+ except ImportError:
+ continue
+ # https://github.com/apache/airflow/pull/38074
+ if name == "airflow.serialization.serializers.deltalake":
+ try:
+ import deltalake # noqa: F401
+ except ImportError:
+ continue
+ mod = import_module(name)
+ for s in getattr(mod, "serializers", list()):
+ ############################################################
+ # Handle compatibility issue at serializer level
+ ############################################################
+ if s == "numpy.bool" and NUMPY_VERSION.major < 2:
+ reason = textwrap.dedent(f"""\
+ Current NumPy version: {NUMPY_VERSION}
+
+ In NumPy 1.20, `numpy.bool` was deprecated as an alias for
the built-in `bool`.
+ For NumPy versions <= 1.26, attempting to import
`numpy.bool` raises an ImportError.
+ Starting with NumPy 2.0, `numpy.bool` is reintroduced as
the NumPy scalar type,
+ and `numpy.bool_` becomes an alias for `numpy.bool`.
+
+ The serializers are loaded lazily at runtime. As a result:
+ - With NumPy <= 1.26, only `numpy.bool_` is loaded.
+ - With NumPy >= 2.0, only `numpy.bool` is loaded.
+
+ This test case deliberately attempts to import both
`numpy.bool` and `numpy.bool_`,
+ regardless of the installed NumPy version. Therefore, when
NumPy <= 1.26 is installed,
+ importing `numpy.bool` will raise an ImportError.
+ """)
+ serializer_tests.append(pytest.param(name, s,
marks=pytest.mark.skip(reason=reason)))
+ else:
+ serializer_tests.append(pytest.param(name, s))
+ return serializer_tests
+
+
+SERIALIZER_TESTS = generate_serializers_importable_tests()
+
+
class Z:
__version__: ClassVar[int] = 1
@@ -386,29 +449,15 @@ class TestSerDe:
obj = deserialize(serialize(asset))
assert asset.uri == obj.uri
- def test_serializers_importable_and_str(self):
+ @pytest.mark.parametrize("name, s", SERIALIZER_TESTS)
+ def test_serializers_importable_and_str(self, name, s):
"""Test if all distributed serializers are lazy loading and can be
imported"""
- import airflow.serialization.serializers
-
- for _, name, _ in iter_namespace(airflow.serialization.serializers):
- if name == "airflow.serialization.serializers.iceberg":
- try:
- import pyiceberg # noqa: F401
- except ImportError:
- continue
- if name == "airflow.serialization.serializers.deltalake":
- try:
- import deltalake # noqa: F401
- except ImportError:
- continue
- mod = import_module(name)
- for s in getattr(mod, "serializers", list()):
- if not isinstance(s, str):
- raise TypeError(f"{s} is not of type str. This is required
for lazy loading")
- try:
- import_string(s)
- except ImportError:
- raise AttributeError(f"{s} cannot be imported (located in
{name})")
+ if not isinstance(s, str):
+ raise TypeError(f"{s} is not of type str. This is required for
lazy loading")
+ try:
+ import_string(s)
+ except ImportError:
+ raise AttributeError(f"{s} cannot be imported (located in {name})")
def test_stringify(self):
i = V(W(10), ["l1", "l2"], (1, 2), 10)