This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 5babf15bb42 Resolve serialization for numpy bool 1.x and 2.x 
compatibility (#53690)
5babf15bb42 is described below

commit 5babf15bb4289419e3326aded0670883113d01c0
Author: Kevin Yang <[email protected]>
AuthorDate: Sun Aug 24 03:33:41 2025 -0400

    Resolve serialization for numpy bool 1.x and 2.x compatibility (#53690)
    
    * fix serialization for np.bool in numpy 2.x
    
    get update from main
    
    * update serializers importable test case to handle numpy bool 
compatibility between v1 and v2
    
    * use pytest xfail to capture np.bool import error in numpy version less 
than 2
    
    * parameterize the serializers importable test, so xfail can be applied 
individually to numpy bool
    
    * add textwrap dedent for message
    
    * add np.float32 to serializer and improve test description
---
 .../src/airflow/serialization/serializers/numpy.py | 13 ++-
 .../serialization/serializers/test_serializers.py  |  1 -
 .../tests/unit/serialization/test_serde.py         | 95 ++++++++++++++++------
 3 files changed, 77 insertions(+), 32 deletions(-)

diff --git a/airflow-core/src/airflow/serialization/serializers/numpy.py 
b/airflow-core/src/airflow/serialization/serializers/numpy.py
index 40d891113f8..ad5c3caa7d0 100644
--- a/airflow-core/src/airflow/serialization/serializers/numpy.py
+++ b/airflow-core/src/airflow/serialization/serializers/numpy.py
@@ -19,7 +19,7 @@ from __future__ import annotations
 
 from typing import TYPE_CHECKING, Any
 
-from airflow.utils.module_loading import import_string, qualname
+from airflow.utils.module_loading import qualname
 
 # lazy loading for performance reasons
 serializers = [
@@ -31,11 +31,13 @@ serializers = [
     "numpy.uint16",
     "numpy.uint32",
     "numpy.uint64",
-    "numpy.bool_",
     "numpy.float64",
+    "numpy.float32",
     "numpy.float16",
     "numpy.complex128",
     "numpy.complex64",
+    "numpy.bool",
+    "numpy.bool_",
 ]
 
 if TYPE_CHECKING:
@@ -70,7 +72,7 @@ def serialize(o: object) -> tuple[U, str, int, bool]:
     ):
         return int(o), *metadata
 
-    if isinstance(o, np.bool_):
+    if hasattr(np, "bool") and isinstance(o, np.bool) or isinstance(o, 
np.bool_):
         return bool(o), *metadata
 
     if isinstance(o, (np.float16, np.float32, np.float64, np.complex64, 
np.complex128)):
@@ -83,9 +85,4 @@ def deserialize(cls: type, version: int, data: str) -> Any:
     if version > __version__:
         raise TypeError("serialized version is newer than class version")
 
-    allowed_deserialize_classes = [import_string(classname) for classname in 
deserializers]
-
-    if cls not in allowed_deserialize_classes:
-        raise TypeError(f"unsupported {qualname(cls)} found for numpy 
deserialization")
-
     return cls(data)
diff --git 
a/airflow-core/tests/unit/serialization/serializers/test_serializers.py 
b/airflow-core/tests/unit/serialization/serializers/test_serializers.py
index 4b21fc63dee..3f753150b38 100644
--- a/airflow-core/tests/unit/serialization/serializers/test_serializers.py
+++ b/airflow-core/tests/unit/serialization/serializers/test_serializers.py
@@ -261,7 +261,6 @@ class TestSerializers:
         ("klass", "ver", "value", "msg"),
         [
             (np.int32, 999, 123, r"serialized version is newer"),
-            (np.float32, 1, 123, r"unsupported numpy\.float32"),
         ],
     )
     def test_numpy_deserialize_errors(self, klass, ver, value, msg):
diff --git a/airflow-core/tests/unit/serialization/test_serde.py 
b/airflow-core/tests/unit/serialization/test_serde.py
index 560569689cf..0dadfc7e9ee 100644
--- a/airflow-core/tests/unit/serialization/test_serde.py
+++ b/airflow-core/tests/unit/serialization/test_serde.py
@@ -18,13 +18,15 @@ from __future__ import annotations
 
 import datetime
 import enum
+import textwrap
 from collections import namedtuple
 from dataclasses import dataclass
-from importlib import import_module
+from importlib import import_module, metadata
 from typing import ClassVar
 
 import attr
 import pytest
+from packaging import version
 from pydantic import BaseModel
 
 from airflow.sdk.definitions.asset import Asset
@@ -61,6 +63,67 @@ def recalculate_patterns():
         _match_regexp.cache_clear()
 
 
+def generate_serializers_importable_tests():
+    """
+    Generate test cases for `test_serializers_importable_and_str`.
+
+    The function iterates through all the modules defined under 
`airflow.serialization.serializers`. It loads
+    the import strings defined in the `serializers` from each module, and 
create a test case to verify that the
+    serializer is importable.
+    """
+    import airflow.serialization.serializers
+
+    NUMPY_VERSION = version.parse(metadata.version("numpy"))
+
+    serializer_tests = []
+
+    for _, name, _ in iter_namespace(airflow.serialization.serializers):
+        ############################################################
+        # Handle compatibility / optional dependency at module level
+        ############################################################
+        # https://github.com/apache/airflow/pull/37320
+        if name == "airflow.serialization.serializers.iceberg":
+            try:
+                import pyiceberg  # noqa: F401
+            except ImportError:
+                continue
+        # https://github.com/apache/airflow/pull/38074
+        if name == "airflow.serialization.serializers.deltalake":
+            try:
+                import deltalake  # noqa: F401
+            except ImportError:
+                continue
+        mod = import_module(name)
+        for s in getattr(mod, "serializers", list()):
+            ############################################################
+            # Handle compatibility issue at serializer level
+            ############################################################
+            if s == "numpy.bool" and NUMPY_VERSION.major < 2:
+                reason = textwrap.dedent(f"""\
+                    Current NumPy version: {NUMPY_VERSION}
+
+                    In NumPy 1.20, `numpy.bool` was deprecated as an alias for 
the built-in `bool`.
+                    For NumPy versions <= 1.26, attempting to import 
`numpy.bool` raises an ImportError.
+                    Starting with NumPy 2.0, `numpy.bool` is reintroduced as 
the NumPy scalar type,
+                    and `numpy.bool_` becomes an alias for `numpy.bool`.
+
+                    The serializers are loaded lazily at runtime. As a result:
+                    - With NumPy <= 1.26, only `numpy.bool_` is loaded.
+                    - With NumPy >= 2.0, only `numpy.bool` is loaded.
+
+                    This test case deliberately attempts to import both 
`numpy.bool` and `numpy.bool_`,
+                    regardless of the installed NumPy version. Therefore, when 
NumPy <= 1.26 is installed,
+                    importing `numpy.bool` will raise an ImportError.
+                """)
+                serializer_tests.append(pytest.param(name, s, 
marks=pytest.mark.skip(reason=reason)))
+            else:
+                serializer_tests.append(pytest.param(name, s))
+    return serializer_tests
+
+
+SERIALIZER_TESTS = generate_serializers_importable_tests()
+
+
 class Z:
     __version__: ClassVar[int] = 1
 
@@ -386,29 +449,15 @@ class TestSerDe:
         obj = deserialize(serialize(asset))
         assert asset.uri == obj.uri
 
-    def test_serializers_importable_and_str(self):
+    @pytest.mark.parametrize("name, s", SERIALIZER_TESTS)
+    def test_serializers_importable_and_str(self, name, s):
         """Test if all distributed serializers are lazy loading and can be 
imported"""
-        import airflow.serialization.serializers
-
-        for _, name, _ in iter_namespace(airflow.serialization.serializers):
-            if name == "airflow.serialization.serializers.iceberg":
-                try:
-                    import pyiceberg  # noqa: F401
-                except ImportError:
-                    continue
-            if name == "airflow.serialization.serializers.deltalake":
-                try:
-                    import deltalake  # noqa: F401
-                except ImportError:
-                    continue
-            mod = import_module(name)
-            for s in getattr(mod, "serializers", list()):
-                if not isinstance(s, str):
-                    raise TypeError(f"{s} is not of type str. This is required 
for lazy loading")
-                try:
-                    import_string(s)
-                except ImportError:
-                    raise AttributeError(f"{s} cannot be imported (located in 
{name})")
+        if not isinstance(s, str):
+            raise TypeError(f"{s} is not of type str. This is required for 
lazy loading")
+        try:
+            import_string(s)
+        except ImportError:
+            raise AttributeError(f"{s} cannot be imported (located in {name})")
 
     def test_stringify(self):
         i = V(W(10), ["l1", "l2"], (1, 2), 10)

Reply via email to