This is an automated email from the ASF dual-hosted git repository.

jscheffl pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 487388906db Revert "Prioritize custom serializers over built-in ones. 
(#56736) (#56881)" (#57711)
487388906db is described below

commit 487388906db13f47582c6c799cc91b33a5130873
Author: Jens Scheffler <[email protected]>
AuthorDate: Sun Nov 2 13:17:50 2025 +0100

    Revert "Prioritize custom serializers over built-in ones. (#56736) 
(#56881)" (#57711)
    
    This reverts commit 1632789b4d2c425bc3534bd0067ed3d615fcfce9.
---
 .../docs/authoring-and-scheduling/serializers.rst  |  6 +--
 airflow-core/newsfragments/56881.significant.rst   | 16 --------
 airflow-core/src/airflow/serialization/serde.py    | 43 +++++++++++-----------
 .../tests/unit/cli/commands/test_dag_command.py    |  4 +-
 .../tests/unit/serialization/test_serde.py         | 34 -----------------
 5 files changed, 25 insertions(+), 78 deletions(-)

diff --git a/airflow-core/docs/authoring-and-scheduling/serializers.rst 
b/airflow-core/docs/authoring-and-scheduling/serializers.rst
index 3e087dcf74d..fb19a111d91 100644
--- a/airflow-core/docs/authoring-and-scheduling/serializers.rst
+++ b/airflow-core/docs/authoring-and-scheduling/serializers.rst
@@ -28,9 +28,9 @@ like ``str`` and ``int`` and it loops over iterables. When 
things become more co
 
 Airflow out of the box supports three ways of custom serialization. Primitives 
are returned as is, without
 additional encoding, e.g. a ``str`` remains a ``str``. When it is not a 
primitive (or iterable thereof) Airflow
-first looks in the class for a ``serialize()`` method or in case of 
deserialization a ``deserialize(data, version: int)`` method.
-Next airflow checks for a registered serializer and deserializer in the 
namespace of ``airflow.serialization.serializers``.
-Finally, if the class is either decorated with ``@dataclass``
+looks for a registered serializer and deserializer in the namespace of 
``airflow.serialization.serializers``.
+If not found it will look in the class for a ``serialize()`` method or in case 
of deserialization a
+``deserialize(data, version: int)`` method. Finally, if the class is either 
decorated with ``@dataclass``
 or ``@attr.define`` it will use the public methods for those decorators.
 
 If you are looking to extend Airflow with a new serializer, it is good to know 
when to choose what way of serialization.
diff --git a/airflow-core/newsfragments/56881.significant.rst 
b/airflow-core/newsfragments/56881.significant.rst
deleted file mode 100644
index 071d827356b..00000000000
--- a/airflow-core/newsfragments/56881.significant.rst
+++ /dev/null
@@ -1,16 +0,0 @@
-Prioritize custom SerDe methods defined in classes over the built-in SerDe 
methods.
-
-Previously, the SerDe logic prioritized built-in serializers in airflow-core 
over those that users defined
-in their classes with the ``serialize(self)`` and ``deserialize(data: dict, 
version: int)`` methods.
-This behavior has been changed to always use custom SerDe methods before 
considering built-in SerDe methods.
-
-* Types of change
-
-  * [ ] Dag changes
-  * [ ] Config changes
-  * [ ] API changes
-  * [ ] CLI changes
-  * [x] Behaviour changes
-  * [ ] Plugin changes
-  * [ ] Dependency changes
-  * [ ] Code interface changes
diff --git a/airflow-core/src/airflow/serialization/serde.py 
b/airflow-core/src/airflow/serialization/serde.py
index e3e8e2047df..6faedd88417 100644
--- a/airflow-core/src/airflow/serialization/serde.py
+++ b/airflow-core/src/airflow/serialization/serde.py
@@ -133,23 +133,6 @@ def serialize(o: object, depth: int = 0) -> U | None:
     qn = qualname(o)
     classname = None
 
-    # custom serializers
-    dct = {
-        CLASSNAME: qn,
-        VERSION: getattr(cls, "__version__", DEFAULT_VERSION),
-    }
-
-    # object / class brings their own. Prioritize over built-in serializers
-    if hasattr(o, "serialize"):
-        data = getattr(o, "serialize")()
-
-        # if we end up with a structure, ensure its values are serialized
-        if isinstance(data, dict):
-            data = serialize(data, depth + 1)
-
-        dct[DATA] = data
-        return dct
-
     # Serialize namedtuple like tuples
     # We also override the classname returned by the builtin.py serializer. 
The classname
     # has to be "builtins.tuple", so that the deserializer can deserialize the 
object into tuple.
@@ -170,13 +153,29 @@ def serialize(o: object, depth: int = 0) -> U | None:
             return encode(classname or serialized_classname, version, 
serialize(data, depth + 1))
 
     # primitive types are returned as is
-    # Need to come after registered built-ins - else numpy float64 won't be 
serialized as numpy class, because isinstance(np.float64(0.234), float) == True
     if isinstance(o, _primitives):
         if isinstance(o, enum.Enum):
             return o.value
 
         return o
 
+    # custom serializers
+    dct = {
+        CLASSNAME: qn,
+        VERSION: getattr(cls, "__version__", DEFAULT_VERSION),
+    }
+
+    # object / class brings their own
+    if hasattr(o, "serialize"):
+        data = getattr(o, "serialize")()
+
+        # if we end up with a structure, ensure its values are serialized
+        if isinstance(data, dict):
+            data = serialize(data, depth + 1)
+
+        dct[DATA] = data
+        return dct
+
     # dataclasses
     if dataclasses.is_dataclass(cls):
         # fixme: unfortunately using asdict with nested dataclasses it looses 
information
@@ -263,10 +262,6 @@ def deserialize(o: T | None, full=True, type_hint: Any = 
None) -> object:
 
     cls = import_string(classname)
 
-    # class has deserialization function
-    if hasattr(cls, "deserialize"):
-        return getattr(cls, "deserialize")(deserialize(value), version)
-
     # registered deserializer
     if classname in _deserializers:
         return _deserializers[classname].deserialize(cls, version, 
deserialize(value))
@@ -274,6 +269,10 @@ def deserialize(o: T | None, full=True, type_hint: Any = 
None) -> object:
         if PYDANTIC_MODEL_QUALNAME in _deserializers:
             return _deserializers[PYDANTIC_MODEL_QUALNAME].deserialize(cls, 
version, deserialize(value))
 
+    # class has deserialization function
+    if hasattr(cls, "deserialize"):
+        return getattr(cls, "deserialize")(deserialize(value), version)
+
     # attr or dataclass
     if attr.has(cls) or dataclasses.is_dataclass(cls):
         class_version = getattr(cls, "__version__", 0)
diff --git a/airflow-core/tests/unit/cli/commands/test_dag_command.py 
b/airflow-core/tests/unit/cli/commands/test_dag_command.py
index e54a5fd09f9..20572af7153 100644
--- a/airflow-core/tests/unit/cli/commands/test_dag_command.py
+++ b/airflow-core/tests/unit/cli/commands/test_dag_command.py
@@ -931,9 +931,7 @@ class TestCliDags:
             tis = dr.get_task_instances()
             assert next(x for x in tis if x.task_id == "abc").state == 
"success"
 
-    @mock.patch(
-        "airflow.sdk.execution_time.task_runner._execute_task", 
return_value="dummy task return value"
-    )
+    @mock.patch("airflow.sdk.execution_time.task_runner._execute_task")
     def test_dag_test_with_mark_success(self, mock__execute_task):
         """
         option `--mark-success-pattern` should mark matching tasks as success 
without executing them.
diff --git a/airflow-core/tests/unit/serialization/test_serde.py 
b/airflow-core/tests/unit/serialization/test_serde.py
index e89fbf53ce0..6130437f1b9 100644
--- a/airflow-core/tests/unit/serialization/test_serde.py
+++ b/airflow-core/tests/unit/serialization/test_serde.py
@@ -33,20 +33,16 @@ from airflow.sdk.definitions.asset import Asset
 from airflow.serialization.serde import (
     CLASSNAME,
     DATA,
-    PYDANTIC_MODEL_QUALNAME,
     SCHEMA_ID,
     VERSION,
-    _deserializers,
     _get_patterns,
     _get_regexp_patterns,
     _match,
     _match_glob,
     _match_regexp,
-    _serializers,
     deserialize,
     serialize,
 )
-from airflow.serialization.typing import is_pydantic_model
 from airflow.utils.module_loading import import_string, iter_namespace, 
qualname
 
 from tests_common.test_utils.config import conf_vars
@@ -200,18 +196,6 @@ class C:
         return None
 
 
-class PydanticModelWithCustomSerDe(BaseModel):
-    ignored_field_in_serialization: int = 0
-    x: str
-
-    @staticmethod
-    def deserialize(data: dict, version: int):
-        return PydanticModelWithCustomSerDe(ignored_field_in_serialization=-1, 
x=data["x"])
-
-    def serialize(self) -> dict:
-        return {"x": self.x}
-
-
 @pytest.mark.usefixtures("recalculate_patterns")
 class TestSerDe:
     def test_ser_primitives(self):
@@ -530,21 +514,3 @@ class TestSerDe:
             TypeError, match="cannot serialize object of type <class 
'unit.serialization.test_serde.C'>"
         ):
             serialize(i)
-
-    def test_custom_serde_methods_are_prioritized_over_builtins(self):
-        """
-        There is a built-in SerDe for pydantic classes.
-        Test that the custom defined SerDe methods take precedence over the 
built-in ones.
-        """
-        orig = 
PydanticModelWithCustomSerDe(ignored_field_in_serialization=200, x="SerDe Test")
-
-        assert is_pydantic_model(orig)
-        assert PYDANTIC_MODEL_QUALNAME in _serializers
-        assert PYDANTIC_MODEL_QUALNAME in _deserializers
-
-        serialized = serialize(orig)
-        assert "ignored_field_in_serialization" not in serialized
-        deserialized: PydanticModelWithCustomSerDe = deserialize(serialized)
-        assert deserialized.ignored_field_in_serialization == -1
-        assert deserialized.x == orig.x
-        assert type(orig) is type(deserialized)

Reply via email to