This is an automated email from the ASF dual-hosted git repository.

bolke pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new a3c06c02e3 Fix non deterministic datetime deserialization (#34492)
a3c06c02e3 is described below

commit a3c06c02e31cc77b2c19554892b72ed91b8387de
Author: Hussein Awala <[email protected]>
AuthorDate: Thu Sep 28 09:31:06 2023 +0200

    Fix non deterministic datetime deserialization (#34492)
    
    tzname() does not return full timezones and returned short hand notations 
are not deterministic. This changes the serialization to be deterministic and 
adds some logic to deal with serialized short-hand US Timezones and CEST.
    
    ---------
    
    Co-authored-by: bolkedebruin <[email protected]>
---
 airflow/serialization/serializers/datetime.py      | 32 +++++++++--
 .../serialization/serializers/test_serializers.py  | 62 +++++++++++++++++++++-
 2 files changed, 88 insertions(+), 6 deletions(-)

diff --git a/airflow/serialization/serializers/datetime.py 
b/airflow/serialization/serializers/datetime.py
index bdb9a6cb6c..49f0899a59 100644
--- a/airflow/serialization/serializers/datetime.py
+++ b/airflow/serialization/serializers/datetime.py
@@ -19,6 +19,10 @@ from __future__ import annotations
 
 from typing import TYPE_CHECKING
 
+from airflow.serialization.serializers.timezone import (
+    deserialize as deserialize_timezone,
+    serialize as serialize_timezone,
+)
 from airflow.utils.module_loading import qualname
 from airflow.utils.timezone import convert_to_utc, is_naive
 
@@ -27,7 +31,7 @@ if TYPE_CHECKING:
 
     from airflow.serialization.serde import U
 
-__version__ = 1
+__version__ = 2
 
 serializers = ["datetime.date", "datetime.datetime", "datetime.timedelta", 
"pendulum.datetime.DateTime"]
 deserializers = serializers
@@ -44,7 +48,7 @@ def serialize(o: object) -> tuple[U, str, int, bool]:
         if is_naive(o):
             o = convert_to_utc(o)
 
-        tz = o.tzname()
+        tz = serialize_timezone(o.tzinfo)
 
         return {TIMESTAMP: o.timestamp(), TIMEZONE: tz}, qn, __version__, True
 
@@ -61,13 +65,31 @@ def deserialize(classname: str, version: int, data: dict | 
str) -> datetime.date
     import datetime
 
     from pendulum import DateTime
-    from pendulum.tz import timezone
+    from pendulum.tz import fixed_timezone, timezone
+
+    tz: datetime.tzinfo | None = None
+    if isinstance(data, dict) and TIMEZONE in data:
+        if version == 1:
+            # try to deserialize unsupported timezones
+            timezone_mapping = {
+                "EDT": fixed_timezone(-4 * 3600),
+                "CDT": fixed_timezone(-5 * 3600),
+                "MDT": fixed_timezone(-6 * 3600),
+                "PDT": fixed_timezone(-7 * 3600),
+                "CEST": timezone("CET"),
+            }
+            if data[TIMEZONE] in timezone_mapping:
+                tz = timezone_mapping[data[TIMEZONE]]
+            else:
+                tz = timezone(data[TIMEZONE])
+        else:
+            tz = deserialize_timezone(data[TIMEZONE][1], data[TIMEZONE][2], 
data[TIMEZONE][0])
 
     if classname == qualname(datetime.datetime) and isinstance(data, dict):
-        return datetime.datetime.fromtimestamp(float(data[TIMESTAMP]), 
tz=timezone(data[TIMEZONE]))
+        return datetime.datetime.fromtimestamp(float(data[TIMESTAMP]), tz=tz)
 
     if classname == qualname(DateTime) and isinstance(data, dict):
-        return DateTime.fromtimestamp(float(data[TIMESTAMP]), 
tz=timezone(data[TIMEZONE]))
+        return DateTime.fromtimestamp(float(data[TIMESTAMP]), tz=tz)
 
     if classname == qualname(datetime.timedelta) and isinstance(data, (str, 
float)):
         return datetime.timedelta(seconds=float(data))
diff --git a/tests/serialization/serializers/test_serializers.py 
b/tests/serialization/serializers/test_serializers.py
index 62ec8f3d9f..26e4ecea0e 100644
--- a/tests/serialization/serializers/test_serializers.py
+++ b/tests/serialization/serializers/test_serializers.py
@@ -31,7 +31,6 @@ from airflow.serialization.serde import DATA, deserialize, 
serialize
 class TestSerializers:
     def test_datetime(self):
         i = datetime.datetime(2022, 7, 10, 22, 10, 43, microsecond=0, 
tzinfo=pendulum.tz.UTC)
-
         s = serialize(i)
         d = deserialize(s)
         assert i.timestamp() == d.timestamp()
@@ -51,6 +50,67 @@ class TestSerializers:
         d = deserialize(s)
         assert i == d
 
+        i = datetime.datetime(
+            2022, 7, 10, 22, 10, 43, microsecond=0, 
tzinfo=pendulum.timezone("America/New_York")
+        )
+        s = serialize(i)
+        d = deserialize(s)
+        assert i.timestamp() == d.timestamp()
+
+        i = DateTime(2022, 7, 10, tzinfo=pendulum.timezone("America/New_York"))
+        s = serialize(i)
+        d = deserialize(s)
+        assert i.timestamp() == d.timestamp()
+
+    def test_deserialize_datetime_v1(self):
+
+        s = {
+            "__classname__": "pendulum.datetime.DateTime",
+            "__version__": 1,
+            "__data__": {"timestamp": 1657505443.0, "tz": "UTC"},
+        }
+        d = deserialize(s)
+        assert d.timestamp() == 1657505443.0
+        assert d.tzinfo.name == "UTC"
+
+        s["__data__"]["tz"] = "Europe/Paris"
+        d = deserialize(s)
+        assert d.timestamp() == 1657505443.0
+        assert d.tzinfo.name == "Europe/Paris"
+
+        s["__data__"]["tz"] = "America/New_York"
+        d = deserialize(s)
+        assert d.timestamp() == 1657505443.0
+        assert d.tzinfo.name == "America/New_York"
+
+        s["__data__"]["tz"] = "EDT"
+        d = deserialize(s)
+        assert d.timestamp() == 1657505443.0
+        assert d.tzinfo.name == "-04:00"
+        # assert that it's serializable with the new format
+        assert deserialize(serialize(d)) == d
+
+        s["__data__"]["tz"] = "CDT"
+        d = deserialize(s)
+        assert d.timestamp() == 1657505443.0
+        assert d.tzinfo.name == "-05:00"
+        # assert that it's serializable with the new format
+        assert deserialize(serialize(d)) == d
+
+        s["__data__"]["tz"] = "MDT"
+        d = deserialize(s)
+        assert d.timestamp() == 1657505443.0
+        assert d.tzinfo.name == "-06:00"
+        # assert that it's serializable with the new format
+        assert deserialize(serialize(d)) == d
+
+        s["__data__"]["tz"] = "PDT"
+        d = deserialize(s)
+        assert d.timestamp() == 1657505443.0
+        assert d.tzinfo.name == "-07:00"
+        # assert that it's serializable with the new format
+        assert deserialize(serialize(d)) == d
+
     @pytest.mark.parametrize(
         "expr, expected",
         [("1", "1"), ("52e4", "520000"), ("2e0", "2"), ("12e-2", "0.12"), 
("12.34", "12.34")],

Reply via email to