This is an automated email from the ASF dual-hosted git repository.
bolke pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new a3c06c02e3 Fix non deterministic datetime deserialization (#34492)
a3c06c02e3 is described below
commit a3c06c02e31cc77b2c19554892b72ed91b8387de
Author: Hussein Awala <[email protected]>
AuthorDate: Thu Sep 28 09:31:06 2023 +0200
Fix non deterministic datetime deserialization (#34492)
tzname() does not return full timezones and returned short hand notations
are not deterministic. This changes the serialization to be deterministic and
adds some logic to deal with serialized short-hand US Timezones and CEST.
---------
Co-authored-by: bolkedebruin <[email protected]>
---
airflow/serialization/serializers/datetime.py | 32 +++++++++--
.../serialization/serializers/test_serializers.py | 62 +++++++++++++++++++++-
2 files changed, 88 insertions(+), 6 deletions(-)
diff --git a/airflow/serialization/serializers/datetime.py
b/airflow/serialization/serializers/datetime.py
index bdb9a6cb6c..49f0899a59 100644
--- a/airflow/serialization/serializers/datetime.py
+++ b/airflow/serialization/serializers/datetime.py
@@ -19,6 +19,10 @@ from __future__ import annotations
from typing import TYPE_CHECKING
+from airflow.serialization.serializers.timezone import (
+ deserialize as deserialize_timezone,
+ serialize as serialize_timezone,
+)
from airflow.utils.module_loading import qualname
from airflow.utils.timezone import convert_to_utc, is_naive
@@ -27,7 +31,7 @@ if TYPE_CHECKING:
from airflow.serialization.serde import U
-__version__ = 1
+__version__ = 2
serializers = ["datetime.date", "datetime.datetime", "datetime.timedelta",
"pendulum.datetime.DateTime"]
deserializers = serializers
@@ -44,7 +48,7 @@ def serialize(o: object) -> tuple[U, str, int, bool]:
if is_naive(o):
o = convert_to_utc(o)
- tz = o.tzname()
+ tz = serialize_timezone(o.tzinfo)
return {TIMESTAMP: o.timestamp(), TIMEZONE: tz}, qn, __version__, True
@@ -61,13 +65,31 @@ def deserialize(classname: str, version: int, data: dict |
str) -> datetime.date
import datetime
from pendulum import DateTime
- from pendulum.tz import timezone
+ from pendulum.tz import fixed_timezone, timezone
+
+ tz: datetime.tzinfo | None = None
+ if isinstance(data, dict) and TIMEZONE in data:
+ if version == 1:
+ # try to deserialize unsupported timezones
+ timezone_mapping = {
+ "EDT": fixed_timezone(-4 * 3600),
+ "CDT": fixed_timezone(-5 * 3600),
+ "MDT": fixed_timezone(-6 * 3600),
+ "PDT": fixed_timezone(-7 * 3600),
+ "CEST": timezone("CET"),
+ }
+ if data[TIMEZONE] in timezone_mapping:
+ tz = timezone_mapping[data[TIMEZONE]]
+ else:
+ tz = timezone(data[TIMEZONE])
+ else:
+ tz = deserialize_timezone(data[TIMEZONE][1], data[TIMEZONE][2],
data[TIMEZONE][0])
if classname == qualname(datetime.datetime) and isinstance(data, dict):
- return datetime.datetime.fromtimestamp(float(data[TIMESTAMP]),
tz=timezone(data[TIMEZONE]))
+ return datetime.datetime.fromtimestamp(float(data[TIMESTAMP]), tz=tz)
if classname == qualname(DateTime) and isinstance(data, dict):
- return DateTime.fromtimestamp(float(data[TIMESTAMP]),
tz=timezone(data[TIMEZONE]))
+ return DateTime.fromtimestamp(float(data[TIMESTAMP]), tz=tz)
if classname == qualname(datetime.timedelta) and isinstance(data, (str,
float)):
return datetime.timedelta(seconds=float(data))
diff --git a/tests/serialization/serializers/test_serializers.py
b/tests/serialization/serializers/test_serializers.py
index 62ec8f3d9f..26e4ecea0e 100644
--- a/tests/serialization/serializers/test_serializers.py
+++ b/tests/serialization/serializers/test_serializers.py
@@ -31,7 +31,6 @@ from airflow.serialization.serde import DATA, deserialize,
serialize
class TestSerializers:
def test_datetime(self):
i = datetime.datetime(2022, 7, 10, 22, 10, 43, microsecond=0,
tzinfo=pendulum.tz.UTC)
-
s = serialize(i)
d = deserialize(s)
assert i.timestamp() == d.timestamp()
@@ -51,6 +50,67 @@ class TestSerializers:
d = deserialize(s)
assert i == d
+ i = datetime.datetime(
+ 2022, 7, 10, 22, 10, 43, microsecond=0,
tzinfo=pendulum.timezone("America/New_York")
+ )
+ s = serialize(i)
+ d = deserialize(s)
+ assert i.timestamp() == d.timestamp()
+
+ i = DateTime(2022, 7, 10, tzinfo=pendulum.timezone("America/New_York"))
+ s = serialize(i)
+ d = deserialize(s)
+ assert i.timestamp() == d.timestamp()
+
+ def test_deserialize_datetime_v1(self):
+
+ s = {
+ "__classname__": "pendulum.datetime.DateTime",
+ "__version__": 1,
+ "__data__": {"timestamp": 1657505443.0, "tz": "UTC"},
+ }
+ d = deserialize(s)
+ assert d.timestamp() == 1657505443.0
+ assert d.tzinfo.name == "UTC"
+
+ s["__data__"]["tz"] = "Europe/Paris"
+ d = deserialize(s)
+ assert d.timestamp() == 1657505443.0
+ assert d.tzinfo.name == "Europe/Paris"
+
+ s["__data__"]["tz"] = "America/New_York"
+ d = deserialize(s)
+ assert d.timestamp() == 1657505443.0
+ assert d.tzinfo.name == "America/New_York"
+
+ s["__data__"]["tz"] = "EDT"
+ d = deserialize(s)
+ assert d.timestamp() == 1657505443.0
+ assert d.tzinfo.name == "-04:00"
+ # assert that it's serializable with the new format
+ assert deserialize(serialize(d)) == d
+
+ s["__data__"]["tz"] = "CDT"
+ d = deserialize(s)
+ assert d.timestamp() == 1657505443.0
+ assert d.tzinfo.name == "-05:00"
+ # assert that it's serializable with the new format
+ assert deserialize(serialize(d)) == d
+
+ s["__data__"]["tz"] = "MDT"
+ d = deserialize(s)
+ assert d.timestamp() == 1657505443.0
+ assert d.tzinfo.name == "-06:00"
+ # assert that it's serializable with the new format
+ assert deserialize(serialize(d)) == d
+
+ s["__data__"]["tz"] = "PDT"
+ d = deserialize(s)
+ assert d.timestamp() == 1657505443.0
+ assert d.tzinfo.name == "-07:00"
+ # assert that it's serializable with the new format
+ assert deserialize(serialize(d)) == d
+
@pytest.mark.parametrize(
"expr, expected",
[("1", "1"), ("52e4", "520000"), ("2e0", "2"), ("12e-2", "0.12"),
("12.34", "12.34")],