Copilot commented on code in PR #63871:
URL: https://github.com/apache/airflow/pull/63871#discussion_r3025333515
##########
airflow-core/src/airflow/serialization/helpers.py:
##########
@@ -61,25 +65,28 @@ def translate_tuples_to_lists(obj: Any):
return {key: translate_tuples_to_lists(value) for key, value in
obj.items()}
return obj
- def sort_dict_recursively(obj: Any) -> Any:
- """Recursively sort dictionaries to ensure consistent ordering."""
+ def sort_and_make_static_dict_recursively(obj: Any) -> Any:
+ """Recursively sort dictionaries and make callable value static to
ensure consistent ordering."""
+ if callable(obj):
+ return make_callable_static(obj)
if isinstance(obj, dict):
- return {k: sort_dict_recursively(v) for k, v in
sorted(obj.items())}
+ return {k: sort_and_make_static_dict_recursively(v) for k, v in
sorted(obj.items())}
if isinstance(obj, list):
- return [sort_dict_recursively(item) for item in obj]
+ return [sort_and_make_static_dict_recursively(item) for item in
obj]
if isinstance(obj, tuple):
- return tuple(sort_dict_recursively(item) for item in obj)
- return obj
+ return tuple(sort_and_make_static_dict_recursively(item) for item
in obj)
+ return str(obj)
max_length = conf.getint("core", "max_templated_field_length")
- if not is_jsonable(template_field):
+ # If the callable objects is in dict value, it makes is_jsonable False.
+ # So, filter the case in here and handle it downside
+ if not is_jsonable(template_field) and not isinstance(template_field,
dict):
Review Comment:
This special-casing only skips the fallback serialization path for `dict`,
but the same instability can occur for other containers (e.g., a top-level
`list`/`tuple` containing a callable) because `is_jsonable` will be false and
the code will fall back to `str(template_field)`, which includes the function
object's memory address. Suggestion: extend the container handling to include
`list`/`tuple` (and potentially nested containers generally), so callables
anywhere in the templated structure get stabilized.
##########
airflow-core/src/airflow/serialization/helpers.py:
##########
@@ -61,25 +65,28 @@ def translate_tuples_to_lists(obj: Any):
return {key: translate_tuples_to_lists(value) for key, value in
obj.items()}
return obj
- def sort_dict_recursively(obj: Any) -> Any:
- """Recursively sort dictionaries to ensure consistent ordering."""
+ def sort_and_make_static_dict_recursively(obj: Any) -> Any:
+ """Recursively sort dictionaries and make callable value static to
ensure consistent ordering."""
+ if callable(obj):
+ return make_callable_static(obj)
if isinstance(obj, dict):
- return {k: sort_dict_recursively(v) for k, v in
sorted(obj.items())}
+ return {k: sort_and_make_static_dict_recursively(v) for k, v in
sorted(obj.items())}
if isinstance(obj, list):
- return [sort_dict_recursively(item) for item in obj]
+ return [sort_and_make_static_dict_recursively(item) for item in
obj]
if isinstance(obj, tuple):
- return tuple(sort_dict_recursively(item) for item in obj)
- return obj
+ return tuple(sort_and_make_static_dict_recursively(item) for item
in obj)
+ return str(obj)
Review Comment:
Returning `str(obj)` for all leaf values changes the data types of *every*
non-container value inside dicts/lists/tuples (e.g., ints/bools/None become
strings). This is a behavior change unrelated to callable stabilization and can
alter downstream consumers that expect preserved types. Suggestion: only
convert callables to the static string, and otherwise return the original `obj`
(or only stringify non-JSON-serializable leaf objects), so numbers/bools/None
remain unchanged.
```suggestion
return obj
```
##########
airflow-core/tests/unit/serialization/test_helpers.py:
##########
@@ -16,16 +16,38 @@
# under the License.
from __future__ import annotations
+from airflow.serialization.helpers import serialize_template_field
+
def test_serialize_template_field_with_very_small_max_length(monkeypatch):
"""Test that truncation message is prioritized even for very small
max_length."""
monkeypatch.setenv("AIRFLOW__CORE__MAX_TEMPLATED_FIELD_LENGTH", "1")
- from airflow.serialization.helpers import serialize_template_field
-
result = serialize_template_field("This is a long string", "test")
# The truncation message should be shown even if it exceeds max_length
# This ensures users always see why content is truncated
assert result
assert "Truncated. You can change this behaviour" in result
+
+
+def test_serialize_template_field_with_dict_value_callable():
+ def fn_returns_callable():
+ def get_arg():
+ pass
+
+ return get_arg
+
+ template_name = "op_kwargs"
+
+ value = {"values": [3, 1, 2], "sort_key": lambda x: x}
+ result = serialize_template_field(value, template_name)
+
+ assert result == serialize_template_field(value, template_name)
+
+ value_nested = {
+ "values": [3, 1, 2],
+ "sort_key_nested": {"b": lambda x: fn_returns_callable(), "a": "test"},
+ }
+ result_nested = serialize_template_field(value_nested, template_name)
+ assert result_nested == serialize_template_field(value_nested,
template_name)
Review Comment:
These assertions don't reliably catch the reported non-determinism. In the
buggy case, serializing the *same* lambda object twice in the same process will
typically produce the same memory address, so the test can still pass even if
addresses are being embedded. Suggestion: build two equivalent dicts with
*different* callable instances (e.g., create the lambda inside a factory called
twice) and assert their serialized outputs match; additionally, assert the
callable is rendered as a stable placeholder (e.g., value for `sort_key`
matches the `<callable ...>` format) rather than containing a `0x...` address.
```suggestion
def make_value():
# Return a new dict each time, with a fresh callable instance
return {"values": [3, 1, 2], "sort_key": (lambda x: x)}
value1 = make_value()
value2 = make_value()
result1 = serialize_template_field(value1, template_name)
result2 = serialize_template_field(value2, template_name)
# Different but equivalent callables should serialize identically and
without memory addresses
assert result1 == result2
assert "<callable" in result1
assert "0x" not in result1
def make_value_nested():
# Return a new nested dict each time, with fresh callable instances
return {
"values": [3, 1, 2],
"sort_key_nested": {"b": (lambda x: fn_returns_callable()), "a":
"test"},
}
value_nested1 = make_value_nested()
value_nested2 = make_value_nested()
result_nested1 = serialize_template_field(value_nested1, template_name)
result_nested2 = serialize_template_field(value_nested2, template_name)
assert result_nested1 == result_nested2
assert "<callable" in result_nested1
assert "0x" not in result_nested1
```
##########
airflow-core/src/airflow/serialization/helpers.py:
##########
@@ -61,25 +65,28 @@ def translate_tuples_to_lists(obj: Any):
return {key: translate_tuples_to_lists(value) for key, value in
obj.items()}
return obj
- def sort_dict_recursively(obj: Any) -> Any:
- """Recursively sort dictionaries to ensure consistent ordering."""
+ def sort_and_make_static_dict_recursively(obj: Any) -> Any:
+ """Recursively sort dictionaries and make callable value static to
ensure consistent ordering."""
+ if callable(obj):
+ return make_callable_static(obj)
if isinstance(obj, dict):
- return {k: sort_dict_recursively(v) for k, v in
sorted(obj.items())}
+ return {k: sort_and_make_static_dict_recursively(v) for k, v in
sorted(obj.items())}
if isinstance(obj, list):
- return [sort_dict_recursively(item) for item in obj]
+ return [sort_and_make_static_dict_recursively(item) for item in
obj]
if isinstance(obj, tuple):
- return tuple(sort_dict_recursively(item) for item in obj)
- return obj
+ return tuple(sort_and_make_static_dict_recursively(item) for item
in obj)
+ return str(obj)
max_length = conf.getint("core", "max_templated_field_length")
- if not is_jsonable(template_field):
+ # If the callable objects is in dict value, it makes is_jsonable False.
+ # So, filter the case in here and handle it downside
Review Comment:
Grammar/wording: 'callable objects is' should be 'callable objects are', and
'handle it downside' is unclear (likely 'handle it below'). Clearer comments
are helpful here since this logic is subtle.
```suggestion
# If callable objects are present in dict values, is_jsonable returns
False for the whole dict.
# So, exclude dicts here; they are handled separately in the logic below.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]