ephraimbuddy commented on code in PR #63871:
URL: https://github.com/apache/airflow/pull/63871#discussion_r3200705762


##########
airflow-core/src/airflow/serialization/helpers.py:
##########
@@ -51,55 +52,43 @@ def is_jsonable(x):
         else:
             return True
 
-    def translate_tuples_to_lists(obj: Any):
-        """Recursively convert tuples to lists."""
-        if isinstance(obj, tuple):
-            return [translate_tuples_to_lists(item) for item in obj]
-        if isinstance(obj, list):
-            return [translate_tuples_to_lists(item) for item in obj]
-        if isinstance(obj, dict):
-            return {key: translate_tuples_to_lists(value) for key, value in 
obj.items()}
-        return obj
+    def serialize_object(obj):
+        if obj is None or isinstance(obj, (str, int, float, bool)):
+            return obj
 
-    def sort_dict_recursively(obj: Any) -> Any:
-        """Recursively sort dictionaries to ensure consistent ordering."""
         if isinstance(obj, dict):
-            return {k: sort_dict_recursively(v) for k, v in 
sorted(obj.items())}
-        if isinstance(obj, list):
-            return [sort_dict_recursively(item) for item in obj]
-        if isinstance(obj, tuple):
-            return tuple(sort_dict_recursively(item) for item in obj)
-        return obj
+            # Sort dictionaries recursively to ensure consistent string 
representation
+            # This prevents hash inconsistencies when dict ordering varies
+            return {
+                k: serialize_object(v)
+                for k, v in sorted(obj.items(), key=lambda kv: 
(type(kv[0]).__name__, repr(kv[0])))
+            }
+
+        if isinstance(obj, (list, tuple)):
+            return [serialize_object(item) for item in obj]
+
+        # Use inspect.getattr_static to bypass any custom __getattr__ / 
metaclass magic
+        if callable(inspect.getattr_static(obj, "serialize", None)):
+            return serialize_object(obj.serialize())
+
+        if callable(obj):
+            # Use qualified name; default repr embeds memory addresses, which 
would change the DAG hash on every parse
+            full_qualified_name = qualname(obj, True)
+            return f"<callable {full_qualified_name}>"
+
+        # Non-primitive objects without a serialize attribute are converted to 
str
+        # So they don't break json.dumps downstream
+        return str(obj)
 
     max_length = conf.getint("core", "max_templated_field_length")
 
-    if not is_jsonable(template_field):
-        try:
-            serialized = template_field.serialize()
-        except AttributeError:
-            if callable(template_field):
-                full_qualified_name = qualname(template_field, True)
-                serialized = f"<callable {full_qualified_name}>"
-            else:
-                serialized = str(template_field)
-        if len(serialized) > max_length:
-            rendered = redact(serialized, name)
-            return truncate_rendered_value(str(rendered), max_length)
-        return serialized
-    if not template_field and not isinstance(template_field, tuple):
-        # Avoid unnecessary serialization steps for empty fields unless they 
are tuples
-        # and need to be converted to lists
-        return template_field
-    template_field = translate_tuples_to_lists(template_field)
-    # Sort dictionaries recursively to ensure consistent string representation
-    # This prevents hash inconsistencies when dict ordering varies
-    if isinstance(template_field, dict):
-        template_field = sort_dict_recursively(template_field)
-    serialized = str(template_field)
-    if len(serialized) > max_length:
+    serialized = serialize_object(template_field)
+
+    if len(str(serialized)) > max_length:
         rendered = redact(serialized, name)
         return truncate_rendered_value(str(rendered), max_length)
-    return template_field
+
+    return serialized if is_jsonable(serialized) else str(serialized)

Review Comment:
   After `serialize_object`, every leaf is already a JSON primitive — so this 
`is_jsonable` check is dead except for one case: dicts with non-JSON-encodable 
keys (tuple/frozenset keys), which `json.dumps` rejects. In that case the 
entire dict collapses to `str()`, defeating the recursive walk and 
re-introducing the repr-with-memory-addresses problem. Either drop the check 
entirely (recommended — `serialize_object `should guarantee `jsonable output)` 
or handle exotic keys explicitly inside `serialize_object`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to