ephraimbuddy commented on code in PR #63871:
URL: https://github.com/apache/airflow/pull/63871#discussion_r3200697520


##########
airflow-core/src/airflow/serialization/helpers.py:
##########
@@ -51,55 +52,43 @@ def is_jsonable(x):
         else:
             return True
 
-    def translate_tuples_to_lists(obj: Any):
-        """Recursively convert tuples to lists."""
-        if isinstance(obj, tuple):
-            return [translate_tuples_to_lists(item) for item in obj]
-        if isinstance(obj, list):
-            return [translate_tuples_to_lists(item) for item in obj]
-        if isinstance(obj, dict):
-            return {key: translate_tuples_to_lists(value) for key, value in 
obj.items()}
-        return obj
+    def serialize_object(obj):
+        if obj is None or isinstance(obj, (str, int, float, bool)):
+            return obj
 
-    def sort_dict_recursively(obj: Any) -> Any:
-        """Recursively sort dictionaries to ensure consistent ordering."""
         if isinstance(obj, dict):
-            return {k: sort_dict_recursively(v) for k, v in 
sorted(obj.items())}
-        if isinstance(obj, list):
-            return [sort_dict_recursively(item) for item in obj]
-        if isinstance(obj, tuple):
-            return tuple(sort_dict_recursively(item) for item in obj)
-        return obj
+            # Sort dictionaries recursively to ensure consistent string 
representation
+            # This prevents hash inconsistencies when dict ordering varies
+            return {
+                k: serialize_object(v)
+                for k, v in sorted(obj.items(), key=lambda kv: 
(type(kv[0]).__name__, repr(kv[0])))
+            }
+
+        if isinstance(obj, (list, tuple)):
+            return [serialize_object(item) for item in obj]
+
+        # Use inspect.getattr_static to bypass any custom __getattr__ / 
metaclass magic
+        if callable(inspect.getattr_static(obj, "serialize", None)):
+            return serialize_object(obj.serialize())
+
+        if callable(obj):
+            # Use qualified name; default repr embeds memory addresses, which 
would change the DAG hash on every parse
+            full_qualified_name = qualname(obj, True)
+            return f"<callable {full_qualified_name}>"
+
+        # Non-primitive objects without a serialize attribute are converted to 
str
+        # So they don't break json.dumps downstream
+        return str(obj)
 
     max_length = conf.getint("core", "max_templated_field_length")
 
-    if not is_jsonable(template_field):
-        try:
-            serialized = template_field.serialize()
-        except AttributeError:
-            if callable(template_field):
-                full_qualified_name = qualname(template_field, True)
-                serialized = f"<callable {full_qualified_name}>"
-            else:
-                serialized = str(template_field)
-        if len(serialized) > max_length:
-            rendered = redact(serialized, name)
-            return truncate_rendered_value(str(rendered), max_length)
-        return serialized
-    if not template_field and not isinstance(template_field, tuple):
-        # Avoid unnecessary serialization steps for empty fields unless they 
are tuples
-        # and need to be converted to lists
-        return template_field
-    template_field = translate_tuples_to_lists(template_field)
-    # Sort dictionaries recursively to ensure consistent string representation
-    # This prevents hash inconsistencies when dict ordering varies
-    if isinstance(template_field, dict):
-        template_field = sort_dict_recursively(template_field)
-    serialized = str(template_field)
-    if len(serialized) > max_length:
+    serialized = serialize_object(template_field)
+
+    if len(str(serialized)) > max_length:
         rendered = redact(serialized, name)

Review Comment:
   `redact()` is now called on a structured `dict/list/bool` instead of the 
`str` it previously always received. The next line then runs 
`truncate_rendered_value(str(rendered), max_length)`, which stringifies the 
redacted container — producing a `repr`-shape truncated value rather than the 
previous string-truncation. Repro: `serialize_template_field({"password": "x" * 
5000}, "op_kwargs")`.
   
   ```python
       if len(str(serialized)) > max_length:
           rendered = redact(str(serialized), name)
           return truncate_rendered_value(rendered, max_length)
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to