Copilot commented on code in PR #61878: URL: https://github.com/apache/airflow/pull/61878#discussion_r2850648830
########## shared/template_rendering/src/airflow_shared/template_rendering/__init__.py: ########## @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import logging + +log = logging.getLogger(__name__) + + +def truncate_rendered_value(rendered: str, max_length: int) -> str: + MIN_CONTENT_LENGTH = 7 + + if max_length <= 0: + return "" + + # Build truncation message once, return if max_length is too small + prefix = "Truncated. You can change this behaviour in [core]max_templated_field_length. " + suffix = "..." + trunc_only = f"{prefix}{suffix}" + + if max_length < len(trunc_only): + return trunc_only + + # Compute available space for content + overhead = len(prefix) + len(suffix) + available = max_length - overhead + + if available < MIN_CONTENT_LENGTH: + return trunc_only + + # Slice content to fit and construct final string + content = rendered[:available].rstrip() + result = f"{prefix}{content}{suffix}" Review Comment: This helper now appends the raw truncated content, whereas the previous inline logic in core/SDK used `!r` (repr) which wraps content in quotes and escapes it. That’s a user-visible output contract change for serialized template fields, and it also conflicts with the PR description’s goal of showing truncated content in quotes when there is space. If quoted output is still desired, adjust the implementation to include quoting/escaping (and update the `available` length calculation to account for the quote characters), or update the stated contract/tests to explicitly define the new unquoted behavior. ```suggestion # Account for the quotes added by repr/!r around the truncated content. overhead = len(prefix) + len(suffix) + 2 available = max_length - overhead if available < MIN_CONTENT_LENGTH: return trunc_only # Slice content to fit and construct final string, using repr-style quoting/escaping content = rendered[:available].rstrip() result = f"{prefix}{content!r}{suffix}" ``` ########## task-sdk/tests/task_sdk/execution_time/test_task_runner.py: ########## @@ -2690,13 +2690,18 @@ def execute(self, context): runtime_ti = create_runtime_ti(task=task, dag_id="test_truncation_masking_dag") run(runtime_ti, context=runtime_ti.get_template_context(), log=mock.MagicMock()) + # Truncation format may vary by config; use actual call for assertion + msg = next( + c.kwargs["msg"] + for c in mock_supervisor_comms.send.mock_calls + if c.kwargs.get("msg") and getattr(c.kwargs["msg"], "type", None) == "SetRenderedFields" + ) + rendered_fields = msg.rendered_fields Review Comment: This assertion becomes effectively self-fulfilling because it uses `rendered_fields` extracted from the recorded mock call to construct the expected `SetRenderedFields(...)`. As a result, the test no longer verifies truncation/masking behavior (e.g., that `env_vars` is truncated and contains the truncation prefix and redacted values) and will pass even if the formatting regresses. Consider asserting on key properties of `rendered_fields` instead (presence of expected keys like `env_vars`/`region`, and for `env_vars` assert it contains the truncation prefix and redaction markers, and ends with `...` when truncated). ########## airflow-core/tests/unit/models/test_renderedtifields.py: ########## @@ -124,13 +125,12 @@ def teardown_method(self): pytest.param(datetime(2018, 12, 6, 10, 55), "2018-12-06 10:55:00+00:00", id="datetime"), pytest.param( "a" * 5000, - f"Truncated. You can change this behaviour in [core]max_templated_field_length. {('a' * 5000)[: max_length - 79]!r}... ", + serialize_template_field("a" * 5000, "bash_command"), id="large_string", ), pytest.param( LargeStrObject(), - f"Truncated. You can change this behaviour in " - f"[core]max_templated_field_length. {str(LargeStrObject())[: max_length - 79]!r}... ", + serialize_template_field(LargeStrObject(), "bash_command"), Review Comment: These parametrized expectations now call `serialize_template_field(...)` to compute the expected value, which can make the test less effective (it risks mirroring the implementation under test instead of validating its output). To keep this test meaningful, consider asserting against a stable expected shape (e.g., startswith truncation prefix, endswith `...`, and length constraints) or constructing the expected value using lower-level primitives with explicit `max_length` inputs rather than reusing the production serializer. ########## airflow-core/tests/unit/models/test_renderedtifields.py: ########## @@ -38,6 +38,7 @@ from airflow.providers.standard.operators.bash import BashOperator from airflow.providers.standard.operators.python import PythonOperator from airflow.sdk import task as task_decorator +from airflow.serialization.helpers import serialize_template_field Review Comment: These parametrized expectations now call `serialize_template_field(...)` to compute the expected value, which can make the test less effective (it risks mirroring the implementation under test instead of validating its output). To keep this test meaningful, consider asserting against a stable expected shape (e.g., startswith truncation prefix, endswith `...`, and length constraints) or constructing the expected value using lower-level primitives with explicit `max_length` inputs rather than reusing the production serializer. ```suggestion ``` ########## airflow-core/tests/unit/models/test_renderedtifields.py: ########## @@ -124,13 +125,12 @@ def teardown_method(self): pytest.param(datetime(2018, 12, 6, 10, 55), "2018-12-06 10:55:00+00:00", id="datetime"), pytest.param( "a" * 5000, - f"Truncated. You can change this behaviour in [core]max_templated_field_length. {('a' * 5000)[: max_length - 79]!r}... ", + serialize_template_field("a" * 5000, "bash_command"), Review Comment: These parametrized expectations now call `serialize_template_field(...)` to compute the expected value, which can make the test less effective (it risks mirroring the implementation under test instead of validating its output). To keep this test meaningful, consider asserting against a stable expected shape (e.g., startswith truncation prefix, endswith `...`, and length constraints) or constructing the expected value using lower-level primitives with explicit `max_length` inputs rather than reusing the production serializer. ########## shared/template_rendering/src/airflow_shared/template_rendering/__init__.py: ########## @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import logging + +log = logging.getLogger(__name__) + + +def truncate_rendered_value(rendered: str, max_length: int) -> str: + MIN_CONTENT_LENGTH = 7 + + if max_length <= 0: + return "" + + # Build truncation message once, return if max_length is too small + prefix = "Truncated. You can change this behaviour in [core]max_templated_field_length. " + suffix = "..." + trunc_only = f"{prefix}{suffix}" + + if max_length < len(trunc_only): + return trunc_only + + # Compute available space for content + overhead = len(prefix) + len(suffix) + available = max_length - overhead + + if available < MIN_CONTENT_LENGTH: + return trunc_only + + # Slice content to fit and construct final string + content = rendered[:available].rstrip() + result = f"{prefix}{content}{suffix}" Review Comment: `truncate_rendered_value` is a public helper but lacks a docstring describing its contract and edge-case behavior. In particular: (1) it may intentionally return a string longer than `max_length` when `max_length < len(prefix+suffix)`, and (2) it only appends content when at least `MIN_CONTENT_LENGTH` characters are available. Adding a docstring (and optionally promoting `MIN_CONTENT_LENGTH`, `prefix`, and `suffix` to named module-level constants) would make this behavior clearer to callers and reduce the need for tests to duplicate these details. ```suggestion # Public truncation configuration used by ``truncate_rendered_value``. # Exposed as module-level constants to avoid duplicating literals in callers/tests. TRUNCATE_MIN_CONTENT_LENGTH = 7 TRUNCATE_PREFIX = "Truncated. You can change this behaviour in [core]max_templated_field_length. " TRUNCATE_SUFFIX = "..." def truncate_rendered_value(rendered: str, max_length: int) -> str: """ Truncate a rendered template value to approximately ``max_length`` characters. Behavior: * If ``max_length <= 0``, an empty string is returned. * A fixed prefix (``TRUNCATE_PREFIX``) and suffix (``TRUNCATE_SUFFIX``) are always included when truncation occurs. The minimal truncation-only message is:: f"{TRUNCATE_PREFIX}{TRUNCATE_SUFFIX}" * If ``max_length`` is smaller than the length of this truncation-only message, that message is returned in full, even though its length may exceed ``max_length``. * Otherwise, space remaining after the prefix and suffix is allocated to the original ``rendered`` content. Content is only appended if at least ``TRUNCATE_MIN_CONTENT_LENGTH`` characters are available; if fewer are available, the truncation-only message is returned instead. Note that this function is best-effort: the return value is intended to be no longer than ``max_length``, but when ``max_length < len(TRUNCATE_PREFIX + TRUNCATE_SUFFIX)`` it intentionally returns a longer string to preserve the full truncation message. """ if max_length <= 0: return "" # Build truncation message once, return if max_length is too small trunc_only = f"{TRUNCATE_PREFIX}{TRUNCATE_SUFFIX}" if max_length < len(trunc_only): return trunc_only # Compute available space for content overhead = len(TRUNCATE_PREFIX) + len(TRUNCATE_SUFFIX) available = max_length - overhead if available < TRUNCATE_MIN_CONTENT_LENGTH: return trunc_only # Slice content to fit and construct final string content = rendered[:available].rstrip() result = f"{TRUNCATE_PREFIX}{content}{TRUNCATE_SUFFIX}" ``` ########## task-sdk/tests/task_sdk/execution_time/test_task_runner.py: ########## @@ -2690,13 +2690,18 @@ def execute(self, context): runtime_ti = create_runtime_ti(task=task, dag_id="test_truncation_masking_dag") run(runtime_ti, context=runtime_ti.get_template_context(), log=mock.MagicMock()) + # Truncation format may vary by config; use actual call for assertion + msg = next( + c.kwargs["msg"] + for c in mock_supervisor_comms.send.mock_calls + if c.kwargs.get("msg") and getattr(c.kwargs["msg"], "type", None) == "SetRenderedFields" + ) + rendered_fields = msg.rendered_fields + assert ( call( msg=SetRenderedFields( - rendered_fields={ - "env_vars": "Truncated. You can change this behaviour in [core]max_templated_field_length. \"{'TEST_URL_0': '***', 'TEST_URL_1': '***', 'TEST_URL_10': '***', 'TEST_URL_11': '***', 'TEST_URL_12': '***', 'TEST_URL_13': '***', 'TEST_URL_14': '***', 'TEST_URL_15': '***', 'TEST_URL_16': '***', 'TEST_URL_17': '***', 'TEST_URL_18': '***', 'TEST_URL_19': '***', 'TEST_URL_2': '***', 'TEST_URL_20': '***', 'TEST_URL_21': '***', 'TEST_URL_22': '***', 'TEST_URL_23': '***', 'TEST_URL_24': '***', 'TEST_URL_25': '***', 'TEST_URL_26': '***', 'TEST_URL_27': '***', 'TEST_URL_28': '***', 'TEST_URL_29': '***', 'TEST_URL_3': '***', 'TEST_URL_30': '***', 'TEST_URL_31': '***', 'TEST_URL_32': '***', 'TEST_URL_33': '***', 'TEST_URL_34': '***', 'TEST_URL_35': '***', 'TEST_URL_36': '***', 'TEST_URL_37': '***', 'TEST_URL_38': '***', 'TEST_URL_39': '***', 'TEST_URL_4': '***', 'TEST_URL_40': '***', 'TEST_URL_41': '***', 'TEST_URL_42': '***', 'TEST_URL_43': '***', 'TEST_URL_44': '***', 'TES T_URL_45': '***', 'TEST_URL_46': '***', 'TEST_URL_47': '***', 'TEST_URL_48': '***', 'TEST_URL_49': '***', 'TEST_URL_5': '***', 'TEST_URL_6': '***', 'TEST_URL_7': '***', 'TEST_URL_8': '***', 'TEST_URL_9': '***'}\"... ", - "region": "us-west-2", - }, + rendered_fields=rendered_fields, Review Comment: This assertion becomes effectively self-fulfilling because it uses `rendered_fields` extracted from the recorded mock call to construct the expected `SetRenderedFields(...)`. As a result, the test no longer verifies truncation/masking behavior (e.g., that `env_vars` is truncated and contains the truncation prefix and redacted values) and will pass even if the formatting regresses. Consider asserting on key properties of `rendered_fields` instead (presence of expected keys like `env_vars`/`region`, and for `env_vars` assert it contains the truncation prefix and redaction markers, and ends with `...` when truncated). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
