This is an automated email from the ASF dual-hosted git repository.
ashb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 89e9f41dd83 Fix sensitive data leak in SparkSubmitOperator truncated
templates (#68624)
89e9f41dd83 is described below
commit 89e9f41dd830a01c0b330b44268a2c4022152d8f
Author: Anshu Singh <[email protected]>
AuthorDate: Tue Jun 16 22:30:20 2026 +0530
Fix sensitive data leak in SparkSubmitOperator truncated templates (#68624)
Spark, Hadoop and Kubernetes config keys use dot or dash separators
(e.g. `spark.hadoop.fs.s3a.bucket.spark.access.key`) which did not match
underscore-style sensitive fields (access_key, secret_key, token) in
SecretsMasker, causing sensitive values to leak in the Rendered Template
view when the conf field was truncated.
Normalize non-word separators to underscores via `re.sub` so dotted/dashed
keys correctly match the sensitive field patterns.
closes: #67459
Co-authored-by: Ash Berlin-Taylor <[email protected]>
---
.../tests/unit/serialization/test_helpers.py | 23 ++++++++++++++++++++++
.../secrets_masker/secrets_masker.py | 3 ++-
.../tests/secrets_masker/test_secrets_masker.py | 7 +++++++
3 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/airflow-core/tests/unit/serialization/test_helpers.py
b/airflow-core/tests/unit/serialization/test_helpers.py
index 1e453a5e3d8..388891f0642 100644
--- a/airflow-core/tests/unit/serialization/test_helpers.py
+++ b/airflow-core/tests/unit/serialization/test_helpers.py
@@ -47,6 +47,29 @@ def
test_serialize_template_field_truncation_kicks_in(monkeypatch):
assert "Truncated. You can change this behaviour" in result
[email protected]_redact
+def
test_serialize_template_field_masks_dotted_sensitive_keys_on_truncation(monkeypatch):
+ """Dotted config keys like spark.hadoop.*.access.key must be masked even
when field is truncated."""
+ monkeypatch.setenv("AIRFLOW__CORE__MAX_TEMPLATED_FIELD_LENGTH", "1500")
+
+ access_key_value = "AKIA-REGRESSION-FIXTURE-ACCESS-KEY"
+ token_value = "REGRESSION-FIXTURE-JWT-TOKEN-VALUE"
+
+ payload = {
+ "spark.hadoop.fs.s3a.bucket.spark.access.key": access_key_value,
+ "spark.sql.catalog.kometa.token": token_value,
+ "zpadding": "z" * 2000, # forces truncation
+ }
+
+ result = serialize_template_field(payload, "conf")
+
+ assert isinstance(result, str)
+ assert "Truncated. You can change this behaviour" in result
+ assert access_key_value not in result, "S3 access key must not appear in
truncated output"
+ assert token_value not in result, "JWT token must not appear in truncated
output"
+ assert "***" in result
+
+
def test_serialize_template_field_with_notset():
"""NOTSET must serialize deterministically via serialize(), not str()
fallback."""
result = serialize_template_field(NOTSET, "logical_date")
diff --git
a/shared/secrets_masker/src/airflow_shared/secrets_masker/secrets_masker.py
b/shared/secrets_masker/src/airflow_shared/secrets_masker/secrets_masker.py
index 03198bf48a8..3b625b7af77 100644
--- a/shared/secrets_masker/src/airflow_shared/secrets_masker/secrets_masker.py
+++ b/shared/secrets_masker/src/airflow_shared/secrets_masker/secrets_masker.py
@@ -577,7 +577,8 @@ class SecretsMasker(logging.Filter):
"""
if isinstance(name, str) and self.hide_sensitive_var_conn_fields:
name = name.strip().lower()
- return any(s in name for s in self.sensitive_variables_fields)
+ normalized = re.sub(r"\W+", "_", name)
+ return any(s in normalized for s in
self.sensitive_variables_fields)
return False
def add_mask(self, secret: JsonValue, name: str | None = None):
diff --git a/shared/secrets_masker/tests/secrets_masker/test_secrets_masker.py
b/shared/secrets_masker/tests/secrets_masker/test_secrets_masker.py
index 1f107fee07e..c6990602284 100644
--- a/shared/secrets_masker/tests/secrets_masker/test_secrets_masker.py
+++ b/shared/secrets_masker/tests/secrets_masker/test_secrets_masker.py
@@ -834,6 +834,13 @@ class TestShouldHideValueForKey:
("custom_auth_header", True),
("service_key", True),
("my_service_key", True),
+ ("spark.hadoop.fs.s3a.bucket.spark.access.key", True),
+ ("spark.hadoop.fs.s3a.bucket.spark.secret.key", True),
+ ("spark.sql.catalog.kometa.token", True),
+ ("my-access-key", True),
+ ("auth.example.com/token", True),
+ ("spark.executor.memory", False),
+ ("spark.driver.cores", False),
],
)
def test_hiding_defaults(self, key, expected_result):