This is an automated email from the ASF dual-hosted git repository.
jason810496 pushed a commit to branch v3-2-test
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/v3-2-test by this push:
new 43acb478210 [v3-2-test] Redact secret-looking query parameters in HTTP
access log (#67498) (#67631)
43acb478210 is described below
commit 43acb478210cfafc06ca669e19a282c071bcf6d3
Author: Jason(Zhe-You) Liu <[email protected]>
AuthorDate: Fri May 29 17:48:39 2026 +0800
[v3-2-test] Redact secret-looking query parameters in HTTP access log
(#67498) (#67631)
The HTTP access log middleware logged the raw query string without
passing it through ``secrets_masker.redact()``. The decorator-layer
audit log already masks request data; the access-log layer did not.
A secret inadvertently passed as a query parameter (e.g.
``?password=foo`` or ``?token=bar``) was therefore written to the
access log in plaintext.
Parse the query string into ``(key, value)`` pairs and call
``secrets_masker.redact(value, key)`` per pair before logging. This
matches the pattern already used in ``logging/decorators.py``: keys
whose names are flagged sensitive by ``secrets_masker`` (``password``,
``token``, ``api_key``, …) have their values replaced with ``***``;
values previously registered via ``mask_secret()`` are caught too.
Non-sensitive keys are unchanged, blank values are preserved so log
readers still see the parameter was present, and malformed query
strings fall back to raw logging rather than silently dropping
diagnostic information.
(cherry picked from commit aa3b7d4)
Co-authored-by: Jarek Potiuk <[email protected]>
---
.../airflow/api_fastapi/common/http_access_log.py | 28 ++++++++++-
.../api_fastapi/common/test_http_access_log.py | 55 +++++++++++++++++++++-
2 files changed, 81 insertions(+), 2 deletions(-)
diff --git a/airflow-core/src/airflow/api_fastapi/common/http_access_log.py
b/airflow-core/src/airflow/api_fastapi/common/http_access_log.py
index 3a298c9c21f..ad37b094a9f 100644
--- a/airflow-core/src/airflow/api_fastapi/common/http_access_log.py
+++ b/airflow-core/src/airflow/api_fastapi/common/http_access_log.py
@@ -21,9 +21,12 @@ from __future__ import annotations
import contextlib
import time
from typing import TYPE_CHECKING
+from urllib.parse import parse_qsl, urlencode
import structlog
+from airflow._shared.secrets_masker import secrets_masker
+
if TYPE_CHECKING:
from starlette.types import ASGIApp, Message, Receive, Scope, Send
@@ -32,6 +35,29 @@ logger = structlog.get_logger(logger_name="http.access")
_HEALTH_PATHS = frozenset(["/api/v2/monitor/health"])
+def _redact_query_string(query: str) -> str:
+ """
+ Redact secret-looking query parameters before they reach the access log.
+
+ Treat each ``key=value`` pair independently so a key whose name signals a
secret
+ (``password``, ``token``, ``api_key`` — anything ``secrets_masker`` flags
as sensitive)
+ gets its value replaced with ``***``. Also catches values that were
previously registered
+ via ``mask_secret()``.
+ """
+ if not query:
+ return query
+ try:
+ pairs = parse_qsl(query, keep_blank_values=True)
+ except ValueError:
+ # Malformed query string — leave it alone; we'd rather log the raw
bytes than
+ # silently drop diagnostic information.
+ return query
+ if not pairs:
+ return query
+ redacted_pairs = [(k, secrets_masker.redact(v, k)) for k, v in pairs]
+ return urlencode(redacted_pairs)
+
+
class HttpAccessLogMiddleware:
"""
Log completed HTTP requests as structured log events.
@@ -91,7 +117,7 @@ class HttpAccessLogMiddleware:
duration_us = (time.monotonic_ns() - start) // 1000
status = response["status"] if response is not None else 0
method = scope.get("method", "")
- query = scope["query_string"].decode("ascii",
errors="replace")
+ query =
_redact_query_string(scope["query_string"].decode("ascii", errors="replace"))
client = scope.get("client")
client_addr = f"{client[0]}:{client[1]}" if client else
None
diff --git a/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py
b/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py
index 5542510aa4a..882f93a3151 100644
--- a/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py
+++ b/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py
@@ -24,7 +24,29 @@ from starlette.responses import PlainTextResponse
from starlette.routing import Route
from starlette.testclient import TestClient
-from airflow.api_fastapi.common.http_access_log import _HEALTH_PATHS,
HttpAccessLogMiddleware
+from airflow._shared.secrets_masker import _secrets_masker
+from airflow.api_fastapi.common.http_access_log import (
+ _HEALTH_PATHS,
+ HttpAccessLogMiddleware,
+ _redact_query_string,
+)
+
+
[email protected]
+def _password_sensitive_field():
+ """Register ``password`` as a sensitive field name on the module-level
masker.
+
+ Production initialises this list from ``DEFAULT_SENSITIVE_FIELDS`` via
+ ``settings.mask_secret``; unit tests run without that initialisation, so we
+ populate the field explicitly for the redaction tests.
+ """
+ masker = _secrets_masker()
+ original = masker.sensitive_variables_fields
+ masker.sensitive_variables_fields = list(set(original) | {"password"})
+ try:
+ yield
+ finally:
+ masker.sensitive_variables_fields = original
def _make_app(raise_exc: bool = False) -> Starlette:
@@ -133,6 +155,37 @@ def test_health_paths_constant():
assert "/api/v2/monitor/health" in _HEALTH_PATHS
[email protected]_redact
+def
test_redact_query_string_masks_value_by_sensitive_key_name(_password_sensitive_field):
+ """A key flagged sensitive by ``secrets_masker`` has its value replaced
with ``***``."""
+ redacted = _redact_query_string("password=topsecret&safe=value")
+ assert "topsecret" not in redacted
+ assert "safe=value" in redacted
+
+
+def test_redact_query_string_leaves_safe_pairs_untouched():
+ assert _redact_query_string("page=2&limit=50") == "page=2&limit=50"
+
+
+def test_redact_query_string_handles_empty_and_blank_values():
+ assert _redact_query_string("") == ""
+ # Blank values should be preserved so log readers still see the key was
present.
+ assert _redact_query_string("flag=&other=x") == "flag=&other=x"
+
+
[email protected]_redact
+def test_logs_redact_sensitive_query_param(_password_sensitive_field):
+ """Integration: a request with `?password=secret` is logged with the value
masked."""
+ with structlog.testing.capture_logs() as logs:
+ client = TestClient(_make_app(), raise_server_exceptions=False)
+ client.get("/?password=topsecret&keep=ok")
+
+ assert len(logs) == 1
+ query = logs[0]["query"]
+ assert "topsecret" not in query
+ assert "keep=ok" in query
+
+
def test_logger_failure_does_not_mask_app_exception(monkeypatch):
"""
If ``logger.info`` raises while the app already raised, the original app
exception must