This is an automated email from the ASF dual-hosted git repository.

jason810496 pushed a commit to branch v3-2-test
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/v3-2-test by this push:
     new 43acb478210 [v3-2-test] Redact secret-looking query parameters in HTTP 
access log (#67498) (#67631)
43acb478210 is described below

commit 43acb478210cfafc06ca669e19a282c071bcf6d3
Author: Jason(Zhe-You) Liu <[email protected]>
AuthorDate: Fri May 29 17:48:39 2026 +0800

    [v3-2-test] Redact secret-looking query parameters in HTTP access log 
(#67498) (#67631)
    
    The HTTP access log middleware logged the raw query string without
    passing it through ``secrets_masker.redact()``. The decorator-layer
    audit log already masks request data; the access-log layer did not.
    A secret inadvertently passed as a query parameter (e.g.
    ``?password=foo`` or ``?token=bar``) was therefore written to the
    access log in plaintext.
    
    Parse the query string into ``(key, value)`` pairs and call
    ``secrets_masker.redact(value, key)`` per pair before logging. This
    matches the pattern already used in ``logging/decorators.py``: keys
    whose names are flagged sensitive by ``secrets_masker`` (``password``,
    ``token``, ``api_key``, …) have their values replaced with ``***``;
    values previously registered via ``mask_secret()`` are caught too.
    
    Non-sensitive keys are unchanged, blank values are preserved so log
    readers still see the parameter was present, and malformed query
    strings fall back to raw logging rather than silently dropping
    diagnostic information.
    (cherry picked from commit aa3b7d4)
    
    Co-authored-by: Jarek Potiuk <[email protected]>
---
 .../airflow/api_fastapi/common/http_access_log.py  | 28 ++++++++++-
 .../api_fastapi/common/test_http_access_log.py     | 55 +++++++++++++++++++++-
 2 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/airflow-core/src/airflow/api_fastapi/common/http_access_log.py 
b/airflow-core/src/airflow/api_fastapi/common/http_access_log.py
index 3a298c9c21f..ad37b094a9f 100644
--- a/airflow-core/src/airflow/api_fastapi/common/http_access_log.py
+++ b/airflow-core/src/airflow/api_fastapi/common/http_access_log.py
@@ -21,9 +21,12 @@ from __future__ import annotations
 import contextlib
 import time
 from typing import TYPE_CHECKING
+from urllib.parse import parse_qsl, urlencode
 
 import structlog
 
+from airflow._shared.secrets_masker import secrets_masker
+
 if TYPE_CHECKING:
     from starlette.types import ASGIApp, Message, Receive, Scope, Send
 
@@ -32,6 +35,29 @@ logger = structlog.get_logger(logger_name="http.access")
 _HEALTH_PATHS = frozenset(["/api/v2/monitor/health"])
 
 
+def _redact_query_string(query: str) -> str:
+    """
+    Redact secret-looking query parameters before they reach the access log.
+
+    Treat each ``key=value`` pair independently so a key whose name signals a 
secret
+    (``password``, ``token``, ``api_key`` — anything ``secrets_masker`` flags 
as sensitive)
+    gets its value replaced with ``***``. Also catches values that were 
previously registered
+    via ``mask_secret()``.
+    """
+    if not query:
+        return query
+    try:
+        pairs = parse_qsl(query, keep_blank_values=True)
+    except ValueError:
+        # Malformed query string — leave it alone; we'd rather log the raw 
bytes than
+        # silently drop diagnostic information.
+        return query
+    if not pairs:
+        return query
+    redacted_pairs = [(k, secrets_masker.redact(v, k)) for k, v in pairs]
+    return urlencode(redacted_pairs)
+
+
 class HttpAccessLogMiddleware:
     """
     Log completed HTTP requests as structured log events.
@@ -91,7 +117,7 @@ class HttpAccessLogMiddleware:
                     duration_us = (time.monotonic_ns() - start) // 1000
                     status = response["status"] if response is not None else 0
                     method = scope.get("method", "")
-                    query = scope["query_string"].decode("ascii", 
errors="replace")
+                    query = 
_redact_query_string(scope["query_string"].decode("ascii", errors="replace"))
                     client = scope.get("client")
                     client_addr = f"{client[0]}:{client[1]}" if client else 
None
 
diff --git a/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py 
b/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py
index 5542510aa4a..882f93a3151 100644
--- a/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py
+++ b/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py
@@ -24,7 +24,29 @@ from starlette.responses import PlainTextResponse
 from starlette.routing import Route
 from starlette.testclient import TestClient
 
-from airflow.api_fastapi.common.http_access_log import _HEALTH_PATHS, 
HttpAccessLogMiddleware
+from airflow._shared.secrets_masker import _secrets_masker
+from airflow.api_fastapi.common.http_access_log import (
+    _HEALTH_PATHS,
+    HttpAccessLogMiddleware,
+    _redact_query_string,
+)
+
+
[email protected]
+def _password_sensitive_field():
+    """Register ``password`` as a sensitive field name on the module-level 
masker.
+
+    Production initialises this list from ``DEFAULT_SENSITIVE_FIELDS`` via
+    ``settings.mask_secret``; unit tests run without that initialisation, so we
+    populate the field explicitly for the redaction tests.
+    """
+    masker = _secrets_masker()
+    original = masker.sensitive_variables_fields
+    masker.sensitive_variables_fields = list(set(original) | {"password"})
+    try:
+        yield
+    finally:
+        masker.sensitive_variables_fields = original
 
 
 def _make_app(raise_exc: bool = False) -> Starlette:
@@ -133,6 +155,37 @@ def test_health_paths_constant():
     assert "/api/v2/monitor/health" in _HEALTH_PATHS
 
 
[email protected]_redact
+def 
test_redact_query_string_masks_value_by_sensitive_key_name(_password_sensitive_field):
+    """A key flagged sensitive by ``secrets_masker`` has its value replaced 
with ``***``."""
+    redacted = _redact_query_string("password=topsecret&safe=value")
+    assert "topsecret" not in redacted
+    assert "safe=value" in redacted
+
+
+def test_redact_query_string_leaves_safe_pairs_untouched():
+    assert _redact_query_string("page=2&limit=50") == "page=2&limit=50"
+
+
+def test_redact_query_string_handles_empty_and_blank_values():
+    assert _redact_query_string("") == ""
+    # Blank values should be preserved so log readers still see the key was 
present.
+    assert _redact_query_string("flag=&other=x") == "flag=&other=x"
+
+
[email protected]_redact
+def test_logs_redact_sensitive_query_param(_password_sensitive_field):
+    """Integration: a request with `?password=secret` is logged with the value 
masked."""
+    with structlog.testing.capture_logs() as logs:
+        client = TestClient(_make_app(), raise_server_exceptions=False)
+        client.get("/?password=topsecret&keep=ok")
+
+    assert len(logs) == 1
+    query = logs[0]["query"]
+    assert "topsecret" not in query
+    assert "keep=ok" in query
+
+
 def test_logger_failure_does_not_mask_app_exception(monkeypatch):
     """
     If ``logger.info`` raises while the app already raised, the original app 
exception must

Reply via email to