This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new aa3b7d4f59a Redact secret-looking query parameters in HTTP access log 
(#67498)
aa3b7d4f59a is described below

commit aa3b7d4f59ac362d446d716b171dae84392ce8ad
Author: Jarek Potiuk <[email protected]>
AuthorDate: Tue May 26 00:09:12 2026 +0200

    Redact secret-looking query parameters in HTTP access log (#67498)
    
    The HTTP access log middleware logged the raw query string without
    passing it through ``secrets_masker.redact()``. The decorator-layer
    audit log already masks request data; the access-log layer did not.
    A secret inadvertently passed as a query parameter (e.g.
    ``?password=foo`` or ``?token=bar``) was therefore written to the
    access log in plaintext.
    
    Parse the query string into ``(key, value)`` pairs and call
    ``secrets_masker.redact(value, key)`` per pair before logging. This
    matches the pattern already used in ``logging/decorators.py``: keys
    whose names are flagged sensitive by ``secrets_masker`` (``password``,
    ``token``, ``api_key``, …) have their values replaced with ``***``;
    values previously registered via ``mask_secret()`` are caught too.
    
    Non-sensitive keys are unchanged, blank values are preserved so log
    readers still see the parameter was present, and malformed query
    strings fall back to raw logging rather than silently dropping
    diagnostic information.
---
 .../airflow/api_fastapi/common/http_access_log.py  | 28 ++++++++++-
 .../api_fastapi/common/test_http_access_log.py     | 56 +++++++++++++++++++++-
 2 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/airflow-core/src/airflow/api_fastapi/common/http_access_log.py 
b/airflow-core/src/airflow/api_fastapi/common/http_access_log.py
index 581c3593790..efaf570c580 100644
--- a/airflow-core/src/airflow/api_fastapi/common/http_access_log.py
+++ b/airflow-core/src/airflow/api_fastapi/common/http_access_log.py
@@ -21,9 +21,12 @@ from __future__ import annotations
 import contextlib
 import time
 from typing import TYPE_CHECKING
+from urllib.parse import parse_qsl, urlencode
 
 import structlog
 
+from airflow._shared.secrets_masker import secrets_masker
+
 if TYPE_CHECKING:
     from starlette.types import ASGIApp, Message, Receive, Scope, Send
 
@@ -32,6 +35,29 @@ logger = structlog.get_logger(logger_name="http.access")
 _HEALTH_PATHS = frozenset(["/api/v2/monitor/health"])
 
 
+def _redact_query_string(query: str) -> str:
+    """
+    Redact secret-looking query parameters before they reach the access log.
+
+    Treat each ``key=value`` pair independently so a key whose name signals a 
secret
+    (``password``, ``token``, ``api_key`` — anything ``secrets_masker`` flags 
as sensitive)
+    gets its value replaced with ``***``. Also catches values that were 
previously registered
+    via ``mask_secret()``.
+    """
+    if not query:
+        return query
+    try:
+        pairs = parse_qsl(query, keep_blank_values=True)
+    except ValueError:
+        # Malformed query string — leave it alone; we'd rather log the raw 
bytes than
+        # silently drop diagnostic information.
+        return query
+    if not pairs:
+        return query
+    redacted_pairs = [(k, secrets_masker.redact(v, k)) for k, v in pairs]
+    return urlencode(redacted_pairs)
+
+
 class HttpAccessLogMiddleware:
     """
     Log completed HTTP requests as structured log events.
@@ -91,7 +117,7 @@ class HttpAccessLogMiddleware:
                     duration_us = (time.monotonic_ns() - start) // 1000
                     status = response["status"] if response is not None else 0
                     method = scope.get("method", "")
-                    query = scope["query_string"].decode("ascii", 
errors="replace")
+                    query = 
_redact_query_string(scope["query_string"].decode("ascii", errors="replace"))
                     client = scope.get("client")
                     client_addr = f"{client[0]}:{client[1]}" if client else 
None
 
diff --git a/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py 
b/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py
index 5e7e9e6e760..243049aedbe 100644
--- a/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py
+++ b/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py
@@ -16,6 +16,7 @@
 # under the License.
 from __future__ import annotations
 
+import pytest
 import structlog
 import structlog.testing
 from starlette.applications import Starlette
@@ -23,7 +24,29 @@ from starlette.responses import PlainTextResponse
 from starlette.routing import Route
 from starlette.testclient import TestClient
 
-from airflow.api_fastapi.common.http_access_log import _HEALTH_PATHS, 
HttpAccessLogMiddleware
+from airflow._shared.secrets_masker import _secrets_masker
+from airflow.api_fastapi.common.http_access_log import (
+    _HEALTH_PATHS,
+    HttpAccessLogMiddleware,
+    _redact_query_string,
+)
+
+
[email protected]
+def _password_sensitive_field():
+    """Register ``password`` as a sensitive field name on the module-level 
masker.
+
+    Production initialises this list from ``DEFAULT_SENSITIVE_FIELDS`` via
+    ``settings.mask_secret``; unit tests run without that initialisation, so we
+    populate the field explicitly for the redaction tests.
+    """
+    masker = _secrets_masker()
+    original = masker.sensitive_variables_fields
+    masker.sensitive_variables_fields = list(set(original) | {"password"})
+    try:
+        yield
+    finally:
+        masker.sensitive_variables_fields = original
 
 
 def _make_app(raise_exc: bool = False) -> Starlette:
@@ -130,3 +153,34 @@ def test_non_http_scope_not_logged():
 
 def test_health_paths_constant():
     assert "/api/v2/monitor/health" in _HEALTH_PATHS
+
+
[email protected]_redact
+def 
test_redact_query_string_masks_value_by_sensitive_key_name(_password_sensitive_field):
+    """A key flagged sensitive by ``secrets_masker`` has its value replaced 
with ``***``."""
+    redacted = _redact_query_string("password=topsecret&safe=value")
+    assert "topsecret" not in redacted
+    assert "safe=value" in redacted
+
+
+def test_redact_query_string_leaves_safe_pairs_untouched():
+    assert _redact_query_string("page=2&limit=50") == "page=2&limit=50"
+
+
+def test_redact_query_string_handles_empty_and_blank_values():
+    assert _redact_query_string("") == ""
+    # Blank values should be preserved so log readers still see the key was 
present.
+    assert _redact_query_string("flag=&other=x") == "flag=&other=x"
+
+
[email protected]_redact
+def test_logs_redact_sensitive_query_param(_password_sensitive_field):
+    """Integration: a request with `?password=secret` is logged with the value 
masked."""
+    with structlog.testing.capture_logs() as logs:
+        client = TestClient(_make_app(), raise_server_exceptions=False)
+        client.get("/?password=topsecret&keep=ok")
+
+    assert len(logs) == 1
+    query = logs[0]["query"]
+    assert "topsecret" not in query
+    assert "keep=ok" in query

Reply via email to