(airflow) branch main updated: Better handling masking of values of set variable (#43123)

potiuk Tue, 22 Oct 2024 14:02:16 -0700

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git



The following commit(s) were added to refs/heads/main by this push:
     new 1260d9c042 Better handling masking of values of set variable  (#43123)
1260d9c042 is described below

commit 1260d9c042f7a6351a4b67611ce39f91764dcbdf
Author: Shubham Raj <[email protected]>
AuthorDate: Wed Oct 23 02:32:03 2024 +0530

    Better handling masking of values of set variable  (#43123)
---
 airflow/utils/cli.py         | 12 ++++++++++--
 tests/utils/test_cli_util.py | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/airflow/utils/cli.py b/airflow/utils/cli.py
index e8b9e6f7d2..1142c5ba0b 100644
--- a/airflow/utils/cli.py
+++ b/airflow/utils/cli.py
@@ -39,6 +39,7 @@ from airflow.api_internal.internal_api_call import 
InternalApiConfig
 from airflow.exceptions import AirflowException
 from airflow.utils import cli_action_loggers, timezone
 from airflow.utils.log.non_caching_file_handler import NonCachingFileHandler
+from airflow.utils.log.secrets_masker import should_hide_value_for_key
 from airflow.utils.platform import getuser, is_terminal_support_colors
 from airflow.utils.session import NEW_SESSION, provide_session
 
@@ -139,11 +140,18 @@ def _build_metrics(func_name, namespace):
     :param namespace: Namespace instance from argparse
     :return: dict with metrics
     """
-    sub_commands_to_check = {"users", "connections"}
+    sub_commands_to_check_for_sensitive_fields = {"users", "connections"}
+    sub_commands_to_check_for_sensitive_key = {"variables"}
     sensitive_fields = {"-p", "--password", "--conn-password"}
     full_command = list(sys.argv)
     sub_command = full_command[1] if len(full_command) > 1 else None
-    if sub_command in sub_commands_to_check:
+    # For cases when value under sub_commands_to_check_for_sensitive_key have 
sensitive info
+    if sub_command in sub_commands_to_check_for_sensitive_key:
+        key = full_command[-2] if len(full_command) > 3 else None
+        if key and should_hide_value_for_key(key):
+            # Mask the sensitive value since key contain sensitive keyword
+            full_command[-1] = "*" * 8
+    elif sub_command in sub_commands_to_check_for_sensitive_fields:
         for idx, command in enumerate(full_command):
             if command in sensitive_fields:
                 # For cases when password is passed as "--password xyz" (with 
space between key and value)
diff --git a/tests/utils/test_cli_util.py b/tests/utils/test_cli_util.py
index 25003eec6d..ba018cdad3 100644
--- a/tests/utils/test_cli_util.py
+++ b/tests/utils/test_cli_util.py
@@ -185,6 +185,47 @@ class TestCliUtil:
         with pytest.raises(AirflowException, match="pickle_id could not be 
found .* -42"):
             get_dag_by_pickle(pickle_id=-42, session=session)
 
+    @pytest.mark.parametrize(
+        ["given_command", "expected_masked_command"],
+        [
+            (
+                "airflow variables set --description 'needed for dag 4' 
client_secret_234 7fh4375f5gy353wdf",
+                "airflow variables set --description 'needed for dag 4' 
client_secret_234 ********",
+            ),
+            (
+                "airflow variables set cust_secret_234 7fh4375f5gy353wdf",
+                "airflow variables set cust_secret_234 ********",
+            ),
+        ],
+    )
+    def test_cli_set_variable_supplied_sensitive_value_is_masked(
+        self, given_command, expected_masked_command, session
+    ):
+        args = given_command.split()
+
+        expected_command = expected_masked_command.split()
+
+        exec_date = timezone.utcnow()
+        namespace = Namespace(dag_id="foo", task_id="bar", subcommand="test", 
execution_date=exec_date)
+        with mock.patch.object(sys, "argv", args), mock.patch(
+            "airflow.utils.session.create_session"
+        ) as mock_create_session:
+            metrics = cli._build_metrics(args[1], namespace)
+            # Make it so the default_action_log doesn't actually commit the 
txn, by giving it a next txn
+            # instead
+            mock_create_session.return_value = session.begin_nested()
+            mock_create_session.return_value.bulk_insert_mappings = 
session.bulk_insert_mappings
+            cli_action_loggers.default_action_log(**metrics)
+
+            log = session.query(Log).order_by(Log.dttm.desc()).first()
+
+        assert metrics.get("start_datetime") <= timezone.utcnow()
+
+        command: str = json.loads(log.extra).get("full_command")
+        # Replace single quotes to double quotes to avoid json decode error
+        command = ast.literal_eval(command)
+        assert command == expected_command
+
 
 @contextmanager
 def fail_action_logger_callback():

(airflow) branch main updated: Better handling masking of values of set variable (#43123)

Reply via email to