This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 1260d9c042 Better handling masking of values of set variable (#43123)
1260d9c042 is described below
commit 1260d9c042f7a6351a4b67611ce39f91764dcbdf
Author: Shubham Raj <[email protected]>
AuthorDate: Wed Oct 23 02:32:03 2024 +0530
Better handling masking of values of set variable (#43123)
---
airflow/utils/cli.py | 12 ++++++++++--
tests/utils/test_cli_util.py | 41 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 51 insertions(+), 2 deletions(-)
diff --git a/airflow/utils/cli.py b/airflow/utils/cli.py
index e8b9e6f7d2..1142c5ba0b 100644
--- a/airflow/utils/cli.py
+++ b/airflow/utils/cli.py
@@ -39,6 +39,7 @@ from airflow.api_internal.internal_api_call import
InternalApiConfig
from airflow.exceptions import AirflowException
from airflow.utils import cli_action_loggers, timezone
from airflow.utils.log.non_caching_file_handler import NonCachingFileHandler
+from airflow.utils.log.secrets_masker import should_hide_value_for_key
from airflow.utils.platform import getuser, is_terminal_support_colors
from airflow.utils.session import NEW_SESSION, provide_session
@@ -139,11 +140,18 @@ def _build_metrics(func_name, namespace):
:param namespace: Namespace instance from argparse
:return: dict with metrics
"""
- sub_commands_to_check = {"users", "connections"}
+ sub_commands_to_check_for_sensitive_fields = {"users", "connections"}
+ sub_commands_to_check_for_sensitive_key = {"variables"}
sensitive_fields = {"-p", "--password", "--conn-password"}
full_command = list(sys.argv)
sub_command = full_command[1] if len(full_command) > 1 else None
- if sub_command in sub_commands_to_check:
+ # For cases when value under sub_commands_to_check_for_sensitive_key have
sensitive info
+ if sub_command in sub_commands_to_check_for_sensitive_key:
+ key = full_command[-2] if len(full_command) > 3 else None
+ if key and should_hide_value_for_key(key):
+ # Mask the sensitive value since key contain sensitive keyword
+ full_command[-1] = "*" * 8
+ elif sub_command in sub_commands_to_check_for_sensitive_fields:
for idx, command in enumerate(full_command):
if command in sensitive_fields:
# For cases when password is passed as "--password xyz" (with
space between key and value)
diff --git a/tests/utils/test_cli_util.py b/tests/utils/test_cli_util.py
index 25003eec6d..ba018cdad3 100644
--- a/tests/utils/test_cli_util.py
+++ b/tests/utils/test_cli_util.py
@@ -185,6 +185,47 @@ class TestCliUtil:
with pytest.raises(AirflowException, match="pickle_id could not be
found .* -42"):
get_dag_by_pickle(pickle_id=-42, session=session)
+ @pytest.mark.parametrize(
+ ["given_command", "expected_masked_command"],
+ [
+ (
+ "airflow variables set --description 'needed for dag 4'
client_secret_234 7fh4375f5gy353wdf",
+ "airflow variables set --description 'needed for dag 4'
client_secret_234 ********",
+ ),
+ (
+ "airflow variables set cust_secret_234 7fh4375f5gy353wdf",
+ "airflow variables set cust_secret_234 ********",
+ ),
+ ],
+ )
+ def test_cli_set_variable_supplied_sensitive_value_is_masked(
+ self, given_command, expected_masked_command, session
+ ):
+ args = given_command.split()
+
+ expected_command = expected_masked_command.split()
+
+ exec_date = timezone.utcnow()
+ namespace = Namespace(dag_id="foo", task_id="bar", subcommand="test",
execution_date=exec_date)
+ with mock.patch.object(sys, "argv", args), mock.patch(
+ "airflow.utils.session.create_session"
+ ) as mock_create_session:
+ metrics = cli._build_metrics(args[1], namespace)
+ # Make it so the default_action_log doesn't actually commit the
txn, by giving it a next txn
+ # instead
+ mock_create_session.return_value = session.begin_nested()
+ mock_create_session.return_value.bulk_insert_mappings =
session.bulk_insert_mappings
+ cli_action_loggers.default_action_log(**metrics)
+
+ log = session.query(Log).order_by(Log.dttm.desc()).first()
+
+ assert metrics.get("start_datetime") <= timezone.utcnow()
+
+ command: str = json.loads(log.extra).get("full_command")
+ # Replace single quotes to double quotes to avoid json decode error
+ command = ast.literal_eval(command)
+ assert command == expected_command
+
@contextmanager
def fail_action_logger_callback():