pierrejeambrun commented on code in PR #53943:
URL: https://github.com/apache/airflow/pull/53943#discussion_r2243620283
##########
task-sdk/tests/task_sdk/definitions/test_secrets_masker.py:
##########
@@ -729,3 +731,356 @@ def test_mixed_structured_unstructured_data(self):
assert "***" in redacted_data["description"]
assert redacted_data["nested"]["token"] == "***"
assert redacted_data["nested"]["info"] == "No secrets here"
+
+
+class TestSecretsMaskerMerge:
+ """Test the merge functionality for restoring original values from
redacted data."""
+
+ @pytest.mark.parametrize(
+ ("new_value", "old_value", "name", "expected"),
+ [
+ ("***", "original_secret", "password", "original_secret"),
+ ("new_secret", "original_secret", "password", "new_secret"),
+ ("***", "original_value", "normal_field", "***"),
+ ("new_value", "original_value", "normal_field", "new_value"),
+ ("***", "original_value", None, "***"),
+ ("new_value", "original_value", None, "new_value"),
+ ],
+ )
+ def test_merge_simple_strings(self, new_value, old_value, name, expected):
+ secrets_masker = SecretsMasker()
+ with
patch("airflow.sdk.execution_time.secrets_masker._secrets_masker",
return_value=secrets_masker):
+ result = merge(new_value, old_value, name)
+ assert result == expected
+
+ def test_merge_dictionaries(self):
+ secrets_masker = SecretsMasker()
+
+ old_data = {
+ "password": "original_password",
+ "api_key": "original_api_key",
+ "normal_field": "original_normal",
+ "token": "original_token",
+ }
+
+ new_data = {
+ "password": "***",
+ "api_key": "new_api_key",
+ "normal_field": "new_normal",
+ "token": "***",
+ }
+
+ expected = {
+ "password": "original_password",
+ "api_key": "new_api_key",
+ "normal_field": "new_normal",
+ "token": "original_token",
+ }
+
+ with
patch("airflow.sdk.execution_time.secrets_masker._secrets_masker",
return_value=secrets_masker):
+ result = merge(new_data, old_data)
+ assert result == expected
+
+ def test_merge_nested_dictionaries(self):
+ secrets_masker = SecretsMasker()
+
+ old_data = {
+ "config": {"password": "original_password", "host":
"original_host"},
+ "credentials": {"api_key": "original_api_key", "username":
"original_user"},
+ }
+
+ new_data = {
+ "config": {
+ "password": "***",
+ "host": "new_host",
+ },
+ "credentials": {
+ "api_key": "new_api_key",
+ "username": "new_user",
+ },
+ }
+
+ expected = {
+ "config": {
+ "password": "original_password",
+ "host": "new_host",
+ },
+ "credentials": {
+ "api_key": "new_api_key",
+ "username": "new_user",
+ },
+ }
+
+ with
patch("airflow.sdk.execution_time.secrets_masker._secrets_masker",
return_value=secrets_masker):
+ result = merge(new_data, old_data)
+ assert result == expected
+
+ @pytest.mark.parametrize(
+ ("old_data", "new_data", "name", "expected"),
+ [
+ # Lists
+ (
+ ["original_item1", "original_item2", "original_item3"],
+ ["new_item1", "new_item2"],
+ None,
+ ["new_item1", "new_item2"],
+ ),
+ (
+ ["original_item1", "original_item2"],
+ ["new_item1", "new_item2", "new_item3", "new_item4"],
+ None,
+ ["new_item1", "new_item2", "new_item3", "new_item4"],
+ ),
+ (
+ ["secret1", "secret2", "secret3"],
+ ["***", "new_secret2", "***"],
+ "password",
+ ["secret1", "new_secret2", "secret3"],
+ ),
+ (
+ ["value1", "value2", "value3"],
+ ["***", "new_value2", "***"],
+ "normal_list",
+ ["***", "new_value2", "***"],
+ ),
+ # Tuples
+ (
+ ("original_item1", "original_item2", "original_item3"),
+ ("new_item1", "new_item2"),
+ None,
+ ("new_item1", "new_item2"),
+ ),
+ (
+ ("original_item1", "original_item2"),
+ ("new_item1", "new_item2", "new_item3", "new_item4"),
+ None,
+ ("new_item1", "new_item2", "new_item3", "new_item4"),
+ ),
+ (
+ ("secret1", "secret2", "secret3"),
+ ("***", "new_secret2", "***"),
+ "password",
+ ("secret1", "new_secret2", "secret3"),
+ ),
+ (
+ ("value1", "value2", "value3"),
+ ("***", "new_value2", "***"),
+ "normal_tuple",
+ ("***", "new_value2", "***"),
+ ),
+ # Sets
+ (
+ {"original_item1", "original_item2", "original_item3"},
+ {"new_item1", "new_item2"},
+ None,
+ {"new_item1", "new_item2"},
+ ),
+ (
+ {"original_item1", "original_item2"},
+ {"new_item1", "new_item2", "new_item3", "new_item4"},
+ None,
+ {"new_item1", "new_item2", "new_item3", "new_item4"},
+ ),
+ (
+ {"secret1", "secret2", "secret3"},
+ {"***", "new_secret2", "***"},
+ "password",
+ {"***", "new_secret2", "***"},
+ ),
+ (
+ {"value1", "value2", "value3"},
+ {"***", "new_value2", "***"},
+ "normal_tuple",
+ {"***", "new_value2", "***"},
+ ),
+ # Mixed collections
+ (
+ ["original_item1", "original_item2", "original_item3"],
+ ("new_item1", "new_item2"),
+ None,
+ ("new_item1", "new_item2"),
+ ),
+ (
+ ("original_item1", "original_item2"),
+ ["new_item1", "new_item2", "new_item3", "new_item4"],
+ None,
+ ["new_item1", "new_item2", "new_item3", "new_item4"],
+ ),
+ (
+ ["secret1", "secret2", "secret3"],
+ ("***", "new_secret2", "***"),
+ "password",
+ ("secret1", "new_secret2", "secret3"),
+ ),
+ (
+ ("value1", "value2", "value3"),
+ ["***", "new_value2", "***"],
+ "normal_tuple",
+ ["***", "new_value2", "***"],
+ ),
+ ],
+ )
+ def test_merge_collections(self, old_data, new_data, name, expected):
+ secrets_masker = SecretsMasker()
+
+ with
patch("airflow.sdk.execution_time.secrets_masker._secrets_masker",
return_value=secrets_masker):
+ result = secrets_masker.merge(new_data, old_data, name)
+ assert result == expected
+
+ def test_merge_mismatched_types(self):
+ secrets_masker = SecretsMasker()
+
+ old_data = {"key": "value"}
+ new_data = "some_string" # Different type
+
+ # When types don't match, prefer the new item
+ expected = "some_string"
+
+ with
patch("airflow.sdk.execution_time.secrets_masker._secrets_masker",
return_value=secrets_masker):
+ result = merge(new_data, old_data)
+ assert result == expected
+
+ def test_merge_with_missing_keys(self):
+ secrets_masker = SecretsMasker()
+
+ old_data = {"password": "original_password", "old_only_key":
"old_value", "common_key": "old_common"}
+
+ new_data = {
+ "password": "***",
+ "new_only_key": "new_value",
+ "common_key": "new_common",
+ }
+
+ expected = {
+ "password": "original_password",
+ "new_only_key": "new_value",
+ "common_key": "new_common",
+ }
+
+ with
patch("airflow.sdk.execution_time.secrets_masker._secrets_masker",
return_value=secrets_masker):
+ result = merge(new_data, old_data)
+ assert result == expected
+
+ def test_merge_complex_redacted_structures(self):
+ secrets_masker = SecretsMasker()
+
+ old_data = {
+ "some_config": {
+ "nested_password": "original_nested_password",
+ "nested_list": ["item1", "item2"],
+ },
+ "normal_field": "normal_value",
+ }
+
+ new_data = {
+ "some_config": {"nested_password": "***", "nested_list": ["***",
"***"]},
+ "normal_field": "new_normal_value",
+ }
+
+ with
patch("airflow.sdk.execution_time.secrets_masker._secrets_masker",
return_value=secrets_masker):
+ result = merge(new_data, old_data)
+ expected = {
+ "some_config": {
+ "nested_password": "original_nested_password",
+ "nested_list": ["***", "***"],
+ },
+ "normal_field": "new_normal_value",
+ }
+ assert result == expected
+
+ def test_merge_partially_redacted_structures(self):
+ secrets_masker = SecretsMasker()
+
+ old_data = {
+ "config": {
+ "password": "original_password",
+ "host": "original_host",
+ "nested": {"api_key": "original_api_key", "timeout": 30},
+ }
+ }
+
+ new_data = {
+ "config": {
+ "password": "***",
+ "host": "new_host",
+ "nested": {
+ "api_key": "***",
+ "timeout": 60,
+ },
+ }
+ }
+
+ expected = {
+ "config": {
+ "password": "original_password",
+ "host": "new_host",
+ "nested": {
+ "api_key": "original_api_key",
+ "timeout": 60,
+ },
+ }
+ }
+
+ with
patch("airflow.sdk.execution_time.secrets_masker._secrets_masker",
return_value=secrets_masker):
+ result = merge(new_data, old_data)
+ assert result == expected
+
+ def test_merge_max_depth(self):
+ secrets_masker = SecretsMasker()
+
+ old_data = {"level1": {"level2": {"level3": {"password":
"original_password"}}}}
+ new_data = {"level1": {"level2": {"level3": {"password": "***"}}}}
+
+ with
patch("airflow.sdk.execution_time.secrets_masker._secrets_masker",
return_value=secrets_masker):
+ result = merge(new_data, old_data, max_depth=1)
+ assert result == new_data
+
+ result = merge(new_data, old_data, max_depth=10)
+ assert result["level1"]["level2"]["level3"]["password"] ==
"original_password"
+
+ def test_merge_enum_values(self):
+ secrets_masker = SecretsMasker()
+
+ old_enum = MyEnum.testname
+ new_enum = MyEnum.testname2
+
+ with
patch("airflow.sdk.execution_time.secrets_masker._secrets_masker",
return_value=secrets_masker):
+ result = merge(new_enum, old_enum)
+ assert result == new_enum
+ assert isinstance(result, MyEnum)
Review Comment:
Just verify that Enums are not merged, and 'new_value' is always taken.
Without trying to merge anything because Enum value can't be a redacted '***'
##########
task-sdk/tests/task_sdk/definitions/test_secrets_masker.py:
##########
@@ -729,3 +731,356 @@ def test_mixed_structured_unstructured_data(self):
assert "***" in redacted_data["description"]
assert redacted_data["nested"]["token"] == "***"
assert redacted_data["nested"]["info"] == "No secrets here"
+
+
+class TestSecretsMaskerMerge:
+ """Test the merge functionality for restoring original values from
redacted data."""
+
+ @pytest.mark.parametrize(
+ ("new_value", "old_value", "name", "expected"),
+ [
+ ("***", "original_secret", "password", "original_secret"),
+ ("new_secret", "original_secret", "password", "new_secret"),
+ ("***", "original_value", "normal_field", "***"),
+ ("new_value", "original_value", "normal_field", "new_value"),
+ ("***", "original_value", None, "***"),
+ ("new_value", "original_value", None, "new_value"),
+ ],
+ )
+ def test_merge_simple_strings(self, new_value, old_value, name, expected):
+ secrets_masker = SecretsMasker()
+ with
patch("airflow.sdk.execution_time.secrets_masker._secrets_masker",
return_value=secrets_masker):
+ result = merge(new_value, old_value, name)
+ assert result == expected
+
+ def test_merge_dictionaries(self):
+ secrets_masker = SecretsMasker()
+
+ old_data = {
+ "password": "original_password",
+ "api_key": "original_api_key",
+ "normal_field": "original_normal",
+ "token": "original_token",
+ }
+
+ new_data = {
+ "password": "***",
+ "api_key": "new_api_key",
+ "normal_field": "new_normal",
+ "token": "***",
+ }
+
+ expected = {
+ "password": "original_password",
+ "api_key": "new_api_key",
+ "normal_field": "new_normal",
+ "token": "original_token",
+ }
+
+ with
patch("airflow.sdk.execution_time.secrets_masker._secrets_masker",
return_value=secrets_masker):
+ result = merge(new_data, old_data)
+ assert result == expected
+
+ def test_merge_nested_dictionaries(self):
+ secrets_masker = SecretsMasker()
+
+ old_data = {
+ "config": {"password": "original_password", "host":
"original_host"},
+ "credentials": {"api_key": "original_api_key", "username":
"original_user"},
+ }
+
+ new_data = {
+ "config": {
+ "password": "***",
+ "host": "new_host",
+ },
+ "credentials": {
+ "api_key": "new_api_key",
+ "username": "new_user",
+ },
+ }
+
+ expected = {
+ "config": {
+ "password": "original_password",
+ "host": "new_host",
+ },
+ "credentials": {
+ "api_key": "new_api_key",
+ "username": "new_user",
+ },
+ }
+
+ with
patch("airflow.sdk.execution_time.secrets_masker._secrets_masker",
return_value=secrets_masker):
+ result = merge(new_data, old_data)
+ assert result == expected
+
+ @pytest.mark.parametrize(
+ ("old_data", "new_data", "name", "expected"),
+ [
+ # Lists
+ (
+ ["original_item1", "original_item2", "original_item3"],
+ ["new_item1", "new_item2"],
+ None,
+ ["new_item1", "new_item2"],
+ ),
+ (
+ ["original_item1", "original_item2"],
+ ["new_item1", "new_item2", "new_item3", "new_item4"],
+ None,
+ ["new_item1", "new_item2", "new_item3", "new_item4"],
+ ),
+ (
+ ["secret1", "secret2", "secret3"],
+ ["***", "new_secret2", "***"],
+ "password",
+ ["secret1", "new_secret2", "secret3"],
+ ),
+ (
+ ["value1", "value2", "value3"],
+ ["***", "new_value2", "***"],
+ "normal_list",
+ ["***", "new_value2", "***"],
+ ),
+ # Tuples
+ (
+ ("original_item1", "original_item2", "original_item3"),
+ ("new_item1", "new_item2"),
+ None,
+ ("new_item1", "new_item2"),
+ ),
+ (
+ ("original_item1", "original_item2"),
+ ("new_item1", "new_item2", "new_item3", "new_item4"),
+ None,
+ ("new_item1", "new_item2", "new_item3", "new_item4"),
+ ),
+ (
+ ("secret1", "secret2", "secret3"),
+ ("***", "new_secret2", "***"),
+ "password",
+ ("secret1", "new_secret2", "secret3"),
+ ),
+ (
+ ("value1", "value2", "value3"),
+ ("***", "new_value2", "***"),
+ "normal_tuple",
+ ("***", "new_value2", "***"),
+ ),
+ # Sets
+ (
+ {"original_item1", "original_item2", "original_item3"},
+ {"new_item1", "new_item2"},
+ None,
+ {"new_item1", "new_item2"},
+ ),
+ (
+ {"original_item1", "original_item2"},
+ {"new_item1", "new_item2", "new_item3", "new_item4"},
+ None,
+ {"new_item1", "new_item2", "new_item3", "new_item4"},
+ ),
+ (
+ {"secret1", "secret2", "secret3"},
+ {"***", "new_secret2", "***"},
+ "password",
+ {"***", "new_secret2", "***"},
Review Comment:
Yes this is what is happening there. 'merge' will do nothing for sets,
because there is no order enforced, so we take whatever the user specified
there without merging anything, because we can't.
Maybe we should raise an explicit error?
##########
task-sdk/tests/task_sdk/definitions/test_secrets_masker.py:
##########
@@ -729,3 +731,356 @@ def test_mixed_structured_unstructured_data(self):
assert "***" in redacted_data["description"]
assert redacted_data["nested"]["token"] == "***"
assert redacted_data["nested"]["info"] == "No secrets here"
+
+
+class TestSecretsMaskerMerge:
+ """Test the merge functionality for restoring original values from
redacted data."""
+
+ @pytest.mark.parametrize(
+ ("new_value", "old_value", "name", "expected"),
+ [
+ ("***", "original_secret", "password", "original_secret"),
+ ("new_secret", "original_secret", "password", "new_secret"),
+ ("***", "original_value", "normal_field", "***"),
+ ("new_value", "original_value", "normal_field", "new_value"),
+ ("***", "original_value", None, "***"),
+ ("new_value", "original_value", None, "new_value"),
+ ],
+ )
+ def test_merge_simple_strings(self, new_value, old_value, name, expected):
+ secrets_masker = SecretsMasker()
+ with
patch("airflow.sdk.execution_time.secrets_masker._secrets_masker",
return_value=secrets_masker):
+ result = merge(new_value, old_value, name)
+ assert result == expected
+
+ def test_merge_dictionaries(self):
+ secrets_masker = SecretsMasker()
+
+ old_data = {
+ "password": "original_password",
+ "api_key": "original_api_key",
+ "normal_field": "original_normal",
+ "token": "original_token",
+ }
+
+ new_data = {
+ "password": "***",
+ "api_key": "new_api_key",
+ "normal_field": "new_normal",
+ "token": "***",
+ }
+
+ expected = {
+ "password": "original_password",
+ "api_key": "new_api_key",
+ "normal_field": "new_normal",
+ "token": "original_token",
+ }
+
+ with
patch("airflow.sdk.execution_time.secrets_masker._secrets_masker",
return_value=secrets_masker):
+ result = merge(new_data, old_data)
+ assert result == expected
+
+ def test_merge_nested_dictionaries(self):
+ secrets_masker = SecretsMasker()
+
+ old_data = {
+ "config": {"password": "original_password", "host":
"original_host"},
+ "credentials": {"api_key": "original_api_key", "username":
"original_user"},
+ }
+
+ new_data = {
+ "config": {
+ "password": "***",
+ "host": "new_host",
+ },
+ "credentials": {
+ "api_key": "new_api_key",
+ "username": "new_user",
+ },
+ }
+
+ expected = {
+ "config": {
+ "password": "original_password",
+ "host": "new_host",
+ },
+ "credentials": {
+ "api_key": "new_api_key",
+ "username": "new_user",
+ },
+ }
+
+ with
patch("airflow.sdk.execution_time.secrets_masker._secrets_masker",
return_value=secrets_masker):
+ result = merge(new_data, old_data)
+ assert result == expected
+
+ @pytest.mark.parametrize(
+ ("old_data", "new_data", "name", "expected"),
+ [
+ # Lists
+ (
+ ["original_item1", "original_item2", "original_item3"],
+ ["new_item1", "new_item2"],
+ None,
+ ["new_item1", "new_item2"],
+ ),
+ (
+ ["original_item1", "original_item2"],
+ ["new_item1", "new_item2", "new_item3", "new_item4"],
+ None,
+ ["new_item1", "new_item2", "new_item3", "new_item4"],
+ ),
+ (
+ ["secret1", "secret2", "secret3"],
+ ["***", "new_secret2", "***"],
+ "password",
+ ["secret1", "new_secret2", "secret3"],
+ ),
+ (
+ ["value1", "value2", "value3"],
+ ["***", "new_value2", "***"],
+ "normal_list",
+ ["***", "new_value2", "***"],
+ ),
+ # Tuples
+ (
+ ("original_item1", "original_item2", "original_item3"),
+ ("new_item1", "new_item2"),
+ None,
+ ("new_item1", "new_item2"),
+ ),
+ (
+ ("original_item1", "original_item2"),
+ ("new_item1", "new_item2", "new_item3", "new_item4"),
+ None,
+ ("new_item1", "new_item2", "new_item3", "new_item4"),
+ ),
+ (
+ ("secret1", "secret2", "secret3"),
+ ("***", "new_secret2", "***"),
+ "password",
+ ("secret1", "new_secret2", "secret3"),
+ ),
+ (
+ ("value1", "value2", "value3"),
+ ("***", "new_value2", "***"),
+ "normal_tuple",
+ ("***", "new_value2", "***"),
+ ),
+ # Sets
+ (
+ {"original_item1", "original_item2", "original_item3"},
+ {"new_item1", "new_item2"},
+ None,
+ {"new_item1", "new_item2"},
+ ),
+ (
+ {"original_item1", "original_item2"},
+ {"new_item1", "new_item2", "new_item3", "new_item4"},
+ None,
+ {"new_item1", "new_item2", "new_item3", "new_item4"},
+ ),
+ (
+ {"secret1", "secret2", "secret3"},
+ {"***", "new_secret2", "***"},
+ "password",
+ {"***", "new_secret2", "***"},
+ ),
+ (
+ {"value1", "value2", "value3"},
+ {"***", "new_value2", "***"},
+ "normal_tuple",
+ {"***", "new_value2", "***"},
+ ),
Review Comment:
The name of the field is `normal_tuple` this is not a sensitive field. But I
could remove it since we do not do anything for sets, it should behave the same
as 'sensitive field' which is do nothing.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]