Unit03 commented on a change in pull request #9615:
URL: https://github.com/apache/airflow/pull/9615#discussion_r449802853
##########
File path: airflow/providers/apache/spark/hooks/spark_submit.py
##########
@@ -237,8 +237,8 @@ def _mask_cmd(self, connection_cmd):
# Mask any password related fields in application args with key value
pair
# where key contains password (case insensitive), e.g.
HivePassword='abc'
connection_cmd_masked = re.sub(
- r"(\S*?(?:secret|password)\S*?\s*=\s*')[^']*(?=')",
- r'\1******', ' '.join(connection_cmd), flags=re.I)
+ r"(\S*?(?:secret|password)\S*?\s*(?:=|\s+)(['\"]?))[^'^\"]+(\2)",
Review comment:
Done. Let me know which parts aren't clear enough. :)
##########
File path: tests/providers/apache/spark/hooks/test_spark_submit.py
##########
@@ -748,3 +750,64 @@ def test_k8s_process_on_kill(self, mock_popen,
mock_client_method):
client.delete_namespaced_pod.assert_called_once_with(
'spark-pi-edf2ace37be7353a958b38733a12f8e6-driver',
'mynamespace', **kwargs)
+
+
[email protected](
+ ("command", "expected"),
+ (
+ (
+ ("spark-submit", "foo", "--bar", "baz", "--password='secret'"),
+ "spark-submit foo --bar baz --password='******'",
+ ),
+ (
+ ("spark-submit", "foo", "--bar", "baz", '--password="secret"'),
+ 'spark-submit foo --bar baz --password="******"',
+ ),
+ (
+ ("spark-submit", "foo", "--bar", "baz", "--password=secret"),
+ "spark-submit foo --bar baz --password=******",
+ ),
+ (
+ ("spark-submit", "foo", "--bar", "baz", "--password 'secret'"),
+ "spark-submit foo --bar baz --password '******'",
+ ),
+ (
+ ("spark-submit", "foo", "--bar", "baz", "--password secret"),
+ "spark-submit foo --bar baz --password ******",
+ ),
+ (
+ ("spark-submit", "foo", "--bar", "baz", '--password "secret"'),
+ 'spark-submit foo --bar baz --password "******"',
+ ),
+ (
+ ("spark-submit", "foo", "--bar", "baz", "--secret='secret'"),
+ "spark-submit foo --bar baz --secret='******'",
+ ),
+ (
+ ("spark-submit", "foo", "--bar", "baz", "--foo.password='secret'"),
+ "spark-submit foo --bar baz --foo.password='******'",
+ ),
+ (
+ ("spark-submit",),
+ "spark-submit",
+ ),
+
+ (
+ ("spark-submit", "foo", "--bar", "baz", "--password \"secret'"),
+ "spark-submit foo --bar baz --password \"secret'",
+ ),
+ (
+ ("spark-submit", "foo", "--bar", "baz", "--password 'secret\""),
+ "spark-submit foo --bar baz --password 'secret\"",
+ ),
+ ),
+)
+def test_masks_passwords(command: str, expected: str) -> None:
Review comment:
Done. :)
Btw when changing the regular expression, I considered these things
happening in the string under masking to possibly be impacted by bugs in said
expression:
* with and without the `=`
* with single quoted/double quoted/not quoted secret value
* with and without additional non-matching quotes in the middle of the value
* with and without flags/arguments after the secret value
I have added all of the combinations for now but that's quite a few - it may
be better to get rid of some of them. What do you think?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]