Taragolis commented on code in PR #28005:
URL: https://github.com/apache/airflow/pull/28005#discussion_r1037260249
##########
airflow/providers/amazon/aws/hooks/glue_crawler.py:
##########
@@ -78,16 +83,60 @@ def update_crawler(self, **crawler_kwargs) -> bool:
crawler_name = crawler_kwargs["Name"]
current_crawler = self.get_crawler(crawler_name)
+ self.update_tags(crawler_name, crawler_kwargs.pop("Tags", {}))
+
update_config = {
- key: value for key, value in crawler_kwargs.items() if
current_crawler[key] != crawler_kwargs[key]
+ key: value
+ for key, value in crawler_kwargs.items()
+ if current_crawler.get(key, None) != crawler_kwargs.get(key)
}
- if update_config != {}:
+ if len(update_config) > 0:
Review Comment:
It might be even simpler 😸
```suggestion
if update_config:
```
##########
airflow/providers/amazon/aws/hooks/glue_crawler.py:
##########
@@ -78,16 +83,60 @@ def update_crawler(self, **crawler_kwargs) -> bool:
crawler_name = crawler_kwargs["Name"]
current_crawler = self.get_crawler(crawler_name)
+ self.update_tags(crawler_name, crawler_kwargs.pop("Tags", {}))
+
update_config = {
- key: value for key, value in crawler_kwargs.items() if
current_crawler[key] != crawler_kwargs[key]
+ key: value
+ for key, value in crawler_kwargs.items()
+ if current_crawler.get(key, None) != crawler_kwargs.get(key)
}
- if update_config != {}:
+ if len(update_config) > 0:
self.log.info("Updating crawler: %s", crawler_name)
self.glue_client.update_crawler(**crawler_kwargs)
self.log.info("Updated configurations: %s", update_config)
return True
- else:
- return False
+ return False
Review Comment:
Hm... seems like if we only update tags but keep other configuration
unchanged than this method return `False`.
What if we add something like that?
```python
tags_updated = self.update_tags(crawler_name, crawler_kwargs.pop("Tags", {}))
...
if update_config:
...
return True
return tags_updated
```
##########
tests/providers/amazon/aws/hooks/test_glue_crawler.py:
##########
@@ -109,16 +113,87 @@ class MockException(Exception):
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
@mock.patch.object(GlueCrawlerHook, "get_conn")
- def test_update_crawler_needed(self, mock_get_conn):
+ @mock.patch.object(StsHook, "get_conn")
+ @mock.patch.object(StsHook, "get_account_number")
+ def test_update_crawler_needed(self, mock_get_account_number,
mock_sts_conn, mock_get_conn):
mock_get_conn.return_value.get_crawler.return_value = {"Crawler":
mock_config}
+ mock_get_account_number.return_value = AWS_ACCOUNT_ID
mock_config_two = deepcopy(mock_config)
mock_config_two["Role"] = "test-2-role"
+ mock_config_two.pop("Tags")
+ response = self.hook.update_crawler(**mock_config_two)
+ self.assertEqual(response, True)
+
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
+
mock_get_conn.return_value.update_crawler.assert_called_once_with(**mock_config_two)
+
+ @mock.patch.object(GlueCrawlerHook, "get_conn")
+ @mock.patch.object(StsHook, "get_conn")
+ @mock.patch.object(StsHook, "get_account_number")
+ def test_update_crawler_missing_keys(self, mock_get_account_number,
mock_sts_conn, mock_get_conn):
+ mock_config_missing_configuration = deepcopy(mock_config)
+ mock_config_missing_configuration.pop("Configuration")
+ mock_get_conn.return_value.get_crawler.return_value = {"Crawler":
mock_config_missing_configuration}
+ mock_get_account_number.return_value = AWS_ACCOUNT_ID
+
+ mock_config_two = deepcopy(mock_config)
+ mock_config_two.pop("Tags")
response = self.hook.update_crawler(**mock_config_two)
self.assertEqual(response, True)
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
mock_get_conn.return_value.update_crawler.assert_called_once_with(**mock_config_two)
+ @mock.patch.object(GlueCrawlerHook, "get_conn")
+ @mock.patch.object(StsHook, "get_conn")
+ @mock.patch.object(StsHook, "get_account_number")
+ def test_update_tags_not_needed(self, mock_get_account_number,
mock_sts_conn, mock_get_conn):
+ mock_get_conn.return_value.get_crawler.return_value = {"Crawler":
mock_config}
+ mock_get_conn.return_value.get_tags.return_value = {"Tags":
mock_config["Tags"]}
+ mock_get_account_number.return_value = AWS_ACCOUNT_ID
+ crawler_arn =
f"arn:aws:glue:{AWS_REGION}:{AWS_ACCOUNT_ID}:crawler/{mock_crawler_name}"
+
+ response = self.hook.update_tags(mock_crawler_name,
mock_config["Tags"])
+ self.assertEqual(response, False)
+
mock_get_conn.return_value.get_tags.assert_called_once_with(ResourceArn=crawler_arn)
+ mock_get_conn.return_value.tag_resource.assert_not_called()
+ mock_get_conn.return_value.untag_resource.assert_not_called()
+
+ @mock.patch.object(GlueCrawlerHook, "get_conn")
+ @mock.patch.object(StsHook, "get_conn")
+ @mock.patch.object(StsHook, "get_account_number")
+ def test_remove_all_tags(self, mock_get_account_number, mock_sts_conn,
mock_get_conn):
+ mock_get_conn.return_value.get_crawler.return_value = {"Crawler":
mock_config}
+ mock_get_conn.return_value.get_tags.return_value = {"Tags":
mock_config["Tags"]}
+ mock_get_account_number.return_value = AWS_ACCOUNT_ID
+ crawler_arn =
f"arn:aws:glue:{AWS_REGION}:{AWS_ACCOUNT_ID}:crawler/{mock_crawler_name}"
+
+ response = self.hook.update_tags(mock_crawler_name, {})
+ self.assertEqual(response, True)
+
mock_get_conn.return_value.get_tags.assert_called_once_with(ResourceArn=crawler_arn)
+ mock_get_conn.return_value.tag_resource.assert_not_called()
+ mock_get_conn.return_value.untag_resource.assert_called_once_with(
+ ResourceArn=crawler_arn, TagsToRemove=["test", "bar"]
+ )
+
+ @mock.patch.object(GlueCrawlerHook, "get_conn")
+ @mock.patch.object(StsHook, "get_conn")
+ @mock.patch.object(StsHook, "get_account_number")
+ def test_replace_tag(self, mock_get_account_number, mock_sts_conn,
mock_get_conn):
+ mock_get_conn.return_value.get_crawler.return_value = {"Crawler":
mock_config}
+ mock_get_conn.return_value.get_tags.return_value = {"Tags":
mock_config["Tags"]}
+ mock_get_account_number.return_value = AWS_ACCOUNT_ID
+ crawler_arn =
f"arn:aws:glue:{AWS_REGION}:{AWS_ACCOUNT_ID}:crawler/{mock_crawler_name}"
+
+ mock_config_two = deepcopy(mock_config)
+ mock_config_two.pop("Tags")
+ response = self.hook.update_tags(mock_crawler_name, {"test": "bla",
"bar": "test"})
+ self.assertEqual(response, True)
Review Comment:
instead of `TestCase.assert*`methods use regular `assert`
```suggestion
assert response
```
##########
tests/providers/amazon/aws/hooks/test_glue_crawler.py:
##########
@@ -109,16 +113,87 @@ class MockException(Exception):
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
@mock.patch.object(GlueCrawlerHook, "get_conn")
- def test_update_crawler_needed(self, mock_get_conn):
+ @mock.patch.object(StsHook, "get_conn")
+ @mock.patch.object(StsHook, "get_account_number")
+ def test_update_crawler_needed(self, mock_get_account_number,
mock_sts_conn, mock_get_conn):
mock_get_conn.return_value.get_crawler.return_value = {"Crawler":
mock_config}
+ mock_get_account_number.return_value = AWS_ACCOUNT_ID
mock_config_two = deepcopy(mock_config)
mock_config_two["Role"] = "test-2-role"
+ mock_config_two.pop("Tags")
+ response = self.hook.update_crawler(**mock_config_two)
+ self.assertEqual(response, True)
+
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
+
mock_get_conn.return_value.update_crawler.assert_called_once_with(**mock_config_two)
+
+ @mock.patch.object(GlueCrawlerHook, "get_conn")
+ @mock.patch.object(StsHook, "get_conn")
+ @mock.patch.object(StsHook, "get_account_number")
+ def test_update_crawler_missing_keys(self, mock_get_account_number,
mock_sts_conn, mock_get_conn):
+ mock_config_missing_configuration = deepcopy(mock_config)
+ mock_config_missing_configuration.pop("Configuration")
+ mock_get_conn.return_value.get_crawler.return_value = {"Crawler":
mock_config_missing_configuration}
+ mock_get_account_number.return_value = AWS_ACCOUNT_ID
Review Comment:
You could use `moto.sts_client` decorator for mock STS Client
```python
from moto import mock_sts
from moto.core import DEFAULT_ACCOUNT_ID
...
@mock_sts
@mock.patch.object(GlueCrawlerHook, "get_conn")
def test_update_crawler_missing_keys(self, mock_get_conn):
...
```
##########
airflow/providers/amazon/aws/hooks/glue_crawler.py:
##########
@@ -44,6 +45,10 @@ def glue_client(self):
""":return: AWS Glue client"""
return self.get_conn()
+ @cached_property
+ def sts_hook(self):
+ return StsHook(aws_conn_id=self.aws_conn_id)
+
Review Comment:
IMHO, better not to add one hook property to another if it possible or at
least make it "private"
Looks like we only need to call it once in GlueCrawlerHook, so we could
directly call in `update_tags` method
```python
account_id = StsHook(aws_conn_id=self.aws_conn_id).get_account_number()
```
As long term (separate PR) it is a good idea to create `account_id` property
in `AwsBaseHook`.
##########
tests/providers/amazon/aws/hooks/test_glue_crawler.py:
##########
@@ -109,16 +113,87 @@ class MockException(Exception):
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
@mock.patch.object(GlueCrawlerHook, "get_conn")
- def test_update_crawler_needed(self, mock_get_conn):
+ @mock.patch.object(StsHook, "get_conn")
+ @mock.patch.object(StsHook, "get_account_number")
+ def test_update_crawler_needed(self, mock_get_account_number,
mock_sts_conn, mock_get_conn):
mock_get_conn.return_value.get_crawler.return_value = {"Crawler":
mock_config}
+ mock_get_account_number.return_value = AWS_ACCOUNT_ID
mock_config_two = deepcopy(mock_config)
mock_config_two["Role"] = "test-2-role"
+ mock_config_two.pop("Tags")
+ response = self.hook.update_crawler(**mock_config_two)
+ self.assertEqual(response, True)
Review Comment:
```suggestion
assert response
```
##########
airflow/providers/amazon/aws/hooks/glue_crawler.py:
##########
@@ -78,16 +83,60 @@ def update_crawler(self, **crawler_kwargs) -> bool:
crawler_name = crawler_kwargs["Name"]
current_crawler = self.get_crawler(crawler_name)
+ self.update_tags(crawler_name, crawler_kwargs.pop("Tags", {}))
+
update_config = {
- key: value for key, value in crawler_kwargs.items() if
current_crawler[key] != crawler_kwargs[key]
+ key: value
+ for key, value in crawler_kwargs.items()
+ if current_crawler.get(key, None) != crawler_kwargs.get(key)
}
- if update_config != {}:
+ if len(update_config) > 0:
self.log.info("Updating crawler: %s", crawler_name)
self.glue_client.update_crawler(**crawler_kwargs)
self.log.info("Updated configurations: %s", update_config)
return True
- else:
- return False
+ return False
+
+ def update_tags(self, crawler_name: str, crawler_tags: dict) -> bool:
+ """
+ Updates crawler tags
+
+ :param crawler_name: Name of the crawler for which to update tags
+ :param crawler_tags: Dictionary of new tags. If empty, all tags will
be deleted
+ :return True if tags were updated and false otherwise
+ """
+ account_number = self.sts_hook.get_account_number()
+ crawler_arn =
f"arn:aws:glue:{self.region_name}:{account_number}:crawler/{crawler_name}"
Review Comment:
I thought actual pattern for crawler:
```python
"arn:{partition}:glue:{region}:{account_id}:crawler/{crawler_name}"
```
`aws` is common partition but also exists `aws-cn` (China), `aws-us-gov`
(GovCloud) and some (TOP) Secret partitions.
In additional `self.region` might return None if region not defined in
connection/hook but defined in AWS Config File / AWS Shared Credential File or
Environment Variable. In this case you should get region name from boto3 client
metadata.
Fortunately `AwsBaseHook` has properties for partition/region name
https://github.com/apache/airflow/blob/e9a9ae6540339bad8b228c81d0a9ea37ce3b469e/airflow/providers/amazon/aws/hooks/base_aws.py#L514-L522
##########
tests/providers/amazon/aws/hooks/test_glue_crawler.py:
##########
@@ -109,16 +113,87 @@ class MockException(Exception):
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
@mock.patch.object(GlueCrawlerHook, "get_conn")
- def test_update_crawler_needed(self, mock_get_conn):
+ @mock.patch.object(StsHook, "get_conn")
+ @mock.patch.object(StsHook, "get_account_number")
+ def test_update_crawler_needed(self, mock_get_account_number,
mock_sts_conn, mock_get_conn):
mock_get_conn.return_value.get_crawler.return_value = {"Crawler":
mock_config}
+ mock_get_account_number.return_value = AWS_ACCOUNT_ID
mock_config_two = deepcopy(mock_config)
mock_config_two["Role"] = "test-2-role"
+ mock_config_two.pop("Tags")
+ response = self.hook.update_crawler(**mock_config_two)
+ self.assertEqual(response, True)
+
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
+
mock_get_conn.return_value.update_crawler.assert_called_once_with(**mock_config_two)
+
+ @mock.patch.object(GlueCrawlerHook, "get_conn")
+ @mock.patch.object(StsHook, "get_conn")
+ @mock.patch.object(StsHook, "get_account_number")
+ def test_update_crawler_missing_keys(self, mock_get_account_number,
mock_sts_conn, mock_get_conn):
+ mock_config_missing_configuration = deepcopy(mock_config)
+ mock_config_missing_configuration.pop("Configuration")
+ mock_get_conn.return_value.get_crawler.return_value = {"Crawler":
mock_config_missing_configuration}
+ mock_get_account_number.return_value = AWS_ACCOUNT_ID
+
+ mock_config_two = deepcopy(mock_config)
+ mock_config_two.pop("Tags")
response = self.hook.update_crawler(**mock_config_two)
self.assertEqual(response, True)
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
mock_get_conn.return_value.update_crawler.assert_called_once_with(**mock_config_two)
+ @mock.patch.object(GlueCrawlerHook, "get_conn")
+ @mock.patch.object(StsHook, "get_conn")
+ @mock.patch.object(StsHook, "get_account_number")
+ def test_update_tags_not_needed(self, mock_get_account_number,
mock_sts_conn, mock_get_conn):
+ mock_get_conn.return_value.get_crawler.return_value = {"Crawler":
mock_config}
+ mock_get_conn.return_value.get_tags.return_value = {"Tags":
mock_config["Tags"]}
+ mock_get_account_number.return_value = AWS_ACCOUNT_ID
+ crawler_arn =
f"arn:aws:glue:{AWS_REGION}:{AWS_ACCOUNT_ID}:crawler/{mock_crawler_name}"
+
+ response = self.hook.update_tags(mock_crawler_name,
mock_config["Tags"])
+ self.assertEqual(response, False)
+
mock_get_conn.return_value.get_tags.assert_called_once_with(ResourceArn=crawler_arn)
+ mock_get_conn.return_value.tag_resource.assert_not_called()
+ mock_get_conn.return_value.untag_resource.assert_not_called()
+
+ @mock.patch.object(GlueCrawlerHook, "get_conn")
+ @mock.patch.object(StsHook, "get_conn")
+ @mock.patch.object(StsHook, "get_account_number")
+ def test_remove_all_tags(self, mock_get_account_number, mock_sts_conn,
mock_get_conn):
+ mock_get_conn.return_value.get_crawler.return_value = {"Crawler":
mock_config}
+ mock_get_conn.return_value.get_tags.return_value = {"Tags":
mock_config["Tags"]}
+ mock_get_account_number.return_value = AWS_ACCOUNT_ID
+ crawler_arn =
f"arn:aws:glue:{AWS_REGION}:{AWS_ACCOUNT_ID}:crawler/{mock_crawler_name}"
+
+ response = self.hook.update_tags(mock_crawler_name, {})
+ self.assertEqual(response, True)
Review Comment:
```suggestion
assert response
```
##########
tests/providers/amazon/aws/hooks/test_glue_crawler.py:
##########
@@ -109,16 +113,87 @@ class MockException(Exception):
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
@mock.patch.object(GlueCrawlerHook, "get_conn")
- def test_update_crawler_needed(self, mock_get_conn):
+ @mock.patch.object(StsHook, "get_conn")
+ @mock.patch.object(StsHook, "get_account_number")
+ def test_update_crawler_needed(self, mock_get_account_number,
mock_sts_conn, mock_get_conn):
mock_get_conn.return_value.get_crawler.return_value = {"Crawler":
mock_config}
+ mock_get_account_number.return_value = AWS_ACCOUNT_ID
mock_config_two = deepcopy(mock_config)
mock_config_two["Role"] = "test-2-role"
+ mock_config_two.pop("Tags")
+ response = self.hook.update_crawler(**mock_config_two)
+ self.assertEqual(response, True)
+
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
+
mock_get_conn.return_value.update_crawler.assert_called_once_with(**mock_config_two)
+
+ @mock.patch.object(GlueCrawlerHook, "get_conn")
+ @mock.patch.object(StsHook, "get_conn")
+ @mock.patch.object(StsHook, "get_account_number")
+ def test_update_crawler_missing_keys(self, mock_get_account_number,
mock_sts_conn, mock_get_conn):
+ mock_config_missing_configuration = deepcopy(mock_config)
+ mock_config_missing_configuration.pop("Configuration")
+ mock_get_conn.return_value.get_crawler.return_value = {"Crawler":
mock_config_missing_configuration}
+ mock_get_account_number.return_value = AWS_ACCOUNT_ID
+
+ mock_config_two = deepcopy(mock_config)
+ mock_config_two.pop("Tags")
response = self.hook.update_crawler(**mock_config_two)
self.assertEqual(response, True)
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
mock_get_conn.return_value.update_crawler.assert_called_once_with(**mock_config_two)
+ @mock.patch.object(GlueCrawlerHook, "get_conn")
+ @mock.patch.object(StsHook, "get_conn")
+ @mock.patch.object(StsHook, "get_account_number")
+ def test_update_tags_not_needed(self, mock_get_account_number,
mock_sts_conn, mock_get_conn):
+ mock_get_conn.return_value.get_crawler.return_value = {"Crawler":
mock_config}
+ mock_get_conn.return_value.get_tags.return_value = {"Tags":
mock_config["Tags"]}
+ mock_get_account_number.return_value = AWS_ACCOUNT_ID
+ crawler_arn =
f"arn:aws:glue:{AWS_REGION}:{AWS_ACCOUNT_ID}:crawler/{mock_crawler_name}"
+
+ response = self.hook.update_tags(mock_crawler_name,
mock_config["Tags"])
+ self.assertEqual(response, False)
Review Comment:
```suggestion
assert not response
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]