Taragolis commented on code in PR #28005:
URL: https://github.com/apache/airflow/pull/28005#discussion_r1037260249


##########
airflow/providers/amazon/aws/hooks/glue_crawler.py:
##########
@@ -78,16 +83,60 @@ def update_crawler(self, **crawler_kwargs) -> bool:
         crawler_name = crawler_kwargs["Name"]
         current_crawler = self.get_crawler(crawler_name)
 
+        self.update_tags(crawler_name, crawler_kwargs.pop("Tags", {}))
+
         update_config = {
-            key: value for key, value in crawler_kwargs.items() if 
current_crawler[key] != crawler_kwargs[key]
+            key: value
+            for key, value in crawler_kwargs.items()
+            if current_crawler.get(key, None) != crawler_kwargs.get(key)
         }
-        if update_config != {}:
+        if len(update_config) > 0:

Review Comment:
   It might be even simpler 😸 
   
   ```suggestion
           if update_config:
   ```
   



##########
airflow/providers/amazon/aws/hooks/glue_crawler.py:
##########
@@ -78,16 +83,60 @@ def update_crawler(self, **crawler_kwargs) -> bool:
         crawler_name = crawler_kwargs["Name"]
         current_crawler = self.get_crawler(crawler_name)
 
+        self.update_tags(crawler_name, crawler_kwargs.pop("Tags", {}))
+
         update_config = {
-            key: value for key, value in crawler_kwargs.items() if 
current_crawler[key] != crawler_kwargs[key]
+            key: value
+            for key, value in crawler_kwargs.items()
+            if current_crawler.get(key, None) != crawler_kwargs.get(key)
         }
-        if update_config != {}:
+        if len(update_config) > 0:
             self.log.info("Updating crawler: %s", crawler_name)
             self.glue_client.update_crawler(**crawler_kwargs)
             self.log.info("Updated configurations: %s", update_config)
             return True
-        else:
-            return False
+        return False

Review Comment:
   Hm... seems like if we only update tags but keep other configuration 
unchanged than this method return `False`.
   
   What if we add something like that?
   
    ```python
   tags_updated = self.update_tags(crawler_name, crawler_kwargs.pop("Tags", {}))
   ...
   if update_config:
       ...
       return True
   return tags_updated
   ```
   



##########
tests/providers/amazon/aws/hooks/test_glue_crawler.py:
##########
@@ -109,16 +113,87 @@ class MockException(Exception):
         
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
 
     @mock.patch.object(GlueCrawlerHook, "get_conn")
-    def test_update_crawler_needed(self, mock_get_conn):
+    @mock.patch.object(StsHook, "get_conn")
+    @mock.patch.object(StsHook, "get_account_number")
+    def test_update_crawler_needed(self, mock_get_account_number, 
mock_sts_conn, mock_get_conn):
         mock_get_conn.return_value.get_crawler.return_value = {"Crawler": 
mock_config}
+        mock_get_account_number.return_value = AWS_ACCOUNT_ID
 
         mock_config_two = deepcopy(mock_config)
         mock_config_two["Role"] = "test-2-role"
+        mock_config_two.pop("Tags")
+        response = self.hook.update_crawler(**mock_config_two)
+        self.assertEqual(response, True)
+        
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
+        
mock_get_conn.return_value.update_crawler.assert_called_once_with(**mock_config_two)
+
+    @mock.patch.object(GlueCrawlerHook, "get_conn")
+    @mock.patch.object(StsHook, "get_conn")
+    @mock.patch.object(StsHook, "get_account_number")
+    def test_update_crawler_missing_keys(self, mock_get_account_number, 
mock_sts_conn, mock_get_conn):
+        mock_config_missing_configuration = deepcopy(mock_config)
+        mock_config_missing_configuration.pop("Configuration")
+        mock_get_conn.return_value.get_crawler.return_value = {"Crawler": 
mock_config_missing_configuration}
+        mock_get_account_number.return_value = AWS_ACCOUNT_ID
+
+        mock_config_two = deepcopy(mock_config)
+        mock_config_two.pop("Tags")
         response = self.hook.update_crawler(**mock_config_two)
         self.assertEqual(response, True)
         
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
         
mock_get_conn.return_value.update_crawler.assert_called_once_with(**mock_config_two)
 
+    @mock.patch.object(GlueCrawlerHook, "get_conn")
+    @mock.patch.object(StsHook, "get_conn")
+    @mock.patch.object(StsHook, "get_account_number")
+    def test_update_tags_not_needed(self, mock_get_account_number, 
mock_sts_conn, mock_get_conn):
+        mock_get_conn.return_value.get_crawler.return_value = {"Crawler": 
mock_config}
+        mock_get_conn.return_value.get_tags.return_value = {"Tags": 
mock_config["Tags"]}
+        mock_get_account_number.return_value = AWS_ACCOUNT_ID
+        crawler_arn = 
f"arn:aws:glue:{AWS_REGION}:{AWS_ACCOUNT_ID}:crawler/{mock_crawler_name}"
+
+        response = self.hook.update_tags(mock_crawler_name, 
mock_config["Tags"])
+        self.assertEqual(response, False)
+        
mock_get_conn.return_value.get_tags.assert_called_once_with(ResourceArn=crawler_arn)
+        mock_get_conn.return_value.tag_resource.assert_not_called()
+        mock_get_conn.return_value.untag_resource.assert_not_called()
+
+    @mock.patch.object(GlueCrawlerHook, "get_conn")
+    @mock.patch.object(StsHook, "get_conn")
+    @mock.patch.object(StsHook, "get_account_number")
+    def test_remove_all_tags(self, mock_get_account_number, mock_sts_conn, 
mock_get_conn):
+        mock_get_conn.return_value.get_crawler.return_value = {"Crawler": 
mock_config}
+        mock_get_conn.return_value.get_tags.return_value = {"Tags": 
mock_config["Tags"]}
+        mock_get_account_number.return_value = AWS_ACCOUNT_ID
+        crawler_arn = 
f"arn:aws:glue:{AWS_REGION}:{AWS_ACCOUNT_ID}:crawler/{mock_crawler_name}"
+
+        response = self.hook.update_tags(mock_crawler_name, {})
+        self.assertEqual(response, True)
+        
mock_get_conn.return_value.get_tags.assert_called_once_with(ResourceArn=crawler_arn)
+        mock_get_conn.return_value.tag_resource.assert_not_called()
+        mock_get_conn.return_value.untag_resource.assert_called_once_with(
+            ResourceArn=crawler_arn, TagsToRemove=["test", "bar"]
+        )
+
+    @mock.patch.object(GlueCrawlerHook, "get_conn")
+    @mock.patch.object(StsHook, "get_conn")
+    @mock.patch.object(StsHook, "get_account_number")
+    def test_replace_tag(self, mock_get_account_number, mock_sts_conn, 
mock_get_conn):
+        mock_get_conn.return_value.get_crawler.return_value = {"Crawler": 
mock_config}
+        mock_get_conn.return_value.get_tags.return_value = {"Tags": 
mock_config["Tags"]}
+        mock_get_account_number.return_value = AWS_ACCOUNT_ID
+        crawler_arn = 
f"arn:aws:glue:{AWS_REGION}:{AWS_ACCOUNT_ID}:crawler/{mock_crawler_name}"
+
+        mock_config_two = deepcopy(mock_config)
+        mock_config_two.pop("Tags")
+        response = self.hook.update_tags(mock_crawler_name, {"test": "bla", 
"bar": "test"})
+        self.assertEqual(response, True)

Review Comment:
   instead of `TestCase.assert*`methods use regular `assert`
   
   ```suggestion
           assert response
   ```



##########
tests/providers/amazon/aws/hooks/test_glue_crawler.py:
##########
@@ -109,16 +113,87 @@ class MockException(Exception):
         
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
 
     @mock.patch.object(GlueCrawlerHook, "get_conn")
-    def test_update_crawler_needed(self, mock_get_conn):
+    @mock.patch.object(StsHook, "get_conn")
+    @mock.patch.object(StsHook, "get_account_number")
+    def test_update_crawler_needed(self, mock_get_account_number, 
mock_sts_conn, mock_get_conn):
         mock_get_conn.return_value.get_crawler.return_value = {"Crawler": 
mock_config}
+        mock_get_account_number.return_value = AWS_ACCOUNT_ID
 
         mock_config_two = deepcopy(mock_config)
         mock_config_two["Role"] = "test-2-role"
+        mock_config_two.pop("Tags")
+        response = self.hook.update_crawler(**mock_config_two)
+        self.assertEqual(response, True)
+        
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
+        
mock_get_conn.return_value.update_crawler.assert_called_once_with(**mock_config_two)
+
+    @mock.patch.object(GlueCrawlerHook, "get_conn")
+    @mock.patch.object(StsHook, "get_conn")
+    @mock.patch.object(StsHook, "get_account_number")
+    def test_update_crawler_missing_keys(self, mock_get_account_number, 
mock_sts_conn, mock_get_conn):
+        mock_config_missing_configuration = deepcopy(mock_config)
+        mock_config_missing_configuration.pop("Configuration")
+        mock_get_conn.return_value.get_crawler.return_value = {"Crawler": 
mock_config_missing_configuration}
+        mock_get_account_number.return_value = AWS_ACCOUNT_ID

Review Comment:
   You could use `moto.sts_client` decorator for mock STS Client
   
   ```python
   from moto import mock_sts
   from moto.core import DEFAULT_ACCOUNT_ID
   
   ...
   
       @mock_sts
       @mock.patch.object(GlueCrawlerHook, "get_conn")
       def test_update_crawler_missing_keys(self, mock_get_conn):
           ...
   ```



##########
airflow/providers/amazon/aws/hooks/glue_crawler.py:
##########
@@ -44,6 +45,10 @@ def glue_client(self):
         """:return: AWS Glue client"""
         return self.get_conn()
 
+    @cached_property
+    def sts_hook(self):
+        return StsHook(aws_conn_id=self.aws_conn_id)
+

Review Comment:
   IMHO, better not to add one hook property to another if it possible or at 
least make it "private"
   Looks like we only need to call it once in GlueCrawlerHook, so we could 
directly call in `update_tags` method
   
   ```python
   account_id = StsHook(aws_conn_id=self.aws_conn_id).get_account_number()
   ```
   
   As long term (separate PR) it is a good idea to create `account_id` property 
in `AwsBaseHook`.



##########
tests/providers/amazon/aws/hooks/test_glue_crawler.py:
##########
@@ -109,16 +113,87 @@ class MockException(Exception):
         
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
 
     @mock.patch.object(GlueCrawlerHook, "get_conn")
-    def test_update_crawler_needed(self, mock_get_conn):
+    @mock.patch.object(StsHook, "get_conn")
+    @mock.patch.object(StsHook, "get_account_number")
+    def test_update_crawler_needed(self, mock_get_account_number, 
mock_sts_conn, mock_get_conn):
         mock_get_conn.return_value.get_crawler.return_value = {"Crawler": 
mock_config}
+        mock_get_account_number.return_value = AWS_ACCOUNT_ID
 
         mock_config_two = deepcopy(mock_config)
         mock_config_two["Role"] = "test-2-role"
+        mock_config_two.pop("Tags")
+        response = self.hook.update_crawler(**mock_config_two)
+        self.assertEqual(response, True)

Review Comment:
   ```suggestion
           assert response
   ```



##########
airflow/providers/amazon/aws/hooks/glue_crawler.py:
##########
@@ -78,16 +83,60 @@ def update_crawler(self, **crawler_kwargs) -> bool:
         crawler_name = crawler_kwargs["Name"]
         current_crawler = self.get_crawler(crawler_name)
 
+        self.update_tags(crawler_name, crawler_kwargs.pop("Tags", {}))
+
         update_config = {
-            key: value for key, value in crawler_kwargs.items() if 
current_crawler[key] != crawler_kwargs[key]
+            key: value
+            for key, value in crawler_kwargs.items()
+            if current_crawler.get(key, None) != crawler_kwargs.get(key)
         }
-        if update_config != {}:
+        if len(update_config) > 0:
             self.log.info("Updating crawler: %s", crawler_name)
             self.glue_client.update_crawler(**crawler_kwargs)
             self.log.info("Updated configurations: %s", update_config)
             return True
-        else:
-            return False
+        return False
+
+    def update_tags(self, crawler_name: str, crawler_tags: dict) -> bool:
+        """
+        Updates crawler tags
+
+        :param crawler_name: Name of the crawler for which to update tags
+        :param crawler_tags: Dictionary of new tags. If empty, all tags will 
be deleted
+        :return True if tags were updated and false otherwise
+        """
+        account_number = self.sts_hook.get_account_number()
+        crawler_arn = 
f"arn:aws:glue:{self.region_name}:{account_number}:crawler/{crawler_name}"

Review Comment:
   I thought actual pattern for crawler:
   
   ```python
   "arn:{partition}:glue:{region}:{account_id}:crawler/{crawler_name}"
   ```
   
   `aws` is common partition but also exists `aws-cn` (China), `aws-us-gov` 
(GovCloud) and some (TOP) Secret partitions.
   
   In additional `self.region` might return None if region not defined in 
connection/hook but defined in AWS Config File / AWS Shared Credential File or 
Environment Variable. In this case you should get region name from boto3 client 
metadata.
   
   Fortunately `AwsBaseHook` has properties for partition/region name
   
https://github.com/apache/airflow/blob/e9a9ae6540339bad8b228c81d0a9ea37ce3b469e/airflow/providers/amazon/aws/hooks/base_aws.py#L514-L522



##########
tests/providers/amazon/aws/hooks/test_glue_crawler.py:
##########
@@ -109,16 +113,87 @@ class MockException(Exception):
         
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
 
     @mock.patch.object(GlueCrawlerHook, "get_conn")
-    def test_update_crawler_needed(self, mock_get_conn):
+    @mock.patch.object(StsHook, "get_conn")
+    @mock.patch.object(StsHook, "get_account_number")
+    def test_update_crawler_needed(self, mock_get_account_number, 
mock_sts_conn, mock_get_conn):
         mock_get_conn.return_value.get_crawler.return_value = {"Crawler": 
mock_config}
+        mock_get_account_number.return_value = AWS_ACCOUNT_ID
 
         mock_config_two = deepcopy(mock_config)
         mock_config_two["Role"] = "test-2-role"
+        mock_config_two.pop("Tags")
+        response = self.hook.update_crawler(**mock_config_two)
+        self.assertEqual(response, True)
+        
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
+        
mock_get_conn.return_value.update_crawler.assert_called_once_with(**mock_config_two)
+
+    @mock.patch.object(GlueCrawlerHook, "get_conn")
+    @mock.patch.object(StsHook, "get_conn")
+    @mock.patch.object(StsHook, "get_account_number")
+    def test_update_crawler_missing_keys(self, mock_get_account_number, 
mock_sts_conn, mock_get_conn):
+        mock_config_missing_configuration = deepcopy(mock_config)
+        mock_config_missing_configuration.pop("Configuration")
+        mock_get_conn.return_value.get_crawler.return_value = {"Crawler": 
mock_config_missing_configuration}
+        mock_get_account_number.return_value = AWS_ACCOUNT_ID
+
+        mock_config_two = deepcopy(mock_config)
+        mock_config_two.pop("Tags")
         response = self.hook.update_crawler(**mock_config_two)
         self.assertEqual(response, True)
         
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
         
mock_get_conn.return_value.update_crawler.assert_called_once_with(**mock_config_two)
 
+    @mock.patch.object(GlueCrawlerHook, "get_conn")
+    @mock.patch.object(StsHook, "get_conn")
+    @mock.patch.object(StsHook, "get_account_number")
+    def test_update_tags_not_needed(self, mock_get_account_number, 
mock_sts_conn, mock_get_conn):
+        mock_get_conn.return_value.get_crawler.return_value = {"Crawler": 
mock_config}
+        mock_get_conn.return_value.get_tags.return_value = {"Tags": 
mock_config["Tags"]}
+        mock_get_account_number.return_value = AWS_ACCOUNT_ID
+        crawler_arn = 
f"arn:aws:glue:{AWS_REGION}:{AWS_ACCOUNT_ID}:crawler/{mock_crawler_name}"
+
+        response = self.hook.update_tags(mock_crawler_name, 
mock_config["Tags"])
+        self.assertEqual(response, False)
+        
mock_get_conn.return_value.get_tags.assert_called_once_with(ResourceArn=crawler_arn)
+        mock_get_conn.return_value.tag_resource.assert_not_called()
+        mock_get_conn.return_value.untag_resource.assert_not_called()
+
+    @mock.patch.object(GlueCrawlerHook, "get_conn")
+    @mock.patch.object(StsHook, "get_conn")
+    @mock.patch.object(StsHook, "get_account_number")
+    def test_remove_all_tags(self, mock_get_account_number, mock_sts_conn, 
mock_get_conn):
+        mock_get_conn.return_value.get_crawler.return_value = {"Crawler": 
mock_config}
+        mock_get_conn.return_value.get_tags.return_value = {"Tags": 
mock_config["Tags"]}
+        mock_get_account_number.return_value = AWS_ACCOUNT_ID
+        crawler_arn = 
f"arn:aws:glue:{AWS_REGION}:{AWS_ACCOUNT_ID}:crawler/{mock_crawler_name}"
+
+        response = self.hook.update_tags(mock_crawler_name, {})
+        self.assertEqual(response, True)

Review Comment:
   ```suggestion
           assert response
   ```



##########
tests/providers/amazon/aws/hooks/test_glue_crawler.py:
##########
@@ -109,16 +113,87 @@ class MockException(Exception):
         
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
 
     @mock.patch.object(GlueCrawlerHook, "get_conn")
-    def test_update_crawler_needed(self, mock_get_conn):
+    @mock.patch.object(StsHook, "get_conn")
+    @mock.patch.object(StsHook, "get_account_number")
+    def test_update_crawler_needed(self, mock_get_account_number, 
mock_sts_conn, mock_get_conn):
         mock_get_conn.return_value.get_crawler.return_value = {"Crawler": 
mock_config}
+        mock_get_account_number.return_value = AWS_ACCOUNT_ID
 
         mock_config_two = deepcopy(mock_config)
         mock_config_two["Role"] = "test-2-role"
+        mock_config_two.pop("Tags")
+        response = self.hook.update_crawler(**mock_config_two)
+        self.assertEqual(response, True)
+        
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
+        
mock_get_conn.return_value.update_crawler.assert_called_once_with(**mock_config_two)
+
+    @mock.patch.object(GlueCrawlerHook, "get_conn")
+    @mock.patch.object(StsHook, "get_conn")
+    @mock.patch.object(StsHook, "get_account_number")
+    def test_update_crawler_missing_keys(self, mock_get_account_number, 
mock_sts_conn, mock_get_conn):
+        mock_config_missing_configuration = deepcopy(mock_config)
+        mock_config_missing_configuration.pop("Configuration")
+        mock_get_conn.return_value.get_crawler.return_value = {"Crawler": 
mock_config_missing_configuration}
+        mock_get_account_number.return_value = AWS_ACCOUNT_ID
+
+        mock_config_two = deepcopy(mock_config)
+        mock_config_two.pop("Tags")
         response = self.hook.update_crawler(**mock_config_two)
         self.assertEqual(response, True)
         
mock_get_conn.return_value.get_crawler.assert_called_once_with(Name=mock_crawler_name)
         
mock_get_conn.return_value.update_crawler.assert_called_once_with(**mock_config_two)
 
+    @mock.patch.object(GlueCrawlerHook, "get_conn")
+    @mock.patch.object(StsHook, "get_conn")
+    @mock.patch.object(StsHook, "get_account_number")
+    def test_update_tags_not_needed(self, mock_get_account_number, 
mock_sts_conn, mock_get_conn):
+        mock_get_conn.return_value.get_crawler.return_value = {"Crawler": 
mock_config}
+        mock_get_conn.return_value.get_tags.return_value = {"Tags": 
mock_config["Tags"]}
+        mock_get_account_number.return_value = AWS_ACCOUNT_ID
+        crawler_arn = 
f"arn:aws:glue:{AWS_REGION}:{AWS_ACCOUNT_ID}:crawler/{mock_crawler_name}"
+
+        response = self.hook.update_tags(mock_crawler_name, 
mock_config["Tags"])
+        self.assertEqual(response, False)

Review Comment:
   ```suggestion
           assert not response
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to