SameerMesiah97 commented on code in PR #62424:
URL: https://github.com/apache/airflow/pull/62424#discussion_r2849384889


##########
providers/google/tests/unit/google/cloud/hooks/test_gcs.py:
##########
@@ -528,21 +528,26 @@ def test_rewrite_exposes_lineage(self, mock_service, 
hook_lineage_collector):
 
     @mock.patch("google.cloud.storage.Bucket")
     @mock.patch(GCS_STRING.format("GCSHook.get_conn"))
-    def test_delete(self, mock_service, mock_bucket):
+    def test_delete(self, mock_service, mock_bucket, caplog):
         test_bucket = "test_bucket"
         test_object = "test_object"
         blob_to_be_deleted = storage.Blob(name=test_object, bucket=mock_bucket)
 
-        get_bucket_method = mock_service.return_value.get_bucket
-        get_blob_method = get_bucket_method.return_value.get_blob
-        delete_method = get_blob_method.return_value.delete
+        bucket_method = mock_service.return_value.bucket
+        blob = bucket_method.return_value.blob
+        delete_method = blob.return_value.delete
         delete_method.return_value = blob_to_be_deleted
 
-        response = self.gcs_hook.delete(bucket_name=test_bucket, 
object_name=test_object)
+        with caplog.at_level(logging.INFO):
+            response = self.gcs_hook.delete(bucket_name=test_bucket, 
object_name=test_object)
         assert response is None
+        bucket_method.assert_called_once_with(test_bucket)
+        blob.assert_called_once_with(blob_name=test_object)
+        delete_method.assert_called_once()
+        assert "Blob test_object has been deleted" in caplog.text
 
     @mock.patch(GCS_STRING.format("GCSHook.get_conn"))
-    def test_delete_nonexisting_object(self, mock_service):
+    def test_delete_nonexisting_object(self, mock_service, caplog):

Review Comment:
   If you implement my suggestion above, you can add the assert for the 
exception back in this test. This will ensure that the original exception 
propagates even after logging.



##########
providers/google/src/airflow/providers/google/cloud/hooks/gcs.py:
##########
@@ -715,12 +715,16 @@ def delete(self, bucket_name: str, object_name: str) -> 
None:
         client = self.get_conn()
         bucket = client.bucket(bucket_name)
         blob = bucket.blob(blob_name=object_name)
-        blob.delete()
-        get_hook_lineage_collector().add_input_asset(
-            context=self, scheme="gs", asset_kwargs={"bucket": bucket.name, 
"key": blob.name}
-        )
 
-        self.log.info("Blob %s deleted.", object_name)
+        self.log.info("Deleting blob %s", object_name)
+        try:
+            blob.delete()
+            get_hook_lineage_collector().add_input_asset(
+                context=self, scheme="gs", asset_kwargs={"bucket": 
bucket.name, "key": blob.name}
+            )
+            self.log.info("Blob %s has been deleted", object_name)
+        except NotFound:
+            self.log.info("Blob %s does not exist", object_name)

Review Comment:
   I am not sure if `def delete` is meant to be best-effort. My understanding 
is that if deletion fails, any overlying task that builds on this function 
should fail. Since this is a hook, the GCS operators might rely on this 
function raising an error instead of silently logging the failure. I think this 
might be better:
   
   ```
   except NotFound:
       self.log.warning(
           "Blob %s does not exist in bucket %s.",
           object_name,
           bucket_name,
       )
       raise
   ```
   This should improve observability without introducing a subtle breaking 
change. Keep in mind that is also a public API so we should be careful about 
changing behavior (even slight;y in unhappy paths).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to