SameerMesiah97 commented on code in PR #62424:
URL: https://github.com/apache/airflow/pull/62424#discussion_r2849384889
##########
providers/google/tests/unit/google/cloud/hooks/test_gcs.py:
##########
@@ -528,21 +528,26 @@ def test_rewrite_exposes_lineage(self, mock_service,
hook_lineage_collector):
@mock.patch("google.cloud.storage.Bucket")
@mock.patch(GCS_STRING.format("GCSHook.get_conn"))
- def test_delete(self, mock_service, mock_bucket):
+ def test_delete(self, mock_service, mock_bucket, caplog):
test_bucket = "test_bucket"
test_object = "test_object"
blob_to_be_deleted = storage.Blob(name=test_object, bucket=mock_bucket)
- get_bucket_method = mock_service.return_value.get_bucket
- get_blob_method = get_bucket_method.return_value.get_blob
- delete_method = get_blob_method.return_value.delete
+ bucket_method = mock_service.return_value.bucket
+ blob = bucket_method.return_value.blob
+ delete_method = blob.return_value.delete
delete_method.return_value = blob_to_be_deleted
- response = self.gcs_hook.delete(bucket_name=test_bucket,
object_name=test_object)
+ with caplog.at_level(logging.INFO):
+ response = self.gcs_hook.delete(bucket_name=test_bucket,
object_name=test_object)
assert response is None
+ bucket_method.assert_called_once_with(test_bucket)
+ blob.assert_called_once_with(blob_name=test_object)
+ delete_method.assert_called_once()
+ assert "Blob test_object has been deleted" in caplog.text
@mock.patch(GCS_STRING.format("GCSHook.get_conn"))
- def test_delete_nonexisting_object(self, mock_service):
+ def test_delete_nonexisting_object(self, mock_service, caplog):
Review Comment:
If you implement my suggestion above, you can add the assert for the
exception back in this test. This will ensure that the original exception
propagates even after logging.
##########
providers/google/src/airflow/providers/google/cloud/hooks/gcs.py:
##########
@@ -715,12 +715,16 @@ def delete(self, bucket_name: str, object_name: str) ->
None:
client = self.get_conn()
bucket = client.bucket(bucket_name)
blob = bucket.blob(blob_name=object_name)
- blob.delete()
- get_hook_lineage_collector().add_input_asset(
- context=self, scheme="gs", asset_kwargs={"bucket": bucket.name,
"key": blob.name}
- )
- self.log.info("Blob %s deleted.", object_name)
+ self.log.info("Deleting blob %s", object_name)
+ try:
+ blob.delete()
+ get_hook_lineage_collector().add_input_asset(
+ context=self, scheme="gs", asset_kwargs={"bucket":
bucket.name, "key": blob.name}
+ )
+ self.log.info("Blob %s has been deleted", object_name)
+ except NotFound:
+ self.log.info("Blob %s does not exist", object_name)
Review Comment:
I am not sure if `def delete` is meant to be best-effort. My understanding
is that if deletion fails, any overlying task that builds on this function
should fail. Since this is a hook, the GCS operators might rely on this
function raising an error instead of silently logging the failure. I think this
might be better:
```
except NotFound:
self.log.warning(
"Blob %s does not exist in bucket %s.",
object_name,
bucket_name,
)
raise
```
This should improve observability without introducing a subtle breaking
change. Keep in mind that is also a public API so we should be careful about
changing behavior (even slight;y in unhappy paths).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]