eladkal commented on code in PR #38398:
URL: https://github.com/apache/airflow/pull/38398#discussion_r1549553297
##########
airflow/providers/google/cloud/hooks/gcs.py:
##########
@@ -1006,6 +1006,27 @@ def get_md5hash(self, bucket_name: str, object_name:
str) -> str:
self.log.info("The md5Hash of %s is %s", object_name, blob_md5hash)
return blob_md5hash
+ def get_metadata(self, bucket_name: str, object_name: str) -> dict | None:
+ """
+ Get the metadata of an object in Google Cloud Storage.
+
+ :param bucket_name: Name of the Google Cloud Storage bucket where the
object is.
+ :param object_name: The name of the object containing the desired
metadata
+ :return: The metadata associated with the object
+ """
+ self.log.info("Retrieving the metadata dict of object (%s) in bucket
(%s)", object_name, bucket_name)
+ client = self.get_conn()
+ bucket = client.bucket(bucket_name)
+ blob = bucket.get_blob(blob_name=object_name)
+ if blob is None:
+ raise ValueError("Object (%s) not found in bucket (%s)",
object_name, bucket_name)
+ blob_metadata = blob.metadata
+ if blob_metadata:
+ self.log.info("Retrieved metadata of object (%s) with %s fields",
object_name, len(blob_metadata))
+ else:
+ self.log.info("Metadata of object (%s) is empty or it does not
exist", object_name)
Review Comment:
It must be empty doesn't it? if it doesn't exist we ValueError is thrown
##########
airflow/providers/google/cloud/hooks/gcs.py:
##########
@@ -1006,6 +1006,27 @@ def get_md5hash(self, bucket_name: str, object_name:
str) -> str:
self.log.info("The md5Hash of %s is %s", object_name, blob_md5hash)
return blob_md5hash
+ def get_metadata(self, bucket_name: str, object_name: str) -> dict | None:
+ """
+ Get the metadata of an object in Google Cloud Storage.
+
+ :param bucket_name: Name of the Google Cloud Storage bucket where the
object is.
+ :param object_name: The name of the object containing the desired
metadata
+ :return: The metadata associated with the object
+ """
+ self.log.info("Retrieving the metadata dict of object (%s) in bucket
(%s)", object_name, bucket_name)
+ client = self.get_conn()
+ bucket = client.bucket(bucket_name)
+ blob = bucket.get_blob(blob_name=object_name)
+ if blob is None:
+ raise ValueError("Object (%s) not found in bucket (%s)",
object_name, bucket_name)
+ blob_metadata = blob.metadata
+ if blob_metadata:
+ self.log.info("Retrieved metadata of object (%s) with %s fields",
object_name, len(blob_metadata))
+ else:
+ self.log.info("Metadata of object (%s) is empty or it does not
exist", object_name)
+ return blob_metadata
Review Comment:
What is the value of returning empty blob?
in any case there is a missing test case for empty blob
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]