Abacn commented on code in PR #17380:
URL: https://github.com/apache/beam/pull/17380#discussion_r863214669
##########
sdks/python/apache_beam/io/azure/blobstorageio.py:
##########
@@ -372,29 +355,49 @@ def last_updated(self, path):
Returns: last updated time of the Azure Blob Storage blob
in seconds.
"""
- container, blob = parse_azfs_path(path)
- blob_to_check = self.client.get_blob_client(container, blob)
- try:
- properties = blob_to_check.get_blob_properties()
- except ResourceNotFoundError as e:
- message = e.reason
- code = e.status_code
- raise BlobStorageError(message, code)
+ return self._updated_to_seconds(self._blob_properties(path).last_modified)
- datatime = properties.last_modified
- return (
- time.mktime(datatime.timetuple()) - time.timezone +
- datatime.microsecond / 1000000.0)
-
- @retry.with_exponential_backoff(
- retry_filter=retry.retry_on_beam_io_error_filter)
def checksum(self, path):
"""Looks up the checksum of an Azure Blob Storage blob.
Args:
path: Azure Blob Storage file path pattern in the form
azfs://<storage-account>/<container>/[name].
"""
+ return self._blob_properties(path).properties.etag
+
+ def _status(self, path):
+ """For internal use only; no backwards-compatibility guarantees.
+
+ Returns supported fields (checksum, last_updated, size) of a single object
+ as a dict at once.
+
+ This method does not perform glob expansion. Hence the given path must be
+ for a single blob property.
+
+ Returns: dict of fields of the blob property.
+ """
+ properties = self._blob_properties(path)
+ file_status = {}
+ if hasattr(properties, 'etag'):
+ file_status['checksum'] = properties.etag
+ if hasattr(properties, 'last_modified'):
+ file_status['last_updated'] = self._updated_to_seconds(
+ properties.last_modified)
Review Comment:
Yes, the BlobProperties contains a last_modified field which is a datetime
object
(https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.models.blobproperties?view=azure-python-previous).
In general, this piece of code uses existing code in IO.last_updated method.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]