Abacn commented on code in PR #17380:
URL: https://github.com/apache/beam/pull/17380#discussion_r863227025
##########
sdks/python/apache_beam/io/azure/blobstorageio.py:
##########
@@ -559,40 +569,54 @@ def _delete_batch(self, container, blobs):
@retry.with_exponential_backoff(
retry_filter=retry.retry_on_beam_io_error_filter)
- def list_prefix(self, path):
+ def list_prefix(self, path, with_metadata=False):
"""Lists files matching the prefix.
Args:
path: Azure Blob Storage file path pattern in the form
azfs://<storage-account>/<container>/[name].
+ with_metadata: Experimental. Specify whether returns file metadata.
Returns:
- Dictionary of file name -> size.
+ If ``with_metadata`` is False: dict of file name -> size; if
+ ``with_metadata`` is True: dict of file name -> tuple(size, timestamp).
"""
storage_account, container, blob = parse_azfs_path(
path, blob_optional=True, get_account=True)
- file_sizes = {}
+ file_info = {}
counter = 0
start_time = time.time()
- logging.info("Starting the size estimation of the input")
+ if with_metadata:
+ logging.info("Starting the file information of the input")
+ else:
+ logging.info("Starting the size estimation of the input")
container_client = self.client.get_container_client(container)
while True:
response = container_client.list_blobs(name_starts_with=blob)
for item in response:
file_name = "azfs://%s/%s/%s" % (storage_account, container, item.name)
- file_sizes[file_name] = item.size
+ if with_metadata:
+ file_info[file_name] = (
+ item.size, self._updated_to_seconds(item.last_modified))
Review Comment:
Entered [BEAM-14393](https://issues.apache.org/jira/browse/BEAM-14393) for
the public API improvement.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]