pabloem commented on code in PR #17380:
URL: https://github.com/apache/beam/pull/17380#discussion_r863189335


##########
sdks/python/apache_beam/io/azure/blobstorageio.py:
##########
@@ -559,40 +569,54 @@ def _delete_batch(self, container, blobs):
 
   @retry.with_exponential_backoff(
       retry_filter=retry.retry_on_beam_io_error_filter)
-  def list_prefix(self, path):
+  def list_prefix(self, path, with_metadata=False):
     """Lists files matching the prefix.
 
     Args:
       path: Azure Blob Storage file path pattern in the form
             azfs://<storage-account>/<container>/[name].
+      with_metadata: Experimental. Specify whether returns file metadata.
 
     Returns:
-      Dictionary of file name -> size.
+      If ``with_metadata`` is False: dict of file name -> size; if
+        ``with_metadata`` is True: dict of file name -> tuple(size, timestamp).
     """
     storage_account, container, blob = parse_azfs_path(
         path, blob_optional=True, get_account=True)
-    file_sizes = {}
+    file_info = {}
     counter = 0
     start_time = time.time()
 
-    logging.info("Starting the size estimation of the input")
+    if with_metadata:
+      logging.info("Starting the file information of the input")
+    else:
+      logging.info("Starting the size estimation of the input")
     container_client = self.client.get_container_client(container)
 
     while True:
       response = container_client.list_blobs(name_starts_with=blob)
       for item in response:
         file_name = "azfs://%s/%s/%s" % (storage_account, container, item.name)
-        file_sizes[file_name] = item.size
+        if with_metadata:
+          file_info[file_name] = (
+              item.size, self._updated_to_seconds(item.last_modified))

Review Comment:
   to be honest, I might prefer that we change this to always be a namedtuple 
as a return value, instead of a Tuple-or-value depending on the arguments.
   
   We don't need to change this now, as it's not officially a public API, but 
in the future it may be worth doing to clean up - maybe add a JIRA issue with a 
target version for Beam 3.0.0?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to