tvalentyn commented on code in PR #30202:
URL: https://github.com/apache/beam/pull/30202#discussion_r1483537668
##########
sdks/python/apache_beam/ml/transforms/utils.py:
##########
@@ -18,18 +18,47 @@
__all__ = ['ArtifactsFetcher']
import os
+import tempfile
import typing
import tensorflow_transform as tft
from apache_beam.ml.transforms import base
+from google.cloud.storage import Client
+from google.cloud.storage import transfer_manager
-class ArtifactsFetcher():
+
+def download_artifacts_from_gcs(bucket_name, prefix, local_path):
+ """Downloads artifacts from GCS to the local file system.
+ Args:
+ bucket_name: The name of the GCS bucket to download from.
+ folder_name: The name of the folder to download.
Review Comment:
```suggestion
prefix: Prefix of GCS objects to download.
```
##########
sdks/python/apache_beam/ml/transforms/utils.py:
##########
@@ -18,18 +18,47 @@
__all__ = ['ArtifactsFetcher']
import os
+import tempfile
import typing
import tensorflow_transform as tft
from apache_beam.ml.transforms import base
+from google.cloud.storage import Client
+from google.cloud.storage import transfer_manager
-class ArtifactsFetcher():
+
+def download_artifacts_from_gcs(bucket_name, prefix, local_path):
+ """Downloads artifacts from GCS to the local file system.
+ Args:
+ bucket_name: The name of the GCS bucket to download from.
+ folder_name: The name of the folder to download.
+ local_path: The local path to download the folder to.
+ """
Review Comment:
```suggestion
"""
```
##########
sdks/python/apache_beam/ml/transforms/utils.py:
##########
@@ -18,18 +18,47 @@
__all__ = ['ArtifactsFetcher']
import os
+import tempfile
import typing
import tensorflow_transform as tft
from apache_beam.ml.transforms import base
+from google.cloud.storage import Client
+from google.cloud.storage import transfer_manager
-class ArtifactsFetcher():
+
+def download_artifacts_from_gcs(bucket_name, prefix, local_path):
+ """Downloads artifacts from GCS to the local file system.
+ Args:
+ bucket_name: The name of the GCS bucket to download from.
+ folder_name: The name of the folder to download.
+ local_path: The local path to download the folder to.
+ """
+ client = Client()
+ bucket = client.get_bucket(bucket_name)
+ blobs = [blob.name for blob in bucket.list_blobs(prefix=prefix)]
+ _ = transfer_manager.download_many_to_path(
+ bucket, blobs, destination_directory=local_path, max_workers=6)
Review Comment:
default for max_workers is 8, any particular reason to reduce to specify it
and reduce to 6?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]