ajamato commented on a change in pull request #14770:
URL: https://github.com/apache/beam/pull/14770#discussion_r640808162
##########
File path: sdks/python/apache_beam/io/gcp/gcsio.py
##########
@@ -586,13 +610,26 @@ def __init__(self, client, path, buffer_size):
auto_transfer=False,
chunksize=self._buffer_size,
num_retries=20)
- self._client.objects.Get(self._get_request, download=self._downloader)
+
+ try:
+ self._client.objects.Get(self._get_request, download=self._downloader)
+ service_call_metric.call('ok')
+ except HttpError as e:
+ service_call_metric.call(e)
@retry.with_exponential_backoff(
retry_filter=retry.retry_on_server_errors_and_timeout_filter)
def _get_object_metadata(self, get_request):
return self._client.objects.Get(get_request)
+ def _get_bucket(self, bucket_name):
+ """Returns a bucket from its name, or None if it does not exist."""
+ try:
Review comment:
Please have this code store the projectIds in a self.bucketToProjectId
dict()
and only issue the request to to StorageBucketsGetRequest if the projectId
for the bucket has not been previously retreived
otherwise return the contents of self.bucketToProjectI[bucket_name]
This prevents calling StorageBucketsGetRequest from being issued ever time
we call objects.Get (which would slow down GCSIO a lot)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]