ajamato commented on a change in pull request #14770:
URL: https://github.com/apache/beam/pull/14770#discussion_r640808162



##########
File path: sdks/python/apache_beam/io/gcp/gcsio.py
##########
@@ -586,13 +610,26 @@ def __init__(self, client, path, buffer_size):
         auto_transfer=False,
         chunksize=self._buffer_size,
         num_retries=20)
-    self._client.objects.Get(self._get_request, download=self._downloader)
+
+    try:
+      self._client.objects.Get(self._get_request, download=self._downloader)
+      service_call_metric.call('ok')
+    except HttpError as e:
+      service_call_metric.call(e)
 
   @retry.with_exponential_backoff(
       retry_filter=retry.retry_on_server_errors_and_timeout_filter)
   def _get_object_metadata(self, get_request):
     return self._client.objects.Get(get_request)
 
+  def _get_bucket(self, bucket_name):
+    """Returns a bucket from its name, or None if it does not exist."""
+    try:

Review comment:
       Please have this code store the projectIds in a self.bucketToProjectId 
dict()
   and only issue the request to to StorageBucketsGetRequest if the projectId 
for the bucket has not been previously retreived
   otherwise return the contents of self.bucketToProjectI[bucket_name]
   
   This prevents calling StorageBucketsGetRequest from being issued ever time 
we call objects.Get (which would slow down GCSIO a lot)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to