sadovnychyi commented on code in PR #33539:
URL: https://github.com/apache/beam/pull/33539#discussion_r1909046517
##########
sdks/python/apache_beam/io/gcp/gcsio.py:
##########
@@ -264,9 +266,45 @@ def delete(self, path):
except NotFound:
return
+ def _batch_with_retry(self, requests, fn):
+ current_requests = [*enumerate(requests)]
+ responses = [None for _ in current_requests]
+
+ @self._storage_client_retry
+ def run_with_retry():
+ current_batch = self.client.batch(raise_exception=False)
+ with current_batch:
+ for _, request in current_requests:
+ fn(request)
+ last_retryable_exception = None
+ for (i, current_pair), response in zip(
+ [*current_requests], current_batch._responses
+ ):
+ responses[i] = response
+ should_retry = (
+ response.status_code >= 400 and
+
self._storage_client_retry._predicate(from_http_response(response)))
+ if should_retry:
+ last_retryable_exception = from_http_response(response)
+ else:
+ current_requests.remove((i, current_pair))
+ if last_retryable_exception:
+ raise last_retryable_exception
+
+ try:
+ run_with_retry()
+ except GoogleCloudError:
Review Comment:
To keep the behaviour the same as before this change. `copy_batch` by itself
doesn't throw any exceptions (at least in 429 cases). Without this line this
function would throw an exception in cases when we ran out of retries.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]