This is an automated email from the ASF dual-hosted git repository.

pabloem pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 885ecbf  [BEAM-8803] BigQuery Streaming Inserts are always retried by 
default. (#10195)
885ecbf is described below

commit 885ecbf3f49257e8b6b4ac376cb5a79ed6282580
Author: Pablo <[email protected]>
AuthorDate: Sun Nov 24 13:53:24 2019 -0800

    [BEAM-8803] BigQuery Streaming Inserts are always retried by default. 
(#10195)
    
    * BigQuery Streaming Inserts are always retried by default.
    
    * Fix lint.
    
    * Adding runtime error on errors for always retry.
    
    * Default behaviour is to always retry.
    
    * fix lint
---
 sdks/python/apache_beam/io/gcp/bigquery.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py 
b/sdks/python/apache_beam/io/gcp/bigquery.py
index 0280c61..61c93ff 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery.py
@@ -728,7 +728,7 @@ class BigQueryWriteFn(DoFn):
     self._max_buffered_rows = (max_buffered_rows
                                or BigQueryWriteFn.DEFAULT_MAX_BUFFERED_ROWS)
     self._retry_strategy = (
-        retry_strategy or 
bigquery_tools.RetryStrategy.RETRY_ON_TRANSIENT_ERROR)
+        retry_strategy or bigquery_tools.RetryStrategy.RETRY_ALWAYS)
 
     self.additional_bq_parameters = additional_bq_parameters or {}
 
@@ -868,7 +868,9 @@ class BigQueryWriteFn(DoFn):
           insert_ids=insert_ids,
           skip_invalid_rows=True)
 
-      _LOGGER.debug("Passed: %s. Errors are %s", passed, errors)
+      if not passed:
+        _LOGGER.info("There were errors inserting to BigQuery: %s",
+                     errors)
       failed_rows = [rows[entry.index] for entry in errors]
       should_retry = any(
           bigquery_tools.RetryStrategy.should_retry(
@@ -1066,6 +1068,10 @@ bigquery_v2_messages.TableSchema`. or a `ValueProvider` 
that has a JSON string,
         FILE_LOADS on Batch pipelines.
       insert_retry_strategy: The strategy to use when retrying streaming 
inserts
         into BigQuery. Options are shown in bigquery_tools.RetryStrategy attrs.
+        Default is to retry always. This means that whenever there are rows
+        that fail to be inserted to BigQuery, they will be retried 
indefinitely.
+        Other retry strategy settings will produce a deadletter PCollection
+        as output.
       additional_bq_parameters (callable): A function that returns a dictionary
         with additional parameters to pass to BQ when creating / loading data
         into a table. These can be 'timePartitioning', 'clustering', etc. They

Reply via email to