Repository: beam Updated Branches: refs/heads/master eb347bf0d -> 7bc2938a2
Updates BigQuery read transform to correctly process empty repeated fields. Project: http://git-wip-us.apache.org/repos/asf/beam/repo Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/08de9e98 Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/08de9e98 Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/08de9e98 Branch: refs/heads/master Commit: 08de9e98d16b762f650631d8553276c28d044f1b Parents: eb347bf Author: [email protected] <[email protected]> Authored: Wed Mar 22 13:17:26 2017 -0700 Committer: Chamikara Jayalath <[email protected]> Committed: Wed Mar 22 17:11:44 2017 -0700 ---------------------------------------------------------------------- sdks/python/apache_beam/io/gcp/bigquery.py | 13 +++++++++++-- sdks/python/apache_beam/io/gcp/bigquery_test.py | 3 ++- 2 files changed, 13 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/beam/blob/08de9e98/sdks/python/apache_beam/io/gcp/bigquery.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py b/sdks/python/apache_beam/io/gcp/bigquery.py index 3186a55..a917d51 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery.py +++ b/sdks/python/apache_beam/io/gcp/bigquery.py @@ -1069,8 +1069,17 @@ class BigQueryWrapper(object): cell = row['f'][index] value = cell['v'] if 'v' in cell else None if field.mode == 'REPEATED': - result[field.name] = [self._convert_cell_value_to_dict(x['v'], field) - for x in value] + if value is None: + # We receive 'None' for repeated fields without any values when + # 'flatten_results' is 'False'. + # When 'flatten_results' is 'True', we receive individual values + # instead of a list of values hence we do not hit this condition. + # We return an empty list here instead of 'None' to be consistent with + # other runners and to be backwards compatible to users. + result[field.name] = [] + else: + result[field.name] = [self._convert_cell_value_to_dict(x['v'], field) + for x in value] elif value is None: if not field.mode == 'NULLABLE': raise ValueError('Received \'None\' as the value for the field %s ' http://git-wip-us.apache.org/repos/asf/beam/blob/08de9e98/sdks/python/apache_beam/io/gcp/bigquery_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/gcp/bigquery_test.py b/sdks/python/apache_beam/io/gcp/bigquery_test.py index fbf073c..2b83079 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_test.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_test.py @@ -402,7 +402,8 @@ class TestBigQueryReader(unittest.TestCase): bigquery.TableCell(v=None), bigquery.TableCell(v=None), bigquery.TableCell(v=None), - bigquery.TableCell(v=to_json_value([]))])] + # REPEATED field without any values. + bigquery.TableCell(v=None)])] return table_rows, schema, expected_rows def test_read_from_table(self):
