[GitHub] [beam] tysonjh commented on a change in pull request #13170: [BEAM-9650] Adding support for ReadAll from BigQuery transform

GitBox Wed, 18 Nov 2020 15:42:49 -0800


tysonjh commented on a change in pull request #13170:
URL: https://github.com/apache/beam/pull/13170#discussion_r526495133




##########
File path: sdks/python/apache_beam/io/gcp/bigquery_read_it_test.py
##########
@@ -298,6 +298,108 @@ def test_iobase_source(self):
       assert_that(result, equal_to(self.get_expected_data(native=False)))
 
 
+class ReadAllBQTests(BigQueryReadIntegrationTests):
+  TABLE_DATA_1 = [{
+      'number': 1, 'str': 'abc'
+  }, {
+      'number': 2, 'str': 'def'
+  }, {
+      'number': 3, 'str': u'你好'
+  }, {
+      'number': 4, 'str': u'привет'
+  }]
+
+  TABLE_DATA_2 = [{
+      'number': 10, 'str': 'abcd'
+  }, {
+      'number': 20, 'str': 'defg'
+  }, {
+      'number': 30, 'str': u'你好'
+  }, {
+      'number': 40, 'str': u'привет'
+  }]
+
+  TABLE_DATA_3 = [{'number': 10, 'str': 'abcde', 'extra': 3}]
+
+  @classmethod
+  def setUpClass(cls):
+    super(ReadAllBQTests, cls).setUpClass()
+    cls.SCHEMA_BQ = cls.create_bq_schema()
+    cls.SCHEMA_BQ_WITH_EXTRA = cls.create_bq_schema(True)
+
+    cls.table_name1 = 'python_rd_table_1'
+    cls.table_schema1 = cls.create_table(
+        cls.table_name1, cls.TABLE_DATA_1, cls.SCHEMA_BQ)
+    table_id1 = '{}.{}'.format(cls.dataset_id, cls.table_name1)
+    cls.query1 = 'SELECT number, str FROM `%s`' % table_id1
+
+    cls.table_name2 = 'python_rd_table_2'
+    cls.table_schema2 = cls.create_table(
+        cls.table_name2, cls.TABLE_DATA_2, cls.SCHEMA_BQ)
+    table_id2 = '{}.{}'.format(cls.dataset_id, cls.table_name2)
+    cls.query2 = 'SELECT number, str FROM %s' % table_id2
+
+    cls.table_name3 = 'python_rd_table_3'
+    cls.table_schema3 = cls.create_table(
+        cls.table_name3, cls.TABLE_DATA_3, cls.SCHEMA_BQ_WITH_EXTRA)
+    table_id3 = '{}.{}'.format(cls.dataset_id, cls.table_name3)
+    cls.query3 = 'SELECT number, str, extra FROM `%s`' % table_id3
+
+  @classmethod
+  def create_table(cls, table_name, data, table_schema):
+    table = bigquery.Table(
+        tableReference=bigquery.TableReference(
+            projectId=cls.project, datasetId=cls.dataset_id,
+            tableId=table_name),
+        schema=table_schema)
+    request = bigquery.BigqueryTablesInsertRequest(
+        projectId=cls.project, datasetId=cls.dataset_id, table=table)
+    cls.bigquery_client.client.tables.Insert(request)
+    cls.bigquery_client.insert_rows(
+        cls.project, cls.dataset_id, table_name, data)
+    return table_schema
+
+  @classmethod
+  def create_bq_schema(cls, with_extra=False):
+    table_schema = bigquery.TableSchema()
+    table_field = bigquery.TableFieldSchema()
+    table_field.name = 'number'
+    table_field.type = 'INTEGER'
+    table_field.mode = 'NULLABLE'
+    table_schema.fields.append(table_field)
+    table_field = bigquery.TableFieldSchema()
+    table_field.name = 'str'
+    table_field.type = 'STRING'
+    table_field.mode = 'NULLABLE'
+    table_schema.fields.append(table_field)
+    if with_extra:
+      table_field = bigquery.TableFieldSchema()
+      table_field.name = 'extra'
+      table_field.type = 'INTEGER'
+      table_field.mode = 'NULLABLE'
+      table_schema.fields.append(table_field)
+    return table_schema
+
+  @skip(['PortableRunner', 'FlinkRunner'])
+  @attr('IT')
+  def test_read_queries(self):
+    args = self.args + ["--experiments=use_runner_v2"]

Review comment:
       Does this need to be here? Or can it be in the groovy config for running 
these tests instead?

##########
File path: sdks/python/apache_beam/io/gcp/bigquery_read_internal.py
##########
@@ -100,3 +123,309 @@ def process(self, unused_element, unused_signal, 
gcs_locations):
     )
 
     return main_output
+
+
+class ReadFromBigQueryRequest:

Review comment:
       I still find it strange that this is a user API but is inside 
`bigquery_read_internal.py`. Is this normal for python? I would expect that 
users should not be using classes in `internal` files.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [beam] tysonjh commented on a change in pull request #13170: [BEAM-9650] Adding support for ReadAll from BigQuery transform

Reply via email to