emkornfield commented on a change in pull request #15185:
URL: https://github.com/apache/beam/pull/15185#discussion_r679444764
##########
File path: sdks/python/apache_beam/io/gcp/bigquery.py
##########
@@ -883,6 +890,206 @@ def _export_files(self, bq):
return table.schema, metadata_list
+class _CustomBigQueryStorageSourceBase(BoundedSource):
+ """A base class for BoundedSource implementations which read from BigQuery
+ using the BigQuery Storage API."""
+ def __init__(
+ self,
+ project=None,
+ dataset=None,
+ table=None,
+ selected_fields=None,
+ row_restriction=None,
+ pipeline_options=None):
+
+ if dataset is None:
+ raise ValueError('A BigQuery dataset must be specified.')
+ elif table is None:
+ raise ValueError('A BigQuery table must be specified.')
+ else:
+ self.table_reference = bigquery_tools.parse_table_reference(
+ table, dataset, project)
+
+ self.project = self.table_reference.projectId
+ self.dataset = self.table_reference.datasetId
+ self.table = self.table_reference.tableId
+ self.selected_fields = selected_fields
+ self.row_restriction = row_restriction
+ self.pipeline_options = pipeline_options
+ self.split_result = None
+ # The maximum number of streams which will be requested when creating a
read
+ # session, regardless of the desired bundle size.
+ self.MAX_SPLIT_COUNT = 10000
Review comment:
is this meant to be public? should it be a constant that is private to
the package (outside of this class?)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]