gemini-code-assist[bot] commented on code in PR #37416:
URL: https://github.com/apache/beam/pull/37416#discussion_r2732862088
##########
sdks/python/apache_beam/io/gcp/bigquery.py:
##########
@@ -932,6 +948,31 @@ def _export_files(self, bq):
return table.schema, metadata_list
+def _create_bq_storage_client(quota_project_id=None):
+ """Create a BigQueryReadClient with optional quota project.
+
+ Args:
+ quota_project_id: Optional GCP project ID to use for quota and billing.
+
+ Returns:
+ A BigQueryReadClient instance.
+ """
+ if quota_project_id:
+ try:
+ import google.auth
+ credentials, _ = google.auth.default()
+ if hasattr(credentials, 'with_quota_project'):
+ credentials = credentials.with_quota_project(quota_project_id)
+ return bq_storage.BigQueryReadClient(credentials=credentials)
+ except Exception as e: # pylint: disable=broad-except
+ _LOGGER.warning(
Review Comment:

Catching a broad `Exception` can hide unexpected errors and make debugging
more difficult. It's generally better to catch more specific exceptions that
you anticipate, such as `google.auth.exceptions.DefaultCredentialsError` or
`AttributeError` if `with_quota_project` is not guaranteed to exist on
`credentials`.
```suggestion
except (google.auth.exceptions.DefaultCredentialsError, AttributeError)
as e: # pylint: disable=broad-except
```
##########
sdks/python/apache_beam/io/gcp/bigquery_tools.py:
##########
@@ -1399,19 +1406,65 @@ def convert_row_to_dict(self, row, schema):
@staticmethod
def from_pipeline_options(pipeline_options: PipelineOptions):
+ """Create a BigQueryWrapper from pipeline options.
+
+ Args:
+ pipeline_options: Pipeline options containing GCP configuration.
+ The quota_project_id is read from GoogleCloudOptions if set.
+ """
+ quota_project_id = None
+ if pipeline_options is not None:
+ quota_project_id = pipeline_options.view_as(
+ GoogleCloudOptions).quota_project_id
return BigQueryWrapper(
- client=BigQueryWrapper._bigquery_client(pipeline_options))
+ client=BigQueryWrapper._bigquery_client(pipeline_options),
+ quota_project_id=quota_project_id)
@staticmethod
- def _bigquery_client(pipeline_options: PipelineOptions):
+ def _bigquery_client(
+ pipeline_options: PipelineOptions, quota_project_id: str = None):
+ """Create a BigQuery API client from pipeline options.
+
+ Args:
+ pipeline_options: Pipeline options for credentials.
+ quota_project_id: Optional quota project ID. If not provided, will be
+ extracted from pipeline_options.
+ """
+ credentials = auth.get_service_credentials(pipeline_options)
+ # Use explicit quota_project_id if provided, otherwise get from options
+ if quota_project_id is None and pipeline_options is not None:
+ quota_project_id = pipeline_options.view_as(
+ GoogleCloudOptions).quota_project_id
+ if quota_project_id:
+ credentials = auth.with_quota_project(credentials, quota_project_id)
return bigquery.BigqueryV2(
http=get_new_http(),
- credentials=auth.get_service_credentials(pipeline_options),
+ credentials=credentials,
response_encoding='utf8',
additional_http_headers={
"user-agent": "apache-beam-%s" % apache_beam.__version__
})
+ @staticmethod
+ def _gcp_bigquery_client(quota_project_id: str = None):
+ """Create a google-cloud-bigquery Client with optional quota project."""
+ credentials = None
+
+ if quota_project_id:
+ # Get default credentials and apply quota project
+ try:
+ import google.auth
+ credentials, _ = google.auth.default()
+ if hasattr(credentials, 'with_quota_project'):
+ credentials = credentials.with_quota_project(quota_project_id)
+ except Exception: # pylint: disable=broad-except
+ pass # Fall back to default credentials
Review Comment:

Similar to the previous comment, catching a broad `Exception` can obscure
specific issues. It's advisable to catch more targeted exceptions, such as
`google.auth.exceptions.DefaultCredentialsError` or `AttributeError` if
`with_quota_project` is not guaranteed to exist on `credentials`.
```suggestion
except (google.auth.exceptions.DefaultCredentialsError,
AttributeError): # pylint: disable=broad-except
```
##########
sdks/python/apache_beam/io/gcp/bigquery.py:
##########
@@ -715,6 +717,7 @@ def display_data(self):
'use_legacy_sql': self.use_legacy_sql,
'bigquery_job_labels': json.dumps(self.bigquery_job_labels),
'export_file_format': export_format,
+ 'quota_project_id': str(self._get_quota_project_id()),
Review Comment:

When `_get_quota_project_id()` returns `None`, `str(None)` will result in
the string "None" being displayed. It might be more user-friendly to display an
empty string or omit the entry entirely if no quota project is set, rather than
explicitly showing "None".
```suggestion
'quota_project_id': self._get_quota_project_id() or '',
```
##########
sdks/python/apache_beam/io/gcp/bigquery_tools.py:
##########
@@ -359,11 +360,17 @@ class BigQueryWrapper(object):
HISTOGRAM_METRIC_LOGGER = MetricLogger()
- def __init__(self, client=None, temp_dataset_id=None, temp_table_ref=None):
- self.client = client or BigQueryWrapper._bigquery_client(PipelineOptions())
- self.gcp_bq_client = client or gcp_bigquery.Client(
- client_info=ClientInfo(
- user_agent="apache-beam-%s" % apache_beam.__version__))
+ def __init__(
+ self,
+ client=None,
+ temp_dataset_id=None,
+ temp_table_ref=None,
+ quota_project_id=None):
+ self.quota_project_id = quota_project_id
+ self.client = client or BigQueryWrapper._bigquery_client(
+ PipelineOptions(), quota_project_id=quota_project_id)
+ self.gcp_bq_client = client or BigQueryWrapper._gcp_bigquery_client(
+ quota_project_id=quota_project_id)
Review Comment:

If a `client` is explicitly passed to the `BigQueryWrapper` constructor, the
`_gcp_bigquery_client` method is still called, and its result is then discarded
because `self.gcp_bq_client` is set to the provided `client`. This results in
an unnecessary function call. Consider calling `_gcp_bigquery_client` only if
`client` is `None`.
```suggestion
self.gcp_bq_client = client if client else
BigQueryWrapper._gcp_bigquery_client(
quota_project_id=quota_project_id)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]