mik-laj commented on a change in pull request #8631:
URL: https://github.com/apache/airflow/pull/8631#discussion_r422786834
##########
File path: airflow/providers/google/cloud/hooks/bigquery.py
##########
@@ -1100,42 +1157,28 @@ def run_table_upsert(self, dataset_id: str,
table_resource: Dict,
project will be self.project_id.
:return:
"""
- service = self.get_service()
- # check to see if the table exists
+ project_id = project_id or self.project_id
table_id = table_resource['tableReference']['tableId']
- project_id = project_id if project_id is not None else self.project_id
- tables_list_resp = service.tables().list( # pylint: disable=no-member
- projectId=project_id,
datasetId=dataset_id).execute(num_retries=self.num_retries)
- while True:
- for table in tables_list_resp.get('tables', []):
- if table['tableReference']['tableId'] == table_id:
- # found the table, do update
- self.log.info('Table %s:%s.%s exists, updating.',
- project_id, dataset_id, table_id)
- return service.tables().update( # pylint:
disable=no-member
- projectId=project_id,
- datasetId=dataset_id,
- tableId=table_id,
-
body=table_resource).execute(num_retries=self.num_retries)
- # If there is a next page, we need to check the next page.
- if 'nextPageToken' in tables_list_resp:
- tables_list_resp = service.tables()\
- .list(projectId=project_id, # pylint: disable=no-member
- datasetId=dataset_id,
- pageToken=tables_list_resp['nextPageToken'])\
- .execute(num_retries=self.num_retries)
- # If there is no next page, then the table doesn't exist.
- else:
- # do insert
- self.log.info('Table %s:%s.%s does not exist. creating.',
- project_id, dataset_id, table_id)
- return service.tables().insert( # pylint: disable=no-member
- projectId=project_id,
- datasetId=dataset_id,
- body=table_resource).execute(num_retries=self.num_retries)
+ table_resource = self._resolve_table_reference(
+ table_resource=table_resource,
+ project_id=project_id,
+ dataset_id=dataset_id,
+ table_id=table_id
+ )
- def run_table_delete(self, deletion_dataset_table: str,
- ignore_if_missing: bool = False) -> None:
+ tables_list_resp = self.get_dataset_tables(dataset_id=dataset_id,
project_id=project_id)
+ for tab in tables_list_resp:
+ if tab['tableId'] == table_id:
+ self.log.info('Table %s:%s.%s exists, updating.', project_id,
dataset_id, table_id)
+ return self.update_table(table_resource=table_resource)
Review comment:
From a technical point of view it is correct to exit the loop using
return, but I think it would be better to split it. Search for item first. If
it exists, then perform the given operation and exit.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]