damccorm opened a new issue, #20824:
URL: https://github.com/apache/beam/issues/20824
We are using FILE_LOADS to write to BigQuery in streaming mode using Python.
after running for about 1 hours, beam job throws an exception with regards
to `RuntimeError: apitools.base.py.exceptions.HttpBadRequestError` including
error message "Load configuration must specify at least one source URI".
```
//
Traceback (most recent call last):
File
"/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
line 289, in _execute
response = task()
File
"/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
line 362, in <lambda>
lambda: self.create_worker().do_instruction(request), request)
File
"/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
line 606, in do_instruction
return getattr(self, request_type)(
File
"/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
line 644, in process_bundle
bundle_processor.process_bundle(instruction_id))
File
"/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 999, in process_bundle
input_op_by_transform_id[element.transform_id].process_encoded(
File
"/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 228,
in process_encoded
self.output(decoded_value)
File "apache_beam/runners/worker/operations.py",
line 357, in apache_beam.runners.worker.operations.Operation.output
File "apache_beam/runners/worker/operations.py",
line 359, in apache_beam.runners.worker.operations.Operation.output
File "apache_beam/runners/worker/operations.py",
line 221, in
apache_beam.runners.worker.operations.SingletonConsumerSet.receive
File "apache_beam/runners/worker/operations.py",
line 718, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/worker/operations.py",
line 719, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/common.py",
line 1241, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py",
line 1321, in apache_beam.runners.common.DoFnRunner._reraise_augmented
File "/usr/local/lib/python3.8/site-packages/future/utils/__init__.py",
line 446, in raise_with_traceback
raise exc.with_traceback(traceback)
File "apache_beam/runners/common.py",
line 1239, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py",
line 768, in apache_beam.runners.common.PerWindowInvoker.invoke_process
File "apache_beam/runners/common.py",
line 891, in
apache_beam.runners.common.PerWindowInvoker._invoke_process_per_window
File "apache_beam/runners/common.py",
line 1374, in apache_beam.runners.common._OutputProcessor.process_outputs
File
"/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/bigquery_file_loads.py",
line 520, in process
job_reference = self.bq_wrapper.perform_load_job(
File
"/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/bigquery_tools.py",
line 825, in perform_load_job
return self._insert_load_job(
File "/usr/local/lib/python3.8/site-packages/apache_beam/utils/retry.py",
line 260, in wrapper
return fun(*args, **kwargs)
File
"/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/bigquery_tools.py",
line 438, in _insert_load_job
return self._start_job(request).jobReference
File
"/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/bigquery_tools.py",
line 449, in _start_job
response = self.client.jobs.Insert(request)
File
"/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py",
line 345, in Insert
return self._RunMethod(
File "/usr/local/lib/python3.8/site-packages/apitools/base/py/base_api.py",
line 731, in _RunMethod
return self.ProcessHttpResponse(method_config, http_response, request)
File "/usr/local/lib/python3.8/site-packages/apitools/base/py/base_api.py",
line 737, in ProcessHttpResponse
self.__ProcessHttpResponse(method_config, http_response, request))
File "/usr/local/lib/python3.8/site-packages/apitools/base/py/base_api.py",
line 603, in __ProcessHttpResponse
raise exceptions.HttpError.FromResponse(
RuntimeError: apitools.base.py.exceptions.HttpBadRequestError:
HttpError accessing
<https://bigquery.googleapis.com/bigquery/v2/projects/my-project/jobs?alt=json>:
response: <{'vary': 'Origin, X-Origin, Referer', 'content-type':
'application/json; charset=UTF-8',
'date': 'Tue, 09 Mar 2021 09:31:01 GMT', 'server': 'ESF', 'cache-control':
'private', 'x-xss-protection':
'0', 'x-frame-options': 'SAMEORIGIN', 'x-content-type-options': 'nosniff',
'transfer-encoding': 'chunked',
'status': '400', 'content-length': '318', '-content-encoding': 'gzip'}>,
content <{
"error": {
"code": 400,
"message": "Load configuration must specify at least one source URI",
"errors":
[
{
"message": "Load configuration must specify at least one source URI",
"domain":
"global",
"reason": "invalid"
}
],
"status": "INVALID_ARGUMENT"
}
}
```
Perhaps, this can be fixed by validating the input value `[files(=
element[1])|https://github.com/apache/beam/blob/v2.28.0/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py#L469]`
is not empty.
Imported from Jira
[BEAM-11939](https://issues.apache.org/jira/browse/BEAM-11939). Original Jira
may contain additional context.
Reported by: yshimizu.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]