[
https://issues.apache.org/jira/browse/BEAM-11939?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17549217#comment-17549217
]
Danny McCormick commented on BEAM-11939:
----------------------------------------
This issue has been migrated to https://github.com/apache/beam/issues/20824
> BigQuery FILE_LOADS failed with 400 error in streaming mode in Python
> ---------------------------------------------------------------------
>
> Key: BEAM-11939
> URL: https://issues.apache.org/jira/browse/BEAM-11939
> Project: Beam
> Issue Type: Bug
> Components: io-py-gcp, sdk-py-core
> Affects Versions: 2.28.0
> Reporter: Yusuke Shimizu
> Priority: P3
>
>
> We are using FILE_LOADS to write to BigQuery in streaming mode using Python.
> after running for about 1 hours, beam job throws an exception with regards to
> `RuntimeError: apitools.base.py.exceptions.HttpBadRequestError` including
> error message "Load configuration must specify at least one source URI".
> {code:java}
> //
> Traceback (most recent call last):
> File
> "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
> line 289, in _execute
> response = task()
> File
> "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
> line 362, in <lambda>
> lambda: self.create_worker().do_instruction(request), request)
> File
> "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
> line 606, in do_instruction
> return getattr(self, request_type)(
> File
> "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
> line 644, in process_bundle
> bundle_processor.process_bundle(instruction_id))
> File
> "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
> line 999, in process_bundle
> input_op_by_transform_id[element.transform_id].process_encoded(
> File
> "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
> line 228, in process_encoded
> self.output(decoded_value)
> File "apache_beam/runners/worker/operations.py", line 357, in
> apache_beam.runners.worker.operations.Operation.output
> File "apache_beam/runners/worker/operations.py", line 359, in
> apache_beam.runners.worker.operations.Operation.output
> File "apache_beam/runners/worker/operations.py", line 221, in
> apache_beam.runners.worker.operations.SingletonConsumerSet.receive
> File "apache_beam/runners/worker/operations.py", line 718, in
> apache_beam.runners.worker.operations.DoOperation.process
> File "apache_beam/runners/worker/operations.py", line 719, in
> apache_beam.runners.worker.operations.DoOperation.process
> File "apache_beam/runners/common.py", line 1241, in
> apache_beam.runners.common.DoFnRunner.process
> File "apache_beam/runners/common.py", line 1321, in
> apache_beam.runners.common.DoFnRunner._reraise_augmented
> File "/usr/local/lib/python3.8/site-packages/future/utils/__init__.py",
> line 446, in raise_with_traceback
> raise exc.with_traceback(traceback)
> File "apache_beam/runners/common.py", line 1239, in
> apache_beam.runners.common.DoFnRunner.process
> File "apache_beam/runners/common.py", line 768, in
> apache_beam.runners.common.PerWindowInvoker.invoke_process
> File "apache_beam/runners/common.py", line 891, in
> apache_beam.runners.common.PerWindowInvoker._invoke_process_per_window
> File "apache_beam/runners/common.py", line 1374, in
> apache_beam.runners.common._OutputProcessor.process_outputs
> File
> "/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/bigquery_file_loads.py",
> line 520, in process
> job_reference = self.bq_wrapper.perform_load_job(
> File
> "/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/bigquery_tools.py",
> line 825, in perform_load_job
> return self._insert_load_job(
> File "/usr/local/lib/python3.8/site-packages/apache_beam/utils/retry.py",
> line 260, in wrapper
> return fun(*args, **kwargs)
> File
> "/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/bigquery_tools.py",
> line 438, in _insert_load_job
> return self._start_job(request).jobReference
> File
> "/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/bigquery_tools.py",
> line 449, in _start_job
> response = self.client.jobs.Insert(request)
> File
> "/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py",
> line 345, in Insert
> return self._RunMethod(
> File "/usr/local/lib/python3.8/site-packages/apitools/base/py/base_api.py",
> line 731, in _RunMethod
> return self.ProcessHttpResponse(method_config, http_response, request)
> File "/usr/local/lib/python3.8/site-packages/apitools/base/py/base_api.py",
> line 737, in ProcessHttpResponse
> self.__ProcessHttpResponse(method_config, http_response, request))
> File "/usr/local/lib/python3.8/site-packages/apitools/base/py/base_api.py",
> line 603, in __ProcessHttpResponse
> raise exceptions.HttpError.FromResponse(
> RuntimeError: apitools.base.py.exceptions.HttpBadRequestError: HttpError
> accessing
> <https://bigquery.googleapis.com/bigquery/v2/projects/my-project/jobs?alt=json>:
> response: <{'vary': 'Origin, X-Origin, Referer', 'content-type':
> 'application/json; charset=UTF-8', 'date': 'Tue, 09 Mar 2021 09:31:01 GMT',
> 'server': 'ESF', 'cache-control': 'private', 'x-xss-protection': '0',
> 'x-frame-options': 'SAMEORIGIN', 'x-content-type-options': 'nosniff',
> 'transfer-encoding': 'chunked', 'status': '400', 'content-length': '318',
> '-content-encoding': 'gzip'}>, content <{
> "error": {
> "code": 400,
> "message": "Load configuration must specify at least one source URI",
> "errors": [
> {
> "message": "Load configuration must specify at least one source URI",
> "domain": "global",
> "reason": "invalid"
> }
> ],
> "status": "INVALID_ARGUMENT"
> }
> }
> {code}
>
> Perhaps, this can be fixed by validating the input value `[files(=
> element[1])|https://github.com/apache/beam/blob/v2.28.0/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py#L469]`
> is not empty.
--
This message was sent by Atlassian Jira
(v8.20.7#820007)