[ 
https://issues.apache.org/jira/browse/BEAM-11939?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17549217#comment-17549217
 ] 

Danny McCormick commented on BEAM-11939:
----------------------------------------

This issue has been migrated to https://github.com/apache/beam/issues/20824

> BigQuery FILE_LOADS failed with 400 error in streaming mode in Python
> ---------------------------------------------------------------------
>
>                 Key: BEAM-11939
>                 URL: https://issues.apache.org/jira/browse/BEAM-11939
>             Project: Beam
>          Issue Type: Bug
>          Components: io-py-gcp, sdk-py-core
>    Affects Versions: 2.28.0
>            Reporter: Yusuke Shimizu
>            Priority: P3
>
>  
> We are using FILE_LOADS to write to BigQuery in streaming mode using Python. 
> after running for about 1 hours, beam job throws an exception with regards to 
> `RuntimeError: apitools.base.py.exceptions.HttpBadRequestError` including 
> error message "Load configuration must specify at least one source URI".
> {code:java}
> //
> Traceback (most recent call last):
>   File 
> "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
>  line 289, in _execute
>     response = task()
>   File 
> "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
>  line 362, in <lambda>
>     lambda: self.create_worker().do_instruction(request), request)
>   File 
> "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
>  line 606, in do_instruction
>     return getattr(self, request_type)(
>   File 
> "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
>  line 644, in process_bundle
>     bundle_processor.process_bundle(instruction_id))
>   File 
> "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
>  line 999, in process_bundle
>     input_op_by_transform_id[element.transform_id].process_encoded(
>   File 
> "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
>  line 228, in process_encoded
>     self.output(decoded_value)
>   File "apache_beam/runners/worker/operations.py", line 357, in 
> apache_beam.runners.worker.operations.Operation.output
>   File "apache_beam/runners/worker/operations.py", line 359, in 
> apache_beam.runners.worker.operations.Operation.output
>   File "apache_beam/runners/worker/operations.py", line 221, in 
> apache_beam.runners.worker.operations.SingletonConsumerSet.receive
>   File "apache_beam/runners/worker/operations.py", line 718, in 
> apache_beam.runners.worker.operations.DoOperation.process
>   File "apache_beam/runners/worker/operations.py", line 719, in 
> apache_beam.runners.worker.operations.DoOperation.process
>   File "apache_beam/runners/common.py", line 1241, in 
> apache_beam.runners.common.DoFnRunner.process
>   File "apache_beam/runners/common.py", line 1321, in 
> apache_beam.runners.common.DoFnRunner._reraise_augmented
>   File "/usr/local/lib/python3.8/site-packages/future/utils/__init__.py", 
> line 446, in raise_with_traceback
>     raise exc.with_traceback(traceback)
>   File "apache_beam/runners/common.py", line 1239, in 
> apache_beam.runners.common.DoFnRunner.process
>   File "apache_beam/runners/common.py", line 768, in 
> apache_beam.runners.common.PerWindowInvoker.invoke_process
>   File "apache_beam/runners/common.py", line 891, in 
> apache_beam.runners.common.PerWindowInvoker._invoke_process_per_window
>   File "apache_beam/runners/common.py", line 1374, in 
> apache_beam.runners.common._OutputProcessor.process_outputs
>   File 
> "/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/bigquery_file_loads.py",
>  line 520, in process
>     job_reference = self.bq_wrapper.perform_load_job(
>   File 
> "/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/bigquery_tools.py",
>  line 825, in perform_load_job
>     return self._insert_load_job(
>   File "/usr/local/lib/python3.8/site-packages/apache_beam/utils/retry.py", 
> line 260, in wrapper
>     return fun(*args, **kwargs)
>   File 
> "/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/bigquery_tools.py",
>  line 438, in _insert_load_job
>     return self._start_job(request).jobReference
>   File 
> "/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/bigquery_tools.py",
>  line 449, in _start_job
>     response = self.client.jobs.Insert(request)
>   File 
> "/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py",
>  line 345, in Insert
>     return self._RunMethod(
>   File "/usr/local/lib/python3.8/site-packages/apitools/base/py/base_api.py", 
> line 731, in _RunMethod
>     return self.ProcessHttpResponse(method_config, http_response, request)
>   File "/usr/local/lib/python3.8/site-packages/apitools/base/py/base_api.py", 
> line 737, in ProcessHttpResponse
>     self.__ProcessHttpResponse(method_config, http_response, request))
>   File "/usr/local/lib/python3.8/site-packages/apitools/base/py/base_api.py", 
> line 603, in __ProcessHttpResponse
>     raise exceptions.HttpError.FromResponse(
> RuntimeError: apitools.base.py.exceptions.HttpBadRequestError: HttpError 
> accessing 
> <https://bigquery.googleapis.com/bigquery/v2/projects/my-project/jobs?alt=json>:
>  response: <{'vary': 'Origin, X-Origin, Referer', 'content-type': 
> 'application/json; charset=UTF-8', 'date': 'Tue, 09 Mar 2021 09:31:01 GMT', 
> 'server': 'ESF', 'cache-control': 'private', 'x-xss-protection': '0', 
> 'x-frame-options': 'SAMEORIGIN', 'x-content-type-options': 'nosniff', 
> 'transfer-encoding': 'chunked', 'status': '400', 'content-length': '318', 
> '-content-encoding': 'gzip'}>, content <{
>   "error": {
>     "code": 400,
>     "message": "Load configuration must specify at least one source URI",
>     "errors": [
>       {
>         "message": "Load configuration must specify at least one source URI",
>         "domain": "global",
>         "reason": "invalid"
>       }
>     ],
>     "status": "INVALID_ARGUMENT"
>   }
> }
> {code}
>  
> Perhaps, this can be fixed by validating the input value `[files(= 
> element[1])|https://github.com/apache/beam/blob/v2.28.0/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py#L469]`
>  is not empty.



--
This message was sent by Atlassian Jira
(v8.20.7#820007)

Reply via email to