damccorm opened a new issue, #20824:
URL: https://github.com/apache/beam/issues/20824

    
   
   We are using FILE_LOADS to write to BigQuery in streaming mode using Python. 
   after running for about 1 hours, beam job throws an exception with regards 
to `RuntimeError: apitools.base.py.exceptions.HttpBadRequestError` including 
error message "Load configuration must specify at least one source URI".
   ```
   
   //
   Traceback (most recent call last):
     File 
"/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
   line 289, in _execute
       response = task()
     File 
"/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
   line 362, in <lambda>
       lambda: self.create_worker().do_instruction(request), request)
     File 
"/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
   line 606, in do_instruction
       return getattr(self, request_type)(
     File 
"/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
   line 644, in process_bundle
       bundle_processor.process_bundle(instruction_id))
     File 
"/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 999, in process_bundle
       input_op_by_transform_id[element.transform_id].process_encoded(
    
   File 
"/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
 line 228,
   in process_encoded
       self.output(decoded_value)
     File "apache_beam/runners/worker/operations.py",
   line 357, in apache_beam.runners.worker.operations.Operation.output
     File "apache_beam/runners/worker/operations.py",
   line 359, in apache_beam.runners.worker.operations.Operation.output
     File "apache_beam/runners/worker/operations.py",
   line 221, in 
apache_beam.runners.worker.operations.SingletonConsumerSet.receive
     File "apache_beam/runners/worker/operations.py",
   line 718, in apache_beam.runners.worker.operations.DoOperation.process
     File "apache_beam/runners/worker/operations.py",
   line 719, in apache_beam.runners.worker.operations.DoOperation.process
     File "apache_beam/runners/common.py",
   line 1241, in apache_beam.runners.common.DoFnRunner.process
     File "apache_beam/runners/common.py",
   line 1321, in apache_beam.runners.common.DoFnRunner._reraise_augmented
     File "/usr/local/lib/python3.8/site-packages/future/utils/__init__.py",
   line 446, in raise_with_traceback
       raise exc.with_traceback(traceback)
     File "apache_beam/runners/common.py",
   line 1239, in apache_beam.runners.common.DoFnRunner.process
     File "apache_beam/runners/common.py",
   line 768, in apache_beam.runners.common.PerWindowInvoker.invoke_process
     File "apache_beam/runners/common.py",
   line 891, in 
apache_beam.runners.common.PerWindowInvoker._invoke_process_per_window
     File "apache_beam/runners/common.py",
   line 1374, in apache_beam.runners.common._OutputProcessor.process_outputs
     File 
"/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/bigquery_file_loads.py",
   line 520, in process
       job_reference = self.bq_wrapper.perform_load_job(
     File 
"/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/bigquery_tools.py",
   line 825, in perform_load_job
       return self._insert_load_job(
     File "/usr/local/lib/python3.8/site-packages/apache_beam/utils/retry.py",
   line 260, in wrapper
       return fun(*args, **kwargs)
     File 
"/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/bigquery_tools.py",
   line 438, in _insert_load_job
       return self._start_job(request).jobReference
     File 
"/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/bigquery_tools.py",
   line 449, in _start_job
       response = self.client.jobs.Insert(request)
     File 
"/usr/local/lib/python3.8/site-packages/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py",
   line 345, in Insert
       return self._RunMethod(
     File "/usr/local/lib/python3.8/site-packages/apitools/base/py/base_api.py",
   line 731, in _RunMethod
       return self.ProcessHttpResponse(method_config, http_response, request)
   
    File "/usr/local/lib/python3.8/site-packages/apitools/base/py/base_api.py", 
line 737, in ProcessHttpResponse
   
      self.__ProcessHttpResponse(method_config, http_response, request))
     File "/usr/local/lib/python3.8/site-packages/apitools/base/py/base_api.py",
   line 603, in __ProcessHttpResponse
       raise exceptions.HttpError.FromResponse(
   RuntimeError: apitools.base.py.exceptions.HttpBadRequestError:
   HttpError accessing 
<https://bigquery.googleapis.com/bigquery/v2/projects/my-project/jobs?alt=json>:
   response: <{'vary': 'Origin, X-Origin, Referer', 'content-type': 
'application/json; charset=UTF-8',
   'date': 'Tue, 09 Mar 2021 09:31:01 GMT', 'server': 'ESF', 'cache-control': 
'private', 'x-xss-protection':
   '0', 'x-frame-options': 'SAMEORIGIN', 'x-content-type-options': 'nosniff', 
'transfer-encoding': 'chunked',
   'status': '400', 'content-length': '318', '-content-encoding': 'gzip'}>, 
content <{
     "error": {
   
      "code": 400,
       "message": "Load configuration must specify at least one source URI",
       "errors":
   [
         {
           "message": "Load configuration must specify at least one source URI",
           "domain":
   "global",
           "reason": "invalid"
         }
       ],
       "status": "INVALID_ARGUMENT"
     }
   }
   
   ```
   
    
   
   
   Perhaps, this can be fixed by validating the input value `[files(= 
element[1])|https://github.com/apache/beam/blob/v2.28.0/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py#L469]`
 is not empty.
   
   Imported from Jira 
[BEAM-11939](https://issues.apache.org/jira/browse/BEAM-11939). Original Jira 
may contain additional context.
   Reported by: yshimizu.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to