Thomas Pilewicz created BEAM-6198:
-------------------------------------
Summary: Python SDK generates requests to BigQuery API's method
Tabledata:list with invalid argument alt
Key: BEAM-6198
URL: https://issues.apache.org/jira/browse/BEAM-6198
Project: Beam
Issue Type: Bug
Components: sdk-py-harness
Affects Versions: 2.8.0
Environment: Python SDK 2.8, Dataflow runner
Reporter: Thomas Pilewicz
Assignee: Robert Bradshaw
When stream inserts to BigQuery is involved, in io.gcp.bigquery.start_bundle,
checking if table is empty generates requests to the [Tabledata:list
method|https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list]
of BigQuery API. Problem is it includes a parameter alt=json to the request,
which doesn't seem accepted by the API.
Original log:
{{ java.util.concurrent.ExecutionException: java.lang.RuntimeException: Error
received from SDK harness for instruction -4955710: Traceback (most recent call
last):}}
{{ File
"/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.py",
line 131, in _execute}}
{{ response = task()}}
{{ File
"/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.py",
line 166, in <lambda>}}
{{ self._execute(lambda: worker.do_instruction(work), work)}}
{{ File
"/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.py",
line 212, in do_instruction}}
{{ request.instruction_id)}}
{{ File
"/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.py",
line 234, in process_bundle}}
{{ processor.process_bundle(instruction_id)}}
{{ File
"/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/bundle_processor.py",
line 404, in process_bundle}}
{{ op.start()}}
{{ File "apache_beam/runners/worker/operations.py", line 368, in
apache_beam.runners.worker.operations.DoOperation.start}}
{{ def start(self):}}
{{ File "apache_beam/runners/worker/operations.py", line 369, in
apache_beam.runners.worker.operations.DoOperation.start}}
{{ with self.scoped_start_state:}}
{{ File "apache_beam/runners/worker/operations.py", line 420, in
apache_beam.runners.worker.operations.DoOperation.start}}
{{ self.dofn_runner.start()}}
{{ File "apache_beam/runners/common.py", line 695, in
apache_beam.runners.common.DoFnRunner.start}}
{{ self._invoke_bundle_method(self.do_fn_invoker.invoke_start_bundle)}}
{{ File "apache_beam/runners/common.py", line 692, in
apache_beam.runners.common.DoFnRunner._invoke_bundle_method}}
{{ self._reraise_augmented(exn)}}
{{ File "apache_beam/runners/common.py", line 702, in
apache_beam.runners.common.DoFnRunner._reraise_augmented}}
{{ raise}}
{{ File "apache_beam/runners/common.py", line 690, in
apache_beam.runners.common.DoFnRunner._invoke_bundle_method}}
{{ bundle_method()}}
{{ File "apache_beam/runners/common.py", line 348, in
apache_beam.runners.common.DoFnInvoker.invoke_start_bundle}}
{{ def invoke_start_bundle(self):}}
{{ File "apache_beam/runners/common.py", line 352, in
apache_beam.runners.common.DoFnInvoker.invoke_start_bundle}}
{{ self.signature.start_bundle_method.method_value())}}
{{ File
"/usr/local/lib/python2.7/dist-packages/apache_beam/io/gcp/bigquery.py", line
1359, in start_bundle}}
{{ self.create_disposition, self.write_disposition)}}
{{ File "/usr/local/lib/python2.7/dist-packages/apache_beam/utils/retry.py",
line 184, in wrapper}}
{{ return fun(*args, **kwargs)}}
{{ File
"/usr/local/lib/python2.7/dist-packages/apache_beam/io/gcp/bigquery.py", line
1100, in get_or_create_table}}
{{ table_empty = self._is_table_empty(project_id, dataset_id, table_id)}}
{{ File "/usr/local/lib/python2.7/dist-packages/apache_beam/utils/retry.py",
line 184, in wrapper}}
{{ return fun(*args, **kwargs)}}
{{ File
"/usr/local/lib/python2.7/dist-packages/apache_beam/io/gcp/bigquery.py", line
972, in _is_table_empty}}
{{ response = self.client.tabledata.List(request)}}
{{ File
"/usr/local/lib/python2.7/dist-packages/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py",
line 500, in List}}
{{ config, request, global_params=global_params)}}
{{ File "/usr/local/lib/python2.7/dist-packages/apitools/base/py/base_api.py",
line 722, in _RunMethod}}
{{ return self.ProcessHttpResponse(method_config, http_response, request)}}
{{ File "/usr/local/lib/python2.7/dist-packages/apitools/base/py/base_api.py",
line 728, in ProcessHttpResponse}}
{{ self.__ProcessHttpResponse(method_config, http_response, request))}}
{{ File "/usr/local/lib/python2.7/dist-packages/apitools/base/py/base_api.py",
line 599, in __ProcessHttpResponse}}
{{ http_response, method_config=method_config, request=request)}}
{{HttpBadRequestError: HttpError accessing
<https://www.googleapis.com/bigquery/v2/projects/xxxx/datasets/xxxx/tables/xxxx/data?alt=json&maxResults=1>:
response: <\{'status': '400', 'content-length': '245', 'x-xss-protection': '1;
mode=block', 'transfer-encoding': 'chunked', 'vary': 'Origin, X-Origin,
Referer', 'server': 'ESF', '-content-encoding': 'gzip', 'cache-control':
'private', 'date': 'Sun, 09 Dec 2018 06:16:01 GMT', 'x-frame-options':
'SAMEORIGIN', 'content-type': 'application/json; charset=UTF-8'}>, content <{}}
{{ "error": {}}
{{ "code": 400,}}
{{ "message": "Request contains an invalid argument.",}}
{{ "errors": [}}
{{ {}}
{{ "message": "",}}
{{ "domain": "global",}}
{{ "reason": "invalid"}}
{{ }}}
{{ ],}}
{{ "status": "INVALID_ARGUMENT"}}
{{ }}}
{{}}}
{{>}}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)