Andrew Pilloud created BEAM-14336:
-------------------------------------
Summary: Access Denied: Table
bigquery-samples:airline_ontime_data.flights
Key: BEAM-14336
URL: https://issues.apache.org/jira/browse/BEAM-14336
Project: Beam
Issue Type: Bug
Components: sdk-py-core
Reporter: Andrew Pilloud
Assignee: Brian Hulette
This has been perma-red for the last week.
https://ci-beam.apache.org/job/beam_PostCommit_Python36/5189/testReport/junit/apache_beam.examples.dataframe.flight_delays_it_test/FlightDelaysTest/test_flight_delays/
https://ci-beam.apache.org/job/beam_PostCommit_Python36/5215/testReport/apache_beam.examples.dataframe.flight_delays_it_test/FlightDelaysTest/test_flight_delays/
{code}
self = <apache_beam.examples.dataframe.flight_delays_it_test.FlightDelaysTest
testMethod=test_flight_delays>
@pytest.mark.examples_postcommit
@pytest.mark.it_postcommit
def test_flight_delays(self):
flight_delays.run_flight_delay_pipeline(
self.test_pipeline,
start_date='2012-12-23',
end_date='2012-12-25',
> output=self.output_path)
apache_beam/examples/dataframe/flight_delays_it_test.py:110:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
apache_beam/examples/dataframe/flight_delays.py:105: in
run_flight_delay_pipeline
result.to_csv(output)
apache_beam/pipeline.py:596: in __exit__
self.result = self.run()
apache_beam/testing/test_pipeline.py:114: in run
False if self.not_use_test_runner_api else test_runner_api))
apache_beam/pipeline.py:549: in run
self._options).run(False)
apache_beam/pipeline.py:573: in run
return self.runner.run_pipeline(self, self._options)
apache_beam/runners/dataflow/test_dataflow_runner.py:64: in run_pipeline
self.result.wait_until_finish(duration=wait_duration)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <DataflowPipelineResult <Job
clientRequestId: '20220420090044988852-7153'
createTime: '2022-04-20T09:00:55.136265Z'
...022-04-20T09:00:55.136265Z'
steps: []
tempFiles: []
type: TypeValueValuesEnum(JOB_TYPE_BATCH, 1)> at 0x7fcf0b6f0e80>
duration = None
def wait_until_finish(self, duration=None):
if not self.is_in_terminal_state():
if not self.has_job:
raise IOError('Failed to get the Dataflow job id.')
thread = threading.Thread(
target=DataflowRunner.poll_for_job_completion,
args=(self._runner, self, duration))
# Mark the thread as a daemon thread so a keyboard interrupt on the main
# thread will terminate everything. This is also the reason we will not
# use thread.join() to wait for the polling thread.
thread.daemon = True
thread.start()
while thread.is_alive():
time.sleep(5.0)
# TODO: Merge the termination code in poll_for_job_completion and
# is_in_terminal_state.
terminated = self.is_in_terminal_state()
assert duration or terminated, (
'Job did not reach to a terminal state after waiting indefinitely.')
# TODO(BEAM-14291): Also run this check if wait_until_finish was called
# after the pipeline completed.
if terminated and self.state != PipelineState.DONE:
# TODO(BEAM-1290): Consider converting this to an error log based on
# theresolution of the issue.
raise DataflowRuntimeException(
'Dataflow pipeline failed. State: %s, Error:\n%s' %
(self.state, getattr(self._runner, 'last_error_msg', None)),
> self)
E
apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow
pipeline failed. State: FAILED, Error:
E Traceback (most recent call last):
E File
"/usr/local/lib/python3.6/site-packages/dataflow_worker/batchworker.py", line
646, in do_work
E work_executor.execute()
E File
"/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line 255,
in execute
E self._split_task)
E File
"/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line 263,
in _perform_source_split_considering_api_limits
E desired_bundle_size)
E File
"/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line 300,
in _perform_source_split
E for split in source.split(desired_bundle_size):
E File
"/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery.py", line
810, in split
E self._setup_temporary_dataset(bq)
E File
"/usr/local/lib/python3.6/site-packages/apache_beam/options/value_provider.py",
line 193, in _f
E return fnc(self, *args, **kwargs)
E File
"/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery.py", line
849, in _setup_temporary_dataset
E self._get_project(), self.query.get(), self.use_legacy_sql)
E File
"/usr/local/lib/python3.6/site-packages/apache_beam/utils/retry.py", line 253,
in wrapper
E return fun(*args, **kwargs)
E File
"/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery_tools.py",
line 416, in get_query_location
E response = self.client.jobs.Insert(request)
E File
"/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py",
line 345, in Insert
E upload=upload, upload_config=upload_config)
E File
"/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line
731, in _RunMethod
E return self.ProcessHttpResponse(method_config, http_response,
request)
E File
"/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line
737, in ProcessHttpResponse
E self.__ProcessHttpResponse(method_config, http_response, request))
E File
"/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line
604, in __ProcessHttpResponse
E http_response, method_config=method_config, request=request)
E apitools.base.py.exceptions.HttpForbiddenError: HttpError accessing
<https://bigquery.googleapis.com/bigquery/v2/projects/apache-beam-testing/jobs?alt=json>:
response: <{'vary': 'Origin, X-Origin, Referer', 'content-type':
'application/json; charset=UTF-8', 'date': 'Wed, 20 Apr 2022 09:06:44 GMT',
'server': 'ESF', 'cache-control': 'private', 'x-xss-protection': '0',
'x-frame-options': 'SAMEORIGIN', 'x-content-type-options': 'nosniff',
'transfer-encoding': 'chunked', 'status': '403', 'content-length': '528',
'-content-encoding': 'gzip'}>, content <{
E "error": {
E "code": 403,
E "message": "Access Denied: Table
bigquery-samples:airline_ontime_data.flights: User does not have permission to
query table bigquery-samples:airline_ontime_data.flights.",
E "errors": [
E {
E "message": "Access Denied: Table
bigquery-samples:airline_ontime_data.flights: User does not have permission to
query table bigquery-samples:airline_ontime_data.flights.",
E "domain": "global",
E "reason": "accessDenied"
E }
E ],
E "status": "PERMISSION_DENIED"
E }
E }
E >
apache_beam/runners/dataflow/dataflow_runner.py:1661: DataflowRuntimeException
{code}
--
This message was sent by Atlassian Jira
(v8.20.7#820007)