[
https://issues.apache.org/jira/browse/BEAM-14336?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17540463#comment-17540463
]
Beam JIRA Bot commented on BEAM-14336:
--------------------------------------
This issue is assigned but has not received an update in 30 days so it has been
labeled "stale-assigned". If you are still working on the issue, please give an
update and remove the label. If you are no longer working on the issue, please
unassign so someone else may work on it. In 7 days the issue will be
automatically unassigned.
> Access Denied: Table bigquery-samples:airline_ontime_data.flights
> -----------------------------------------------------------------
>
> Key: BEAM-14336
> URL: https://issues.apache.org/jira/browse/BEAM-14336
> Project: Beam
> Issue Type: Bug
> Components: dsl-dataframe, examples-python
> Reporter: Andrew Pilloud
> Assignee: Brian Hulette
> Priority: P2
> Labels: stale-assigned
> Time Spent: 50m
> Remaining Estimate: 0h
>
> This has been perma-red for the last week.
> https://ci-beam.apache.org/job/beam_PostCommit_Python36/5189/testReport/junit/apache_beam.examples.dataframe.flight_delays_it_test/FlightDelaysTest/test_flight_delays/
> https://ci-beam.apache.org/job/beam_PostCommit_Python36/5215/testReport/apache_beam.examples.dataframe.flight_delays_it_test/FlightDelaysTest/test_flight_delays/
> {code}
> self = <apache_beam.examples.dataframe.flight_delays_it_test.FlightDelaysTest
> testMethod=test_flight_delays>
> @pytest.mark.examples_postcommit
> @pytest.mark.it_postcommit
> def test_flight_delays(self):
> flight_delays.run_flight_delay_pipeline(
> self.test_pipeline,
> start_date='2012-12-23',
> end_date='2012-12-25',
> > output=self.output_path)
> apache_beam/examples/dataframe/flight_delays_it_test.py:110:
> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> _
> apache_beam/examples/dataframe/flight_delays.py:105: in
> run_flight_delay_pipeline
> result.to_csv(output)
> apache_beam/pipeline.py:596: in __exit__
> self.result = self.run()
> apache_beam/testing/test_pipeline.py:114: in run
> False if self.not_use_test_runner_api else test_runner_api))
> apache_beam/pipeline.py:549: in run
> self._options).run(False)
> apache_beam/pipeline.py:573: in run
> return self.runner.run_pipeline(self, self._options)
> apache_beam/runners/dataflow/test_dataflow_runner.py:64: in run_pipeline
> self.result.wait_until_finish(duration=wait_duration)
> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> _
> self = <DataflowPipelineResult <Job
> clientRequestId: '20220420090044988852-7153'
> createTime: '2022-04-20T09:00:55.136265Z'
> ...022-04-20T09:00:55.136265Z'
> steps: []
> tempFiles: []
> type: TypeValueValuesEnum(JOB_TYPE_BATCH, 1)> at 0x7fcf0b6f0e80>
> duration = None
> def wait_until_finish(self, duration=None):
> if not self.is_in_terminal_state():
> if not self.has_job:
> raise IOError('Failed to get the Dataflow job id.')
>
> thread = threading.Thread(
> target=DataflowRunner.poll_for_job_completion,
> args=(self._runner, self, duration))
>
> # Mark the thread as a daemon thread so a keyboard interrupt on the
> main
> # thread will terminate everything. This is also the reason we will
> not
> # use thread.join() to wait for the polling thread.
> thread.daemon = True
> thread.start()
> while thread.is_alive():
> time.sleep(5.0)
>
> # TODO: Merge the termination code in poll_for_job_completion and
> # is_in_terminal_state.
> terminated = self.is_in_terminal_state()
> assert duration or terminated, (
> 'Job did not reach to a terminal state after waiting
> indefinitely.')
>
> # TODO(BEAM-14291): Also run this check if wait_until_finish was
> called
> # after the pipeline completed.
> if terminated and self.state != PipelineState.DONE:
> # TODO(BEAM-1290): Consider converting this to an error log based on
> # theresolution of the issue.
> raise DataflowRuntimeException(
> 'Dataflow pipeline failed. State: %s, Error:\n%s' %
> (self.state, getattr(self._runner, 'last_error_msg', None)),
> > self)
> E
> apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException:
> Dataflow pipeline failed. State: FAILED, Error:
> E Traceback (most recent call last):
> E File
> "/usr/local/lib/python3.6/site-packages/dataflow_worker/batchworker.py", line
> 646, in do_work
> E work_executor.execute()
> E File
> "/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line
> 255, in execute
> E self._split_task)
> E File
> "/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line
> 263, in _perform_source_split_considering_api_limits
> E desired_bundle_size)
> E File
> "/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line
> 300, in _perform_source_split
> E for split in source.split(desired_bundle_size):
> E File
> "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery.py", line
> 810, in split
> E self._setup_temporary_dataset(bq)
> E File
> "/usr/local/lib/python3.6/site-packages/apache_beam/options/value_provider.py",
> line 193, in _f
> E return fnc(self, *args, **kwargs)
> E File
> "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery.py", line
> 849, in _setup_temporary_dataset
> E self._get_project(), self.query.get(), self.use_legacy_sql)
> E File
> "/usr/local/lib/python3.6/site-packages/apache_beam/utils/retry.py", line
> 253, in wrapper
> E return fun(*args, **kwargs)
> E File
> "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery_tools.py",
> line 416, in get_query_location
> E response = self.client.jobs.Insert(request)
> E File
> "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py",
> line 345, in Insert
> E upload=upload, upload_config=upload_config)
> E File
> "/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line
> 731, in _RunMethod
> E return self.ProcessHttpResponse(method_config, http_response,
> request)
> E File
> "/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line
> 737, in ProcessHttpResponse
> E self.__ProcessHttpResponse(method_config, http_response,
> request))
> E File
> "/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line
> 604, in __ProcessHttpResponse
> E http_response, method_config=method_config, request=request)
> E apitools.base.py.exceptions.HttpForbiddenError: HttpError accessing
> <https://bigquery.googleapis.com/bigquery/v2/projects/apache-beam-testing/jobs?alt=json>:
> response: <{'vary': 'Origin, X-Origin, Referer', 'content-type':
> 'application/json; charset=UTF-8', 'date': 'Wed, 20 Apr 2022 09:06:44 GMT',
> 'server': 'ESF', 'cache-control': 'private', 'x-xss-protection': '0',
> 'x-frame-options': 'SAMEORIGIN', 'x-content-type-options': 'nosniff',
> 'transfer-encoding': 'chunked', 'status': '403', 'content-length': '528',
> '-content-encoding': 'gzip'}>, content <{
> E "error": {
> E "code": 403,
> E "message": "Access Denied: Table
> bigquery-samples:airline_ontime_data.flights: User does not have permission
> to query table bigquery-samples:airline_ontime_data.flights.",
> E "errors": [
> E {
> E "message": "Access Denied: Table
> bigquery-samples:airline_ontime_data.flights: User does not have permission
> to query table bigquery-samples:airline_ontime_data.flights.",
> E "domain": "global",
> E "reason": "accessDenied"
> E }
> E ],
> E "status": "PERMISSION_DENIED"
> E }
> E }
> E >
> apache_beam/runners/dataflow/dataflow_runner.py:1661: DataflowRuntimeException
> {code}
--
This message was sent by Atlassian Jira
(v8.20.7#820007)