Andrew Pilloud created BEAM-14336:
-------------------------------------

             Summary: Access Denied: Table 
bigquery-samples:airline_ontime_data.flights
                 Key: BEAM-14336
                 URL: https://issues.apache.org/jira/browse/BEAM-14336
             Project: Beam
          Issue Type: Bug
          Components: sdk-py-core
            Reporter: Andrew Pilloud
            Assignee: Brian Hulette


This has been perma-red for the last week.
https://ci-beam.apache.org/job/beam_PostCommit_Python36/5189/testReport/junit/apache_beam.examples.dataframe.flight_delays_it_test/FlightDelaysTest/test_flight_delays/
https://ci-beam.apache.org/job/beam_PostCommit_Python36/5215/testReport/apache_beam.examples.dataframe.flight_delays_it_test/FlightDelaysTest/test_flight_delays/

{code}
self = <apache_beam.examples.dataframe.flight_delays_it_test.FlightDelaysTest 
testMethod=test_flight_delays>

    @pytest.mark.examples_postcommit
    @pytest.mark.it_postcommit
    def test_flight_delays(self):
      flight_delays.run_flight_delay_pipeline(
          self.test_pipeline,
          start_date='2012-12-23',
          end_date='2012-12-25',
>         output=self.output_path)

apache_beam/examples/dataframe/flight_delays_it_test.py:110: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
apache_beam/examples/dataframe/flight_delays.py:105: in 
run_flight_delay_pipeline
    result.to_csv(output)
apache_beam/pipeline.py:596: in __exit__
    self.result = self.run()
apache_beam/testing/test_pipeline.py:114: in run
    False if self.not_use_test_runner_api else test_runner_api))
apache_beam/pipeline.py:549: in run
    self._options).run(False)
apache_beam/pipeline.py:573: in run
    return self.runner.run_pipeline(self, self._options)
apache_beam/runners/dataflow/test_dataflow_runner.py:64: in run_pipeline
    self.result.wait_until_finish(duration=wait_duration)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <DataflowPipelineResult <Job
 clientRequestId: '20220420090044988852-7153'
 createTime: '2022-04-20T09:00:55.136265Z'
...022-04-20T09:00:55.136265Z'
 steps: []
 tempFiles: []
 type: TypeValueValuesEnum(JOB_TYPE_BATCH, 1)> at 0x7fcf0b6f0e80>
duration = None

    def wait_until_finish(self, duration=None):
      if not self.is_in_terminal_state():
        if not self.has_job:
          raise IOError('Failed to get the Dataflow job id.')
    
        thread = threading.Thread(
            target=DataflowRunner.poll_for_job_completion,
            args=(self._runner, self, duration))
    
        # Mark the thread as a daemon thread so a keyboard interrupt on the main
        # thread will terminate everything. This is also the reason we will not
        # use thread.join() to wait for the polling thread.
        thread.daemon = True
        thread.start()
        while thread.is_alive():
          time.sleep(5.0)
    
        # TODO: Merge the termination code in poll_for_job_completion and
        # is_in_terminal_state.
        terminated = self.is_in_terminal_state()
        assert duration or terminated, (
            'Job did not reach to a terminal state after waiting indefinitely.')
    
        # TODO(BEAM-14291): Also run this check if wait_until_finish was called
        # after the pipeline completed.
        if terminated and self.state != PipelineState.DONE:
          # TODO(BEAM-1290): Consider converting this to an error log based on
          # theresolution of the issue.
          raise DataflowRuntimeException(
              'Dataflow pipeline failed. State: %s, Error:\n%s' %
              (self.state, getattr(self._runner, 'last_error_msg', None)),
>             self)
E         
apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow 
pipeline failed. State: FAILED, Error:
E         Traceback (most recent call last):
E           File 
"/usr/local/lib/python3.6/site-packages/dataflow_worker/batchworker.py", line 
646, in do_work
E             work_executor.execute()
E           File 
"/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line 255, 
in execute
E             self._split_task)
E           File 
"/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line 263, 
in _perform_source_split_considering_api_limits
E             desired_bundle_size)
E           File 
"/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line 300, 
in _perform_source_split
E             for split in source.split(desired_bundle_size):
E           File 
"/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery.py", line 
810, in split
E             self._setup_temporary_dataset(bq)
E           File 
"/usr/local/lib/python3.6/site-packages/apache_beam/options/value_provider.py", 
line 193, in _f
E             return fnc(self, *args, **kwargs)
E           File 
"/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery.py", line 
849, in _setup_temporary_dataset
E             self._get_project(), self.query.get(), self.use_legacy_sql)
E           File 
"/usr/local/lib/python3.6/site-packages/apache_beam/utils/retry.py", line 253, 
in wrapper
E             return fun(*args, **kwargs)
E           File 
"/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery_tools.py", 
line 416, in get_query_location
E             response = self.client.jobs.Insert(request)
E           File 
"/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py",
 line 345, in Insert
E             upload=upload, upload_config=upload_config)
E           File 
"/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line 
731, in _RunMethod
E             return self.ProcessHttpResponse(method_config, http_response, 
request)
E           File 
"/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line 
737, in ProcessHttpResponse
E             self.__ProcessHttpResponse(method_config, http_response, request))
E           File 
"/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line 
604, in __ProcessHttpResponse
E             http_response, method_config=method_config, request=request)
E         apitools.base.py.exceptions.HttpForbiddenError: HttpError accessing 
<https://bigquery.googleapis.com/bigquery/v2/projects/apache-beam-testing/jobs?alt=json>:
 response: <{'vary': 'Origin, X-Origin, Referer', 'content-type': 
'application/json; charset=UTF-8', 'date': 'Wed, 20 Apr 2022 09:06:44 GMT', 
'server': 'ESF', 'cache-control': 'private', 'x-xss-protection': '0', 
'x-frame-options': 'SAMEORIGIN', 'x-content-type-options': 'nosniff', 
'transfer-encoding': 'chunked', 'status': '403', 'content-length': '528', 
'-content-encoding': 'gzip'}>, content <{
E           "error": {
E             "code": 403,
E             "message": "Access Denied: Table 
bigquery-samples:airline_ontime_data.flights: User does not have permission to 
query table bigquery-samples:airline_ontime_data.flights.",
E             "errors": [
E               {
E                 "message": "Access Denied: Table 
bigquery-samples:airline_ontime_data.flights: User does not have permission to 
query table bigquery-samples:airline_ontime_data.flights.",
E                 "domain": "global",
E                 "reason": "accessDenied"
E               }
E             ],
E             "status": "PERMISSION_DENIED"
E           }
E         }
E         >

apache_beam/runners/dataflow/dataflow_runner.py:1661: DataflowRuntimeException
{code}



--
This message was sent by Atlassian Jira
(v8.20.7#820007)

Reply via email to