[
https://issues.apache.org/jira/browse/BEAM-14336?focusedWorklogId=774778&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-774778
]
ASF GitHub Bot logged work on BEAM-14336:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 25/May/22 19:03
Start Date: 25/May/22 19:03
Worklog Time Spent: 10m
Work Description: yeandy commented on code in PR #17758:
URL: https://github.com/apache/beam/pull/17758#discussion_r882020082
##########
sdks/python/apache_beam/examples/dataframe/flight_delays_it_test.py:
##########
@@ -35,59 +35,58 @@
from apache_beam.testing.test_pipeline import TestPipeline
[email protected]('BEAM-14336')
class FlightDelaysTest(unittest.TestCase):
EXPECTED = {
'2012-12-23': [
('AA', 20.082559339525282, 12.825593395252838),
- ('EV', 10.01901901901902, 4.431431431431432),
+ ('AS', 5.0456273764258555, 1.0722433460076046),
+ ('B6', 20.646569646569645, 16.405405405405407),
+ ('DL', 5.241148325358852, -3.2401913875598085),
+ ('EV', 9.982053838484546, 4.40777666999003),
+ ('F9', 23.67883211678832, 25.27007299270073),
+ ('FL', 4.4602272727272725, -0.8352272727272727),
('HA', -1.0829015544041452, 0.010362694300518135),
+ ('MQ', 8.912912912912914, 3.6936936936936937),
+ ('OO', 30.526699029126213, 31.17961165048544),
('UA', 19.142555438225976, 11.07180570221753),
- ('MQ', 8.902255639097744, 3.6676691729323307),
- ('OO', 31.148883374689827, 31.90818858560794),
('US', 3.092541436464088, -2.350828729281768),
- ('WN', 12.074298711144806, 6.717968157695224),
- ('AS', 5.0456273764258555, 1.0722433460076046),
- ('B6', 20.646569646569645, 16.405405405405407),
- ('DL', 5.2559923298178335, -3.214765100671141),
- ('F9', 23.823529411764707, 25.455882352941178),
- ('FL', 4.492877492877493, -0.8005698005698005),
('VX', 62.755102040816325, 62.61224489795919),
+ ('WN', 12.05824508320726, 6.713313161875946),
('YV', 16.155844155844157, 13.376623376623376),
],
'2012-12-24': [
+ ('AA', 7.049086757990867, -1.5970319634703196),
('AS', 0.5917602996254682, -2.2659176029962547),
('B6', 8.070993914807302, 2.73630831643002),
- ('DL', 3.7171824973319105, -2.2358591248665953),
- ('F9', 14.111940298507463, 15.888059701492537),
- ('FL', 2.4210526315789473, 2.242690058479532),
- ('VX', 3.841666666666667, -2.4166666666666665),
- ('YV', 0.32, 0.78),
- ('MQ', 15.869642857142857, 9.992857142857142),
- ('OO', 11.048517520215633, 10.138814016172507),
- ('US', 1.369281045751634, -1.4101307189542485),
- ('WN', 7.515952597994531, 0.7028258887876025),
- ('AA', 7.049086757990867, -1.5970319634703196),
- ('EV', 7.297101449275362, 2.2693236714975846),
+ ('DL', 3.700745473908413, -2.2396166134185305),
+ ('EV', 7.322115384615385, 2.3653846153846154),
+ ('F9', 13.786764705882351, 15.5),
+ ('FL', 2.416909620991253, 2.224489795918368),
('HA', -2.6785714285714284, -2.4744897959183674),
+ ('MQ', 15.818181818181818, 9.935828877005347),
+ ('OO', 10.902374670184695, 10.08575197889182),
('UA', 10.935406698564593, -1.3337320574162679),
+ ('US', 1.369281045751634, -1.4101307189542485),
+ ('VX', 3.841666666666667, -2.4166666666666665),
+ ('WN', 7.3715753424657535, 0.348458904109589),
+ ('YV', 0.32, 0.78),
],
'2012-12-25': [
+ ('AA', 23.551581843191197, 35.62585969738652),
('AS', 3.4816326530612245, 0.27346938775510204),
('B6', 9.10590631364562, 3.989816700610998),
- ('DL', 2.3022170361726952, -3.6709451575262544),
- ('F9', 19.38255033557047, 21.845637583892618),
- ('FL', 1.3982300884955752, 0.9380530973451328),
- ('VX', 23.62878787878788, 23.636363636363637),
- ('YV', 11.256302521008404, 11.659663865546218),
- ('MQ', 32.6, 44.28666666666667),
- ('OO', 16.2275960170697, 17.11948790896159),
- ('US', 2.7953216374269005, 0.2236842105263158),
- ('WN', 14.405783582089553, 10.111940298507463),
- ('AA', 23.551581843191197, 35.62585969738652),
- ('EV', 17.368638239339752, 16.43191196698762),
+ ('DL', 2.2863795110593714, -3.668218859138533),
+ ('EV', 17.35576923076923, 16.414835164835164,),
Review Comment:
nit: extra comma after last tuple element
Issue Time Tracking
-------------------
Worklog Id: (was: 774778)
Time Spent: 1h 40m (was: 1.5h)
> Access Denied: Table bigquery-samples:airline_ontime_data.flights
> -----------------------------------------------------------------
>
> Key: BEAM-14336
> URL: https://issues.apache.org/jira/browse/BEAM-14336
> Project: Beam
> Issue Type: Bug
> Components: dsl-dataframe, examples-python
> Reporter: Andrew Pilloud
> Assignee: Brian Hulette
> Priority: P2
> Labels: stale-assigned
> Time Spent: 1h 40m
> Remaining Estimate: 0h
>
> This has been perma-red for the last week.
> https://ci-beam.apache.org/job/beam_PostCommit_Python36/5189/testReport/junit/apache_beam.examples.dataframe.flight_delays_it_test/FlightDelaysTest/test_flight_delays/
> https://ci-beam.apache.org/job/beam_PostCommit_Python36/5215/testReport/apache_beam.examples.dataframe.flight_delays_it_test/FlightDelaysTest/test_flight_delays/
> {code}
> self = <apache_beam.examples.dataframe.flight_delays_it_test.FlightDelaysTest
> testMethod=test_flight_delays>
> @pytest.mark.examples_postcommit
> @pytest.mark.it_postcommit
> def test_flight_delays(self):
> flight_delays.run_flight_delay_pipeline(
> self.test_pipeline,
> start_date='2012-12-23',
> end_date='2012-12-25',
> > output=self.output_path)
> apache_beam/examples/dataframe/flight_delays_it_test.py:110:
> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> _
> apache_beam/examples/dataframe/flight_delays.py:105: in
> run_flight_delay_pipeline
> result.to_csv(output)
> apache_beam/pipeline.py:596: in __exit__
> self.result = self.run()
> apache_beam/testing/test_pipeline.py:114: in run
> False if self.not_use_test_runner_api else test_runner_api))
> apache_beam/pipeline.py:549: in run
> self._options).run(False)
> apache_beam/pipeline.py:573: in run
> return self.runner.run_pipeline(self, self._options)
> apache_beam/runners/dataflow/test_dataflow_runner.py:64: in run_pipeline
> self.result.wait_until_finish(duration=wait_duration)
> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> _
> self = <DataflowPipelineResult <Job
> clientRequestId: '20220420090044988852-7153'
> createTime: '2022-04-20T09:00:55.136265Z'
> ...022-04-20T09:00:55.136265Z'
> steps: []
> tempFiles: []
> type: TypeValueValuesEnum(JOB_TYPE_BATCH, 1)> at 0x7fcf0b6f0e80>
> duration = None
> def wait_until_finish(self, duration=None):
> if not self.is_in_terminal_state():
> if not self.has_job:
> raise IOError('Failed to get the Dataflow job id.')
>
> thread = threading.Thread(
> target=DataflowRunner.poll_for_job_completion,
> args=(self._runner, self, duration))
>
> # Mark the thread as a daemon thread so a keyboard interrupt on the
> main
> # thread will terminate everything. This is also the reason we will
> not
> # use thread.join() to wait for the polling thread.
> thread.daemon = True
> thread.start()
> while thread.is_alive():
> time.sleep(5.0)
>
> # TODO: Merge the termination code in poll_for_job_completion and
> # is_in_terminal_state.
> terminated = self.is_in_terminal_state()
> assert duration or terminated, (
> 'Job did not reach to a terminal state after waiting
> indefinitely.')
>
> # TODO(BEAM-14291): Also run this check if wait_until_finish was
> called
> # after the pipeline completed.
> if terminated and self.state != PipelineState.DONE:
> # TODO(BEAM-1290): Consider converting this to an error log based on
> # theresolution of the issue.
> raise DataflowRuntimeException(
> 'Dataflow pipeline failed. State: %s, Error:\n%s' %
> (self.state, getattr(self._runner, 'last_error_msg', None)),
> > self)
> E
> apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException:
> Dataflow pipeline failed. State: FAILED, Error:
> E Traceback (most recent call last):
> E File
> "/usr/local/lib/python3.6/site-packages/dataflow_worker/batchworker.py", line
> 646, in do_work
> E work_executor.execute()
> E File
> "/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line
> 255, in execute
> E self._split_task)
> E File
> "/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line
> 263, in _perform_source_split_considering_api_limits
> E desired_bundle_size)
> E File
> "/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line
> 300, in _perform_source_split
> E for split in source.split(desired_bundle_size):
> E File
> "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery.py", line
> 810, in split
> E self._setup_temporary_dataset(bq)
> E File
> "/usr/local/lib/python3.6/site-packages/apache_beam/options/value_provider.py",
> line 193, in _f
> E return fnc(self, *args, **kwargs)
> E File
> "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery.py", line
> 849, in _setup_temporary_dataset
> E self._get_project(), self.query.get(), self.use_legacy_sql)
> E File
> "/usr/local/lib/python3.6/site-packages/apache_beam/utils/retry.py", line
> 253, in wrapper
> E return fun(*args, **kwargs)
> E File
> "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery_tools.py",
> line 416, in get_query_location
> E response = self.client.jobs.Insert(request)
> E File
> "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py",
> line 345, in Insert
> E upload=upload, upload_config=upload_config)
> E File
> "/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line
> 731, in _RunMethod
> E return self.ProcessHttpResponse(method_config, http_response,
> request)
> E File
> "/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line
> 737, in ProcessHttpResponse
> E self.__ProcessHttpResponse(method_config, http_response,
> request))
> E File
> "/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line
> 604, in __ProcessHttpResponse
> E http_response, method_config=method_config, request=request)
> E apitools.base.py.exceptions.HttpForbiddenError: HttpError accessing
> <https://bigquery.googleapis.com/bigquery/v2/projects/apache-beam-testing/jobs?alt=json>:
> response: <{'vary': 'Origin, X-Origin, Referer', 'content-type':
> 'application/json; charset=UTF-8', 'date': 'Wed, 20 Apr 2022 09:06:44 GMT',
> 'server': 'ESF', 'cache-control': 'private', 'x-xss-protection': '0',
> 'x-frame-options': 'SAMEORIGIN', 'x-content-type-options': 'nosniff',
> 'transfer-encoding': 'chunked', 'status': '403', 'content-length': '528',
> '-content-encoding': 'gzip'}>, content <{
> E "error": {
> E "code": 403,
> E "message": "Access Denied: Table
> bigquery-samples:airline_ontime_data.flights: User does not have permission
> to query table bigquery-samples:airline_ontime_data.flights.",
> E "errors": [
> E {
> E "message": "Access Denied: Table
> bigquery-samples:airline_ontime_data.flights: User does not have permission
> to query table bigquery-samples:airline_ontime_data.flights.",
> E "domain": "global",
> E "reason": "accessDenied"
> E }
> E ],
> E "status": "PERMISSION_DENIED"
> E }
> E }
> E >
> apache_beam/runners/dataflow/dataflow_runner.py:1661: DataflowRuntimeException
> {code}
--
This message was sent by Atlassian Jira
(v8.20.7#820007)