tvalentyn commented on a change in pull request #11661:
URL: https://github.com/apache/beam/pull/11661#discussion_r430592661
##########
File path: sdks/python/apache_beam/examples/wordcount_it_test.py
##########
@@ -104,18 +107,33 @@ def _run_wordcount_it(self, run_wordcount, **opts):
run_time = end_time - start_time
if publish_to_bq:
- bq_publisher = BigQueryMetricsPublisher(
- project_name=test_pipeline.get_option('project'),
- table=test_pipeline.get_option('metrics_table'),
- dataset=test_pipeline.get_option('metrics_dataset'),
- )
- result = Metric(
- submit_timestamp=time.time(),
- metric_id=uuid.uuid4().hex,
- value=run_time,
- label='Python performance test',
- )
- bq_publisher.publish([result.as_dict()])
+ self._publish_metrics(test_pipeline, run_time)
+
+ def _publish_metrics(self, pipeline, metric_value):
Review comment:
Sounds like this code can be reused across other performance tests, is
there a module shared across perf tests that we can move it to? cc: @kamilwu
##########
File path: sdks/python/apache_beam/examples/wordcount_it_test.py
##########
@@ -104,18 +107,33 @@ def _run_wordcount_it(self, run_wordcount, **opts):
run_time = end_time - start_time
if publish_to_bq:
- bq_publisher = BigQueryMetricsPublisher(
- project_name=test_pipeline.get_option('project'),
- table=test_pipeline.get_option('metrics_table'),
- dataset=test_pipeline.get_option('metrics_dataset'),
- )
- result = Metric(
- submit_timestamp=time.time(),
- metric_id=uuid.uuid4().hex,
- value=run_time,
- label='Python performance test',
- )
- bq_publisher.publish([result.as_dict()])
+ self._publish_metrics(test_pipeline, run_time)
+
+ def _publish_metrics(self, pipeline, metric_value):
Review comment:
Looks like we have
apache_beam.testing.load_tests.load_test_metrics_utils. Should we move it there?
##########
File path:
.test-infra/metrics/grafana/dashboards/perftests_metrics/Python_Performance_Tests.json
##########
@@ -0,0 +1,297 @@
+{
Review comment:
- @kamilwu , could you please review the dashboard config?
Some questions:
- Is there a way to visualize the dashboard on an in-progress PR?
- Is it possible to reduce duplication in the configs? We can do it in a
separate change. For example: most settings for Python 2.7, and 3.7 are
similar. Have we considered auto-generating the final config from a smaller
set of settings?
##########
File path: .test-infra/jenkins/job_PerformanceTests_Python.groovy
##########
@@ -58,117 +26,59 @@ def dataflowPipelineArgs = [
temp_location : 'gs://temp-storage-for-end-to-end-tests/temp-it',
]
-
-// Configurations of each Jenkins job.
-def testConfigurations = [
- new PerformanceTestConfigurations(
- jobName : 'beam_PerformanceTests_WordCountIT_Py27',
- jobDescription : 'Python SDK Performance Test - Run WordCountIT in
Py27 with 1Gb files',
- jobTriggerPhrase : 'Run Python27 WordCountIT Performance Test',
- resultTable : 'beam_performance.wordcount_py27_pkb_results',
- test :
'apache_beam.examples.wordcount_it_test:WordCountIT.test_wordcount_it',
- itModule : ':sdks:python:test-suites:dataflow:py2',
- extraPipelineArgs : dataflowPipelineArgs + [
- input:
'gs://apache-beam-samples/input_small_files/ascii_sort_1MB_input.0000*', // 1Gb
- output:
'gs://temp-storage-for-end-to-end-tests/py-it-cloud/output',
- expect_checksum: 'ea0ca2e5ee4ea5f218790f28d0b9fe7d09d8d710',
- num_workers: '10',
- autoscaling_algorithm: 'NONE', // Disable autoscale the worker
pool.
- ],
- ),
- new PerformanceTestConfigurations(
- jobName : 'beam_PerformanceTests_WordCountIT_Py35',
- jobDescription : 'Python SDK Performance Test - Run WordCountIT in
Py35 with 1Gb files',
- jobTriggerPhrase : 'Run Python35 WordCountIT Performance Test',
- resultTable : 'beam_performance.wordcount_py35_pkb_results',
- test :
'apache_beam.examples.wordcount_it_test:WordCountIT.test_wordcount_it',
- itModule : ':sdks:python:test-suites:dataflow:py35',
- extraPipelineArgs : dataflowPipelineArgs + [
- input:
'gs://apache-beam-samples/input_small_files/ascii_sort_1MB_input.0000*', // 1Gb
- output:
'gs://temp-storage-for-end-to-end-tests/py-it-cloud/output',
- expect_checksum: 'ea0ca2e5ee4ea5f218790f28d0b9fe7d09d8d710',
- num_workers: '10',
- autoscaling_algorithm: 'NONE', // Disable autoscale the worker
pool.
- ],
- ),
- new PerformanceTestConfigurations(
- jobName : 'beam_PerformanceTests_WordCountIT_Py36',
- jobDescription : 'Python SDK Performance Test - Run WordCountIT in
Py36 with 1Gb files',
- jobTriggerPhrase : 'Run Python36 WordCountIT Performance Test',
- resultTable : 'beam_performance.wordcount_py36_pkb_results',
- test :
'apache_beam.examples.wordcount_it_test:WordCountIT.test_wordcount_it',
- itModule : ':sdks:python:test-suites:dataflow:py36',
- extraPipelineArgs : dataflowPipelineArgs + [
- input:
'gs://apache-beam-samples/input_small_files/ascii_sort_1MB_input.0000*', // 1Gb
- output:
'gs://temp-storage-for-end-to-end-tests/py-it-cloud/output',
- expect_checksum: 'ea0ca2e5ee4ea5f218790f28d0b9fe7d09d8d710',
- num_workers: '10',
- autoscaling_algorithm: 'NONE', // Disable autoscale the worker
pool.
- ],
- ),
- new PerformanceTestConfigurations(
- jobName : 'beam_PerformanceTests_WordCountIT_Py37',
- jobDescription : 'Python SDK Performance Test - Run WordCountIT in
Py37 with 1Gb files',
- jobTriggerPhrase : 'Run Python37 WordCountIT Performance Test',
- resultTable : 'beam_performance.wordcount_py37_pkb_results',
- test :
'apache_beam.examples.wordcount_it_test:WordCountIT.test_wordcount_it',
- itModule : ':sdks:python:test-suites:dataflow:py37',
- extraPipelineArgs : dataflowPipelineArgs + [
- input:
'gs://apache-beam-samples/input_small_files/ascii_sort_1MB_input.0000*', // 1Gb
- output:
'gs://temp-storage-for-end-to-end-tests/py-it-cloud/output',
- expect_checksum: 'ea0ca2e5ee4ea5f218790f28d0b9fe7d09d8d710',
- num_workers: '10',
- autoscaling_algorithm: 'NONE', // Disable autoscale the worker
pool.
- ],
- ),
-]
-
+testConfigurations = []
+pythonVersions = ['27', '35', '36', '37']
+
+for (pythonVersion in pythonVersions) {
Review comment:
Thanks @piotr-szuberski , I think you understood the meaning correctly,
left a few minor comments.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]