kamilwu commented on a change in pull request #12612:
URL: https://github.com/apache/beam/pull/12612#discussion_r475707404
##########
File path: .test-infra/jenkins/job_LoadTests_GBK_Python.groovy
##########
@@ -22,119 +22,138 @@ import InfluxDBCredentialsHelper
def now = new Date().format("MMddHHmmss", TimeZone.getTimeZone('UTC'))
-def loadTestConfigurations = { datasetName ->
+// TODO(BEAM-10774): Skipping some cases because they are too slow.
+def STREAMING_TESTS_TO_SKIP = [1, 2, 4, 5]
+
+def loadTestConfigurations = { mode, datasetName ->
[
[
title : 'GroupByKey Python Load test: 2GB of 10B records',
test : 'apache_beam.testing.load_tests.group_by_key_test',
runner : CommonTestProperties.Runner.DATAFLOW,
pipelineOptions: [
- job_name : 'load-tests-python-dataflow-batch-gbk-1-' + now,
+ job_name :
"load-tests-python-dataflow-${mode}-gbk-1-${now}",
project : 'apache-beam-testing',
region : 'us-central1',
temp_location : 'gs://temp-storage-for-perf-tests/loadtests',
publish_to_big_query : true,
metrics_dataset : datasetName,
- metrics_table : 'python_dataflow_batch_gbk_1',
- influx_measurement : 'python_batch_gbk_1',
+ metrics_table : "python_dataflow_${mode}_gbk_1",
+ influx_measurement : "python_${mode}_gbk_1",
input_options : '\'{"num_records": 200000000,' +
'"key_size": 1,' +
'"value_size": 9}\'',
iterations : 1,
fanout : 1,
num_workers : 5,
- autoscaling_algorithm: "NONE"
+ autoscaling_algorithm: 'NONE',
]
],
[
title : 'GroupByKey Python Load test: 2GB of 100B records',
test : 'apache_beam.testing.load_tests.group_by_key_test',
runner : CommonTestProperties.Runner.DATAFLOW,
pipelineOptions: [
- job_name : 'load-tests-python-dataflow-batch-gbk-2-' + now,
+ job_name : 'load-tests-python-dataflow-${mode}-gbk-2-' +
now,
project : 'apache-beam-testing',
region : 'us-central1',
temp_location : 'gs://temp-storage-for-perf-tests/loadtests',
publish_to_big_query : true,
metrics_dataset : datasetName,
- metrics_table : 'python_dataflow_batch_gbk_2',
- influx_measurement : 'python_batch_gbk_2',
+ metrics_table : 'python_dataflow_${mode}_gbk_2',
+ influx_measurement : 'python_${mode}_gbk_2',
input_options : '\'{"num_records": 20000000,' +
'"key_size": 10,' +
'"value_size": 90}\'',
iterations : 1,
fanout : 1,
num_workers : 5,
- autoscaling_algorithm: "NONE"
+ autoscaling_algorithm: 'NONE',
]
],
[
title : 'GroupByKey Python Load test: 2GB of 100kB records',
test : 'apache_beam.testing.load_tests.group_by_key_test',
runner : CommonTestProperties.Runner.DATAFLOW,
pipelineOptions: [
- job_name : 'load-tests-python-dataflow-batch-gbk-3-' + now,
+ job_name :
"load-tests-python-dataflow-${mode}-gbk-3-${now}",
project : 'apache-beam-testing',
region : 'us-central1',
temp_location : 'gs://temp-storage-for-perf-tests/loadtests',
publish_to_big_query : true,
metrics_dataset : datasetName,
- metrics_table : 'python_dataflow_batch_gbk_3',
- influx_measurement : 'python_batch_gbk_3',
+ metrics_table : "python_dataflow_${mode}_gbk_3",
+ influx_measurement : "python_${mode}_gbk_3",
input_options : '\'{"num_records": 20000,' +
'"key_size": 10000,' +
'"value_size": 90000}\'',
iterations : 1,
fanout : 1,
num_workers : 5,
- autoscaling_algorithm: "NONE"
+ autoscaling_algorithm: 'NONE',
]
],
[
title : 'GroupByKey Python Load test: fanout 4 times with 2GB
10-byte records total',
test : 'apache_beam.testing.load_tests.group_by_key_test',
runner : CommonTestProperties.Runner.DATAFLOW,
pipelineOptions: [
- job_name : 'load-tests-python-dataflow-batch-gbk-4-' + now,
+ job_name :
"load-tests-python-dataflow-${mode}-gbk-4-${now}",
project : 'apache-beam-testing',
region : 'us-central1',
temp_location : 'gs://temp-storage-for-perf-tests/loadtests',
publish_to_big_query : true,
metrics_dataset : datasetName,
- metrics_table : 'python_dataflow_batch_gbk_4',
- influx_measurement : 'python_batch_gbk_4',
+ metrics_table : "python_dataflow_${mode}_gbk_4",
+ influx_measurement : "python_${mode}_gbk_4",
input_options : '\'{"num_records": 5000000,' +
'"key_size": 10,' +
'"value_size": 90}\'',
iterations : 1,
fanout : 4,
- num_workers : 5,
- autoscaling_algorithm: "NONE"
+ num_workers : 16,
+ autoscaling_algorithm: 'NONE',
]
],
[
title : 'GroupByKey Python Load test: fanout 8 times with 2GB
10-byte records total',
test : 'apache_beam.testing.load_tests.group_by_key_test',
runner : CommonTestProperties.Runner.DATAFLOW,
pipelineOptions: [
- job_name : 'load-tests-python-dataflow-batch-gbk-5-' + now,
+ job_name :
"load-tests-python-dataflow-${mode}-gbk-5-${now}",
project : 'apache-beam-testing',
region : 'us-central1',
temp_location : 'gs://temp-storage-for-perf-tests/loadtests',
publish_to_big_query : true,
metrics_dataset : datasetName,
- metrics_table : 'python_dataflow_batch_gbk_5',
- influx_measurement : 'python_batch_gbk_5',
+ metrics_table : "python_dataflow_${mode}_gbk_5",
+ influx_measurement : "python_${mode}_gbk_5",
input_options : '\'{"num_records": 2500000,' +
'"key_size": 10,' +
'"value_size": 90}\'',
iterations : 1,
fanout : 8,
- num_workers : 5,
- autoscaling_algorithm: "NONE"
+ num_workers : 16,
+ autoscaling_algorithm: 'NONE',
]
],
- ].each { test -> test.pipelineOptions.putAll(additionalPipelineArgs) }
+ ]
+ .each { test -> test.pipelineOptions.putAll(additionalPipelineArgs) }
+ .each { test -> (mode != 'streaming') ?: addStreamingOptions(test) }
+ .withIndex().collectMany { test, i ->
+ mode == 'streaming' && STREAMING_TESTS_TO_SKIP.contains(i + 1) ? []: [test]
+ }
+}
+
+def addStreamingOptions(test) {
+ test.pipelineOptions << [streaming: null, experiments: 'use_runner_v2',
+ enable_streaming_engine: null ]
+}
Review comment:
I removed `--enable_streaming_engine`, since it is now being added
automatically when using `use_runner_v2`:
https://github.com/apache/beam/pull/12585
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]