kamilwu commented on a change in pull request #12612:
URL: https://github.com/apache/beam/pull/12612#discussion_r475520818
##########
File path: .test-infra/jenkins/job_LoadTests_GBK_Python_reiterate.groovy
##########
@@ -58,43 +58,47 @@ def loadTestConfigurations = { datasetName ->
pipelineOptions: [
project : 'apache-beam-testing',
region : 'us-central1',
- job_name : 'load-tests-python-dataflow-batch-gbk-7-' + now,
+ job_name :
"load-tests-python-dataflow-${mode}-gbk-7-${now}",
temp_location : 'gs://temp-storage-for-perf-tests/loadtests',
publish_to_big_query : true,
metrics_dataset : datasetName,
- metrics_table : 'python_dataflow_batch_gbk_7',
- influx_measurement : 'python_batch_gbk_7',
+ metrics_table : "python_dataflow_${mode}_gbk_7",
+ influx_measurement : "python_${mode}_gbk_7",
input_options : '\'{"num_records": 20000000,' +
'"key_size": 10,' +
'"value_size": 90,' +
'"num_hot_keys": 10,' +
'"hot_key_fraction": 1}\'',
- fanout : 1,
iterations : 4,
+ fanout : 1,
num_workers : 5,
- autoscaling_algorithm: 'NONE'
+ autoscaling_algorithm: 'NONE',
]
]
- ].each { test -> test.pipelineOptions.putAll(additionalPipelineArgs) }
+ ]
+ .each { test -> test.pipelineOptions.putAll(additionalPipelineArgs) }
+ .each { test -> (mode != 'streaming') ?: addStreamingOptions(test) }
}
-def batchLoadTestJob = { scope, triggeringContext ->
- scope.description('Runs Python GBK reiterate load tests on Dataflow runner
in batch mode')
- commonJobProperties.setTopLevelMainJobProperties(scope, 'master', 240)
+def addStreamingOptions(test) {
+ test.pipelineOptions << [streaming: null, experiments: 'use_runner_v2',
+ enable_streaming_engine: null ]
+}
+def loadTestJob = { scope, triggeringContext, mode ->
def datasetName = loadTestsBuilder.getBigQueryDataset('load_test',
triggeringContext)
- for (testConfiguration in loadTestConfigurations(datasetName)) {
- loadTestsBuilder.loadTest(scope, testConfiguration.title,
testConfiguration.runner, CommonTestProperties.SDK.PYTHON_37,
testConfiguration.pipelineOptions, testConfiguration.test)
- }
+ loadTestsBuilder.loadTests(scope, CommonTestProperties.SDK.PYTHON_37,
+ loadTestConfigurations(mode, datasetName), 'GBK reiterate', mode)
}
-CronJobBuilder.cronJob('beam_LoadTests_Python_GBK_reiterate_Dataflow_Batch',
'H 14 * * *', this) {
- additionalPipelineArgs = [
- influx_db_name: InfluxDBCredentialsHelper.InfluxDBDatabaseName,
- influx_hostname: InfluxDBCredentialsHelper.InfluxDBHostname,
- ]
- batchLoadTestJob(delegate,
CommonTestProperties.TriggeringContext.POST_COMMIT)
-}
+CronJobBuilder.cronJob('beam_LoadTests_Python_GBK_reiterate_Dataflow_Batch',
+ 'H 14 * * *', this) {
Review comment:
Ideally, each test suite (GBK, ParDo, IO tests, etc.) should has its
own, unique time in order no to flood Jenkins with many tests that are
triggered at the same time. When adding a new test suite, a contributor has to
take a look at what time slots are already occupied and avoid using them.
I think this is not documented. I'll add some information here:
https://cwiki.apache.org/confluence/display/BEAM/Contribution+Testing+Guide#ContributionTestingGuide
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]