[
https://issues.apache.org/jira/browse/BEAM-5985?focusedWorklogId=211620&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-211620
]
ASF GitHub Bot logged work on BEAM-5985:
----------------------------------------
Author: ASF GitHub Bot
Created on: 12/Mar/19 10:04
Start Date: 12/Mar/19 10:04
Worklog Time Spent: 10m
Work Description: lgajowy commented on pull request #7903: [BEAM-5985]
Dataflow batch load test jobs
URL: https://github.com/apache/beam/pull/7903#discussion_r264600855
##########
File path: .test-infra/jenkins/job_LoadTests_Java.groovy
##########
@@ -17,123 +17,215 @@
*/
import CommonJobProperties as commonJobProperties
+import CommonTestProperties
import LoadTestsBuilder as loadTestsBuilder
import PhraseTriggeringPostCommitBuilder
+import CronJobBuilder
def loadTestConfigurations = [
[
- jobName :
'beam_Java_LoadTests_GroupByKey_Dataflow_Small',
- jobDescription : 'Runs GroupByKey load tests on Dataflow
runner small records 10b',
- itClass :
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
- prCommitStatusName: 'Java GroupByKey Small Load Test Dataflow',
- prTriggerPhrase : 'Run GroupByKey Small Java Load Test
Dataflow',
- runner : CommonTestProperties.Runner.DATAFLOW,
- sdk : CommonTestProperties.SDK.JAVA,
- jobProperties : [
+ title : 'Load test: 2GB of 10B records',
+ itClass :
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
+ runner : CommonTestProperties.Runner.DATAFLOW,
+ jobProperties: [
project : 'apache-beam-testing',
+ appName :
'load_tests_Java_Dataflow_Batch_GBK_1',
tempLocation :
'gs://temp-storage-for-perf-tests/loadtests',
publishToBigQuery : true,
- bigQueryDataset : 'load_test_PRs',
- bigQueryTable : 'dataflow_gbk_small',
- sourceOptions :
'{"numRecords":1000000000,"splitPointFrequencyRecords":1,"keySizeBytes":1,"valueSizeBytes":9,"numHotKeys":0,"hotKeyFraction":0,"seed":123456,"bundleSizeDistribution":{"type":"const","const":42},"forceNumInitialBundles":100,"progressShape":"LINEAR","initializeDelayDistribution":{"type":"const","const":42}}',
- stepOptions :
'{"outputRecordsPerInputRecord":1,"preservesInputKeyDistribution":true,"perBundleDelay":10000,"perBundleDelayType":"MIXED","cpuUtilizationInMixedDelay":0.5}',
- fanout : 10,
+ bigQueryDataset : 'load_test',
+ bigQueryTable : 'java_dataflow_batch_GBK_1',
+ sourceOptions : """
+ {
+ "numRecords": 200000000,
+ "keySizeBytes": 1,
+ "valueSizeBytes": 9
+ }
+ """.trim().replaceAll("\\s", ""),
+ fanout : 1,
iterations : 1,
- maxNumWorkers : 32,
+ maxNumWorkers : 5,
+ numWorkers : 5,
+ autoscalingAlgorithm: "NONE"
]
-
],
-]
+ [
+ title : 'Load test: 2GB of 100B records',
+ itClass :
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
+ runner : CommonTestProperties.Runner.DATAFLOW,
+ jobProperties: [
+ project : 'apache-beam-testing',
+ appName :
'load_tests_Java_Dataflow_Batch_GBK_2',
+ tempLocation :
'gs://temp-storage-for-perf-tests/loadtests',
+ publishToBigQuery : true,
+ bigQueryDataset : 'load_test',
+ bigQueryTable : 'java_dataflow_batch_GBK_2',
+ sourceOptions : """
+ {
+ "numRecords": 20000000,
+ "keySizeBytes": 10,
+ "valueSizeBytes": 90
+ }
+ """.trim().replaceAll("\\s", ""),
+ fanout : 1,
+ iterations : 1,
+ maxNumWorkers : 5,
+ numWorkers : 5,
+ autoscalingAlgorithm: "NONE"
+ ]
+ ],
+ [
-for (testConfiguration in loadTestConfigurations) {
- PhraseTriggeringPostCommitBuilder.postCommitJob(
- testConfiguration.jobName,
- testConfiguration.prTriggerPhrase,
- testConfiguration.prCommitStatusName,
- this
- ) {
- description(testConfiguration.jobDescription)
- commonJobProperties.setTopLevelMainJobProperties(delegate, 'master',
240)
- loadTestsBuilder.loadTest(delegate, testConfiguration.jobDescription,
testConfiguration.runner, testConfiguration.sdk,
testConfiguration.jobProperties, testConfiguration.itClass)
- }
-}
+ title : 'Load test: 2GB of 100kB records',
+ itClass :
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
+ runner : CommonTestProperties.Runner.DATAFLOW,
+ jobProperties: [
+ project : 'apache-beam-testing',
+ appName :
'load_tests_Java_Dataflow_Batch_GBK_3',
+ tempLocation :
'gs://temp-storage-for-perf-tests/loadtests',
+ publishToBigQuery : true,
+ bigQueryDataset : 'load_test',
+ bigQueryTable : 'java_dataflow_batch_GBK_3',
+ sourceOptions : """
+ {
+ "numRecords": 2000,
+ "keySizeBytes": 100000,
+ "valueSizeBytes": 900000
+ }
+ """.trim().replaceAll("\\s", ""),
+ fanout : 1,
+ iterations : 1,
+ maxNumWorkers : 5,
+ numWorkers : 5,
+ autoscalingAlgorithm: "NONE"
+ ]
-def smokeTestConfigurations = [
+ ],
[
- title : 'GroupByKey load test Direct',
+ title : 'Load test: fanout 4 times with 2GB 10-byte
records total',
itClass :
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
- runner : CommonTestProperties.Runner.DIRECT,
- sdk : CommonTestProperties.SDK.JAVA,
+ runner : CommonTestProperties.Runner.DATAFLOW,
jobProperties: [
- publishToBigQuery: true,
- bigQueryDataset : 'load_test_SMOKE',
- bigQueryTable : 'direct_gbk',
- sourceOptions :
'{"numRecords":100000,"splitPointFrequencyRecords":1}',
- stepOptions :
'{"outputRecordsPerInputRecord":1,"preservesInputKeyDistribution":true}',
- fanout : 10,
- iterations : 1,
+ project : 'apache-beam-testing',
+ appName :
'load_tests_Java_Dataflow_Batch_GBK_4',
+ tempLocation :
'gs://temp-storage-for-perf-tests/loadtests',
+ publishToBigQuery : true,
+ bigQueryDataset : 'load_test',
+ bigQueryTable : 'java_dataflow_batch_GBK_4',
+ sourceOptions : """
+ {
+ "numRecords": 5000000,
+ "keySizeBytes": 10,
+ "valueSizeBytes": 90
+ }
+ """.trim().replaceAll("\\s", ""),
+ fanout : 4,
+ iterations : 1,
+ maxNumWorkers : 16,
+ numWorkers : 16,
+ autoscalingAlgorithm: "NONE"
]
],
[
- title : 'GroupByKey load test Dataflow',
+ title : 'Load test: fanout 8 times with 2GB 10-byte
records total',
itClass :
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
runner : CommonTestProperties.Runner.DATAFLOW,
- sdk : CommonTestProperties.SDK.JAVA,
jobProperties: [
- project : 'apache-beam-testing',
- tempLocation :
'gs://temp-storage-for-perf-tests/smoketests',
- publishToBigQuery: true,
- bigQueryDataset : 'load_test_SMOKE',
- bigQueryTable : 'dataflow_gbk',
- sourceOptions :
'{"numRecords":100000,"splitPointFrequencyRecords":1}',
- stepOptions :
'{"outputRecordsPerInputRecord":1,"preservesInputKeyDistribution":true}',
- fanout : 10,
- iterations : 1,
+ project : 'apache-beam-testing',
+ appName :
'load_tests_Java_Dataflow_Batch_GBK_5',
+ tempLocation :
'gs://temp-storage-for-perf-tests/loadtests',
+ publishToBigQuery : true,
+ bigQueryDataset : 'load_test',
+ bigQueryTable : 'java_dataflow_batch_GBK_5',
+ sourceOptions : """
+ {
+ "numRecords": 2500000,
+ "keySizeBytes": 10,
+ "valueSizeBytes": 90
+ }
+ """.trim().replaceAll("\\s", ""),
+ fanout : 8,
+ iterations : 1,
+ maxNumWorkers : 16,
+ numWorkers : 16,
+ autoscalingAlgorithm: "NONE"
]
],
[
- title : 'GroupByKey load test Flink',
+ title : 'Load test: reiterate 4 times 10kB values',
itClass :
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
- runner : CommonTestProperties.Runner.FLINK,
- sdk : CommonTestProperties.SDK.JAVA,
+ runner : CommonTestProperties.Runner.DATAFLOW,
jobProperties: [
- publishToBigQuery: true,
- bigQueryDataset : 'load_test_SMOKE',
- bigQueryTable : 'flink_gbk',
- sourceOptions :
'{"numRecords":100000,"splitPointFrequencyRecords":1}',
- stepOptions :
'{"outputRecordsPerInputRecord":1,"preservesInputKeyDistribution":true}',
- fanout : 10,
- iterations : 1,
+ project : 'apache-beam-testing',
+ appName :
'load_tests_Java_Dataflow_Batch_GBK_6',
+ tempLocation :
'gs://temp-storage-for-perf-tests/loadtests',
+ publishToBigQuery : true,
+ bigQueryDataset : 'load_test',
+ bigQueryTable : 'java_dataflow_batch_GBK_6',
+ sourceOptions : """
+ {
+ "numRecords": 20000000,
+ "keySizeBytes": 10,
+ "valueSizeBytes": 90,
+ "numHotKeys": 200,
+ "hotKeyFraction": 1
+ }
+ """.trim().replaceAll("\\s", ""),
+ fanout : 1,
+ iterations : 4,
+ maxNumWorkers : 5,
+ numWorkers : 5,
+ autoscalingAlgorithm: "NONE"
]
],
[
- title : 'GroupByKey load test Spark',
+ title : 'Load test: reiterate 4 times 2MB values',
itClass :
'org.apache.beam.sdk.loadtests.GroupByKeyLoadTest',
- runner : CommonTestProperties.Runner.SPARK,
- sdk : CommonTestProperties.SDK.JAVA,
+ runner : CommonTestProperties.Runner.DATAFLOW,
jobProperties: [
- sparkMaster : 'local[4]',
- publishToBigQuery: true,
- bigQueryDataset : 'load_test_SMOKE',
- bigQueryTable : 'spark_gbk',
- sourceOptions :
'{"numRecords":100000,"splitPointFrequencyRecords":1}',
- stepOptions :
'{"outputRecordsPerInputRecord":1,"preservesInputKeyDistribution":true}',
- fanout : 10,
- iterations : 1,
+ project : 'apache-beam-testing',
+ appName :
'load_tests_Java_Dataflow_Batch_GBK_7',
+ tempLocation :
'gs://temp-storage-for-perf-tests/loadtests',
+ publishToBigQuery : true,
+ bigQueryDataset : 'load_test',
+ bigQueryTable : 'java_dataflow_batch_GBK_7',
+ sourceOptions : """
+ {
+ "numRecords": 20000000,
+ "keySizeBytes": 10,
+ "valueSizeBytes": 90,
+ "numHotKeys": 10,
+ "hotKeyFraction": 1
+ }
+ """.trim().replaceAll("\\s", ""),
+ fanout : 1,
+ iterations : 4,
+ maxNumWorkers : 5,
+ numWorkers : 5,
+ autoscalingAlgorithm: "NONE"
]
]
]
+def loadTestJob = { scope ->
+ scope.description('Runs Java GBK load tests on Dataflow runner in batch
mode')
+ commonJobProperties.setTopLevelMainJobProperties(scope, 'master', 240)
+
+ for (testConfiguration in loadTestConfigurations) {
+ loadTestsBuilder.loadTest(scope, testConfiguration.title,
testConfiguration.runner, CommonTestProperties.SDK.JAVA,
testConfiguration.jobProperties, testConfiguration.itClass,
CommonTestProperties.TriggeringContext.PR)
+ }
+}
+
+CronJobBuilder.cronJob('beam_LoadTests_Java_GBK_Dataflow_Batch', 'H 12 * * *',
this) {
+ loadTestJob(delegate)
+}
Review comment:
So far we used it as you described (debugging, validating PRs on demand).
IMO let's keep it this way - It's not a paramount or easy to break piece of
code. If in the future, if we'll have problems with that, it's always easy to
change the triggers for smokes to be pre/post commit (1 line change?)
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 211620)
Time Spent: 20h (was: 19h 50m)
> Create jenkins jobs to run the load tests for Java SDK
> ------------------------------------------------------
>
> Key: BEAM-5985
> URL: https://issues.apache.org/jira/browse/BEAM-5985
> Project: Beam
> Issue Type: Sub-task
> Components: testing
> Reporter: Lukasz Gajowy
> Assignee: Kasia Kucharczyk
> Priority: Major
> Time Spent: 20h
> Remaining Estimate: 0h
>
> How/how often/in what cases we run those tests is yet to be decided (this is
> part of the task)
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)