[GitHub] [beam] kamilwu commented on a change in pull request #12612: [BEAM-10675] Add Python GBK Load Tests for streaming on Dataflow

GitBox Mon, 24 Aug 2020 08:39:43 -0700


kamilwu commented on a change in pull request #12612:
URL: https://github.com/apache/beam/pull/12612#discussion_r475707404




##########
File path: .test-infra/jenkins/job_LoadTests_GBK_Python.groovy
##########
@@ -22,119 +22,138 @@ import InfluxDBCredentialsHelper
 
 def now = new Date().format("MMddHHmmss", TimeZone.getTimeZone('UTC'))
 
-def loadTestConfigurations = { datasetName ->
+// TODO(BEAM-10774): Skipping some cases because they are too slow.
+def STREAMING_TESTS_TO_SKIP = [1, 2, 4, 5]
+
+def loadTestConfigurations = { mode, datasetName ->
   [
     [
       title          : 'GroupByKey Python Load test: 2GB of 10B records',
       test           : 'apache_beam.testing.load_tests.group_by_key_test',
       runner         : CommonTestProperties.Runner.DATAFLOW,
       pipelineOptions: [
-        job_name             : 'load-tests-python-dataflow-batch-gbk-1-' + now,
+        job_name             : 
"load-tests-python-dataflow-${mode}-gbk-1-${now}",
         project              : 'apache-beam-testing',
         region               : 'us-central1',
         temp_location        : 'gs://temp-storage-for-perf-tests/loadtests',
         publish_to_big_query : true,
         metrics_dataset      : datasetName,
-        metrics_table        : 'python_dataflow_batch_gbk_1',
-        influx_measurement   : 'python_batch_gbk_1',
+        metrics_table        : "python_dataflow_${mode}_gbk_1",
+        influx_measurement   : "python_${mode}_gbk_1",
         input_options        : '\'{"num_records": 200000000,' +
         '"key_size": 1,' +
         '"value_size": 9}\'',
         iterations           : 1,
         fanout               : 1,
         num_workers          : 5,
-        autoscaling_algorithm: "NONE"
+        autoscaling_algorithm: 'NONE',
       ]
     ],
     [
       title          : 'GroupByKey Python Load test: 2GB of 100B records',
       test           : 'apache_beam.testing.load_tests.group_by_key_test',
       runner         : CommonTestProperties.Runner.DATAFLOW,
       pipelineOptions: [
-        job_name             : 'load-tests-python-dataflow-batch-gbk-2-' + now,
+        job_name             : 'load-tests-python-dataflow-${mode}-gbk-2-' + 
now,
         project              : 'apache-beam-testing',
         region               : 'us-central1',
         temp_location        : 'gs://temp-storage-for-perf-tests/loadtests',
         publish_to_big_query : true,
         metrics_dataset      : datasetName,
-        metrics_table        : 'python_dataflow_batch_gbk_2',
-        influx_measurement   : 'python_batch_gbk_2',
+        metrics_table        : 'python_dataflow_${mode}_gbk_2',
+        influx_measurement   : 'python_${mode}_gbk_2',
         input_options        : '\'{"num_records": 20000000,' +
         '"key_size": 10,' +
         '"value_size": 90}\'',
         iterations           : 1,
         fanout               : 1,
         num_workers          : 5,
-        autoscaling_algorithm: "NONE"
+        autoscaling_algorithm: 'NONE',
       ]
     ],
     [
       title          : 'GroupByKey Python Load test: 2GB of 100kB records',
       test           : 'apache_beam.testing.load_tests.group_by_key_test',
       runner         : CommonTestProperties.Runner.DATAFLOW,
       pipelineOptions: [
-        job_name             : 'load-tests-python-dataflow-batch-gbk-3-' + now,
+        job_name             : 
"load-tests-python-dataflow-${mode}-gbk-3-${now}",
         project              : 'apache-beam-testing',
         region               : 'us-central1',
         temp_location        : 'gs://temp-storage-for-perf-tests/loadtests',
         publish_to_big_query : true,
         metrics_dataset      : datasetName,
-        metrics_table        : 'python_dataflow_batch_gbk_3',
-        influx_measurement   : 'python_batch_gbk_3',
+        metrics_table        : "python_dataflow_${mode}_gbk_3",
+        influx_measurement   : "python_${mode}_gbk_3",
         input_options        : '\'{"num_records": 20000,' +
         '"key_size": 10000,' +
         '"value_size": 90000}\'',
         iterations           : 1,
         fanout               : 1,
         num_workers          : 5,
-        autoscaling_algorithm: "NONE"
+        autoscaling_algorithm: 'NONE',
       ]
     ],
     [
       title          : 'GroupByKey Python Load test: fanout 4 times with 2GB 
10-byte records total',
       test           : 'apache_beam.testing.load_tests.group_by_key_test',
       runner         : CommonTestProperties.Runner.DATAFLOW,
       pipelineOptions: [
-        job_name             : 'load-tests-python-dataflow-batch-gbk-4-' + now,
+        job_name             : 
"load-tests-python-dataflow-${mode}-gbk-4-${now}",
         project              : 'apache-beam-testing',
         region               : 'us-central1',
         temp_location        : 'gs://temp-storage-for-perf-tests/loadtests',
         publish_to_big_query : true,
         metrics_dataset      : datasetName,
-        metrics_table        : 'python_dataflow_batch_gbk_4',
-        influx_measurement   : 'python_batch_gbk_4',
+        metrics_table        : "python_dataflow_${mode}_gbk_4",
+        influx_measurement   : "python_${mode}_gbk_4",
         input_options        : '\'{"num_records": 5000000,' +
         '"key_size": 10,' +
         '"value_size": 90}\'',
         iterations           : 1,
         fanout               : 4,
-        num_workers          : 5,
-        autoscaling_algorithm: "NONE"
+        num_workers          : 16,
+        autoscaling_algorithm: 'NONE',
       ]
     ],
     [
       title          : 'GroupByKey Python Load test: fanout 8 times with 2GB 
10-byte records total',
       test           : 'apache_beam.testing.load_tests.group_by_key_test',
       runner         : CommonTestProperties.Runner.DATAFLOW,
       pipelineOptions: [
-        job_name             : 'load-tests-python-dataflow-batch-gbk-5-' + now,
+        job_name             : 
"load-tests-python-dataflow-${mode}-gbk-5-${now}",
         project              : 'apache-beam-testing',
         region               : 'us-central1',
         temp_location        : 'gs://temp-storage-for-perf-tests/loadtests',
         publish_to_big_query : true,
         metrics_dataset      : datasetName,
-        metrics_table        : 'python_dataflow_batch_gbk_5',
-        influx_measurement   : 'python_batch_gbk_5',
+        metrics_table        : "python_dataflow_${mode}_gbk_5",
+        influx_measurement   : "python_${mode}_gbk_5",
         input_options        : '\'{"num_records": 2500000,' +
         '"key_size": 10,' +
         '"value_size": 90}\'',
         iterations           : 1,
         fanout               : 8,
-        num_workers          : 5,
-        autoscaling_algorithm: "NONE"
+        num_workers          : 16,
+        autoscaling_algorithm: 'NONE',
       ]
     ],
-  ].each { test -> test.pipelineOptions.putAll(additionalPipelineArgs) }
+  ]
+  .each { test -> test.pipelineOptions.putAll(additionalPipelineArgs) }
+  .each { test -> (mode != 'streaming') ?: addStreamingOptions(test) }
+  .withIndex().collectMany { test, i ->
+    mode == 'streaming' && STREAMING_TESTS_TO_SKIP.contains(i + 1) ? []: [test]
+  }
+}
+
+def addStreamingOptions(test) {
+  test.pipelineOptions << [streaming: null, experiments: 'use_runner_v2',
+    enable_streaming_engine: null ]
+}

Review comment:
       I removed `--enable_streaming_engine`, since it is now being added 
automatically when using `use_runner_v2`:  
https://github.com/apache/beam/pull/12585




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [beam] kamilwu commented on a change in pull request #12612: [BEAM-10675] Add Python GBK Load Tests for streaming on Dataflow

Reply via email to