This is an automated email from the ASF dual-hosted git repository.
yhu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 218bda98624 added jobs to GitHub Actions (#28679)
218bda98624 is described below
commit 218bda9862407c20adfeffb6723d0de07f2e4db3
Author: Andrey Devyatkin <[email protected]>
AuthorDate: Thu Sep 28 05:49:24 2023 +0200
added jobs to GitHub Actions (#28679)
---
.github/workflows/beam_PerformanceTests_Cdap.yml | 108 ++++++++++++++++++++
.../beam_PerformanceTests_HadoopFormat.yml | 108 ++++++++++++++++++++
.../beam_PerformanceTests_MongoDBIO_IT.yml | 108 ++++++++++++++++++++
...erformanceTests_PubsubIOIT_Python_Streaming.yml | 94 +++++++++++++++++
.../beam_PerformanceTests_SparkReceiver_IO.yml | 108 ++++++++++++++++++++
.../beam_PerformanceTests_TFRecordIOIT_HDFS.yml | 112 +++++++++++++++++++++
...PerformanceTests_WordCountIT_PythonVersions.yml | 105 +++++++++++++++++++
.../TFRecordIOIT_HDFS.txt | 23 +++++
.../performance-tests-job-configs/cdap.txt | 29 ++++++
.../performance-tests-job-configs/hadoopFormat.txt | 29 ++++++
.../performance-tests-job-configs/mongoDBIO_IT.txt | 28 ++++++
.../pubsubIOIT_Python_Streaming.txt | 28 ++++++
.../sparkReceiver_IO.txt | 26 +++++
.../wordCountIT_Python.txt | 27 +++++
14 files changed, 933 insertions(+)
diff --git a/.github/workflows/beam_PerformanceTests_Cdap.yml
b/.github/workflows/beam_PerformanceTests_Cdap.yml
new file mode 100644
index 00000000000..0dfd095ecf5
--- /dev/null
+++ b/.github/workflows/beam_PerformanceTests_Cdap.yml
@@ -0,0 +1,108 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: PerformanceTests Cdap
+
+on:
+ issue_comment:
+ types: [created]
+ schedule:
+ - cron: '13 4/16 * * *'
+ workflow_dispatch:
+
+#Setting explicit permissions for the action to avoid the default permissions
which are `write-all` in case of pull_request_target event
+permissions:
+ actions: write
+ pull-requests: write
+ checks: write
+ contents: read
+ deployments: read
+ id-token: none
+ issues: write
+ discussions: read
+ packages: read
+ pages: read
+ repository-projects: read
+ security-events: read
+ statuses: read
+
+# This allows a subsequently queued workflow run to interrupt previous runs
+concurrency:
+ group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha
|| github.head_ref || github.ref }}-${{ github.event.schedule ||
github.event.comment.body || github.event.sender.login }}'
+ cancel-in-progress: true
+
+env:
+ GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
+ GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
+ GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
+ beam_PerformanceTests_Cdap_test_arguments_1: ''
+
+jobs:
+ beam_PerformanceTests_Cdap:
+ if: |
+ github.event_name == 'workflow_dispatch' ||
+ github.event_name == 'schedule' ||
+ github.event.comment.body == 'Run Java CdapIO Performance Test'
+ runs-on: [self-hosted, ubuntu-20.04, main]
+ timeout-minutes: 100
+ name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+ strategy:
+ matrix:
+ job_name: ["beam_PerformanceTests_Cdap"]
+ job_phrase: ["Run Java CdapIO Performance Test"]
+ steps:
+ - uses: actions/checkout@v4
+ - name: Setup repository
+ uses: ./.github/actions/setup-action
+ with:
+ comment_phrase: ${{ matrix.job_phrase }}
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+ - name: Authenticate on GCP
+ id: auth
+ uses: google-github-actions/auth@v1
+ with:
+ credentials_json: ${{ secrets.GCP_SA_KEY }}
+ project_id: ${{ secrets.GCP_PROJECT_ID }}
+ - name: Set k8s access
+ uses: ./.github/actions/setup-k8s-access
+ with:
+ cluster_name: io-datastores
+ k8s_namespace: ${{ matrix.job_name }}-${{ github.run_id }}
+ - name: Install postgres
+ id: install_postgres
+ run: |
+ kubectl apply -f ${{ github.workspace
}}/.test-infra/kubernetes/postgres/postgres-service-for-local-dev.yml
+ kubectl wait svc/postgres-for-dev
--for=jsonpath='{.status.loadBalancer.ingress[0].ip}' --timeout=60s
+ loadbalancer_IP=$(kubectl get svc postgres-for-dev -o
jsonpath='{.status.loadBalancer.ingress[0].ip}')
+ echo postgres_IP=$loadbalancer_IP >> $GITHUB_OUTPUT
+ - name: Prepare test arguments
+ uses: ./.github/actions/test-arguments-action
+ with:
+ test-type: performance
+ test-language: java
+ argument-file-paths: |
+ ${{ github.workspace
}}/.github/workflows/performance-tests-job-configs/cdap.txt
+ arguments: |
+ --postgresServerName=${{
steps.install_postgres.outputs.postgres_IP }}
+ - name: run integrationTest
+ uses: ./.github/actions/gradle-command-self-hosted-action
+ with:
+ gradle-command: :sdks:java:io:cdap:integrationTest
+ arguments: |
+ --info \
+ --tests org.apache.beam.sdk.io.cdap.CdapIOIT \
+ -DintegrationTestRunner=dataflow \
+ -DintegrationTestPipelineOptions='[${{
env.beam_PerformanceTests_Cdap_test_arguments_1 }}]' \
diff --git a/.github/workflows/beam_PerformanceTests_HadoopFormat.yml
b/.github/workflows/beam_PerformanceTests_HadoopFormat.yml
new file mode 100644
index 00000000000..c80db8a86a1
--- /dev/null
+++ b/.github/workflows/beam_PerformanceTests_HadoopFormat.yml
@@ -0,0 +1,108 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: PerformanceTests HadoopFormat
+
+on:
+ issue_comment:
+ types: [created]
+ schedule:
+ - cron: '16 7/19 * * *'
+ workflow_dispatch:
+
+#Setting explicit permissions for the action to avoid the default permissions
which are `write-all` in case of pull_request_target event
+permissions:
+ actions: write
+ pull-requests: write
+ checks: write
+ contents: read
+ deployments: read
+ id-token: none
+ issues: write
+ discussions: read
+ packages: read
+ pages: read
+ repository-projects: read
+ security-events: read
+ statuses: read
+
+# This allows a subsequently queued workflow run to interrupt previous runs
+concurrency:
+ group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha
|| github.head_ref || github.ref }}-${{ github.event.schedule ||
github.event.comment.body || github.event.sender.login }}'
+ cancel-in-progress: true
+
+env:
+ GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
+ GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
+ GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
+ beam_PerformanceTests_HadoopFormat_test_arguments_1: ''
+
+jobs:
+ beam_PerformanceTests_HadoopFormat:
+ if: |
+ github.event_name == 'workflow_dispatch' ||
+ github.event_name == 'schedule' ||
+ github.event.comment.body == 'Run Java HadoopFormatIO Performance Test'
+ runs-on: [self-hosted, ubuntu-20.04, main]
+ timeout-minutes: 100
+ name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+ strategy:
+ matrix:
+ job_name: ["beam_PerformanceTests_HadoopFormat"]
+ job_phrase: ["Run Java HadoopFormatIO Performance Test"]
+ steps:
+ - uses: actions/checkout@v4
+ - name: Setup repository
+ uses: ./.github/actions/setup-action
+ with:
+ comment_phrase: ${{ matrix.job_phrase }}
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+ - name: Authenticate on GCP
+ id: auth
+ uses: google-github-actions/auth@v1
+ with:
+ credentials_json: ${{ secrets.GCP_SA_KEY }}
+ project_id: ${{ secrets.GCP_PROJECT_ID }}
+ - name: Set k8s access
+ uses: ./.github/actions/setup-k8s-access
+ with:
+ cluster_name: io-datastores
+ k8s_namespace: ${{ matrix.job_name }}-${{ github.run_id }}
+ - name: Install postgres
+ id: install_postgres
+ run: |
+ kubectl apply -f ${{ github.workspace
}}/.test-infra/kubernetes/postgres/postgres-service-for-local-dev.yml
+ kubectl wait svc/postgres-for-dev
--for=jsonpath='{.status.loadBalancer.ingress[0].ip}' --timeout=60s
+ loadbalancer_IP=$(kubectl get svc postgres-for-dev -o
jsonpath='{.status.loadBalancer.ingress[0].ip}')
+ echo postgres_IP=$loadbalancer_IP >> $GITHUB_OUTPUT
+ - name: Prepare test arguments
+ uses: ./.github/actions/test-arguments-action
+ with:
+ test-type: performance
+ test-language: java
+ argument-file-paths: |
+ ${{ github.workspace
}}/.github/workflows/performance-tests-job-configs/hadoopFormat.txt
+ arguments: |
+ --postgresServerName=${{
steps.install_postgres.outputs.postgres_IP }}
+ - name: run integrationTest
+ uses: ./.github/actions/gradle-command-self-hosted-action
+ with:
+ gradle-command: :sdks:java:io:hadoop-format:integrationTest
+ arguments: |
+ --info \
+ --tests org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOIT \
+ -DintegrationTestRunner=dataflow \
+ -DintegrationTestPipelineOptions='[${{
env.beam_PerformanceTests_HadoopFormat_test_arguments_1 }}]' \
diff --git a/.github/workflows/beam_PerformanceTests_MongoDBIO_IT.yml
b/.github/workflows/beam_PerformanceTests_MongoDBIO_IT.yml
new file mode 100644
index 00000000000..299d9ac0b77
--- /dev/null
+++ b/.github/workflows/beam_PerformanceTests_MongoDBIO_IT.yml
@@ -0,0 +1,108 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: PerformanceTests MongoDBIO IT
+
+on:
+ issue_comment:
+ types: [created]
+ schedule:
+ - cron: '14 5/17 * * *'
+ workflow_dispatch:
+
+#Setting explicit permissions for the action to avoid the default permissions
which are `write-all` in case of pull_request_target event
+permissions:
+ actions: write
+ pull-requests: write
+ checks: write
+ contents: read
+ deployments: read
+ id-token: none
+ issues: write
+ discussions: read
+ packages: read
+ pages: read
+ repository-projects: read
+ security-events: read
+ statuses: read
+
+# This allows a subsequently queued workflow run to interrupt previous runs
+concurrency:
+ group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha
|| github.head_ref || github.ref }}-${{ github.event.schedule ||
github.event.comment.body || github.event.sender.login }}'
+ cancel-in-progress: true
+
+env:
+ GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
+ GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
+ GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
+ beam_PerformanceTests_MongoDBIO_IT_test_arguments_1: ''
+
+jobs:
+ beam_PerformanceTests_MongoDBIO_IT:
+ if: |
+ github.event_name == 'workflow_dispatch' ||
+ github.event_name == 'schedule' ||
+ github.event.comment.body == 'Run Java MongoDBIO Performance Test'
+ runs-on: [self-hosted, ubuntu-20.04, main]
+ timeout-minutes: 100
+ name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+ strategy:
+ matrix:
+ job_name: ["beam_PerformanceTests_MongoDBIO_IT"]
+ job_phrase: ["Run Java MongoDBIO Performance Test"]
+ steps:
+ - uses: actions/checkout@v4
+ - name: Setup repository
+ uses: ./.github/actions/setup-action
+ with:
+ comment_phrase: ${{ matrix.job_phrase }}
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+ - name: Authenticate on GCP
+ id: auth
+ uses: google-github-actions/auth@v1
+ with:
+ credentials_json: ${{ secrets.GCP_SA_KEY }}
+ project_id: ${{ secrets.GCP_PROJECT_ID }}
+ - name: Set k8s access
+ uses: ./.github/actions/setup-k8s-access
+ with:
+ cluster_name: io-datastores
+ k8s_namespace: ${{ matrix.job_name }}-${{ github.run_id }}
+ - name: Install mongo
+ id: install_mongo
+ run: |
+ kubectl apply -f ${{ github.workspace
}}/.test-infra/kubernetes/mongodb/load-balancer/mongo.yml
+ kubectl wait svc/mongo-load-balancer-service
--for=jsonpath='{.status.loadBalancer.ingress[0].ip}' --timeout=60s
+ loadbalancer_IP=$(kubectl get svc mongo-load-balancer-service -o
jsonpath='{.status.loadBalancer.ingress[0].ip}')
+ echo mongo_IP=$loadbalancer_IP >> $GITHUB_OUTPUT
+ - name: Prepare test arguments
+ uses: ./.github/actions/test-arguments-action
+ with:
+ test-type: performance
+ test-language: java
+ argument-file-paths: |
+ ${{ github.workspace
}}/.github/workflows/performance-tests-job-configs/mongoDBIO_IT.txt
+ arguments: |
+ --mongoDBHostName=${{ steps.install_mongo.outputs.mongo_IP }}
+ - name: run integrationTest
+ uses: ./.github/actions/gradle-command-self-hosted-action
+ with:
+ gradle-command: :sdks:java:io:mongodb:integrationTest
+ arguments: |
+ --info \
+ --tests org.apache.beam.sdk.io.mongodb.MongoDBIOIT \
+ -DintegrationTestRunner=dataflow \
+ -DintegrationTestPipelineOptions='[${{
env.beam_PerformanceTests_MongoDBIO_IT_test_arguments_1 }}]' \
diff --git
a/.github/workflows/beam_PerformanceTests_PubsubIOIT_Python_Streaming.yml
b/.github/workflows/beam_PerformanceTests_PubsubIOIT_Python_Streaming.yml
new file mode 100644
index 00000000000..3a16e482979
--- /dev/null
+++ b/.github/workflows/beam_PerformanceTests_PubsubIOIT_Python_Streaming.yml
@@ -0,0 +1,94 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: PerformanceTests PubsubIOIT Python Streaming
+
+on:
+ issue_comment:
+ types: [created]
+ schedule:
+ - cron: '11 2 * * *'
+ workflow_dispatch:
+
+#Setting explicit permissions for the action to avoid the default permissions
which are `write-all` in case of pull_request_target event
+permissions:
+ actions: write
+ pull-requests: write
+ checks: write
+ contents: read
+ deployments: read
+ id-token: none
+ issues: write
+ discussions: read
+ packages: read
+ pages: read
+ repository-projects: read
+ security-events: read
+ statuses: read
+
+# This allows a subsequently queued workflow run to interrupt previous runs
+concurrency:
+ group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha
|| github.head_ref || github.ref }}-${{ github.event.schedule ||
github.event.comment.body || github.event.sender.login }}'
+ cancel-in-progress: true
+
+env:
+ GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
+ GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
+ GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
+ beam_PerformanceTests_PubsubIOIT_Python_Streaming_test_arguments_1: ''
+
+jobs:
+ beam_PerformanceTests_PubsubIOIT_Python_Streaming:
+ if: |
+ github.event_name == 'workflow_dispatch' ||
+ github.event_name == 'schedule' ||
+ github.event.comment.body == 'Run PubsubIO Performance Test Python'
+ runs-on: [self-hosted, ubuntu-20.04, main]
+ timeout-minutes: 240
+ name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+ strategy:
+ matrix:
+ job_name: ["beam_PerformanceTests_PubsubIOIT_Python_Streaming"]
+ job_phrase: ["Run PubsubIO Performance Test Python"]
+ steps:
+ - uses: actions/checkout@v4
+ - name: Setup repository
+ uses: ./.github/actions/setup-action
+ with:
+ comment_phrase: ${{ matrix.job_phrase }}
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+ - name: Setup environment
+ uses: ./.github/actions/setup-environment-action
+ with:
+ python-version: 3.8
+ - name: Prepare test arguments
+ uses: ./.github/actions/test-arguments-action
+ with:
+ test-type: load
+ test-language: python
+ argument-file-paths: |
+ ${{ github.workspace
}}/.github/workflows/performance-tests-job-configs/pubsubIOIT_Python_Streaming.txt
+ arguments: |
+ --job_name=performance-tests-psio-python-2gb$(date '+%m%d%H%M%S'
--utc)
+ - name: Run test
+ uses: ./.github/actions/gradle-command-self-hosted-action
+ with:
+ gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+ arguments: |
+ -PloadTest.mainClass=apache_beam.io.gcp.pubsub_io_perf_test \
+ -Prunner=TestDataflowRunner \
+ -PpythonVersion=3.8 \
+ '-PloadTest.args=${{
env.beam_PerformanceTests_PubsubIOIT_Python_Streaming_test_arguments_1 }}'
\ No newline at end of file
diff --git a/.github/workflows/beam_PerformanceTests_SparkReceiver_IO.yml
b/.github/workflows/beam_PerformanceTests_SparkReceiver_IO.yml
new file mode 100644
index 00000000000..f2f4dc85ec5
--- /dev/null
+++ b/.github/workflows/beam_PerformanceTests_SparkReceiver_IO.yml
@@ -0,0 +1,108 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: PerformanceTests SparkReceiver IO
+
+on:
+ issue_comment:
+ types: [created]
+ schedule:
+ - cron: '15 6/18 * * *'
+ workflow_dispatch:
+
+#Setting explicit permissions for the action to avoid the default permissions
which are `write-all` in case of pull_request_target event
+permissions:
+ actions: write
+ pull-requests: write
+ checks: write
+ contents: read
+ deployments: read
+ id-token: none
+ issues: write
+ discussions: read
+ packages: read
+ pages: read
+ repository-projects: read
+ security-events: read
+ statuses: read
+
+# This allows a subsequently queued workflow run to interrupt previous runs
+concurrency:
+ group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha
|| github.head_ref || github.ref }}-${{ github.event.schedule ||
github.event.comment.body || github.event.sender.login }}'
+ cancel-in-progress: true
+
+env:
+ GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
+ GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
+ GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
+ beam_PerformanceTests_SparkReceiver_IO_test_arguments_1: ''
+
+jobs:
+ beam_PerformanceTests_SparkReceiver_IO:
+ if: |
+ github.event_name == 'workflow_dispatch' ||
+ github.event_name == 'schedule' ||
+ github.event.comment.body == 'Run Java SparkReceiverIO Performance Test'
+ runs-on: [self-hosted, ubuntu-20.04, main]
+ timeout-minutes: 120
+ name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+ strategy:
+ matrix:
+ job_name: ["beam_PerformanceTests_SparkReceiver_IO"]
+ job_phrase: ["Run Java SparkReceiverIO Performance Test"]
+ steps:
+ - uses: actions/checkout@v4
+ - name: Setup repository
+ uses: ./.github/actions/setup-action
+ with:
+ comment_phrase: ${{ matrix.job_phrase }}
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+ - name: Authenticate on GCP
+ id: auth
+ uses: google-github-actions/auth@v1
+ with:
+ credentials_json: ${{ secrets.GCP_SA_KEY }}
+ project_id: ${{ secrets.GCP_PROJECT_ID }}
+ - name: Set k8s access
+ uses: ./.github/actions/setup-k8s-access
+ with:
+ cluster_name: io-datastores
+ k8s_namespace: ${{ matrix.job_name }}-${{ github.run_id }}
+ - name: Install rabbitmq
+ id: install_rabbitmq
+ run: |
+ kubectl apply -f ${{ github.workspace
}}/.test-infra/kubernetes/rabbit/rabbitmq.yaml
+ kubectl wait svc/rabbitmq
--for=jsonpath='{.status.loadBalancer.ingress[0].ip}' --timeout=60s
+ loadbalancer_IP=$(kubectl get svc rabbitmq -o
jsonpath='{.status.loadBalancer.ingress[0].ip}')
+ echo rabbitmq_IP=$loadbalancer_IP >> $GITHUB_OUTPUT
+ - name: Prepare test arguments
+ uses: ./.github/actions/test-arguments-action
+ with:
+ test-type: performance
+ test-language: java
+ argument-file-paths: |
+ ${{ github.workspace
}}/.github/workflows/performance-tests-job-configs/sparkReceiver_IO.txt
+ arguments: |
+ --rabbitMqBootstrapServerAddress=amqp://guest:guest@${{
steps.install_rabbitmq.outputs.rabbitmq_IP }}:5672
+ - name: run integrationTest
+ uses: ./.github/actions/gradle-command-self-hosted-action
+ with:
+ gradle-command: :sdks:java:io:sparkreceiver:2:integrationTest
+ arguments: |
+ --info \
+ --tests org.apache.beam.sdk.io.sparkreceiver.SparkReceiverIOIT \
+ -DintegrationTestRunner=dataflow \
+ -DintegrationTestPipelineOptions='[${{
env.beam_PerformanceTests_SparkReceiver_IO_test_arguments_1 }}]' \
diff --git a/.github/workflows/beam_PerformanceTests_TFRecordIOIT_HDFS.yml
b/.github/workflows/beam_PerformanceTests_TFRecordIOIT_HDFS.yml
new file mode 100644
index 00000000000..bbc4a79aa0f
--- /dev/null
+++ b/.github/workflows/beam_PerformanceTests_TFRecordIOIT_HDFS.yml
@@ -0,0 +1,112 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: PerformanceTests TFRecordIOIT HDFS
+
+on:
+ # TODO(https://github.com/apache/beam/issues/18796) TFRecord performance
test is failing only when running on hdfs.
+ # We need to fix this before enabling this job on jenkins.
+ # issue_comment:
+ # types: [created]
+ # schedule:
+ # - cron: '17 8/20 * * *'
+ workflow_dispatch:
+
+#Setting explicit permissions for the action to avoid the default permissions
which are `write-all` in case of pull_request_target event
+permissions:
+ actions: write
+ pull-requests: write
+ checks: write
+ contents: read
+ deployments: read
+ id-token: none
+ issues: write
+ discussions: read
+ packages: read
+ pages: read
+ repository-projects: read
+ security-events: read
+ statuses: read
+
+# This allows a subsequently queued workflow run to interrupt previous runs
+concurrency:
+ group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha
|| github.head_ref || github.ref }}-${{ github.event.schedule ||
github.event.comment.body || github.event.sender.login }}'
+ cancel-in-progress: true
+
+env:
+ GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
+ GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
+ GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
+ beam_PerformanceTests_TFRecordIOIT_HDFS_test_arguments_1: ''
+
+jobs:
+ beam_PerformanceTests_TFRecordIOIT_HDFS:
+ if: |
+ github.event_name == 'workflow_dispatch' ||
+ github.event_name == 'schedule' ||
+ github.event.comment.body == 'Run Java TFRecordIO Performance Test HDFS'
+ runs-on: [self-hosted, ubuntu-20.04, main]
+ timeout-minutes: 100
+ name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+ strategy:
+ matrix:
+ job_name: ["beam_PerformanceTests_TFRecordIOIT_HDFS"]
+ job_phrase: ["Run Java TFRecordIO Performance Test HDFS"]
+ steps:
+ - uses: actions/checkout@v4
+ - name: Setup repository
+ uses: ./.github/actions/setup-action
+ with:
+ comment_phrase: ${{ matrix.job_phrase }}
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+ - name: Authenticate on GCP
+ id: auth
+ uses: google-github-actions/auth@v1
+ with:
+ credentials_json: ${{ secrets.GCP_SA_KEY }}
+ project_id: ${{ secrets.GCP_PROJECT_ID }}
+ - name: Set k8s access
+ uses: ./.github/actions/setup-k8s-access
+ with:
+ cluster_name: io-datastores
+ k8s_namespace: ${{ matrix.job_name }}-${{ github.run_id }}
+ - name: Install Hadoop
+ id: install_hadoop
+ run: |
+ kubectl apply -f ${{ github.workspace
}}/.test-infra/kubernetes/hadoop/LargeITCluster/hdfs-multi-datanode-cluster.yml
+ kubectl wait svc/hadoop
--for=jsonpath='{.status.loadBalancer.ingress[0].ip}' --timeout=60s
+ loadbalancer_IP=$(kubectl get svc hadoop -o
jsonpath='{.status.loadBalancer.ingress[0].ip}')
+ echo hadoop_IP=$loadbalancer_IP >> $GITHUB_OUTPUT
+ - name: Prepare test arguments
+ uses: ./.github/actions/test-arguments-action
+ with:
+ test-type: performance
+ test-language: java
+ argument-file-paths: |
+ ${{ github.workspace
}}/.github/workflows/performance-tests-job-configs/TFRecordIOIT_HDFS.txt
+ arguments: |
+ --filenamePrefix=hdfs://${{ steps.install_hadoop.outputs.hadoop_IP
}}:9000/TEXTIO_IT_
+ --hdfsConfiguration=[{\\\"fs.defaultFS\\\":\\\"hdfs:${{
steps.install_hadoop.outputs.hadoop_IP }}:9000\\\",\\\"dfs.replication\\\":1}]
+ - name: run integrationTest
+ uses: ./.github/actions/gradle-command-self-hosted-action
+ with:
+ gradle-command: :sdks:java:io:file-based-io-tests:integrationTest
+ arguments: |
+ --info \
+ --tests org.apache.beam.sdk.io.tfrecord.TFRecordIOIT \
+ -Dfilesystem=hdfs \
+ -DintegrationTestRunner=dataflow \
+ -DintegrationTestPipelineOptions='[${{
env.beam_PerformanceTests_TFRecordIOIT_HDFS_test_arguments_1 }}]' \
\ No newline at end of file
diff --git
a/.github/workflows/beam_PerformanceTests_WordCountIT_PythonVersions.yml
b/.github/workflows/beam_PerformanceTests_WordCountIT_PythonVersions.yml
new file mode 100644
index 00000000000..53b157d691c
--- /dev/null
+++ b/.github/workflows/beam_PerformanceTests_WordCountIT_PythonVersions.yml
@@ -0,0 +1,105 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: PerformanceTests WordCountIT PythonVersions
+
+on:
+ issue_comment:
+ types: [created]
+ schedule:
+ - cron: '12 3 * * *'
+ workflow_dispatch:
+
+#Setting explicit permissions for the action to avoid the default permissions
which are `write-all` in case of pull_request_target event
+permissions:
+ actions: write
+ pull-requests: write
+ checks: write
+ contents: read
+ deployments: read
+ id-token: none
+ issues: write
+ discussions: read
+ packages: read
+ pages: read
+ repository-projects: read
+ security-events: read
+ statuses: read
+
+# This allows a subsequently queued workflow run to interrupt previous runs
+concurrency:
+ group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha
|| github.head_ref || github.ref }}-${{ github.event.schedule ||
github.event.comment.body || github.event.sender.login }}'
+ cancel-in-progress: true
+
+env:
+ GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
+ GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
+ GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
+ beam_PerformanceTests_WordCountIT_PythonVersions_test_arguments_1: ''
+
+jobs:
+ beam_PerformanceTests_WordCountIT_PythonVersions:
+ if: |
+ github.event_name == 'workflow_dispatch' ||
+ github.event_name == 'schedule' ||
+ (startswith(github.event.comment.body, 'Run Python') &&
+ endswith(github.event.comment.body, 'WordCountIT Performance Test'))
+ runs-on: [self-hosted, ubuntu-20.04, main]
+ timeout-minutes: 100
+ name: ${{ matrix.job_name }} (${{ matrix.job_phrase_1
}}${{matrix.python_version}} ${{ matrix.job_phrase_2 }})
+ strategy:
+ fail-fast: false
+ matrix:
+ job_name: ["beam_PerformanceTests_WordCountIT_PythonVersions"]
+ job_phrase_1: [Run Python]
+ job_phrase_2: [WordCountIT Performance Test]
+ python_version: ['3.8']
+ steps:
+ - uses: actions/checkout@v4
+ - name: Setup repository
+ uses: ./.github/actions/setup-action
+ with:
+ comment_phrase: ${{ matrix.job_phrase_1 }}${{matrix.python_version}}
${{ matrix.job_phrase_2 }}
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase_1
}}${{matrix.python_version}} ${{ matrix.job_phrase_2 }})
+ - name: Setup environment
+ uses: ./.github/actions/setup-environment-action
+ with:
+ python-version: ${{matrix.python_version}}
+ - name: Set PY_VER_CLEAN
+ id: set_py_ver_clean
+ run: |
+ PY_VER=${{ matrix.python_version }}
+ PY_VER_CLEAN=${PY_VER//.}
+ echo "py_ver_clean=$PY_VER_CLEAN" >> $GITHUB_OUTPUT
+ - name: Prepare test arguments
+ uses: ./.github/actions/test-arguments-action
+ with:
+ test-type: load
+ test-language: python
+ argument-file-paths: |
+ ${{ github.workspace
}}/.github/workflows/performance-tests-job-configs/wordCountIT_Python.txt
+ arguments: |
+
--job_name=performance-tests-wordcount-python${{steps.set_py_ver_clean.outputs.py_ver_clean}}-batch-1gb$(date
'+%m%d%H%M%S' --utc)
+
--metrics_table=wordcount_py${{steps.set_py_ver_clean.outputs.py_ver_clean}}_pkb_results
+
--influx_measurement=wordcount_py${{steps.set_py_ver_clean.outputs.py_ver_clean}}_results
+ - name: Run test
+ uses: ./.github/actions/gradle-command-self-hosted-action
+ with:
+ gradle-command:
:sdks:python:test-suites:dataflow:py${{steps.set_py_ver_clean.outputs.py_ver_clean}}:runPerformanceTest
+ arguments: |
+ --info \
+
-Ptest=apache_beam/examples/wordcount_it_test.py::WordCountIT::test_wordcount_it
\
+ "-Ptest-pipeline-options=${{
env.beam_PerformanceTests_WordCountIT_PythonVersions_test_arguments_1 }}"
\ No newline at end of file
diff --git
a/.github/workflows/performance-tests-job-configs/TFRecordIOIT_HDFS.txt
b/.github/workflows/performance-tests-job-configs/TFRecordIOIT_HDFS.txt
new file mode 100644
index 00000000000..608e11c7847
--- /dev/null
+++ b/.github/workflows/performance-tests-job-configs/TFRecordIOIT_HDFS.txt
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+--runner=DataflowRunner
+--tempRoot=gs://temp-storage-for-perf-tests
+--numberOfRecords=18000000
+--expectedHash=543104423f8b6eb097acb9f111c19fe4
+--datasetSize=1019380000
+--numWorkers=5
+--autoscalingAlgorithm=NONE
\ No newline at end of file
diff --git a/.github/workflows/performance-tests-job-configs/cdap.txt
b/.github/workflows/performance-tests-job-configs/cdap.txt
new file mode 100644
index 00000000000..60c98a1065d
--- /dev/null
+++ b/.github/workflows/performance-tests-job-configs/cdap.txt
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+--tempRoot=gs://temp-storage-for-perf-tests
+--runner=DataflowRunner
+--numberOfRecords=5000000
+--bigQueryDataset=beam_performance
+--bigQueryTable=cdapioit_results
+--influxMeasurement=cdapioit_results
+--postgresUsername=postgres
+--postgresPassword=uuinkks
+--postgresDatabaseName=postgres
+--postgresSsl=false
+--postgresPort=5432
+--numWorkers=5
+--autoscalingAlgorithm=NONE
\ No newline at end of file
diff --git a/.github/workflows/performance-tests-job-configs/hadoopFormat.txt
b/.github/workflows/performance-tests-job-configs/hadoopFormat.txt
new file mode 100644
index 00000000000..712f29e3d52
--- /dev/null
+++ b/.github/workflows/performance-tests-job-configs/hadoopFormat.txt
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+--tempRoot=gs://temp-storage-for-perf-tests
+--runner=DataflowRunner
+--numberOfRecords=5000000
+--bigQueryDataset=beam_performance
+--bigQueryTable=hadoopformatioit_results
+--influxMeasurement=hadoopformatioit_results
+--postgresUsername=postgres
+--postgresPassword=uuinkks
+--postgresDatabaseName=postgres
+--postgresSsl=false
+--postgresPort=5432
+--numWorkers=5
+--autoscalingAlgorithm=NONE
\ No newline at end of file
diff --git a/.github/workflows/performance-tests-job-configs/mongoDBIO_IT.txt
b/.github/workflows/performance-tests-job-configs/mongoDBIO_IT.txt
new file mode 100644
index 00000000000..67a60f6407f
--- /dev/null
+++ b/.github/workflows/performance-tests-job-configs/mongoDBIO_IT.txt
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+--tempRoot=gs://temp-storage-for-perf-tests
+--runner=DataflowRunner
+--numberOfRecords=10000000
+--bigQueryDataset=beam_performance
+--bigQueryTable=mongodbioit_results
+--influxMeasurement=mongodbioit_results
+--mongoDBDatabaseName=beam
+--mongoDBPort=27017
+--mongoDBUsername=root
+--mongoDBPassword=uuinkkS
+--numWorkers=5
+--autoscalingAlgorithm=NONE
\ No newline at end of file
diff --git
a/.github/workflows/performance-tests-job-configs/pubsubIOIT_Python_Streaming.txt
b/.github/workflows/performance-tests-job-configs/pubsubIOIT_Python_Streaming.txt
new file mode 100644
index 00000000000..257fc196de7
--- /dev/null
+++
b/.github/workflows/performance-tests-job-configs/pubsubIOIT_Python_Streaming.txt
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+--region=us-central1
+--temp_location=gs://temp-storage-for-perf-tests/loadtests
+--publish_to_big_query=true
+--metrics_dataset=beam_performance
+--metrics_table=psio_io_2GB_results
+--influx_measurement=python_psio_2GB_results
+--input_options=''{\\"num_records\\":2097152,\\"key_size\\":1,\\"value_size\\":1024,\\"algorithm\\":\\"lcg\\"}''
+--num_workers=5
+--autoscaling_algorithm=NONE
+--pubsub_namespace_prefix=pubsub_io_performance_
+--wait_until_finish_duration=720000
+--runner=TestDataflowRunner
\ No newline at end of file
diff --git
a/.github/workflows/performance-tests-job-configs/sparkReceiver_IO.txt
b/.github/workflows/performance-tests-job-configs/sparkReceiver_IO.txt
new file mode 100644
index 00000000000..12d2bde6732
--- /dev/null
+++ b/.github/workflows/performance-tests-job-configs/sparkReceiver_IO.txt
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+--tempRoot=gs://temp-storage-for-perf-tests
+--runner=DataflowRunner
+--sourceOptions=''{\"numRecords\":\"5000000\",\"keySizeBytes\":\"1\",\"valueSizeBytes\":\"90\"}''
+--bigQueryDataset=beam_performance
+--bigQueryTable=sparkreceiverioit_results
+--influxMeasurement=sparkreceiverioit_results
+--streamName=rabbitMqTestStream
+--readTimeout=1800
+--numWorkers=1
+--autoscalingAlgorithm=NONE
\ No newline at end of file
diff --git
a/.github/workflows/performance-tests-job-configs/wordCountIT_Python.txt
b/.github/workflows/performance-tests-job-configs/wordCountIT_Python.txt
new file mode 100644
index 00000000000..9b9abeeb092
--- /dev/null
+++ b/.github/workflows/performance-tests-job-configs/wordCountIT_Python.txt
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+--region=us-central1
+--staging_location=gs://temp-storage-for-end-to-end-tests/staging-it
+--temp_location=gs://temp-storage-for-end-to-end-tests/temp-it
+--runner=TestDataflowRunner
+--publish_to_big_query=true
+--metrics_dataset=beam_performance
+--input=gs://apache-beam-samples/input_small_files/ascii_sort_1MB_input.0000*
+--output=gs://temp-storage-for-end-to-end-tests/py-it-cloud/output
+--expect_checksum=ea0ca2e5ee4ea5f218790f28d0b9fe7d09d8d710
+--num_workers=10
+--autoscaling_algorithm=NONE
\ No newline at end of file