This is an automated email from the ASF dual-hosted git repository.
yhu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new a6de3033072 Github Workflow Replacement for Jenkins Jobs,
beam_PerformanceTests_ParquetIOIT* (#28582)
a6de3033072 is described below
commit a6de303307281e9ca51b1a9cbc6a74a71aec39ee
Author: magicgoody <[email protected]>
AuthorDate: Fri Sep 29 02:48:15 2023 +0600
Github Workflow Replacement for Jenkins Jobs,
beam_PerformanceTests_ParquetIOIT* (#28582)
---
.../beam_PerformanceTests_ParquetIOIT.yml | 91 +++++++++++++++++
.../beam_PerformanceTests_ParquetIOIT_HDFS.yml | 109 +++++++++++++++++++++
.../config_ParquetIOIT.txt | 26 +++++
.../config_ParquetIOIT_HDFS.txt | 26 +++++
4 files changed, 252 insertions(+)
diff --git a/.github/workflows/beam_PerformanceTests_ParquetIOIT.yml
b/.github/workflows/beam_PerformanceTests_ParquetIOIT.yml
new file mode 100644
index 00000000000..ceb540b16b1
--- /dev/null
+++ b/.github/workflows/beam_PerformanceTests_ParquetIOIT.yml
@@ -0,0 +1,91 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: PerformanceTests ParquetIOIT
+
+on:
+ issue_comment:
+ types: [created]
+ schedule:
+ - cron: '10 3/12 * * *'
+ workflow_dispatch:
+
+#Setting explicit permissions for the action to avoid the default permissions
which are `write-all` in case of pull_request_target event
+permissions:
+ actions: write
+ pull-requests: write
+ checks: write
+ contents: read
+ deployments: read
+ id-token: none
+ issues: write
+ discussions: read
+ packages: read
+ pages: read
+ repository-projects: read
+ security-events: read
+ statuses: read
+
+# This allows a subsequently queued workflow run to interrupt previous runs
+concurrency:
+ group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha
|| github.head_ref || github.ref }}-${{ github.event.schedule ||
github.event.comment.body || github.event.sender.login }}'
+ cancel-in-progress: true
+
+env:
+ GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
+ GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
+ GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
+
+jobs:
+ beam_PerformanceTests_ParquetIOIT:
+ if: |
+ github.event_name == 'workflow_dispatch' ||
+ github.event_name == 'schedule' ||
+ github.event.comment.body == 'Run Java ParquetIO Performance Test'
+ runs-on: [self-hosted, ubuntu-20.04, main]
+ timeout-minutes: 100
+ name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+ strategy:
+ matrix:
+ job_name: ["beam_PerformanceTests_ParquetIOIT"]
+ job_phrase: ["Run Java ParquetIO Performance Test"]
+ steps:
+ - uses: actions/checkout@v3
+ - name: Setup repository
+ uses: ./.github/actions/setup-action
+ with:
+ comment_phrase: ${{ matrix.job_phrase }}
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+# The env variable is created and populated in the test-arguments-action as
"<github.job>_test_arguments_<argument_file_paths_index>"
+ - name: Prepare test arguments
+ uses: ./.github/actions/test-arguments-action
+ with:
+ test-type: performance
+ test-language: java
+ argument-file-paths: |
+ ${{ github.workspace
}}/.github/workflows/performance-tests-job-configs/config_ParquetIOIT.txt
+ arguments: |
+ --filenamePrefix=gs://temp-storage-for-perf-tests/${{
matrix.job_name }}/${{github.run_id}}/
+ - name: run integrationTest
+ uses: ./.github/actions/gradle-command-self-hosted-action
+ with:
+ gradle-command: :sdks:java:io:file-based-io-tests:integrationTest
+ arguments: |
+ --tests org.apache.beam.sdk.io.parquet.ParquetIOIT \
+ --info \
+ -Dfilesystem=gcs \
+ -DintegrationTestRunner=dataflow \
+ -DintegrationTestPipelineOptions='[${{
env.beam_PerformanceTests_ParquetIOIT_test_arguments_1 }}]'
\ No newline at end of file
diff --git a/.github/workflows/beam_PerformanceTests_ParquetIOIT_HDFS.yml
b/.github/workflows/beam_PerformanceTests_ParquetIOIT_HDFS.yml
new file mode 100644
index 00000000000..d0c40599eb6
--- /dev/null
+++ b/.github/workflows/beam_PerformanceTests_ParquetIOIT_HDFS.yml
@@ -0,0 +1,109 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: PerformanceTests ParquetIOIT HDFS
+
+on:
+ issue_comment:
+ types: [created]
+ schedule:
+ - cron: '50 3/12 * * *'
+ workflow_dispatch:
+
+#Setting explicit permissions for the action to avoid the default permissions
which are `write-all` in case of pull_request_target event
+permissions:
+ actions: write
+ pull-requests: write
+ checks: write
+ contents: read
+ deployments: read
+ id-token: none
+ issues: write
+ discussions: read
+ packages: read
+ pages: read
+ repository-projects: read
+ security-events: read
+ statuses: read
+
+# This allows a subsequently queued workflow run to interrupt previous runs
+concurrency:
+ group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha
|| github.head_ref || github.ref }}-${{ github.event.schedule ||
github.event.comment.body || github.event.sender.login }}'
+ cancel-in-progress: true
+
+env:
+ GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
+ GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
+ GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
+
+jobs:
+ beam_PerformanceTests_ParquetIOIT_HDFS:
+ if: |
+ github.event_name == 'workflow_dispatch' ||
+ github.event_name == 'schedule' ||
+ github.event.comment.body == 'Run Java ParquetIO Performance Test HDFS'
+ runs-on: [self-hosted, ubuntu-20.04, main]
+ timeout-minutes: 100
+ name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+ strategy:
+ matrix:
+ job_name: ["beam_PerformanceTests_ParquetIOIT_HDFS"]
+ job_phrase: ["Run Java ParquetIO Performance Test HDFS"]
+ steps:
+ - uses: actions/checkout@v4
+ - name: Setup repository
+ uses: ./.github/actions/setup-action
+ with:
+ comment_phrase: ${{ matrix.job_phrase }}
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+ - name: Authenticate on GCP
+ id: auth
+ uses: google-github-actions/auth@v1
+ with:
+ credentials_json: ${{ secrets.GCP_SA_KEY }}
+ project_id: ${{ secrets.GCP_PROJECT_ID }}
+ - name: Set k8s access
+ uses: ./.github/actions/setup-k8s-access
+ with:
+ k8s_namespace: ${{ matrix.job_name }}-${{ github.run_id }}
+ - name: Install Hadoop
+ id: install_hadoop
+ run: |
+ kubectl apply -f ${{ github.workspace
}}/.test-infra/kubernetes/hadoop/LargeITCluster/hdfs-multi-datanode-cluster.yml
+ kubectl wait svc/hadoop
--for=jsonpath='{.status.loadBalancer.ingress[0].ip}' --timeout=120s
+ loadbalancer_IP=$(kubectl get svc hadoop -o
jsonpath='{.status.loadBalancer.ingress[0].ip}')
+ echo hadoop_IP=$loadbalancer_IP >> $GITHUB_OUTPUT
+# The env variable is created and populated in the test-arguments-action as
"<github.job>_test_arguments_<argument_file_paths_index>"
+ - name: Prepare test arguments
+ uses: ./.github/actions/test-arguments-action
+ with:
+ test-type: performance
+ test-language: java
+ argument-file-paths: |
+ ${{ github.workspace
}}/.github/workflows/performance-tests-job-configs/config_ParquetIOIT_HDFS.txt
+ arguments: |
+ --filenamePrefix=hdfs://${{ steps.install_hadoop.outputs.hadoop_IP
}}:9000/TEXTIO_IT_
+ --hdfsConfiguration=[{\\\"fs.defaultFS\\\":\\\"hdfs:${{
steps.install_hadoop.outputs.hadoop_IP }}:9000\\\",\\\"dfs.replication\\\":1}]
+ - name: run integrationTest
+ uses: ./.github/actions/gradle-command-self-hosted-action
+ with:
+ gradle-command: :sdks:java:io:file-based-io-tests:integrationTest
+ arguments: |
+ --tests org.apache.beam.sdk.io.parquet.ParquetIOIT \
+ --info \
+ -Dfilesystem=hdfs \
+ -DintegrationTestRunner=dataflow \
+ -DintegrationTestPipelineOptions='[${{
env.beam_PerformanceTests_ParquetIOIT_HDFS_test_arguments_1 }}]'
\ No newline at end of file
diff --git
a/.github/workflows/performance-tests-job-configs/config_ParquetIOIT.txt
b/.github/workflows/performance-tests-job-configs/config_ParquetIOIT.txt
new file mode 100644
index 00000000000..10dc0eba73d
--- /dev/null
+++ b/.github/workflows/performance-tests-job-configs/config_ParquetIOIT.txt
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+--runner=DataflowRunner
+--tempRoot=gs://temp-storage-for-perf-tests
+--bigQueryDataset=beam_performance
+--bigQueryTable=parquetioit_results
+--influxMeasurement=parquetioit_results
+--numberOfRecords=225000000
+--expectedHash=2f9f5ca33ea464b25109c0297eb6aecb
+--datasetSize=1087370000
+--numWorkers=5
+--autoscalingAlgorithm=NONE
\ No newline at end of file
diff --git
a/.github/workflows/performance-tests-job-configs/config_ParquetIOIT_HDFS.txt
b/.github/workflows/performance-tests-job-configs/config_ParquetIOIT_HDFS.txt
new file mode 100644
index 00000000000..ecb725cc12f
--- /dev/null
+++
b/.github/workflows/performance-tests-job-configs/config_ParquetIOIT_HDFS.txt
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+--runner=DataflowRunner
+--tempRoot=gs://temp-storage-for-perf-tests
+--bigQueryDataset=beam_performance
+--bigQueryTable=parquetioit_hdfs_results
+--influxMeasurement=parquetioit_hdfs_results
+--numberOfRecords=225000000
+--expectedHash=2f9f5ca33ea464b25109c0297eb6aecb
+--datasetSize=1087370000
+--numWorkers=5
+--autoscalingAlgorithm=NONE
\ No newline at end of file