This is an automated email from the ASF dual-hosted git repository.

yhu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new a6de3033072 Github Workflow Replacement for Jenkins Jobs, 
beam_PerformanceTests_ParquetIOIT* (#28582)
a6de3033072 is described below

commit a6de303307281e9ca51b1a9cbc6a74a71aec39ee
Author: magicgoody <[email protected]>
AuthorDate: Fri Sep 29 02:48:15 2023 +0600

    Github Workflow Replacement for Jenkins Jobs, 
beam_PerformanceTests_ParquetIOIT* (#28582)
---
 .../beam_PerformanceTests_ParquetIOIT.yml          |  91 +++++++++++++++++
 .../beam_PerformanceTests_ParquetIOIT_HDFS.yml     | 109 +++++++++++++++++++++
 .../config_ParquetIOIT.txt                         |  26 +++++
 .../config_ParquetIOIT_HDFS.txt                    |  26 +++++
 4 files changed, 252 insertions(+)

diff --git a/.github/workflows/beam_PerformanceTests_ParquetIOIT.yml 
b/.github/workflows/beam_PerformanceTests_ParquetIOIT.yml
new file mode 100644
index 00000000000..ceb540b16b1
--- /dev/null
+++ b/.github/workflows/beam_PerformanceTests_ParquetIOIT.yml
@@ -0,0 +1,91 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: PerformanceTests ParquetIOIT
+
+on:
+  issue_comment:
+    types: [created]
+  schedule:
+    - cron: '10 3/12 * * *'
+  workflow_dispatch:
+
+#Setting explicit permissions for the action to avoid the default permissions 
which are `write-all` in case of pull_request_target event
+permissions:
+  actions: write
+  pull-requests: write
+  checks: write
+  contents: read
+  deployments: read
+  id-token: none
+  issues: write
+  discussions: read
+  packages: read
+  pages: read
+  repository-projects: read
+  security-events: read
+  statuses: read
+
+# This allows a subsequently queued workflow run to interrupt previous runs
+concurrency:
+  group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha 
|| github.head_ref || github.ref }}-${{ github.event.schedule || 
github.event.comment.body || github.event.sender.login }}'
+  cancel-in-progress: true
+
+env:
+  GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
+  GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
+  GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
+
+jobs:
+  beam_PerformanceTests_ParquetIOIT:
+    if: |
+      github.event_name == 'workflow_dispatch' ||
+      github.event_name == 'schedule' ||
+      github.event.comment.body == 'Run Java ParquetIO Performance Test'
+    runs-on: [self-hosted, ubuntu-20.04, main]
+    timeout-minutes: 100
+    name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+    strategy:
+      matrix:
+        job_name: ["beam_PerformanceTests_ParquetIOIT"]
+        job_phrase: ["Run Java ParquetIO Performance Test"]
+    steps:
+      - uses: actions/checkout@v3
+      - name: Setup repository
+        uses: ./.github/actions/setup-action
+        with:
+          comment_phrase: ${{ matrix.job_phrase }}
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+# The env variable is created and populated in the test-arguments-action as 
"<github.job>_test_arguments_<argument_file_paths_index>"
+      - name: Prepare test arguments
+        uses: ./.github/actions/test-arguments-action
+        with:
+          test-type: performance
+          test-language: java
+          argument-file-paths: |
+            ${{ github.workspace 
}}/.github/workflows/performance-tests-job-configs/config_ParquetIOIT.txt
+          arguments: |
+            --filenamePrefix=gs://temp-storage-for-perf-tests/${{ 
matrix.job_name }}/${{github.run_id}}/
+      - name: run integrationTest
+        uses: ./.github/actions/gradle-command-self-hosted-action
+        with:
+          gradle-command: :sdks:java:io:file-based-io-tests:integrationTest
+          arguments: |
+            --tests org.apache.beam.sdk.io.parquet.ParquetIOIT \
+            --info \
+            -Dfilesystem=gcs \
+            -DintegrationTestRunner=dataflow \
+            -DintegrationTestPipelineOptions='[${{ 
env.beam_PerformanceTests_ParquetIOIT_test_arguments_1 }}]'
\ No newline at end of file
diff --git a/.github/workflows/beam_PerformanceTests_ParquetIOIT_HDFS.yml 
b/.github/workflows/beam_PerformanceTests_ParquetIOIT_HDFS.yml
new file mode 100644
index 00000000000..d0c40599eb6
--- /dev/null
+++ b/.github/workflows/beam_PerformanceTests_ParquetIOIT_HDFS.yml
@@ -0,0 +1,109 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: PerformanceTests ParquetIOIT HDFS
+
+on:
+  issue_comment:
+    types: [created]
+  schedule:
+    - cron: '50 3/12 * * *'
+  workflow_dispatch:
+
+#Setting explicit permissions for the action to avoid the default permissions 
which are `write-all` in case of pull_request_target event
+permissions:
+  actions: write
+  pull-requests: write
+  checks: write
+  contents: read
+  deployments: read
+  id-token: none
+  issues: write
+  discussions: read
+  packages: read
+  pages: read
+  repository-projects: read
+  security-events: read
+  statuses: read
+
+# This allows a subsequently queued workflow run to interrupt previous runs
+concurrency:
+  group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha 
|| github.head_ref || github.ref }}-${{ github.event.schedule || 
github.event.comment.body || github.event.sender.login }}'
+  cancel-in-progress: true
+
+env:
+  GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
+  GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
+  GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
+
+jobs:
+  beam_PerformanceTests_ParquetIOIT_HDFS:
+    if: |
+      github.event_name == 'workflow_dispatch' ||
+      github.event_name == 'schedule' ||
+      github.event.comment.body == 'Run Java ParquetIO Performance Test HDFS'
+    runs-on: [self-hosted, ubuntu-20.04, main]
+    timeout-minutes: 100
+    name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+    strategy:
+      matrix:
+        job_name: ["beam_PerformanceTests_ParquetIOIT_HDFS"]
+        job_phrase: ["Run Java ParquetIO Performance Test HDFS"]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup repository
+        uses: ./.github/actions/setup-action
+        with:
+          comment_phrase: ${{ matrix.job_phrase }}
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+      - name: Authenticate on GCP
+        id: auth
+        uses: google-github-actions/auth@v1
+        with:
+          credentials_json: ${{ secrets.GCP_SA_KEY }}
+          project_id: ${{ secrets.GCP_PROJECT_ID }}
+      - name: Set k8s access
+        uses: ./.github/actions/setup-k8s-access
+        with:
+          k8s_namespace: ${{ matrix.job_name }}-${{ github.run_id }}
+      - name: Install Hadoop
+        id: install_hadoop
+        run: |
+          kubectl apply -f ${{ github.workspace 
}}/.test-infra/kubernetes/hadoop/LargeITCluster/hdfs-multi-datanode-cluster.yml
+          kubectl wait svc/hadoop 
--for=jsonpath='{.status.loadBalancer.ingress[0].ip}' --timeout=120s
+          loadbalancer_IP=$(kubectl get svc hadoop -o 
jsonpath='{.status.loadBalancer.ingress[0].ip}')
+          echo hadoop_IP=$loadbalancer_IP >> $GITHUB_OUTPUT
+# The env variable is created and populated in the test-arguments-action as 
"<github.job>_test_arguments_<argument_file_paths_index>"
+      - name: Prepare test arguments
+        uses: ./.github/actions/test-arguments-action
+        with:
+          test-type: performance
+          test-language: java
+          argument-file-paths: |
+            ${{ github.workspace 
}}/.github/workflows/performance-tests-job-configs/config_ParquetIOIT_HDFS.txt
+          arguments: |
+            --filenamePrefix=hdfs://${{ steps.install_hadoop.outputs.hadoop_IP 
}}:9000/TEXTIO_IT_
+            --hdfsConfiguration=[{\\\"fs.defaultFS\\\":\\\"hdfs:${{ 
steps.install_hadoop.outputs.hadoop_IP }}:9000\\\",\\\"dfs.replication\\\":1}]
+      - name: run integrationTest
+        uses: ./.github/actions/gradle-command-self-hosted-action
+        with:
+          gradle-command: :sdks:java:io:file-based-io-tests:integrationTest
+          arguments: |
+            --tests org.apache.beam.sdk.io.parquet.ParquetIOIT \
+            --info \
+            -Dfilesystem=hdfs \
+            -DintegrationTestRunner=dataflow \
+            -DintegrationTestPipelineOptions='[${{ 
env.beam_PerformanceTests_ParquetIOIT_HDFS_test_arguments_1 }}]'
\ No newline at end of file
diff --git 
a/.github/workflows/performance-tests-job-configs/config_ParquetIOIT.txt 
b/.github/workflows/performance-tests-job-configs/config_ParquetIOIT.txt
new file mode 100644
index 00000000000..10dc0eba73d
--- /dev/null
+++ b/.github/workflows/performance-tests-job-configs/config_ParquetIOIT.txt
@@ -0,0 +1,26 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+--runner=DataflowRunner
+--tempRoot=gs://temp-storage-for-perf-tests
+--bigQueryDataset=beam_performance
+--bigQueryTable=parquetioit_results
+--influxMeasurement=parquetioit_results
+--numberOfRecords=225000000
+--expectedHash=2f9f5ca33ea464b25109c0297eb6aecb
+--datasetSize=1087370000
+--numWorkers=5
+--autoscalingAlgorithm=NONE
\ No newline at end of file
diff --git 
a/.github/workflows/performance-tests-job-configs/config_ParquetIOIT_HDFS.txt 
b/.github/workflows/performance-tests-job-configs/config_ParquetIOIT_HDFS.txt
new file mode 100644
index 00000000000..ecb725cc12f
--- /dev/null
+++ 
b/.github/workflows/performance-tests-job-configs/config_ParquetIOIT_HDFS.txt
@@ -0,0 +1,26 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+--runner=DataflowRunner
+--tempRoot=gs://temp-storage-for-perf-tests
+--bigQueryDataset=beam_performance
+--bigQueryTable=parquetioit_hdfs_results
+--influxMeasurement=parquetioit_hdfs_results
+--numberOfRecords=225000000
+--expectedHash=2f9f5ca33ea464b25109c0297eb6aecb
+--datasetSize=1087370000
+--numWorkers=5
+--autoscalingAlgorithm=NONE
\ No newline at end of file

Reply via email to