This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 2a329a8 [SPARK-37833][INFRA] Add `precondition` job to skip the main
GitHub Action jobs
2a329a8 is described below
commit 2a329a853e42bb42d874e1736f04b84a70efb561
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Fri Jan 7 01:25:12 2022 -0800
[SPARK-37833][INFRA] Add `precondition` job to skip the main GitHub Action
jobs
### What changes were proposed in this pull request?
This PR aims to introduce `precondition` jobs to skip the main GitHub
Action jobs.
### Why are the changes needed?
This will save huge community GitHub Action resource and speed up our
develop and PR review process.
- For example, GitHub Action will run only `linter` job for `docs` only PR.
<img width="468" alt="Screen Shot 2022-01-07 at 12 10 50 AM"
src="https://user-images.githubusercontent.com/9700541/148512753-bd9b7e49-0e7b-47dd-9ce5-31f684dac666.png">
### Does this PR introduce _any_ user-facing change?
No. This is a dev-only infra.
### How was this patch tested?
Manually review and check the result on this PR.
Closes #35121 from dongjoon-hyun/SPARK-37833.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.github/workflows/build_and_test.yml | 60 +++++++++++++++++++++++++++---------
1 file changed, 46 insertions(+), 14 deletions(-)
diff --git a/.github/workflows/build_and_test.yml
b/.github/workflows/build_and_test.yml
index 0f04147..250cd72 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -96,15 +96,46 @@ jobs:
echo '::set-output name=hadoop::hadoop3'
fi
+ precondition:
+ name: Check changes
+ runs-on: ubuntu-20.04
+ outputs:
+ required: ${{ steps.set-outputs.outputs.required }}
+ steps:
+ - name: Checkout Spark repository
+ uses: actions/checkout@v2
+ with:
+ fetch-depth: 0
+ repository: apache/spark
+ ref: master
+ - name: Sync the current branch with the latest in Apache Spark
+ if: github.repository != 'apache/spark'
+ run: |
+ echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
+ git fetch https://github.com/$GITHUB_REPOSITORY.git
${GITHUB_REF#refs/heads/}
+ git -c user.name='Apache Spark Test Account' -c
user.email='[email protected]' merge --no-commit --progress --squash
FETCH_HEAD
+ git -c user.name='Apache Spark Test Account' -c
user.email='[email protected]' commit -m "Merged commit"
+ - name: Check all modules
+ id: set-outputs
+ run: |
+ build=`./dev/is-changed.py -m
avro,build,catalyst,core,docker-integration-tests,examples,graphx,hadoop-cloud,hive,hive-thriftserver,kubernetes,kvstore,launcher,mesos,mllib,mllib-local,network-common,network-shuffle,pyspark-core,pyspark-ml,pyspark-mllib,pyspark-pandas,pyspark-pandas-slow,pyspark-resource,pyspark-sql,pyspark-streaming,repl,sketch,spark-ganglia-lgpl,sparkr,sql,sql-kafka-0-10,streaming,streaming-kafka-0-10,streaming-kinesis-asl,tags,unsafe,yarn`
+ pyspark=`./dev/is-changed.py -m
avro,build,catalyst,core,graphx,hive,kvstore,launcher,mllib,mllib-local,network-common,network-shuffle,pyspark-core,pyspark-ml,pyspark-mllib,pyspark-pandas,pyspark-pandas-slow,pyspark-resource,pyspark-sql,pyspark-streaming,repl,sketch,sql,tags,unsafe`
+ sparkr=`./dev/is-changed.py -m
avro,build,catalyst,core,hive,kvstore,launcher,mllib,mllib-local,network-common,network-shuffle,repl,sketch,sparkr,sql,tags,unsafe`
+ tpcds=`./dev/is-changed.py -m
build,catalyst,core,hive,kvstore,launcher,network-common,network-shuffle,repl,sketch,sql,tags,unsafe`
+ docker=`./dev/is-changed.py -m
build,catalyst,core,docker-integration-tests,hive,kvstore,launcher,network-common,network-shuffle,repl,sketch,sql,tags,unsafe`
+ echo "{\"build\": \"$build\", \"pyspark\": \"$pyspark\", \"sparkr\":
\"$sparkr\", \"tpcds\": \"$tpcds\", \"docker\": \"$docker\"}" > required.json
+ cat required.json
+ echo "::set-output name=required::$(cat required.json)"
+
# Build: build Spark and run the tests for specified modules.
build:
name: "Build modules (${{ format('{0}, {1} job',
needs.configure-jobs.outputs.branch, needs.configure-jobs.outputs.type) }}):
${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{
matrix.hadoop }}, ${{ matrix.hive }})"
- needs: configure-jobs
+ needs: [configure-jobs, precondition]
# Run scheduled jobs for Apache Spark only
# Run regular jobs for commit in both Apache Spark and forked repository
if: >-
(github.repository == 'apache/spark' &&
needs.configure-jobs.outputs.type == 'scheduled')
- || needs.configure-jobs.outputs.type == 'regular'
+ || (needs.configure-jobs.outputs.type == 'regular' &&
fromJson(needs.precondition.outputs.required).build == 'true')
# Ubuntu 20.04 is the latest LTS. The next LTS is 22.04.
runs-on: ubuntu-20.04
strategy:
@@ -242,15 +273,16 @@ jobs:
name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{
matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
path: "**/target/unit-tests.log"
+
pyspark:
- needs: configure-jobs
+ needs: [configure-jobs, precondition]
# Run PySpark coverage scheduled jobs for Apache Spark only
# Run scheduled jobs with JDK 17 in Apache Spark
# Run regular jobs for commit in both Apache Spark and forked repository
if: >-
(github.repository == 'apache/spark' &&
needs.configure-jobs.outputs.type == 'pyspark-coverage-scheduled')
|| (github.repository == 'apache/spark' &&
needs.configure-jobs.outputs.type == 'scheduled' &&
needs.configure-jobs.outputs.java == '17')
- || needs.configure-jobs.outputs.type == 'regular'
+ || (needs.configure-jobs.outputs.type == 'regular' &&
fromJson(needs.precondition.outputs.required).pyspark == 'true')
name: "Build modules (${{ format('{0}, {1} job',
needs.configure-jobs.outputs.branch, needs.configure-jobs.outputs.type) }}):
${{ matrix.modules }}"
runs-on: ubuntu-20.04
container:
@@ -351,9 +383,9 @@ jobs:
path: "**/target/unit-tests.log"
sparkr:
- needs: configure-jobs
+ needs: [configure-jobs, precondition]
if: >-
- needs.configure-jobs.outputs.type == 'regular'
+ (needs.configure-jobs.outputs.type == 'regular' &&
fromJson(needs.precondition.outputs.required).sparkr == 'true')
|| (github.repository == 'apache/spark' &&
needs.configure-jobs.outputs.type == 'scheduled' &&
needs.configure-jobs.outputs.java == '17')
name: "Build modules: sparkr"
runs-on: ubuntu-20.04
@@ -532,8 +564,8 @@ jobs:
bundle exec jekyll build
java-11-17:
- needs: configure-jobs
- if: needs.configure-jobs.outputs.type == 'regular'
+ needs: [configure-jobs, precondition]
+ if: needs.configure-jobs.outputs.type == 'regular' &&
fromJson(needs.precondition.outputs.required).build == 'true'
name: Java ${{ matrix.java }} build with Maven
strategy:
fail-fast: false
@@ -587,8 +619,8 @@ jobs:
rm -rf ~/.m2/repository/org/apache/spark
scala-213:
- needs: configure-jobs
- if: needs.configure-jobs.outputs.type == 'regular'
+ needs: [configure-jobs, precondition]
+ if: needs.configure-jobs.outputs.type == 'regular' &&
fromJson(needs.precondition.outputs.required).build == 'true'
name: Scala 2.13 build with SBT
runs-on: ubuntu-20.04
steps:
@@ -632,8 +664,8 @@ jobs:
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver
-Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests
-Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile
test:compile
tpcds-1g:
- needs: configure-jobs
- if: needs.configure-jobs.outputs.type == 'regular'
+ needs: [configure-jobs, precondition]
+ if: needs.configure-jobs.outputs.type == 'regular' &&
fromJson(needs.precondition.outputs.required).tpcds == 'true'
name: Run TPC-DS queries with SF=1
runs-on: ubuntu-20.04
env:
@@ -726,8 +758,8 @@ jobs:
path: "**/target/unit-tests.log"
docker-integration-tests:
- needs: configure-jobs
- if: needs.configure-jobs.outputs.type == 'regular'
+ needs: [configure-jobs, precondition]
+ if: needs.configure-jobs.outputs.type == 'regular' &&
fromJson(needs.precondition.outputs.required).docker == 'true'
name: Run Docker integration tests
runs-on: ubuntu-20.04
env:
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]