HyukjinKwon commented on a change in pull request #29057:
URL: https://github.com/apache/spark/pull/29057#discussion_r452278579
##########
File path: .github/workflows/master.yml
##########
@@ -9,148 +9,233 @@ on:
- master
jobs:
+ # TODO(SPARK-32248): Recover JDK 11 builds
+ # Build: build Spark and run the tests for specified modules.
build:
-
+ name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{
matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
runs-on: ubuntu-latest
strategy:
+ fail-fast: false
matrix:
- java: [ '1.8', '11' ]
- hadoop: [ 'hadoop-2.7', 'hadoop-3.2' ]
- hive: [ 'hive-1.2', 'hive-2.3' ]
- exclude:
- - java: '11'
- hive: 'hive-1.2'
- - hadoop: 'hadoop-3.2'
- hive: 'hive-1.2'
- name: Build Spark - JDK${{ matrix.java }}/${{ matrix.hadoop }}/${{
matrix.hive }}
-
+ java:
+ - 1.8
+ hadoop:
+ - hadoop3.2
+ hive:
+ - hive2.3
+ # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
+ # Kinesis tests depends on external Amazon kinesis service.
+ # Note that the modules below are from sparktestsupport/modules.py.
+ modules:
+ - |-
+ core, unsafe, kvstore, avro,
+ network_common, network_shuffle, repl, launcher
+ examples, sketch, graphx
+ - |-
+ catalyst, sql
+ - |-
+ hive-thriftserver
+ - |-
+ streaming, sql-kafka-0-10, streaming-kafka-0-10
+ - |-
+ mllib-local, mllib
+ - |-
+ pyspark-sql, pyspark-mllib, pyspark-resource
+ - |-
+ pyspark-core, pyspark-streaming, pyspark-ml
+ - |-
+ sparkr
+ - |-
+ yarn, mesos, kubernetes, hadoop-cloud,
+ spark-ganglia-lgpl
+ # Here, we split Hive tests into some of heavy ones and the rest of
them.
+ included-tags: [""]
+ excluded-tags: [""]
+ comment: ["- all tests"]
+ include:
+ - modules: hive
+ java: 1.8
+ hadoop: hadoop3.2
+ hive: hive2.3
+ included-tags: org.apache.spark.tags.HeavyHiveTest
+ comment: "- heavy tests"
+ - modules: hive
+ java: 1.8
+ hadoop: hadoop3.2
+ hive: hive2.3
+ excluded-tags: org.apache.spark.tags.HeavyHiveTest
+ comment: "- light tests"
+ env:
+ TEST_ONLY_MODULES: ${{ matrix.modules }}
+ HADOOP_PROFILE: ${{ matrix.hadoop }}
+ HIVE_PROFILE: ${{ matrix.hive }}
+ # Github Actions' default miniconda
+ CONDA_PREFIX: /usr/share/miniconda
+ # Don't run the tests in parallel due to flakiness. See
SparkParallelTestGrouping.
+ TEST_ONLY_EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
+ TEST_ONLY_INCLUDED_TAGS: ${{ matrix.included-tags }}
steps:
- - uses: actions/checkout@master
- # We split caches because GitHub Action Cache has a 400MB-size limit.
- - uses: actions/cache@v1
+ - name: Checkout Spark repository
+ uses: actions/checkout@v2
+ # Cache local repositories. Note that Github Actions cache has a 2G limit.
+ - name: Cache Scala, SBT, Maven and Zinc
+ uses: actions/cache@v1
with:
path: build
key: build-${{ hashFiles('**/pom.xml') }}
restore-keys: |
build-
- - uses: actions/cache@v1
- with:
- path: ~/.m2/repository/com
- key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-${{
hashFiles('**/pom.xml') }}
- restore-keys: |
- ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-
- - uses: actions/cache@v1
+ - name: Cache Maven local repository
+ uses: actions/cache@v2
with:
- path: ~/.m2/repository/org
- key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-${{
hashFiles('**/pom.xml') }}
- restore-keys: |
- ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-
- - uses: actions/cache@v1
- with:
- path: ~/.m2/repository/net
- key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-${{
hashFiles('**/pom.xml') }}
+ path: ~/.m2/repository
+ key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{
hashFiles('**/pom.xml') }}
restore-keys: |
- ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-
- - uses: actions/cache@v1
+ ${{ matrix.java }}-${{ matrix.hadoop }}-maven-
+ - name: Cache Ivy local repository
+ uses: actions/cache@v2
with:
- path: ~/.m2/repository/io
- key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-${{
hashFiles('**/pom.xml') }}
+ path: ~/.ivy2/cache
+ key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{
hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }}
restore-keys: |
- ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-
- - name: Set up JDK ${{ matrix.java }}
+ ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-
+ - name: Install JDK ${{ matrix.java }}
uses: actions/setup-java@v1
with:
java-version: ${{ matrix.java }}
- - name: Build with Maven
+ # PySpark
+ - name: Install PyPy3
+ # SQL component also has Python related tests, for example,
IntegratedUDFTestUtils.
+ # Note that order of Python installations here matters because default
python3 is
+ # overridden by pypy3.
+ uses: actions/setup-python@v2
+ if: contains(matrix.modules, 'pyspark') || matrix.modules == 'sql'
+ with:
+ python-version: pypy3
+ architecture: x64
+ - name: Install Python 2.7
+ uses: actions/setup-python@v2
+ if: contains(matrix.modules, 'pyspark') || matrix.modules == 'sql'
+ with:
+ python-version: 2.7
+ architecture: x64
+ - name: Install Python 3.6
+ uses: actions/setup-python@v2
+ if: contains(matrix.modules, 'pyspark') || matrix.modules == 'sql'
+ with:
+ python-version: 3.6
+ architecture: x64
+ - name: Install Python packages
+ if: contains(matrix.modules, 'pyspark') || matrix.modules == 'sql'
+ # PyArrow is not supported in PyPy yet, see ARROW-2651.
+ # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown
reason.
+ run: |
+ python3 -m pip install numpy pyarrow pandas scipy
+ python3 -m pip list
+ python2 -m pip install numpy pyarrow pandas scipy
Review comment:
Because we didn't drop yet at https://github.com/apache/spark/pull/28957
😢
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]