HyukjinKwon commented on a change in pull request #29460:
URL: https://github.com/apache/spark/pull/29460#discussion_r471931078



##########
File path: .github/workflows/master.yml
##########
@@ -9,148 +9,242 @@ on:
     - branch-3.0
 
 jobs:
+  # Build: build Spark and run the tests for specified modules.
   build:
-
+    name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ 
matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
     runs-on: ubuntu-latest
     strategy:
+      fail-fast: false
       matrix:
-        java: [ '1.8', '11' ]
-        hadoop: [ 'hadoop-2.7', 'hadoop-3.2' ]
-        hive: [ 'hive-1.2', 'hive-2.3' ]
-        exclude:
-        - java: '11'
-          hive: 'hive-1.2'
-        - hadoop: 'hadoop-3.2'
-          hive: 'hive-1.2'
-    name: Build Spark - JDK${{ matrix.java }}/${{ matrix.hadoop }}/${{ 
matrix.hive }}
-
+        java:
+          - 1.8
+        hadoop:
+          - hadoop3.2
+        hive:
+          - hive2.3
+        # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
+        # Kinesis tests depends on external Amazon kinesis service.
+        # Note that the modules below are from sparktestsupport/modules.py.
+        modules:
+          - >-
+            core, unsafe, kvstore, avro,
+            network-common, network-shuffle, repl, launcher,
+            examples, sketch, graphx
+          - >-
+            catalyst, hive-thriftserver
+          - >-
+            streaming, sql-kafka-0-10, streaming-kafka-0-10,
+            mllib-local, mllib,
+            yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl
+          - >-
+            pyspark-sql, pyspark-mllib
+          - >-
+            pyspark-core, pyspark-streaming, pyspark-ml
+          - >-
+            sparkr
+        # Here, we split Hive and SQL tests into some of slow ones and the 
rest of them.
+        included-tags: [""]
+        excluded-tags: [""]
+        comment: [""]
+        include:
+          # Hive tests
+          - modules: hive
+            java: 1.8
+            hadoop: hadoop3.2
+            hive: hive2.3
+            included-tags: org.apache.spark.tags.SlowHiveTest
+            comment: "- slow tests"
+          - modules: hive
+            java: 1.8
+            hadoop: hadoop3.2
+            hive: hive2.3
+            excluded-tags: org.apache.spark.tags.SlowHiveTest
+            comment: "- other tests"
+          # SQL tests
+          - modules: sql
+            java: 1.8
+            hadoop: hadoop3.2
+            hive: hive2.3
+            included-tags: org.apache.spark.tags.ExtendedSQLTest
+            comment: "- slow tests"
+          - modules: sql
+            java: 1.8
+            hadoop: hadoop3.2
+            hive: hive2.3
+            excluded-tags: org.apache.spark.tags.ExtendedSQLTest
+            comment: "- other tests"
+    env:
+      MODULES_TO_TEST: ${{ matrix.modules }}
+      EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
+      INCLUDED_TAGS: ${{ matrix.included-tags }}
+      HADOOP_PROFILE: ${{ matrix.hadoop }}
+      HIVE_PROFILE: ${{ matrix.hive }}
+      # GitHub Actions' default miniconda to use in pip packaging test.
+      CONDA_PREFIX: /usr/share/miniconda
+      GITHUB_PREV_SHA: ${{ github.event.before }}
     steps:
-    - uses: actions/checkout@master
-    # We split caches because GitHub Action Cache has a 400MB-size limit.
-    - uses: actions/cache@v1
+    - name: Checkout Spark repository
+      uses: actions/checkout@v2
+      # In order to fetch changed files
+      with:
+        fetch-depth: 0
+    # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
+    - name: Cache Scala, SBT, Maven and Zinc
+      uses: actions/cache@v1
       with:
         path: build
         key: build-${{ hashFiles('**/pom.xml') }}
         restore-keys: |
           build-
-    - uses: actions/cache@v1
-      with:
-        path: ~/.m2/repository/com
-        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-${{ 
hashFiles('**/pom.xml') }}
-        restore-keys: |
-          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-
-    - uses: actions/cache@v1
-      with:
-        path: ~/.m2/repository/org
-        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-${{ 
hashFiles('**/pom.xml') }}
-        restore-keys: |
-          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-
-    - uses: actions/cache@v1
+    - name: Cache Maven local repository
+      uses: actions/cache@v2
       with:
-        path: ~/.m2/repository/net
-        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-${{ 
hashFiles('**/pom.xml') }}
+        path: ~/.m2/repository
+        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ 
hashFiles('**/pom.xml') }}
         restore-keys: |
-          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-
-    - uses: actions/cache@v1
+          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-
+    - name: Cache Ivy local repository
+      uses: actions/cache@v2
       with:
-        path: ~/.m2/repository/io
-        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-${{ 
hashFiles('**/pom.xml') }}
+        path: ~/.ivy2/cache
+        key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ 
hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }}
         restore-keys: |
-          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-
-    - name: Set up JDK ${{ matrix.java }}
+          ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-
+    - name: Install JDK ${{ matrix.java }}
       uses: actions/setup-java@v1
       with:
         java-version: ${{ matrix.java }}
-    - name: Build with Maven
-      run: |
-        export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g 
-Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
-        export MAVEN_CLI_OPTS="--no-transfer-progress"
-        mkdir -p ~/.m2
-        ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes 
-Phive -P${{ matrix.hive }} -Phive-thriftserver -P${{ matrix.hadoop }} 
-Phadoop-cloud -Djava.version=${{ matrix.java }} install
-        rm -rf ~/.m2/repository/org/apache/spark
-
-
-  lint:
-    runs-on: ubuntu-latest
-    name: Linters (Java/Scala/Python), licenses, dependencies
-    steps:
-    - uses: actions/checkout@master
-    - uses: actions/setup-java@v1
-      with:
-        java-version: '11'
-    - uses: actions/setup-python@v1
+    # PySpark
+    - name: Install PyPy3
+      # Note that order of Python installations here matters because default 
python3 is
+      # overridden by pypy3.
+      uses: actions/setup-python@v2
+      if: contains(matrix.modules, 'pyspark')
       with:
-        python-version: '3.x'
-        architecture: 'x64'
-    - name: Scala
-      run: ./dev/lint-scala
-    - name: Java
-      run: ./dev/lint-java
-    - name: Python
-      run: |
-        pip install flake8 sphinx numpy
-        ./dev/lint-python
-    - name: License
-      run: ./dev/check-license
-    - name: Dependencies
-      run: ./dev/test-dependencies.sh
-
-  lintr:
-    runs-on: ubuntu-latest
-    name: Linter (R)
-    steps:
-    - uses: actions/checkout@master
-    - uses: actions/setup-java@v1
+        python-version: pypy3
+        architecture: x64
+    - name: Install Python 2.7

Review comment:
       Spark 3.0 did not drop Python 2 yet. In master, we test Python 3.6 and 
3.8. In branch-3.0, we test Python 3.8 and 2.7.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to