[spark] branch branch-3.0 updated: [SPARK-32249][INFRA][3.0] Run Github Actions builds in branch-3.0

dongjoon Wed, 19 Aug 2020 07:27:01 -0700

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 56ec5dd  [SPARK-32249][INFRA][3.0] Run Github Actions builds in 
branch-3.0
56ec5dd is described below

commit 56ec5ddcac8233011c17fc7d120a284707f0f712
Author: HyukjinKwon <[email protected]>
AuthorDate: Wed Aug 19 07:20:26 2020 -0700

    [SPARK-32249][INFRA][3.0] Run Github Actions builds in branch-3.0
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to backport the following JIRAs:
    
    - SPARK-32245
    - SPARK-32292
    - SPARK-32252
    - SPARK-32316
    - SPARK-32408
    - SPARK-32303
    - SPARK-32363
    - SPARK-32419
    - SPARK-32422
    - SPARK-32491
    - SPARK-32493
    - SPARK-32496
    - SPARK-32497
    - SPARK-32357
    - SPARK-32606
    - SPARK-32605
    - SPARK-32248
    - SPARK-32645
    - Minor renaming d0dfe49#diff-02d9c370a663741451423342d5869b21
    
    in order to enable GitHub Actions in branch-3.0.
    
    ### Why are the changes needed?
    
    To be able to run the tests in branch-3.0. Jenkins jobs are unstable.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, dev-only.
    
    ### How was this patch tested?
    
    Build in this PR will test.
    
    Closes #29460 from HyukjinKwon/SPARK-32249.
    
    Lead-authored-by: HyukjinKwon <[email protected]>
    Co-authored-by: Hyukjin Kwon <[email protected]>
    Co-authored-by: Dongjoon Hyun <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .github/workflows/build_and_test.yml               | 283 +++++++++++++++++++++
 .github/workflows/master.yml                       | 156 ------------
 .github/workflows/test_report.yml                  |  24 ++
 .../spark/tags/GitHubActionsUnstableTest.java      |  30 +++
 .../java/org/apache/spark/tags/SlowHiveTest.java   |  30 +++
 dev/run-pip-tests                                  |  10 +-
 dev/run-tests.py                                   | 185 ++++++++++----
 dev/sparktestsupport/modules.py                    |  93 +++++--
 project/SparkBuild.scala                           |   9 +
 python/pyspark/sql/tests/test_arrow.py             |   3 +
 python/pyspark/sql/tests/test_types.py             |   3 +-
 python/pyspark/streaming/tests/test_dstream.py     |  15 +-
 python/run-tests.py                                |   2 +-
 .../apache/spark/sql/IntegratedUDFTestUtils.scala  |   2 +-
 .../thriftserver/ThriftServerQueryTestSuite.scala  |   2 +
 .../ThriftServerWithSparkContextSuite.scala        |   3 +
 .../hive/execution/HiveCompatibilitySuite.scala    |   2 +
 .../hive/HiveExternalCatalogVersionsSuite.scala    |   3 +-
 .../spark/sql/hive/HiveSparkSubmitSuite.scala      |   3 +-
 .../spark/sql/hive/client/VersionsSuite.scala      |   3 +-
 .../sql/hive/execution/AggregationQuerySuite.scala |   2 +
 .../spark/sql/hive/execution/HiveDDLSuite.scala    |   3 +
 .../spark/sql/hive/execution/HiveQuerySuite.scala  |   2 +
 .../sql/hive/execution/Hive_2_1_DDLSuite.scala     |   3 +-
 .../spark/sql/hive/execution/SQLQuerySuite.scala   |   3 +
 25 files changed, 639 insertions(+), 235 deletions(-)

diff --git a/.github/workflows/build_and_test.yml 
b/.github/workflows/build_and_test.yml
new file mode 100644
index 0000000..581ad8a
--- /dev/null
+++ b/.github/workflows/build_and_test.yml
@@ -0,0 +1,283 @@
+name: Build and test
+
+on:
+  push:
+    branches:
+    - branch-3.0
+  pull_request:
+    branches:
+    - branch-3.0
+
+jobs:
+  # Build: build Spark and run the tests for specified modules.
+  build:
+    name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ 
matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        java:
+          - 1.8
+        hadoop:
+          - hadoop2.7
+        hive:
+          - hive2.3
+        # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
+        # Kinesis tests depends on external Amazon kinesis service.
+        # Note that the modules below are from sparktestsupport/modules.py.
+        modules:
+          - >-
+            core, unsafe, kvstore, avro,
+            network-common, network-shuffle, repl, launcher,
+            examples, sketch, graphx
+          - >-
+            catalyst, hive-thriftserver
+          - >-
+            streaming, sql-kafka-0-10, streaming-kafka-0-10,
+            mllib-local, mllib,
+            yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl
+          - >-
+            pyspark-sql, pyspark-mllib
+          - >-
+            pyspark-core, pyspark-streaming, pyspark-ml
+          - >-
+            sparkr
+        # Here, we split Hive and SQL tests into some of slow ones and the 
rest of them.
+        included-tags: [""]
+        # Some tests are disabled in GitHun Actions. Ideally, we should remove 
this tag
+        # and run all tests.
+        excluded-tags: ["org.apache.spark.tags.GitHubActionsUnstableTest"]
+        comment: [""]
+        include:
+          # Hive tests
+          - modules: hive
+            java: 1.8
+            hadoop: hadoop2.7
+            hive: hive2.3
+            included-tags: org.apache.spark.tags.SlowHiveTest
+            comment: "- slow tests"
+          - modules: hive
+            java: 1.8
+            hadoop: hadoop2.7
+            hive: hive2.3
+            excluded-tags: 
org.apache.spark.tags.SlowHiveTest,org.apache.spark.tags.GitHubActionsUnstableTest
+            comment: "- other tests"
+          # SQL tests
+          - modules: sql
+            java: 1.8
+            hadoop: hadoop2.7
+            hive: hive2.3
+            included-tags: org.apache.spark.tags.ExtendedSQLTest
+            comment: "- slow tests"
+          - modules: sql
+            java: 1.8
+            hadoop: hadoop2.7
+            hive: hive2.3
+            excluded-tags: 
org.apache.spark.tags.ExtendedSQLTest,org.apache.spark.tags.GitHubActionsUnstableTest
+            comment: "- other tests"
+    env:
+      MODULES_TO_TEST: ${{ matrix.modules }}
+      EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
+      INCLUDED_TAGS: ${{ matrix.included-tags }}
+      HADOOP_PROFILE: ${{ matrix.hadoop }}
+      HIVE_PROFILE: ${{ matrix.hive }}
+      # GitHub Actions' default miniconda to use in pip packaging test.
+      CONDA_PREFIX: /usr/share/miniconda
+      GITHUB_PREV_SHA: ${{ github.event.before }}
+    steps:
+    - name: Checkout Spark repository
+      uses: actions/checkout@v2
+      # In order to fetch changed files
+      with:
+        fetch-depth: 0
+    # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
+    - name: Cache Scala, SBT, Maven and Zinc
+      uses: actions/cache@v1
+      with:
+        path: build
+        key: build-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          build-
+    - name: Cache Maven local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.m2/repository
+        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ 
hashFiles('**/pom.xml') }}
+        restore-keys: |
+          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-
+    - name: Cache Ivy local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.ivy2/cache
+        key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ 
hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }}
+        restore-keys: |
+          ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-
+    - name: Install JDK ${{ matrix.java }}
+      uses: actions/setup-java@v1
+      with:
+        java-version: ${{ matrix.java }}
+    # PySpark
+    - name: Install PyPy3
+      # Note that order of Python installations here matters because default 
python3 is
+      # overridden by pypy3.
+      uses: actions/setup-python@v2
+      if: contains(matrix.modules, 'pyspark')
+      with:
+        python-version: pypy3
+        architecture: x64
+    - name: Install Python 2.7
+      uses: actions/setup-python@v2
+      if: contains(matrix.modules, 'pyspark')
+      with:
+        python-version: 2.7
+        architecture: x64
+    - name: Install Python 3.8
+      uses: actions/setup-python@v2
+      # We should install one Python that is higher then 3+ for SQL and Yarn 
because:
+      # - SQL component also has Python related tests, for example, 
IntegratedUDFTestUtils.
+      # - Yarn has a Python specific test too, for example, YarnClusterSuite.
+      if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 
'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 
'sql-'))
+      with:
+        python-version: 3.8
+        architecture: x64
+    - name: Install Python packages (Python 2.7 and PyPy3)
+      if: contains(matrix.modules, 'pyspark')
+      # PyArrow is not supported in PyPy yet, see ARROW-2651.
+      # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown 
reason.
+      run: |
+        python2.7 -m pip install numpy pyarrow pandas scipy xmlrunner
+        python2.7 -m pip list
+        # PyPy does not have xmlrunner
+        pypy3 -m pip install numpy pandas
+        pypy3 -m pip list
+    - name: Install Python packages (Python 3.8)
+      if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 
'sql') && !contains(matrix.modules, 'sql-'))
+      run: |
+        python3.8 -m pip install numpy pyarrow pandas scipy xmlrunner
+        python3.8 -m pip list
+    # SparkR
+    - name: Install R 4.0
+      if: contains(matrix.modules, 'sparkr')
+      run: |
+        sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu 
bionic-cran40/' >> /etc/apt/sources.list"
+        curl -sL 
"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9";
 | sudo apt-key add
+        sudo apt-get update
+        sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
+    - name: Install R packages
+      if: contains(matrix.modules, 'sparkr')
+      run: |
+        # qpdf is required to reduce the size of PDFs to make CRAN check pass. 
See SPARK-32497.
+        sudo apt-get install -y libcurl4-openssl-dev qpdf
+        sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 
'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), 
repos='https://cloud.r-project.org/')"
+        # Show installed packages in R.
+        sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, 
c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]'
+    # Run the tests.
+    - name: Run tests
+      run: |
+        # Hive tests become flaky when running in parallel as it's too 
intensive.
+        if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; 
fi
+        mkdir -p ~/.m2
+        ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" 
--included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
+        rm -rf ~/.m2/repository/org/apache/spark
+    - name: Upload test results to report
+      if: always()
+      uses: actions/upload-artifact@v2
+      with:
+        name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ 
matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
+        path: "**/target/test-reports/*.xml"
+    - name: Upload unit tests log files
+      if: failure()
+      uses: actions/upload-artifact@v2
+      with:
+        name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ 
matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
+        path: "**/target/unit-tests.log"
+
+  # Static analysis, and documentation build
+  lint:
+    name: Linters, licenses, dependencies and documentation generation
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout Spark repository
+      uses: actions/checkout@v2
+    - name: Cache Maven local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.m2/repository
+        key: docs-maven-repo-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          docs-maven-
+    - name: Install JDK 1.8
+      uses: actions/setup-java@v1
+      with:
+        java-version: 1.8
+    - name: Install Python 3.6
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.6
+        architecture: x64
+    - name: Install Python linter dependencies
+      run: |
+        pip3 install flake8 sphinx numpy
+    - name: Install R 4.0
+      run: |
+        sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu 
bionic-cran40/' >> /etc/apt/sources.list"
+        curl -sL 
"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9";
 | sudo apt-key add
+        sudo apt-get update
+        sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
+    - name: Install R linter dependencies and SparkR
+      run: |
+        sudo apt-get install -y libcurl4-openssl-dev
+        sudo Rscript -e "install.packages(c('devtools'), 
repos='https://cloud.r-project.org/')"
+        sudo Rscript -e "devtools::install_github('jimhester/[email protected]')"
+        ./R/install-dev.sh
+    - name: Install Ruby 2.7 for documentation generation
+      uses: actions/setup-ruby@v1
+      with:
+        ruby-version: 2.7
+    - name: Install dependencies for documentation generation
+      run: |
+        sudo apt-get install -y libcurl4-openssl-dev pandoc
+        pip install sphinx mkdocs numpy
+        gem install jekyll jekyll-redirect-from rouge
+        sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 
'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
+    - name: Scala linter
+      run: ./dev/lint-scala
+    - name: Java linter
+      run: ./dev/lint-java
+    - name: Python linter
+      run: ./dev/lint-python
+    - name: R linter
+      run: ./dev/lint-r
+    - name: License test
+      run: ./dev/check-license
+    - name: Dependencies test
+      run: ./dev/test-dependencies.sh
+    - name: Run documentation build
+      run: |
+        cd docs
+        jekyll build
+
+  java11:
+    name: Java 11 build
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout Spark repository
+      uses: actions/checkout@v2
+    - name: Cache Maven local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.m2/repository
+        key: java11-maven-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          java11-maven-
+    - name: Install Java 11
+      uses: actions/setup-java@v1
+      with:
+        java-version: 11
+    - name: Build with Maven
+      run: |
+        export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g 
-Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
+        export MAVEN_CLI_OPTS="--no-transfer-progress"
+        mkdir -p ~/.m2
+        ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes 
-Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
+        rm -rf ~/.m2/repository/org/apache/spark
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
deleted file mode 100644
index 36f9e33..0000000
--- a/.github/workflows/master.yml
+++ /dev/null
@@ -1,156 +0,0 @@
-name: master
-
-on:
-  push:
-    branches:
-    - branch-3.0
-  pull_request:
-    branches:
-    - branch-3.0
-
-jobs:
-  build:
-
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        java: [ '1.8', '11' ]
-        hadoop: [ 'hadoop-2.7', 'hadoop-3.2' ]
-        hive: [ 'hive-1.2', 'hive-2.3' ]
-        exclude:
-        - java: '11'
-          hive: 'hive-1.2'
-        - hadoop: 'hadoop-3.2'
-          hive: 'hive-1.2'
-    name: Build Spark - JDK${{ matrix.java }}/${{ matrix.hadoop }}/${{ 
matrix.hive }}
-
-    steps:
-    - uses: actions/checkout@master
-    # We split caches because GitHub Action Cache has a 400MB-size limit.
-    - uses: actions/cache@v1
-      with:
-        path: build
-        key: build-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          build-
-    - uses: actions/cache@v1
-      with:
-        path: ~/.m2/repository/com
-        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-${{ 
hashFiles('**/pom.xml') }}
-        restore-keys: |
-          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-
-    - uses: actions/cache@v1
-      with:
-        path: ~/.m2/repository/org
-        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-${{ 
hashFiles('**/pom.xml') }}
-        restore-keys: |
-          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-
-    - uses: actions/cache@v1
-      with:
-        path: ~/.m2/repository/net
-        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-${{ 
hashFiles('**/pom.xml') }}
-        restore-keys: |
-          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-
-    - uses: actions/cache@v1
-      with:
-        path: ~/.m2/repository/io
-        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-${{ 
hashFiles('**/pom.xml') }}
-        restore-keys: |
-          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-
-    - name: Set up JDK ${{ matrix.java }}
-      uses: actions/setup-java@v1
-      with:
-        java-version: ${{ matrix.java }}
-    - name: Build with Maven
-      run: |
-        export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g 
-Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
-        export MAVEN_CLI_OPTS="--no-transfer-progress"
-        mkdir -p ~/.m2
-        ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes 
-Phive -P${{ matrix.hive }} -Phive-thriftserver -P${{ matrix.hadoop }} 
-Phadoop-cloud -Djava.version=${{ matrix.java }} install
-        rm -rf ~/.m2/repository/org/apache/spark
-
-
-  lint:
-    runs-on: ubuntu-latest
-    name: Linters (Java/Scala/Python), licenses, dependencies
-    steps:
-    - uses: actions/checkout@master
-    - uses: actions/setup-java@v1
-      with:
-        java-version: '11'
-    - uses: actions/setup-python@v1
-      with:
-        python-version: '3.x'
-        architecture: 'x64'
-    - name: Scala
-      run: ./dev/lint-scala
-    - name: Java
-      run: ./dev/lint-java
-    - name: Python
-      run: |
-        pip install flake8 sphinx numpy
-        ./dev/lint-python
-    - name: License
-      run: ./dev/check-license
-    - name: Dependencies
-      run: ./dev/test-dependencies.sh
-
-  lintr:
-    runs-on: ubuntu-latest
-    name: Linter (R)
-    steps:
-    - uses: actions/checkout@master
-    - uses: actions/setup-java@v1
-      with:
-        java-version: '11'
-    - uses: r-lib/actions/setup-r@v1
-      with:
-        r-version: '3.6.2'
-    - name: Install lib
-      run: |
-        sudo apt-get install -y libcurl4-openssl-dev
-    - name: install R packages
-      run: |
-        sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 
'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), 
repos='https://cloud.r-project.org/')"
-        sudo Rscript -e "devtools::install_github('jimhester/[email protected]')"
-    - name: package and install SparkR
-      run: ./R/install-dev.sh
-    - name: lint-r
-      run: ./dev/lint-r
-
-  docs:
-    runs-on: ubuntu-latest
-    name: Generate documents
-    steps:
-    - uses: actions/checkout@master
-    - uses: actions/cache@v1
-      with:
-        path: ~/.m2/repository
-        key: docs-maven-repo-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          docs-maven-repo-
-    - uses: actions/setup-java@v1
-      with:
-        java-version: '1.8'
-    - uses: actions/setup-python@v1
-      with:
-        python-version: '3.x'
-        architecture: 'x64'
-    - uses: actions/setup-ruby@v1
-      with:
-        ruby-version: '2.7'
-    - uses: r-lib/actions/setup-r@v1
-      with:
-        r-version: '3.6.2'
-    - name: Install lib and pandoc
-      run: |
-        sudo apt-get install -y libcurl4-openssl-dev pandoc
-    - name: Install packages
-      run: |
-        pip install sphinx mkdocs numpy
-        gem install jekyll jekyll-redirect-from rouge
-        sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 
'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), 
repos='https://cloud.r-project.org/')"
-    - name: Run jekyll build
-      run: |
-        cd docs
-        jekyll build
diff --git a/.github/workflows/test_report.yml 
b/.github/workflows/test_report.yml
new file mode 100644
index 0000000..93cdb86
--- /dev/null
+++ b/.github/workflows/test_report.yml
@@ -0,0 +1,24 @@
+name: Report test results
+on:
+  workflow_run:
+    workflows: ["Build and test"]
+    types:
+      - completed
+
+jobs:
+  test_report:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Download test results to report
+      uses: dawidd6/action-download-artifact@v2
+      with:
+        github_token: ${{ secrets.GITHUB_TOKEN }}
+        workflow: ${{ github.event.workflow_run.workflow_id }}
+        commit: ${{ github.event.workflow_run.head_commit.id }}
+    - name: Publish test report
+      uses: scacap/action-surefire-report@v1
+      with:
+        check_name: Report test results
+        github_token: ${{ secrets.GITHUB_TOKEN }}
+        report_paths: "**/target/test-reports/*.xml"
+        commit: ${{ github.event.workflow_run.head_commit.id }}
diff --git 
a/common/tags/src/test/java/org/apache/spark/tags/GitHubActionsUnstableTest.java
 
b/common/tags/src/test/java/org/apache/spark/tags/GitHubActionsUnstableTest.java
new file mode 100644
index 0000000..a602656
--- /dev/null
+++ 
b/common/tags/src/test/java/org/apache/spark/tags/GitHubActionsUnstableTest.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.tags;
+
+import org.scalatest.TagAnnotation;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+@TagAnnotation
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.METHOD, ElementType.TYPE})
+public @interface GitHubActionsUnstableTest { }
diff --git a/common/tags/src/test/java/org/apache/spark/tags/SlowHiveTest.java 
b/common/tags/src/test/java/org/apache/spark/tags/SlowHiveTest.java
new file mode 100644
index 0000000..a7e6f35
--- /dev/null
+++ b/common/tags/src/test/java/org/apache/spark/tags/SlowHiveTest.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.tags;
+
+import org.scalatest.TagAnnotation;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+@TagAnnotation
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.METHOD, ElementType.TYPE})
+public @interface SlowHiveTest { }
diff --git a/dev/run-pip-tests b/dev/run-pip-tests
index 81e33a6..b322d3f 100755
--- a/dev/run-pip-tests
+++ b/dev/run-pip-tests
@@ -63,7 +63,7 @@ fi
 PYSPARK_VERSION=$(python3 -c 
"exec(open('python/pyspark/version.py').read());print(__version__)")
 PYSPARK_DIST="$FWDIR/python/dist/pyspark-$PYSPARK_VERSION.tar.gz"
 # The pip install options we use for all the pip commands
-PIP_OPTIONS="--upgrade --no-cache-dir --force-reinstall "
+PIP_OPTIONS="--upgrade --no-cache-dir --force-reinstall"
 # Test both regular user and edit/dev install modes.
 PIP_COMMANDS=("pip install $PIP_OPTIONS $PYSPARK_DIST"
              "pip install $PIP_OPTIONS -e python/")
@@ -80,8 +80,12 @@ for python in "${PYTHON_EXECS[@]}"; do
     VIRTUALENV_PATH="$VIRTUALENV_BASE"/$python
     rm -rf "$VIRTUALENV_PATH"
     if [ -n "$USE_CONDA" ]; then
+      if [ -f "$CONDA_PREFIX/etc/profile.d/conda.sh" ]; then
+        # See also https://github.com/conda/conda/issues/7980
+        source "$CONDA_PREFIX/etc/profile.d/conda.sh"
+      fi
       conda create -y -p "$VIRTUALENV_PATH" python=$python numpy pandas pip 
setuptools
-      source activate "$VIRTUALENV_PATH"
+      source activate "$VIRTUALENV_PATH" || conda activate "$VIRTUALENV_PATH"
     else
       mkdir -p "$VIRTUALENV_PATH"
       virtualenv --python=$python "$VIRTUALENV_PATH"
@@ -124,7 +128,7 @@ for python in "${PYTHON_EXECS[@]}"; do
 
     # conda / virtualenv environments need to be deactivated differently
     if [ -n "$USE_CONDA" ]; then
-      source deactivate
+      source deactivate || conda deactivate
     else
       deactivate
     fi
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 5255a77..976854d 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -49,13 +49,12 @@ def determine_modules_for_files(filenames):
     ['pyspark-core', 'sql']
     >>> [x.name for x in 
determine_modules_for_files(["file_not_matched_by_any_subproject"])]
     ['root']
-    >>> [x.name for x in determine_modules_for_files( \
-            [".github/workflows/master.yml", "appveyor.yml"])]
+    >>> [x.name for x in determine_modules_for_files(["appveyor.yml"])]
     []
     """
     changed_modules = set()
     for filename in filenames:
-        if filename in (".github/workflows/master.yml", "appveyor.yml"):
+        if filename in ("appveyor.yml",):
             continue
         matched_at_least_one_module = False
         for module in modules.all_modules:
@@ -101,28 +100,52 @@ def setup_test_environ(environ):
         os.environ[k] = v
 
 
-def determine_modules_to_test(changed_modules):
+def determine_modules_to_test(changed_modules, deduplicated=True):
     """
     Given a set of modules that have changed, compute the transitive closure 
of those modules'
     dependent modules in order to determine the set of modules that should be 
tested.
 
     Returns a topologically-sorted list of modules (ties are broken by sorting 
on module names).
+    If ``deduplicated`` is disabled, the modules are returned without tacking 
the deduplication
+    by dependencies into account.
 
     >>> [x.name for x in determine_modules_to_test([modules.root])]
     ['root']
     >>> [x.name for x in determine_modules_to_test([modules.build])]
     ['root']
+    >>> [x.name for x in determine_modules_to_test([modules.core])]
+    ['root']
+    >>> [x.name for x in determine_modules_to_test([modules.launcher])]
+    ['root']
     >>> [x.name for x in determine_modules_to_test([modules.graphx])]
     ['graphx', 'examples']
-    >>> x = [x.name for x in determine_modules_to_test([modules.sql])]
-    >>> x # doctest: +NORMALIZE_WHITESPACE
+    >>> [x.name for x in determine_modules_to_test([modules.sql])]
+    ... # doctest: +NORMALIZE_WHITESPACE
     ['sql', 'avro', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', 
'hive-thriftserver',
      'pyspark-sql', 'repl', 'sparkr', 'pyspark-mllib', 'pyspark-ml']
+    >>> sorted([x.name for x in determine_modules_to_test(
+    ...     [modules.sparkr, modules.sql], deduplicated=False)])
+    ... # doctest: +NORMALIZE_WHITESPACE
+    ['avro', 'examples', 'hive', 'hive-thriftserver', 'mllib', 'pyspark-ml',
+     'pyspark-mllib', 'pyspark-sql', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
+    >>> sorted([x.name for x in determine_modules_to_test(
+    ...     [modules.sql, modules.core], deduplicated=False)])
+    ... # doctest: +NORMALIZE_WHITESPACE
+    ['avro', 'catalyst', 'core', 'examples', 'graphx', 'hive', 
'hive-thriftserver',
+     'mllib', 'mllib-local', 'pyspark-core', 'pyspark-ml', 'pyspark-mllib',
+     'pyspark-sql', 'pyspark-streaming', 'repl', 'root',
+     'sparkr', 'sql', 'sql-kafka-0-10', 'streaming', 'streaming-kafka-0-10',
+     'streaming-kinesis-asl']
     """
     modules_to_test = set()
     for module in changed_modules:
-        modules_to_test = 
modules_to_test.union(determine_modules_to_test(module.dependent_modules))
+        modules_to_test = modules_to_test.union(
+            determine_modules_to_test(module.dependent_modules, deduplicated))
     modules_to_test = modules_to_test.union(set(changed_modules))
+
+    if not deduplicated:
+        return modules_to_test
+
     # If we need to run all of the tests, then we should short-circuit and 
return 'root'
     if modules.root in modules_to_test:
         return [modules.root]
@@ -415,7 +438,7 @@ def run_scala_tests_sbt(test_modules, test_profiles):
     exec_sbt(profiles_and_goals)
 
 
-def run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags):
+def run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags, 
included_tags):
     """Function to properly execute all tests passed in as a set from the
     `determine_test_suites` function"""
     set_title_and_block("Running Spark unit tests", "BLOCK_SPARK_UNIT_TESTS")
@@ -425,6 +448,8 @@ def run_scala_tests(build_tool, extra_profiles, 
test_modules, excluded_tags):
     test_profiles = extra_profiles + \
         list(set(itertools.chain.from_iterable(m.build_profile_flags for m in 
test_modules)))
 
+    if included_tags:
+        test_profiles += ['-Dtest.include.tags=' + ",".join(included_tags)]
     if excluded_tags:
         test_profiles += ['-Dtest.exclude.tags=' + ",".join(excluded_tags)]
 
@@ -532,6 +557,24 @@ def parse_opts():
         "-p", "--parallelism", type=int, default=8,
         help="The number of suites to test in parallel (default %(default)d)"
     )
+    parser.add_argument(
+        "-m", "--modules", type=str,
+        default=None,
+        help="A comma-separated list of modules to test "
+             "(default: %s)" % ",".join(sorted([m.name for m in 
modules.all_modules]))
+    )
+    parser.add_argument(
+        "-e", "--excluded-tags", type=str,
+        default=None,
+        help="A comma-separated list of tags to exclude in the tests, "
+             "e.g., org.apache.spark.tags.ExtendedHiveTest "
+    )
+    parser.add_argument(
+        "-i", "--included-tags", type=str,
+        default=None,
+        help="A comma-separated list of tags to include in the tests, "
+             "e.g., org.apache.spark.tags.ExtendedHiveTest "
+    )
 
     args, unknown = parser.parse_known_args()
     if unknown:
@@ -564,11 +607,20 @@ def main():
               " install one and retry.")
         sys.exit(2)
 
-    # install SparkR
-    if which("R"):
-        run_cmd([os.path.join(SPARK_HOME, "R", "install-dev.sh")])
-    else:
-        print("Cannot install SparkR as R was not found in PATH")
+    # Install SparkR
+    should_only_test_modules = opts.modules is not None
+    test_modules = []
+    if should_only_test_modules:
+        str_test_modules = [m.strip() for m in opts.modules.split(",")]
+        test_modules = [m for m in modules.all_modules if m.name in 
str_test_modules]
+
+    if not should_only_test_modules or modules.sparkr in test_modules:
+        # If tests modules are specified, we will not run R linter.
+        # SparkR needs the manual SparkR installation.
+        if which("R"):
+            run_cmd([os.path.join(SPARK_HOME, "R", "install-dev.sh")])
+        else:
+            print("Cannot install SparkR as R was not found in PATH")
 
     if os.environ.get("AMPLAB_JENKINS"):
         # if we're on the Amplab Jenkins build servers setup variables
@@ -582,27 +634,69 @@ def main():
         # /home/jenkins/anaconda2/envs/py36/bin
         os.environ["PATH"] = "/home/anaconda/envs/py36/bin:" + 
os.environ.get("PATH")
     else:
-        # else we're running locally and can use local settings
+        # else we're running locally or Github Actions.
         build_tool = "sbt"
         hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.7")
         hive_version = os.environ.get("HIVE_PROFILE", "hive2.3")
-        test_env = "local"
+        if "GITHUB_ACTIONS" in os.environ:
+            test_env = "github_actions"
+        else:
+            test_env = "local"
 
     print("[info] Using build tool", build_tool, "with Hadoop profile", 
hadoop_version,
           "and Hive profile", hive_version, "under environment", test_env)
     extra_profiles = get_hadoop_profiles(hadoop_version) + 
get_hive_profiles(hive_version)
 
-    changed_modules = None
-    changed_files = None
-    if test_env == "amplab_jenkins" and os.environ.get("AMP_JENKINS_PRB"):
+    changed_modules = []
+    changed_files = []
+    included_tags = []
+    excluded_tags = []
+    if should_only_test_modules:
+        # If we're running the tests in Github Actions, attempt to detect and 
test
+        # only the affected modules.
+        if test_env == "github_actions":
+            if os.environ["GITHUB_BASE_REF"] != "":
+                # Pull requests
+                changed_files = identify_changed_files_from_git_commits(
+                    os.environ["GITHUB_SHA"], 
target_branch=os.environ["GITHUB_BASE_REF"])
+            else:
+                # Build for each commit.
+                changed_files = identify_changed_files_from_git_commits(
+                    os.environ["GITHUB_SHA"], 
target_ref=os.environ["GITHUB_PREV_SHA"])
+
+            modules_to_test = determine_modules_to_test(
+                determine_modules_for_files(changed_files), deduplicated=False)
+
+            if modules.root not in modules_to_test:
+                # If root module is not found, only test the intersected 
modules.
+                # If root module is found, just run the modules as specified 
initially.
+                test_modules = 
list(set(modules_to_test).intersection(test_modules))
+
+        changed_modules = test_modules
+        if len(changed_modules) == 0:
+            print("[info] There are no modules to test, exiting without 
testing.")
+            return
+
+    # If we're running the tests in AMPLab Jenkins, calculate the diff from 
the targeted branch, and
+    # detect modules to test.
+    elif test_env == "amplab_jenkins" and os.environ.get("AMP_JENKINS_PRB"):
         target_branch = os.environ["ghprbTargetBranch"]
         changed_files = identify_changed_files_from_git_commits("HEAD", 
target_branch=target_branch)
         changed_modules = determine_modules_for_files(changed_files)
+        test_modules = determine_modules_to_test(changed_modules)
         excluded_tags = determine_tags_to_exclude(changed_modules)
 
+    # If there is no changed module found, tests all.
     if not changed_modules:
         changed_modules = [modules.root]
-        excluded_tags = []
+    if not test_modules:
+        test_modules = determine_modules_to_test(changed_modules)
+
+    if opts.excluded_tags:
+        excluded_tags.extend([t.strip() for t in 
opts.excluded_tags.split(",")])
+    if opts.included_tags:
+        included_tags.extend([t.strip() for t in 
opts.included_tags.split(",")])
+
     print("[info] Found the following changed modules:",
           ", ".join(x.name for x in changed_modules))
 
@@ -615,33 +709,32 @@ def main():
         test_environ.update(m.environ)
     setup_test_environ(test_environ)
 
-    test_modules = determine_modules_to_test(changed_modules)
-
-    # license checks
-    run_apache_rat_checks()
-
-    # style checks
-    if not changed_files or any(f.endswith(".scala")
-                                or f.endswith("scalastyle-config.xml")
-                                for f in changed_files):
-        run_scala_style_checks(extra_profiles)
     should_run_java_style_checks = False
-    if not changed_files or any(f.endswith(".java")
-                                or f.endswith("checkstyle.xml")
-                                or f.endswith("checkstyle-suppressions.xml")
-                                for f in changed_files):
-        # Run SBT Checkstyle after the build to prevent a side-effect to the 
build.
-        should_run_java_style_checks = True
-    if not changed_files or any(f.endswith("lint-python")
-                                or f.endswith("tox.ini")
-                                or f.endswith(".py")
-                                for f in changed_files):
-        run_python_style_checks()
-    if not changed_files or any(f.endswith(".R")
-                                or f.endswith("lint-r")
-                                or f.endswith(".lintr")
-                                for f in changed_files):
-        run_sparkr_style_checks()
+    if not should_only_test_modules:
+        # license checks
+        run_apache_rat_checks()
+
+        # style checks
+        if not changed_files or any(f.endswith(".scala")
+                                    or f.endswith("scalastyle-config.xml")
+                                    for f in changed_files):
+            run_scala_style_checks(extra_profiles)
+        if not changed_files or any(f.endswith(".java")
+                                    or f.endswith("checkstyle.xml")
+                                    or 
f.endswith("checkstyle-suppressions.xml")
+                                    for f in changed_files):
+            # Run SBT Checkstyle after the build to prevent a side-effect to 
the build.
+            should_run_java_style_checks = True
+        if not changed_files or any(f.endswith("lint-python")
+                                    or f.endswith("tox.ini")
+                                    or f.endswith(".py")
+                                    for f in changed_files):
+            run_python_style_checks()
+        if not changed_files or any(f.endswith(".R")
+                                    or f.endswith("lint-r")
+                                    or f.endswith(".lintr")
+                                    for f in changed_files):
+            run_sparkr_style_checks()
 
     # determine if docs were changed and if we're inside the amplab environment
     # note - the below commented out until *all* Jenkins workers can get 
`jekyll` installed
@@ -663,7 +756,7 @@ def main():
         build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks)
 
     # run the test suites
-    run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags)
+    run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags, 
included_tags)
 
     modules_with_python_tests = [m for m in test_modules if 
m.python_test_goals]
     if modules_with_python_tests:
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 391e4bb..75bdec0 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -100,9 +100,75 @@ tags = Module(
     ]
 )
 
+kvstore = Module(
+    name="kvstore",
+    dependencies=[tags],
+    source_file_regexes=[
+        "common/kvstore/",
+    ],
+    sbt_test_goals=[
+        "kvstore/test",
+    ],
+)
+
+network_common = Module(
+    name="network-common",
+    dependencies=[tags],
+    source_file_regexes=[
+        "common/network-common/",
+    ],
+    sbt_test_goals=[
+        "network-common/test",
+    ],
+)
+
+network_shuffle = Module(
+    name="network-shuffle",
+    dependencies=[tags],
+    source_file_regexes=[
+        "common/network-shuffle/",
+    ],
+    sbt_test_goals=[
+        "network-shuffle/test",
+    ],
+)
+
+unsafe = Module(
+    name="unsafe",
+    dependencies=[tags],
+    source_file_regexes=[
+        "common/unsafe",
+    ],
+    sbt_test_goals=[
+        "unsafe/test",
+    ],
+)
+
+launcher = Module(
+    name="launcher",
+    dependencies=[tags],
+    source_file_regexes=[
+        "launcher/",
+    ],
+    sbt_test_goals=[
+        "launcher/test",
+    ],
+)
+
+core = Module(
+    name="core",
+    dependencies=[kvstore, network_common, network_shuffle, unsafe, launcher],
+    source_file_regexes=[
+        "core/",
+    ],
+    sbt_test_goals=[
+        "core/test",
+    ],
+)
+
 catalyst = Module(
     name="catalyst",
-    dependencies=[tags],
+    dependencies=[tags, core],
     source_file_regexes=[
         "sql/catalyst/",
     ],
@@ -111,7 +177,6 @@ catalyst = Module(
     ],
 )
 
-
 sql = Module(
     name="sql",
     dependencies=[catalyst],
@@ -123,7 +188,6 @@ sql = Module(
     ],
 )
 
-
 hive = Module(
     name="hive",
     dependencies=[sql],
@@ -142,7 +206,6 @@ hive = Module(
     ]
 )
 
-
 repl = Module(
     name="repl",
     dependencies=[hive],
@@ -154,7 +217,6 @@ repl = Module(
     ],
 )
 
-
 hive_thriftserver = Module(
     name="hive-thriftserver",
     dependencies=[hive],
@@ -192,7 +254,6 @@ sql_kafka = Module(
     ]
 )
 
-
 sketch = Module(
     name="sketch",
     dependencies=[tags],
@@ -204,10 +265,9 @@ sketch = Module(
     ]
 )
 
-
 graphx = Module(
     name="graphx",
-    dependencies=[tags],
+    dependencies=[tags, core],
     source_file_regexes=[
         "graphx/",
     ],
@@ -216,10 +276,9 @@ graphx = Module(
     ]
 )
 
-
 streaming = Module(
     name="streaming",
-    dependencies=[tags],
+    dependencies=[tags, core],
     source_file_regexes=[
         "streaming",
     ],
@@ -235,7 +294,7 @@ streaming = Module(
 # fail other PRs.
 streaming_kinesis_asl = Module(
     name="streaming-kinesis-asl",
-    dependencies=[tags],
+    dependencies=[tags, core],
     source_file_regexes=[
         "external/kinesis-asl/",
         "external/kinesis-asl-assembly/",
@@ -254,21 +313,23 @@ streaming_kinesis_asl = Module(
 
 streaming_kafka_0_10 = Module(
     name="streaming-kafka-0-10",
-    dependencies=[streaming],
+    dependencies=[streaming, core],
     source_file_regexes=[
         # The ending "/" is necessary otherwise it will include "sql-kafka" 
codes
         "external/kafka-0-10/",
         "external/kafka-0-10-assembly",
+        "external/kafka-0-10-token-provider",
     ],
     sbt_test_goals=[
         "streaming-kafka-0-10/test",
+        "token-provider-kafka-0-10/test"
     ]
 )
 
 
 mllib_local = Module(
     name="mllib-local",
-    dependencies=[tags],
+    dependencies=[tags, core],
     source_file_regexes=[
         "mllib-local",
     ],
@@ -302,10 +363,9 @@ examples = Module(
     ]
 )
 
-
 pyspark_core = Module(
     name="pyspark-core",
-    dependencies=[],
+    dependencies=[core],
     source_file_regexes=[
         "python/(?!pyspark/(ml|mllib|sql|streaming))"
     ],
@@ -339,7 +399,6 @@ pyspark_core = Module(
     ]
 )
 
-
 pyspark_sql = Module(
     name="pyspark-sql",
     dependencies=[pyspark_core, hive, avro],
@@ -578,7 +637,7 @@ spark_ganglia_lgpl = Module(
 # No other modules should directly depend on this module.
 root = Module(
     name="root",
-    dependencies=[build],  # Changes to build should trigger all tests.
+    dependencies=[build, core],  # Changes to build should trigger all tests.
     source_file_regexes=[],
     # In order to run all of the tests, enable every test profile:
     build_profile_flags=list(set(
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index a0e138c..7a9634a 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -1017,6 +1017,15 @@ object TestSettings {
       sys.props.get("test.exclude.tags").map { tags =>
         Seq("--exclude-categories=" + tags)
       }.getOrElse(Nil): _*),
+    // Include tags defined in a system property
+    testOptions in Test += Tests.Argument(TestFrameworks.ScalaTest,
+      sys.props.get("test.include.tags").map { tags =>
+        tags.split(",").flatMap { tag => Seq("-n", tag) }.toSeq
+      }.getOrElse(Nil): _*),
+    testOptions in Test += Tests.Argument(TestFrameworks.JUnit,
+      sys.props.get("test.include.tags").map { tags =>
+        Seq("--include-categories=" + tags)
+      }.getOrElse(Nil): _*),
     // Show full stack trace and duration in test cases.
     testOptions in Test += Tests.Argument("-oDF"),
     testOptions in Test += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"),
diff --git a/python/pyspark/sql/tests/test_arrow.py 
b/python/pyspark/sql/tests/test_arrow.py
index 42f064a..15c5cf1 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -21,6 +21,9 @@ import threading
 import time
 import unittest
 import warnings
+import sys
+if sys.version >= '3':
+    basestring = unicode = str
 
 from pyspark import SparkContext, SparkConf
 from pyspark.sql import Row, SparkSession
diff --git a/python/pyspark/sql/tests/test_types.py 
b/python/pyspark/sql/tests/test_types.py
index 81402f5..016cafd6 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -725,7 +725,8 @@ class TypesTests(ReusedSQLTestCase):
         if sys.version_info[0] < 3:
             all_types = set(['c', 'b', 'B', 'u', 'h', 'H', 'i', 'I', 'l', 'L', 
'f', 'd'])
         else:
-            all_types = set(array.typecodes)
+            # PyPy seems not having array.typecodes.
+            all_types = set(['b', 'B', 'u', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 
'Q', 'f', 'd'])
         unsupported_types = all_types - set(supported_types)
         # test unsupported types
         for t in unsupported_types:
diff --git a/python/pyspark/streaming/tests/test_dstream.py 
b/python/pyspark/streaming/tests/test_dstream.py
index 7ecdf6b..89edb23 100644
--- a/python/pyspark/streaming/tests/test_dstream.py
+++ b/python/pyspark/streaming/tests/test_dstream.py
@@ -30,8 +30,9 @@ from pyspark.testing.streamingutils import 
PySparkStreamingTestCase
 
 
 @unittest.skipIf(
-    "pypy" in platform.python_implementation().lower() and 
"COVERAGE_PROCESS_START" in os.environ,
-    "PyPy implementation causes to hang DStream tests forever when Coverage 
report is used.")
+    "pypy" in platform.python_implementation().lower(),
+    "The tests fail in PyPy3 implementation for an unknown reason. "
+    "With PyPy, it causes to hang DStream tests forever when Coverage report 
is used.")
 class BasicOperationTests(PySparkStreamingTestCase):
 
     def test_map(self):
@@ -394,8 +395,9 @@ class BasicOperationTests(PySparkStreamingTestCase):
 
 
 @unittest.skipIf(
-    "pypy" in platform.python_implementation().lower() and 
"COVERAGE_PROCESS_START" in os.environ,
-    "PyPy implementation causes to hang DStream tests forever when Coverage 
report is used.")
+    "pypy" in platform.python_implementation().lower(),
+    "The tests fail in PyPy3 implementation for an unknown reason. "
+    "With PyPy, it causes to hang DStream tests forever when Coverage report 
is used.")
 class WindowFunctionTests(PySparkStreamingTestCase):
 
     timeout = 15
@@ -474,8 +476,9 @@ class WindowFunctionTests(PySparkStreamingTestCase):
 
 
 @unittest.skipIf(
-    "pypy" in platform.python_implementation().lower() and 
"COVERAGE_PROCESS_START" in os.environ,
-    "PyPy implementation causes to hang DStream tests forever when Coverage 
report is used.")
+    "pypy" in platform.python_implementation().lower(),
+    "The tests fail in PyPy3 implementation for an unknown reason. "
+    "With PyPy, it causes to hang DStream tests forever when Coverage report 
is used.")
 class CheckpointTests(unittest.TestCase):
 
     setupCalled = False
diff --git a/python/run-tests.py b/python/run-tests.py
index b677a51..a404c53 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -161,7 +161,7 @@ def run_individual_python_test(target_dir, test_name, 
pyspark_python):
 
 
 def get_default_python_executables():
-    python_execs = [x for x in ["python3.6", "python2.7", "pypy"] if which(x)]
+    python_execs = [x for x in ["python3.8", "python2.7", "pypy3", "pypy"] if 
which(x)]
 
     if "python3.6" not in python_execs:
         p = which("python3")
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
index 6391d56..80346b3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
@@ -197,7 +197,7 @@ object IntegratedUDFTestUtils extends SQLHelper {
 
   lazy val pythonExec: String = {
     val pythonExec = sys.env.getOrElse(
-      "PYSPARK_DRIVER_PYTHON", sys.env.getOrElse("PYSPARK_PYTHON", 
"python3.6"))
+      "PYSPARK_DRIVER_PYTHON", sys.env.getOrElse("PYSPARK_PYTHON", "python3"))
     if (TestUtils.testCommandAvailable(pythonExec)) {
       pythonExec
     } else {
diff --git 
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
 
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
index 15cc310..8b16674 100644
--- 
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
+++ 
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.util.fileToString
 import org.apache.spark.sql.execution.HiveResult
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
+import org.apache.spark.tags.GitHubActionsUnstableTest
 
 /**
  * Re-run all the tests in SQLQueryTestSuite via Thrift Server.
@@ -52,6 +53,7 @@ import org.apache.spark.sql.types._
  *   2. Support DESC command.
  *   3. Support SHOW command.
  */
+@GitHubActionsUnstableTest
 class ThriftServerQueryTestSuite extends SQLQueryTestSuite with 
SharedThriftServer {
 
   override protected def testFile(fileName: String): String = {
diff --git 
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
 
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
index 3e1fce7..73547d7 100644
--- 
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
+++ 
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.sql.hive.thriftserver
 
+import org.apache.spark.tags.GitHubActionsUnstableTest
+
+@GitHubActionsUnstableTest
 class ThriftServerWithSparkContextSuite extends SharedThriftServer {
 
   test("SPARK-29911: Uncache cached tables when session closed") {
diff --git 
a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
 
b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index db1f6fb..4112512 100644
--- 
a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ 
b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -26,10 +26,12 @@ import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
+import org.apache.spark.tags.SlowHiveTest
 
 /**
  * Runs the test cases that are included in the hive distribution.
  */
+@SlowHiveTest
 class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
   // TODO: bundle in jar files... get from classpath
   private lazy val hiveQueryDir = TestHive.getHiveFile(
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index 8be3d26..aa96fa0 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType
 import org.apache.spark.sql.internal.StaticSQLConf.WAREHOUSE_PATH
 import org.apache.spark.sql.test.SQLTestUtils
-import org.apache.spark.tags.ExtendedHiveTest
+import org.apache.spark.tags.{ExtendedHiveTest, SlowHiveTest}
 import org.apache.spark.util.Utils
 
 /**
@@ -46,6 +46,7 @@ import org.apache.spark.util.Utils
  * expected version under this local directory, e.g. 
`/tmp/spark-test/spark-2.0.3`, we will skip the
  * downloading for this spark version.
  */
+@SlowHiveTest
 @ExtendedHiveTest
 class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
   private val isTestAtLeastJava9 = 
SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 8b97489..3a7e92e 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -38,12 +38,13 @@ import org.apache.spark.sql.hive.test.{HiveTestJars, 
TestHiveContext}
 import org.apache.spark.sql.internal.SQLConf.SHUFFLE_PARTITIONS
 import org.apache.spark.sql.internal.StaticSQLConf.WAREHOUSE_PATH
 import org.apache.spark.sql.types.{DecimalType, StructType}
-import org.apache.spark.tags.ExtendedHiveTest
+import org.apache.spark.tags.{ExtendedHiveTest, SlowHiveTest}
 import org.apache.spark.util.{ResetSystemProperties, Utils}
 
 /**
  * This suite tests spark-submit with applications using HiveContext.
  */
+@SlowHiveTest
 @ExtendedHiveTest
 class HiveSparkSubmitSuite
   extends SparkSubmitTestUtils
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index 8642a5f..5ddaaf3 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -41,7 +41,7 @@ import org.apache.spark.sql.hive.{HiveExternalCatalog, 
HiveUtils}
 import org.apache.spark.sql.hive.test.TestHiveVersion
 import org.apache.spark.sql.types.IntegerType
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.tags.ExtendedHiveTest
+import org.apache.spark.tags.{ExtendedHiveTest, GitHubActionsUnstableTest}
 import org.apache.spark.util.{MutableURLClassLoader, Utils}
 
 /**
@@ -52,6 +52,7 @@ import org.apache.spark.util.{MutableURLClassLoader, Utils}
  */
 // TODO: Refactor this to `HiveClientSuite` and make it a subclass of 
`HiveVersionSuite`
 @ExtendedHiveTest
+@GitHubActionsUnstableTest
 class VersionsSuite extends SparkFunSuite with Logging {
 
   override protected val enableAutoThreadAudit = false
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
index fac9812..1dd2ad3 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
+import org.apache.spark.tags.SlowHiveTest
 import org.apache.spark.unsafe.UnsafeAlignedOffset
 
 
@@ -1054,6 +1055,7 @@ abstract class AggregationQuerySuite extends QueryTest 
with SQLTestUtils with Te
 class HashAggregationQuerySuite extends AggregationQuerySuite
 
 
+@SlowHiveTest
 class HashAggregationQueryWithControlledFallbackSuite extends 
AggregationQuerySuite {
 
   override protected def checkAnswer(actual: => DataFrame, expectedAnswer: 
Seq[Row]): Unit = {
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index e8cf4ad..d0aa618 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -44,9 +44,11 @@ import 
org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
+import org.apache.spark.tags.SlowHiveTest
 import org.apache.spark.util.Utils
 
 // TODO(gatorsmile): combine HiveCatalogedDDLSuite and HiveDDLSuite
+@SlowHiveTest
 class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with 
BeforeAndAfterEach {
   override def afterEach(): Unit = {
     try {
@@ -404,6 +406,7 @@ class HiveCatalogedDDLSuite extends DDLSuite with 
TestHiveSingleton with BeforeA
   }
 }
 
+@SlowHiveTest
 class HiveDDLSuite
   extends QueryTest with SQLTestUtils with TestHiveSingleton with 
BeforeAndAfterEach {
   import testImplicits._
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index b10a8cb..e798a35 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -39,6 +39,7 @@ import org.apache.spark.sql.hive.test.{HiveTestJars, TestHive}
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.tags.SlowHiveTest
 
 case class TestData(a: Int, b: String)
 
@@ -46,6 +47,7 @@ case class TestData(a: Int, b: String)
  * A set of test cases expressed in Hive QL that are not covered by the tests
  * included in the hive distribution.
  */
+@SlowHiveTest
 class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with 
BeforeAndAfter {
   import org.apache.spark.sql.hive.test.TestHive.implicits._
 
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
index b20ef03..6f37e39 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
@@ -27,13 +27,14 @@ import org.apache.spark.sql.hive.{HiveExternalCatalog, 
HiveUtils}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.StaticSQLConf._
 import org.apache.spark.sql.types._
-import org.apache.spark.tags.ExtendedHiveTest
+import org.apache.spark.tags.{ExtendedHiveTest, SlowHiveTest}
 import org.apache.spark.util.Utils
 
 /**
  * A separate set of DDL tests that uses Hive 2.1 libraries, which behave a 
little differently
  * from the built-in ones.
  */
+@SlowHiveTest
 @ExtendedHiveTest
 class Hive_2_1_DDLSuite extends SparkFunSuite with TestHiveSingleton with 
BeforeAndAfterEach
   with BeforeAndAfterAll {
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index d12eae0e..a46db32 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -43,6 +43,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
+import org.apache.spark.tags.SlowHiveTest
 import org.apache.spark.util.Utils
 
 case class Nested1(f1: Nested2)
@@ -2559,6 +2560,8 @@ abstract class SQLQuerySuiteBase extends QueryTest with 
SQLTestUtils with TestHi
   }
 }
 
+@SlowHiveTest
 class SQLQuerySuite extends SQLQuerySuiteBase with 
DisableAdaptiveExecutionSuite
+@SlowHiveTest
 class SQLQuerySuiteAE extends SQLQuerySuiteBase with 
EnableAdaptiveExecutionSuite
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch branch-3.0 updated: [SPARK-32249][INFRA][3.0] Run Github Actions builds in branch-3.0

Reply via email to