This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new 7c65f76  [SPARK-32249][INFRA][2.4] Run Github Actions builds in 
branch-2.4
7c65f76 is described below

commit 7c65f7680ffbe2c03e444ec60358cbf912c27d13
Author: HyukjinKwon <[email protected]>
AuthorDate: Wed Aug 19 18:31:20 2020 -0700

    [SPARK-32249][INFRA][2.4] Run Github Actions builds in branch-2.4
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to backport the following JIRAs:
    
    - SPARK-32245
    - SPARK-32292
    - SPARK-32252
    - SPARK-32408
    - SPARK-32303
    - SPARK-32363
    - SPARK-32419
    - SPARK-32491
    - SPARK-32493
    - SPARK-32496
    - SPARK-32497
    - SPARK-32357
    - SPARK-32606
    - SPARK-32605
    - SPARK-32645
    - Minor renaming 
https://github.com/apache/spark/commit/d0dfe4986b1c4cb5a47be46b2bbedeea42d81caf#diff-02d9c370a663741451423342d5869b21
    
    in order to enable GitHub Actions in branch-2.4.
    
    ### Why are the changes needed?
    
    To be able to run the tests in branch-2.4. Jenkins jobs are unstable.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, dev-only.
    
    ### How was this patch tested?
    
    Build in this PR will test.
    
    Closes #29465 from HyukjinKwon/SPARK-32249-2.4.
    
    Lead-authored-by: HyukjinKwon <[email protected]>
    Co-authored-by: Hyukjin Kwon <[email protected]>
    Co-authored-by: Dongjoon Hyun <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .github/workflows/branch-2.4.yml                   | 104 ---------
 .github/workflows/build_and_test.yml               | 241 +++++++++++++++++++++
 .github/workflows/test_report.yml                  |  24 ++
 .../java/org/apache/spark/tags/SlowHiveTest.java   |  30 +++
 .../spark/scheduler/BarrierTaskContextSuite.scala  |   3 +-
 dev/run-pip-tests                                  |  14 +-
 dev/run-tests.py                                   | 192 ++++++++++++----
 dev/sparktestsupport/modules.py                    |  91 ++++++--
 project/SparkBuild.scala                           |   9 +
 python/pyspark/sql/tests.py                        |   3 +-
 python/pyspark/streaming/tests.py                  |  10 +
 .../hive/execution/HiveCompatibilitySuite.scala    |   2 +
 .../hive/HiveExternalCatalogVersionsSuite.scala    |   2 +
 .../spark/sql/hive/HiveSparkSubmitSuite.scala      |   2 +
 .../spark/sql/hive/client/VersionsSuite.scala      |   3 +-
 .../sql/hive/execution/AggregationQuerySuite.scala |   2 +
 .../spark/sql/hive/execution/HiveDDLSuite.scala    |   3 +
 .../spark/sql/hive/execution/HiveQuerySuite.scala  |   2 +
 .../sql/hive/execution/Hive_2_1_DDLSuite.scala     |   3 +-
 .../spark/sql/hive/execution/SQLQuerySuite.scala   |   2 +
 20 files changed, 566 insertions(+), 176 deletions(-)

diff --git a/.github/workflows/branch-2.4.yml b/.github/workflows/branch-2.4.yml
deleted file mode 100644
index 77e8f27..0000000
--- a/.github/workflows/branch-2.4.yml
+++ /dev/null
@@ -1,104 +0,0 @@
-name: branch-2.4
-
-on:
-  push:
-    branches:
-    - branch-2.4
-  pull_request:
-    branches:
-    - branch-2.4
-
-jobs:
-  build:
-
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        scala: [ '2.11', '2.12' ]
-        hadoop: [ 'hadoop-2.6', 'hadoop-2.7' ]
-    name: Build Spark with Scala ${{ matrix.scala }} / Hadoop ${{ 
matrix.hadoop }}
-
-    steps:
-    - uses: actions/checkout@master
-    # We split caches because GitHub Action Cache has a 400MB-size limit.
-    - uses: actions/cache@v1
-      with:
-        path: build
-        key: build-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          build-
-    - uses: actions/cache@v1
-      with:
-        path: ~/.m2/repository/com
-        key: ${{ matrix.scala }}-${{ matrix.hadoop }}-maven-com-${{ 
hashFiles('**/pom.xml') }}
-        restore-keys: |
-          ${{ matrix.scala }}-${{ matrix.hadoop }}-maven-com-
-    - uses: actions/cache@v1
-      with:
-        path: ~/.m2/repository/org
-        key: ${{ matrix.scala }}-${{ matrix.hadoop }}-maven-org-${{ 
hashFiles('**/pom.xml') }}
-        restore-keys: |
-          ${{ matrix.scala }}-${{ matrix.hadoop }}-maven-org-
-    - name: Set up JDK 8
-      uses: actions/setup-java@v1
-      with:
-        java-version: '1.8'
-    - name: Change to Scala ${{ matrix.scala }}
-      run: |
-        dev/change-scala-version.sh ${{ matrix.scala }}
-    - name: Build with Maven
-      run: |
-        export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m 
-Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
-        export MAVEN_CLI_OPTS="--no-transfer-progress"
-        mkdir -p ~/.m2
-        ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes 
-Phive -Phive-thriftserver -Pscala-${{ matrix.scala }} -P${{ matrix.hadoop }} 
-Phadoop-cloud install
-        rm -rf ~/.m2/repository/org/apache/spark
-
-
-  lint:
-    runs-on: ubuntu-latest
-    name: Linters (Java/Scala/Python), licenses, dependencies
-    steps:
-    - uses: actions/checkout@master
-    - uses: actions/setup-java@v1
-      with:
-        java-version: '1.8'
-    - uses: actions/setup-python@v1
-      with:
-        python-version: '3.7'
-        architecture: 'x64'
-    - name: Scala
-      run: ./dev/lint-scala
-    - name: Java
-      run: ./dev/lint-java
-    - name: Python
-      run: |
-        pip install flake8 sphinx numpy
-        ./dev/lint-python
-    - name: License
-      run: ./dev/check-license
-    - name: Dependencies
-      run: ./dev/test-dependencies.sh
-
-  lintr:
-    runs-on: ubuntu-latest
-    name: Linter (R)
-    steps:
-    - uses: actions/checkout@master
-    - uses: actions/setup-java@v1
-      with:
-        java-version: '1.8'
-    - uses: r-lib/actions/setup-r@v1
-      with:
-        r-version: '3.6.2'
-    - name: install lib
-      run: |
-        sudo apt-get install -y libcurl4-openssl-dev
-    - name: install R packages
-      run: |
-        sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 
'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), 
repos='https://cloud.r-project.org/')"
-        sudo Rscript -e "devtools::install_github('jimhester/[email protected]')"
-    - name: package and install SparkR
-      run: ./R/install-dev.sh
-    - name: lint-r
-      run: ./dev/lint-r
diff --git a/.github/workflows/build_and_test.yml 
b/.github/workflows/build_and_test.yml
new file mode 100644
index 0000000..aefa6f3
--- /dev/null
+++ b/.github/workflows/build_and_test.yml
@@ -0,0 +1,241 @@
+name: Build and test
+
+on:
+  push:
+    branches:
+    - branch-2.4
+  pull_request:
+    branches:
+    - branch-2.4
+
+jobs:
+  # Build: build Spark and run the tests for specified modules.
+  build:
+    name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ 
matrix.java }}, ${{ matrix.hadoop }})"
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        java:
+          - 1.8
+        hadoop:
+          - hadoop2.6
+        # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
+        # Kinesis tests depends on external Amazon kinesis service.
+        # Note that the modules below are from sparktestsupport/modules.py.
+        modules:
+          - >-
+            core, unsafe, kvstore, avro,
+            network-common, network-shuffle, repl, launcher,
+            examples, sketch, graphx
+          - >-
+            catalyst, hive-thriftserver
+          - >-
+            streaming, sql-kafka-0-10, streaming-kafka-0-10,
+            mllib-local, mllib,
+            yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl,
+            streaming-flume, streaming-flume-sink, streaming-kafka-0-8
+          - >-
+            pyspark-sql, pyspark-mllib
+          - >-
+            pyspark-core, pyspark-streaming, pyspark-ml
+          - >-
+            sparkr
+          - >-
+            sql
+        # Here, we split Hive and SQL tests into some of slow ones and the 
rest of them.
+        included-tags: [""]
+        excluded-tags: [""]
+        comment: [""]
+        include:
+          # Hive tests
+          - modules: hive
+            java: 1.8
+            hadoop: hadoop2.6
+            included-tags: org.apache.spark.tags.SlowHiveTest
+            comment: "- slow tests"
+          - modules: hive
+            java: 1.8
+            hadoop: hadoop2.6
+            excluded-tags: org.apache.spark.tags.SlowHiveTest
+            comment: "- other tests"
+    env:
+      MODULES_TO_TEST: ${{ matrix.modules }}
+      EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
+      INCLUDED_TAGS: ${{ matrix.included-tags }}
+      HADOOP_PROFILE: ${{ matrix.hadoop }}
+      # GitHub Actions' default miniconda to use in pip packaging test.
+      CONDA_PREFIX: /usr/share/miniconda
+      GITHUB_PREV_SHA: ${{ github.event.before }}
+      ARROW_PRE_0_15_IPC_FORMAT: 1
+    steps:
+    - name: Checkout Spark repository
+      uses: actions/checkout@v2
+      # In order to fetch changed files
+      with:
+        fetch-depth: 0
+    # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
+    - name: Cache Scala, SBT, Maven and Zinc
+      uses: actions/cache@v1
+      with:
+        path: build
+        key: build-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          build-
+    - name: Cache Maven local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.m2/repository
+        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ 
hashFiles('**/pom.xml') }}
+        restore-keys: |
+          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-
+    - name: Cache Ivy local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.ivy2/cache
+        key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ 
hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }}
+        restore-keys: |
+          ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-
+    - name: Install JDK ${{ matrix.java }}
+      uses: actions/setup-java@v1
+      with:
+        java-version: ${{ matrix.java }}
+    # PySpark
+    - name: Install PyPy3
+      # Note that order of Python installations here matters because default 
python is
+      # overridden.
+      uses: actions/setup-python@v2
+      if: contains(matrix.modules, 'pyspark')
+      with:
+        python-version: pypy3
+        architecture: x64
+    - name: Install Python 3.6
+      uses: actions/setup-python@v2
+      if: contains(matrix.modules, 'pyspark')
+      with:
+        python-version: 3.6
+        architecture: x64
+    - name: Install Python 2.7
+      uses: actions/setup-python@v2
+      # Yarn has a Python specific test too, for example, YarnClusterSuite.
+      if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 
'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 
'sql-'))
+      with:
+        python-version: 2.7
+        architecture: x64
+    - name: Install Python packages (Python 3.6 and PyPy3)
+      if: contains(matrix.modules, 'pyspark')
+      # PyArrow is not supported in PyPy yet, see ARROW-2651.
+      # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown 
reason.
+      run: |
+        python3.6 -m pip install numpy pyarrow pandas scipy xmlrunner
+        python3.6 -m pip list
+        # PyPy does not have xmlrunner
+        pypy3 -m pip install numpy pandas
+        pypy3 -m pip list
+    - name: Install Python packages (Python 2.7)
+      if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 
'sql') && !contains(matrix.modules, 'sql-'))
+      run: |
+        # Some tests do not pass in PySpark with PyArrow, for example, 
pyspark.sql.tests.ArrowTests.
+        python2.7 -m pip install numpy pandas scipy xmlrunner
+        python2.7 -m pip list
+    # SparkR
+    - name: Install R 4.0
+      if: contains(matrix.modules, 'sparkr')
+      run: |
+        sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu 
bionic-cran40/' >> /etc/apt/sources.list"
+        curl -sL 
"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9";
 | sudo apt-key add
+        sudo apt-get update
+        sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
+    - name: Install R packages
+      if: contains(matrix.modules, 'sparkr')
+      run: |
+        # qpdf is required to reduce the size of PDFs to make CRAN check pass. 
See SPARK-32497.
+        sudo apt-get install -y libcurl4-openssl-dev qpdf
+        sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 
'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), 
repos='https://cloud.r-project.org/')"
+        # Show installed packages in R.
+        sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, 
c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]'
+    # Run the tests.
+    - name: Run tests
+      run: |
+        # Hive tests become flaky when running in parallel as it's too 
intensive.
+        if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; 
fi
+        mkdir -p ~/.m2
+        ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" 
--included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
+        rm -rf ~/.m2/repository/org/apache/spark
+    - name: Upload test results to report
+      if: always()
+      uses: actions/upload-artifact@v2
+      with:
+        name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ 
matrix.java }}-${{ matrix.hadoop }}
+        path: "**/target/test-reports/*.xml"
+    - name: Upload unit tests log files
+      if: failure()
+      uses: actions/upload-artifact@v2
+      with:
+        name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ 
matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
+        path: "**/target/unit-tests.log"
+
+  # Static analysis, and documentation build
+  lint:
+    name: Linters, licenses, dependencies and documentation generation
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout Spark repository
+      uses: actions/checkout@v2
+    - name: Cache Maven local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.m2/repository
+        key: docs-maven-repo-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          docs-maven-
+    - name: Install JDK 1.8
+      uses: actions/setup-java@v1
+      with:
+        java-version: 1.8
+    - name: Install Python 3.6
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.6
+        architecture: x64
+    - name: Install Python linter dependencies
+      run: |
+        pip3 install flake8 sphinx numpy
+    - name: Install R 4.0
+      run: |
+        sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu 
bionic-cran40/' >> /etc/apt/sources.list"
+        curl -sL 
"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9";
 | sudo apt-key add
+        sudo apt-get update
+        sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
+    - name: Install R linter dependencies and SparkR
+      run: |
+        sudo apt-get install -y libcurl4-openssl-dev
+        sudo Rscript -e "install.packages(c('devtools'), 
repos='https://cloud.r-project.org/')"
+        sudo Rscript -e "devtools::install_github('jimhester/[email protected]')"
+        ./R/install-dev.sh
+    - name: Install Ruby 2.7 for documentation generation
+      uses: actions/setup-ruby@v1
+      with:
+        ruby-version: 2.7
+    - name: Install dependencies for documentation generation
+      run: |
+        sudo apt-get install -y libcurl4-openssl-dev pandoc
+        pip install sphinx mkdocs numpy
+        gem install jekyll jekyll-redirect-from pygments.rb
+        sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 
'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
+    - name: Scala linter
+      run: ./dev/lint-scala
+    - name: Java linter
+      run: ./dev/lint-java
+    - name: Python linter
+      run: ./dev/lint-python
+    - name: R linter
+      run: ./dev/lint-r
+    - name: License test
+      run: ./dev/check-license
+    - name: Dependencies test
+      run: ./dev/test-dependencies.sh
+    - name: Run documentation build
+      run: |
+        cd docs
+        jekyll build
diff --git a/.github/workflows/test_report.yml 
b/.github/workflows/test_report.yml
new file mode 100644
index 0000000..93cdb86
--- /dev/null
+++ b/.github/workflows/test_report.yml
@@ -0,0 +1,24 @@
+name: Report test results
+on:
+  workflow_run:
+    workflows: ["Build and test"]
+    types:
+      - completed
+
+jobs:
+  test_report:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Download test results to report
+      uses: dawidd6/action-download-artifact@v2
+      with:
+        github_token: ${{ secrets.GITHUB_TOKEN }}
+        workflow: ${{ github.event.workflow_run.workflow_id }}
+        commit: ${{ github.event.workflow_run.head_commit.id }}
+    - name: Publish test report
+      uses: scacap/action-surefire-report@v1
+      with:
+        check_name: Report test results
+        github_token: ${{ secrets.GITHUB_TOKEN }}
+        report_paths: "**/target/test-reports/*.xml"
+        commit: ${{ github.event.workflow_run.head_commit.id }}
diff --git a/common/tags/src/test/java/org/apache/spark/tags/SlowHiveTest.java 
b/common/tags/src/test/java/org/apache/spark/tags/SlowHiveTest.java
new file mode 100644
index 0000000..a7e6f35
--- /dev/null
+++ b/common/tags/src/test/java/org/apache/spark/tags/SlowHiveTest.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.tags;
+
+import org.scalatest.TagAnnotation;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+@TagAnnotation
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.METHOD, ElementType.TYPE})
+public @interface SlowHiveTest { }
diff --git 
a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala 
b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
index 92a97d1..7fcac96 100644
--- 
a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
+++ 
b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
@@ -153,7 +153,8 @@ class BarrierTaskContextSuite extends SparkFunSuite with 
LocalSparkContext {
     assert(error.contains("within 1 second(s)"))
   }
 
-  test("SPARK-31485: barrier stage should fail if only partial tasks are 
launched") {
+  // Disabled as it is flaky in GitHub Actions.
+  ignore("SPARK-31485: barrier stage should fail if only partial tasks are 
launched") {
     val conf = new SparkConf()
       .setMaster("local-cluster[2, 1, 1024]")
       .setAppName("test-cluster")
diff --git a/dev/run-pip-tests b/dev/run-pip-tests
index 60cf4d8..00748d5 100755
--- a/dev/run-pip-tests
+++ b/dev/run-pip-tests
@@ -68,11 +68,15 @@ fi
 PYSPARK_VERSION=$(python3 -c 
"exec(open('python/pyspark/version.py').read());print(__version__)")
 PYSPARK_DIST="$FWDIR/python/dist/pyspark-$PYSPARK_VERSION.tar.gz"
 # The pip install options we use for all the pip commands
-PIP_OPTIONS="--upgrade --no-cache-dir --force-reinstall "
+PIP_OPTIONS="--upgrade --no-cache-dir --force-reinstall"
 # Test both regular user and edit/dev install modes.
 PIP_COMMANDS=("pip install $PIP_OPTIONS $PYSPARK_DIST"
              "pip install $PIP_OPTIONS -e python/")
 
+# Jenkins has PySpark installed under user sitepackages shared for some 
reasons.
+# In this test, explicitly exclude user sitepackages to prevent side effects
+export PYTHONNOUSERSITE=1
+
 for python in "${PYTHON_EXECS[@]}"; do
   for install_command in "${PIP_COMMANDS[@]}"; do
     echo "Testing pip installation with python $python"
@@ -81,8 +85,12 @@ for python in "${PYTHON_EXECS[@]}"; do
     VIRTUALENV_PATH="$VIRTUALENV_BASE"/$python
     rm -rf "$VIRTUALENV_PATH"
     if [ -n "$USE_CONDA" ]; then
+      if [ -f "$CONDA_PREFIX/etc/profile.d/conda.sh" ]; then
+        # See also https://github.com/conda/conda/issues/7980
+        source "$CONDA_PREFIX/etc/profile.d/conda.sh"
+      fi
       conda create -y -p "$VIRTUALENV_PATH" python=$python numpy pandas pip 
setuptools
-      source activate "$VIRTUALENV_PATH"
+      source activate "$VIRTUALENV_PATH" || conda activate "$VIRTUALENV_PATH"
     else
       mkdir -p "$VIRTUALENV_PATH"
       virtualenv --python=$python "$VIRTUALENV_PATH"
@@ -125,7 +133,7 @@ for python in "${PYTHON_EXECS[@]}"; do
 
     # conda / virtualenv environments need to be deactivated differently
     if [ -n "$USE_CONDA" ]; then
-      source deactivate
+      source deactivate || conda deactivate
     else
       deactivate
     fi
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 5915b52..58acffa 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -48,13 +48,12 @@ def determine_modules_for_files(filenames):
     ['pyspark-core', 'sql']
     >>> [x.name for x in 
determine_modules_for_files(["file_not_matched_by_any_subproject"])]
     ['root']
-    >>> [x.name for x in determine_modules_for_files( \
-            [".github/workflows/master.yml", "appveyor.yml"])]
+    >>> [x.name for x in determine_modules_for_files(["appveyor.yml"])]
     []
     """
     changed_modules = set()
     for filename in filenames:
-        if filename in (".github/workflows/master.yml", "appveyor.yml"):
+        if filename in ("appveyor.yml",):
             continue
         matched_at_least_one_module = False
         for module in modules.all_modules:
@@ -100,28 +99,53 @@ def setup_test_environ(environ):
         os.environ[k] = v
 
 
-def determine_modules_to_test(changed_modules):
+def determine_modules_to_test(changed_modules, deduplicated=True):
     """
     Given a set of modules that have changed, compute the transitive closure 
of those modules'
     dependent modules in order to determine the set of modules that should be 
tested.
 
     Returns a topologically-sorted list of modules (ties are broken by sorting 
on module names).
+    If ``deduplicated`` is disabled, the modules are returned without tacking 
the deduplication
+    by dependencies into account.
 
     >>> [x.name for x in determine_modules_to_test([modules.root])]
     ['root']
     >>> [x.name for x in determine_modules_to_test([modules.build])]
     ['root']
+    >>> [x.name for x in determine_modules_to_test([modules.core])]
+    ['root']
+    >>> [x.name for x in determine_modules_to_test([modules.launcher])]
+    ['root']
     >>> [x.name for x in determine_modules_to_test([modules.graphx])]
     ['graphx', 'examples']
-    >>> x = [x.name for x in determine_modules_to_test([modules.sql])]
-    >>> x # doctest: +NORMALIZE_WHITESPACE
+    >>> [x.name for x in determine_modules_to_test([modules.sql])]
+    ... # doctest: +NORMALIZE_WHITESPACE
     ['sql', 'avro', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', 
'hive-thriftserver',
      'pyspark-sql', 'repl', 'sparkr', 'pyspark-mllib', 'pyspark-ml']
+    >>> sorted([x.name for x in determine_modules_to_test(
+    ...     [modules.sparkr, modules.sql], deduplicated=False)])
+    ... # doctest: +NORMALIZE_WHITESPACE
+    ['avro', 'examples', 'hive', 'hive-thriftserver', 'mllib', 'pyspark-ml',
+     'pyspark-mllib', 'pyspark-sql', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
+    >>> sorted([x.name for x in determine_modules_to_test(
+    ...     [modules.sql, modules.core], deduplicated=False)])
+    ... # doctest: +NORMALIZE_WHITESPACE
+    ['avro', 'catalyst', 'core', 'examples', 'graphx', 'hive', 
'hive-thriftserver',
+     'mllib', 'mllib-local', 'pyspark-core', 'pyspark-ml', 'pyspark-mllib',
+     'pyspark-sql', 'pyspark-streaming', 'repl', 'root',
+     'sparkr', 'sql', 'sql-kafka-0-10', 'streaming', 'streaming-flume',
+     'streaming-flume-assembly', 'streaming-flume-sink', 
'streaming-kafka-0-10',
+     'streaming-kafka-0-8', 'streaming-kinesis-asl']
     """
     modules_to_test = set()
     for module in changed_modules:
-        modules_to_test = 
modules_to_test.union(determine_modules_to_test(module.dependent_modules))
+        modules_to_test = modules_to_test.union(
+            determine_modules_to_test(module.dependent_modules, deduplicated))
     modules_to_test = modules_to_test.union(set(changed_modules))
+
+    if not deduplicated:
+        return modules_to_test
+
     # If we need to run all of the tests, then we should short-circuit and 
return 'root'
     if modules.root in modules_to_test:
         return [modules.root]
@@ -421,7 +445,7 @@ def run_scala_tests_sbt(test_modules, test_profiles):
     exec_sbt(profiles_and_goals)
 
 
-def run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags):
+def run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags, 
included_tags):
     """Function to properly execute all tests passed in as a set from the
     `determine_test_suites` function"""
     set_title_and_block("Running Spark unit tests", "BLOCK_SPARK_UNIT_TESTS")
@@ -431,6 +455,8 @@ def run_scala_tests(build_tool, hadoop_version, 
test_modules, excluded_tags):
     test_profiles = get_hadoop_profiles(hadoop_version) + \
         list(set(itertools.chain.from_iterable(m.build_profile_flags for m in 
test_modules)))
 
+    if included_tags:
+        test_profiles += ['-Dtest.include.tags=' + ",".join(included_tags)]
     if excluded_tags:
         test_profiles += ['-Dtest.exclude.tags=' + ",".join(excluded_tags)]
 
@@ -447,6 +473,9 @@ def run_python_tests(test_modules, parallelism):
     if test_modules != [modules.root]:
         command.append("--modules=%s" % ','.join(m.name for m in test_modules))
     command.append("--parallelism=%i" % parallelism)
+    if "GITHUB_ACTIONS" in os.environ:
+        # GitHub Actions has 'pypy3' explicitly and does not have 'pypy' 
executable.
+        command.append("--python-executables=python2.7,python3.6,pypy3")
     run_cmd(command)
 
 
@@ -479,6 +508,24 @@ def parse_opts():
         "-p", "--parallelism", type="int", default=4,
         help="The number of suites to test in parallel (default %default)"
     )
+    parser.add_option(
+        "-m", "--modules", type="str",
+        default=None,
+        help="A comma-separated list of modules to test "
+             "(default: %s)" % ",".join(sorted([m.name for m in 
modules.all_modules]))
+    )
+    parser.add_option(
+        "-e", "--excluded-tags", type="str",
+        default=None,
+        help="A comma-separated list of tags to exclude in the tests, "
+             "e.g., org.apache.spark.tags.ExtendedHiveTest "
+    )
+    parser.add_option(
+        "-i", "--included-tags", type="str",
+        default=None,
+        help="A comma-separated list of tags to include in the tests, "
+             "e.g., org.apache.spark.tags.ExtendedHiveTest "
+    )
 
     (opts, args) = parser.parse_args()
     if args:
@@ -511,13 +558,20 @@ def main():
               " install one and retry.")
         sys.exit(2)
 
-    java_version = determine_java_version(java_exe)
-
-    # install SparkR
-    if which("R"):
-        run_cmd([os.path.join(SPARK_HOME, "R", "install-dev.sh")])
-    else:
-        print("Cannot install SparkR as R was not found in PATH")
+    # Install SparkR
+    should_only_test_modules = opts.modules is not None
+    test_modules = []
+    if should_only_test_modules:
+        str_test_modules = [m.strip() for m in opts.modules.split(",")]
+        test_modules = [m for m in modules.all_modules if m.name in 
str_test_modules]
+
+    if not should_only_test_modules or modules.sparkr in test_modules:
+        # If tests modules are specified, we will not run R linter.
+        # SparkR needs the manual SparkR installation.
+        if which("R"):
+            run_cmd([os.path.join(SPARK_HOME, "R", "install-dev.sh")])
+        else:
+            print("Cannot install SparkR as R was not found in PATH")
 
     if os.environ.get("AMPLAB_JENKINS"):
         # if we're on the Amplab Jenkins build servers setup variables
@@ -528,24 +582,67 @@ def main():
         # add path for Python3 in Jenkins if we're calling from a Jenkins 
machine
         os.environ["PATH"] = "/home/anaconda/envs/py3k/bin:" + 
os.environ.get("PATH")
     else:
-        # else we're running locally and can use local settings
+        # else we're running locally or Github Actions.
         build_tool = "sbt"
         hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.6")
-        test_env = "local"
+        if "GITHUB_ACTIONS" in os.environ:
+            test_env = "github_actions"
+        else:
+            test_env = "local"
 
     print("[info] Using build tool", build_tool, "with Hadoop profile", 
hadoop_version,
           "under environment", test_env)
 
-    changed_modules = None
-    changed_files = None
-    if test_env == "amplab_jenkins" and os.environ.get("AMP_JENKINS_PRB"):
+    changed_modules = []
+    changed_files = []
+    included_tags = []
+    excluded_tags = []
+    if should_only_test_modules:
+        # If we're running the tests in Github Actions, attempt to detect and 
test
+        # only the affected modules.
+        if test_env == "github_actions":
+            if os.environ["GITHUB_BASE_REF"] != "":
+                # Pull requests
+                changed_files = identify_changed_files_from_git_commits(
+                    os.environ["GITHUB_SHA"], 
target_branch=os.environ["GITHUB_BASE_REF"])
+            else:
+                # Build for each commit.
+                changed_files = identify_changed_files_from_git_commits(
+                    os.environ["GITHUB_SHA"], 
target_ref=os.environ["GITHUB_PREV_SHA"])
+
+            modules_to_test = determine_modules_to_test(
+                determine_modules_for_files(changed_files), deduplicated=False)
+
+            if modules.root not in modules_to_test:
+                # If root module is not found, only test the intersected 
modules.
+                # If root module is found, just run the modules as specified 
initially.
+                test_modules = 
list(set(modules_to_test).intersection(test_modules))
+
+        changed_modules = test_modules
+        if len(changed_modules) == 0:
+            print("[info] There are no modules to test, exiting without 
testing.")
+            return
+
+    # If we're running the tests in AMPLab Jenkins, calculate the diff from 
the targeted branch, and
+    # detect modules to test.
+    elif test_env == "amplab_jenkins" and os.environ.get("AMP_JENKINS_PRB"):
         target_branch = os.environ["ghprbTargetBranch"]
         changed_files = identify_changed_files_from_git_commits("HEAD", 
target_branch=target_branch)
         changed_modules = determine_modules_for_files(changed_files)
+        test_modules = determine_modules_to_test(changed_modules)
         excluded_tags = determine_tags_to_exclude(changed_modules)
+
+    # If there is no changed module found, tests all.
     if not changed_modules:
         changed_modules = [modules.root]
-        excluded_tags = []
+    if not test_modules:
+        test_modules = determine_modules_to_test(changed_modules)
+
+    if opts.excluded_tags:
+        excluded_tags.extend([t.strip() for t in 
opts.excluded_tags.split(",")])
+    if opts.included_tags:
+        included_tags.extend([t.strip() for t in 
opts.included_tags.split(",")])
+
     print("[info] Found the following changed modules:",
           ", ".join(x.name for x in changed_modules))
 
@@ -558,33 +655,32 @@ def main():
         test_environ.update(m.environ)
     setup_test_environ(test_environ)
 
-    test_modules = determine_modules_to_test(changed_modules)
-
-    # license checks
-    run_apache_rat_checks()
-
-    # style checks
-    if not changed_files or any(f.endswith(".scala")
-                                or f.endswith("scalastyle-config.xml")
-                                for f in changed_files):
-        run_scala_style_checks()
     should_run_java_style_checks = False
-    if not changed_files or any(f.endswith(".java")
-                                or f.endswith("checkstyle.xml")
-                                or f.endswith("checkstyle-suppressions.xml")
-                                for f in changed_files):
-        # Run SBT Checkstyle after the build to prevent a side-effect to the 
build.
-        should_run_java_style_checks = True
-    if not changed_files or any(f.endswith("lint-python")
-                                or f.endswith("tox.ini")
-                                or f.endswith(".py")
-                                for f in changed_files):
-        run_python_style_checks()
-    if not changed_files or any(f.endswith(".R")
-                                or f.endswith("lint-r")
-                                or f.endswith(".lintr")
-                                for f in changed_files):
-        run_sparkr_style_checks()
+    if not should_only_test_modules:
+        # license checks
+        run_apache_rat_checks()
+
+        # style checks
+        if not changed_files or any(f.endswith(".scala")
+                                    or f.endswith("scalastyle-config.xml")
+                                    for f in changed_files):
+            run_scala_style_checks()
+        if not changed_files or any(f.endswith(".java")
+                                    or f.endswith("checkstyle.xml")
+                                    or 
f.endswith("checkstyle-suppressions.xml")
+                                    for f in changed_files):
+            # Run SBT Checkstyle after the build to prevent a side-effect to 
the build.
+            should_run_java_style_checks = True
+        if not changed_files or any(f.endswith("lint-python")
+                                    or f.endswith("tox.ini")
+                                    or f.endswith(".py")
+                                    for f in changed_files):
+            run_python_style_checks()
+        if not changed_files or any(f.endswith(".R")
+                                    or f.endswith("lint-r")
+                                    or f.endswith(".lintr")
+                                    for f in changed_files):
+            run_sparkr_style_checks()
 
     # determine if docs were changed and if we're inside the amplab environment
     # note - the below commented out until *all* Jenkins workers can get 
`jekyll` installed
@@ -606,7 +702,7 @@ def main():
         build_spark_assembly_sbt(hadoop_version, should_run_java_style_checks)
 
     # run the test suites
-    run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags)
+    run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags, 
included_tags)
 
     modules_with_python_tests = [m for m in test_modules if 
m.python_test_goals]
     if modules_with_python_tests:
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 2690527..7ace1b9 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -100,9 +100,75 @@ tags = Module(
     ]
 )
 
+kvstore = Module(
+    name="kvstore",
+    dependencies=[tags],
+    source_file_regexes=[
+        "common/kvstore/",
+    ],
+    sbt_test_goals=[
+        "kvstore/test",
+    ],
+)
+
+network_common = Module(
+    name="network-common",
+    dependencies=[tags],
+    source_file_regexes=[
+        "common/network-common/",
+    ],
+    sbt_test_goals=[
+        "network-common/test",
+    ],
+)
+
+network_shuffle = Module(
+    name="network-shuffle",
+    dependencies=[tags],
+    source_file_regexes=[
+        "common/network-shuffle/",
+    ],
+    sbt_test_goals=[
+        "network-shuffle/test",
+    ],
+)
+
+unsafe = Module(
+    name="unsafe",
+    dependencies=[tags],
+    source_file_regexes=[
+        "common/unsafe",
+    ],
+    sbt_test_goals=[
+        "unsafe/test",
+    ],
+)
+
+launcher = Module(
+    name="launcher",
+    dependencies=[tags],
+    source_file_regexes=[
+        "launcher/",
+    ],
+    sbt_test_goals=[
+        "launcher/test",
+    ],
+)
+
+core = Module(
+    name="core",
+    dependencies=[kvstore, network_common, network_shuffle, unsafe, launcher],
+    source_file_regexes=[
+        "core/",
+    ],
+    sbt_test_goals=[
+        "core/test",
+    ],
+)
+
 catalyst = Module(
     name="catalyst",
-    dependencies=[tags],
+    dependencies=[tags, core],
     source_file_regexes=[
         "sql/catalyst/",
     ],
@@ -111,7 +177,6 @@ catalyst = Module(
     ],
 )
 
-
 sql = Module(
     name="sql",
     dependencies=[catalyst],
@@ -123,7 +188,6 @@ sql = Module(
     ],
 )
 
-
 hive = Module(
     name="hive",
     dependencies=[sql],
@@ -142,7 +206,6 @@ hive = Module(
     ]
 )
 
-
 repl = Module(
     name="repl",
     dependencies=[hive],
@@ -154,7 +217,6 @@ repl = Module(
     ],
 )
 
-
 hive_thriftserver = Module(
     name="hive-thriftserver",
     dependencies=[hive],
@@ -192,7 +254,6 @@ sql_kafka = Module(
     ]
 )
 
-
 sketch = Module(
     name="sketch",
     dependencies=[tags],
@@ -204,10 +265,9 @@ sketch = Module(
     ]
 )
 
-
 graphx = Module(
     name="graphx",
-    dependencies=[tags],
+    dependencies=[tags, core],
     source_file_regexes=[
         "graphx/",
     ],
@@ -216,10 +276,9 @@ graphx = Module(
     ]
 )
 
-
 streaming = Module(
     name="streaming",
-    dependencies=[tags],
+    dependencies=[tags, core],
     source_file_regexes=[
         "streaming",
     ],
@@ -235,7 +294,7 @@ streaming = Module(
 # fail other PRs.
 streaming_kinesis_asl = Module(
     name="streaming-kinesis-asl",
-    dependencies=[tags],
+    dependencies=[tags, core],
     source_file_regexes=[
         "external/kinesis-asl/",
         "external/kinesis-asl-assembly/",
@@ -275,7 +334,7 @@ streaming_kafka = Module(
 
 streaming_kafka_0_10 = Module(
     name="streaming-kafka-0-10",
-    dependencies=[streaming],
+    dependencies=[streaming, core],
     source_file_regexes=[
         # The ending "/" is necessary otherwise it will include "sql-kafka" 
codes
         "external/kafka-0-10/",
@@ -339,7 +398,7 @@ streaming_flume_assembly = Module(
 
 mllib_local = Module(
     name="mllib-local",
-    dependencies=[tags],
+    dependencies=[tags, core],
     source_file_regexes=[
         "mllib-local",
     ],
@@ -373,10 +432,9 @@ examples = Module(
     ]
 )
 
-
 pyspark_core = Module(
     name="pyspark-core",
-    dependencies=[],
+    dependencies=[core],
     source_file_regexes=[
         "python/(?!pyspark/(ml|mllib|sql|streaming))"
     ],
@@ -396,7 +454,6 @@ pyspark_core = Module(
     ]
 )
 
-
 pyspark_sql = Module(
     name="pyspark-sql",
     dependencies=[pyspark_core, hive],
@@ -574,7 +631,7 @@ spark_ganglia_lgpl = Module(
 # No other modules should directly depend on this module.
 root = Module(
     name="root",
-    dependencies=[build],  # Changes to build should trigger all tests.
+    dependencies=[build, core],  # Changes to build should trigger all tests.
     source_file_regexes=[],
     # In order to run all of the tests, enable every test profile:
     build_profile_flags=list(set(
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 7ee079c..12b8ede 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -922,6 +922,15 @@ object TestSettings {
       sys.props.get("test.exclude.tags").map { tags =>
         Seq("--exclude-categories=" + tags)
       }.getOrElse(Nil): _*),
+    // Include tags defined in a system property
+    testOptions in Test += Tests.Argument(TestFrameworks.ScalaTest,
+      sys.props.get("test.include.tags").map { tags =>
+        tags.split(",").flatMap { tag => Seq("-n", tag) }.toSeq
+      }.getOrElse(Nil): _*),
+    testOptions in Test += Tests.Argument(TestFrameworks.JUnit,
+      sys.props.get("test.include.tags").map { tags =>
+        Seq("--include-categories=" + tags)
+      }.getOrElse(Nil): _*),
     // Show full stack trace and duration in test cases.
     testOptions in Test += Tests.Argument("-oDF"),
     testOptions in Test += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"),
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index c144b41..020542b 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -3214,7 +3214,8 @@ class SQLTests(ReusedSQLTestCase):
         if sys.version_info[0] < 3:
             all_types = set(['c', 'b', 'B', 'u', 'h', 'H', 'i', 'I', 'l', 'L', 
'f', 'd'])
         else:
-            all_types = set(array.typecodes)
+            # PyPy seems not having array.typecodes.
+            all_types = set(['b', 'B', 'u', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 
'Q', 'f', 'd'])
         unsupported_types = all_types - set(supported_types)
         # test unsupported types
         for t in unsupported_types:
diff --git a/python/pyspark/streaming/tests.py 
b/python/pyspark/streaming/tests.py
index 2f7fa83..783ca40 100644
--- a/python/pyspark/streaming/tests.py
+++ b/python/pyspark/streaming/tests.py
@@ -26,6 +26,7 @@ import random
 import struct
 import shutil
 from functools import reduce
+import platform
 
 try:
     import xmlrunner
@@ -167,6 +168,9 @@ class PySparkStreamingTestCase(unittest.TestCase):
             output.sort(key=lambda x: x[0])
 
 
[email protected](
+    "pypy" in platform.python_implementation().lower(),
+    "The tests fail in PyPy3 implementation for an unknown reason.")
 class BasicOperationTests(PySparkStreamingTestCase):
 
     def test_map(self):
@@ -657,6 +661,9 @@ class StreamingListenerTests(PySparkStreamingTestCase):
             self.assertEqual(info.numRecords(), 0)
 
 
[email protected](
+    "pypy" in platform.python_implementation().lower(),
+    "The tests fail in PyPy3 implementation for an unknown reason.")
 class WindowFunctionTests(PySparkStreamingTestCase):
 
     timeout = 15
@@ -884,6 +891,9 @@ class StreamingContextTests(PySparkStreamingTestCase):
         self.assertTrue(self.ssc.awaitTerminationOrTimeout(0.001))
 
 
[email protected](
+    "pypy" in platform.python_implementation().lower(),
+    "The tests fail in PyPy3 implementation for an unknown reason.")
 class CheckpointTests(unittest.TestCase):
 
     setupCalled = False
diff --git 
a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
 
b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index cebaad5..dfe318c 100644
--- 
a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ 
b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -26,10 +26,12 @@ import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.tags.SlowHiveTest
 
 /**
  * Runs the test cases that are included in the hive distribution.
  */
+@SlowHiveTest
 class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
   // TODO: bundle in jar files... get from classpath
   private lazy val hiveQueryDir = TestHive.getHiveFile(
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index e33d8ff..6d8ff78 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.{QueryTest, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType
 import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.tags.SlowHiveTest
 import org.apache.spark.util.Utils
 
 /**
@@ -41,6 +42,7 @@ import org.apache.spark.util.Utils
  * expected version under this local directory, e.g. 
`/tmp/spark-test/spark-2.0.3`, we will skip the
  * downloading for this spark version.
  */
+@SlowHiveTest
 class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
   private val wareHousePath = Utils.createTempDir(namePrefix = "warehouse")
   private val tmpDataDir = Utils.createTempDir(namePrefix = "test-data")
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index a676cf6..c62047a 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -33,11 +33,13 @@ import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.hive.test.{TestHive, TestHiveContext}
 import org.apache.spark.sql.types.{DecimalType, StructType}
+import org.apache.spark.tags.SlowHiveTest
 import org.apache.spark.util.{ResetSystemProperties, Utils}
 
 /**
  * This suite tests spark-submit with applications using HiveContext.
  */
+@SlowHiveTest
 class HiveSparkSubmitSuite
   extends SparkSubmitTestUtils
   with Matchers
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index ff4643a..7c66ff6 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.hive.{HiveExternalCatalog, 
HiveUtils}
 import org.apache.spark.sql.hive.test.TestHiveVersion
 import org.apache.spark.sql.types.IntegerType
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.tags.ExtendedHiveTest
+import org.apache.spark.tags.{ExtendedHiveTest, SlowHiveTest}
 import org.apache.spark.util.{MutableURLClassLoader, Utils}
 
 /**
@@ -48,6 +48,7 @@ import org.apache.spark.util.{MutableURLClassLoader, Utils}
  * is not fully tested.
  */
 // TODO: Refactor this to `HiveClientSuite` and make it a subclass of 
`HiveVersionSuite`
+@SlowHiveTest
 @ExtendedHiveTest
 class VersionsSuite extends SparkFunSuite with Logging {
 
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
index c65bf7c..1df5260 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
+import org.apache.spark.tags.SlowHiveTest
 
 
 class ScalaAggregateFunction(schema: StructType) extends 
UserDefinedAggregateFunction {
@@ -1024,6 +1025,7 @@ abstract class AggregationQuerySuite extends QueryTest 
with SQLTestUtils with Te
 class HashAggregationQuerySuite extends AggregationQuerySuite
 
 
+@SlowHiveTest
 class HashAggregationQueryWithControlledFallbackSuite extends 
AggregationQuerySuite {
 
   override protected def checkAnswer(actual: => DataFrame, expectedAnswer: 
Seq[Row]): Unit = {
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index d590a2c..90915e0 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -44,9 +44,11 @@ import 
org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
+import org.apache.spark.tags.SlowHiveTest
 import org.apache.spark.util.Utils
 
 // TODO(gatorsmile): combine HiveCatalogedDDLSuite and HiveDDLSuite
+@SlowHiveTest
 class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with 
BeforeAndAfterEach {
   override def afterEach(): Unit = {
     try {
@@ -262,6 +264,7 @@ class HiveCatalogedDDLSuite extends DDLSuite with 
TestHiveSingleton with BeforeA
   }
 }
 
+@SlowHiveTest
 class HiveDDLSuite
   extends QueryTest with SQLTestUtils with TestHiveSingleton with 
BeforeAndAfterEach {
   import testImplicits._
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 6a7932f..e979415 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -38,6 +38,7 @@ import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.tags.SlowHiveTest
 
 case class TestData(a: Int, b: String)
 
@@ -45,6 +46,7 @@ case class TestData(a: Int, b: String)
  * A set of test cases expressed in Hive QL that are not covered by the tests
  * included in the hive distribution.
  */
+@SlowHiveTest
 class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with 
BeforeAndAfter {
   private val originalTimeZone = TimeZone.getDefault
   private val originalLocale = Locale.getDefault
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
index eaedac1..552a6ac 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
@@ -30,13 +30,14 @@ import org.apache.spark.sql.hive.{HiveExternalCatalog, 
HiveUtils}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.StaticSQLConf._
 import org.apache.spark.sql.types._
-import org.apache.spark.tags.ExtendedHiveTest
+import org.apache.spark.tags.{ExtendedHiveTest, SlowHiveTest}
 import org.apache.spark.util.Utils
 
 /**
  * A separate set of DDL tests that uses Hive 2.1 libraries, which behave a 
little differently
  * from the built-in ones.
  */
+@SlowHiveTest
 @ExtendedHiveTest
 class Hive_2_1_DDLSuite extends SparkFunSuite with TestHiveSingleton with 
BeforeAndAfterEach
   with BeforeAndAfterAll {
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index f69f589..833a655 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -41,6 +41,7 @@ import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
+import org.apache.spark.tags.SlowHiveTest
 import org.apache.spark.unsafe.types.CalendarInterval
 import org.apache.spark.util.Utils
 
@@ -67,6 +68,7 @@ case class Order(
  * Hive to generate them (in contrast to HiveQuerySuite).  Often this is 
because the query is
  * valid, but Hive currently cannot execute it.
  */
+@SlowHiveTest
 class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton 
{
   import hiveContext._
   import spark.implicits._


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to