This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 56ec5dd [SPARK-32249][INFRA][3.0] Run Github Actions builds in
branch-3.0
56ec5dd is described below
commit 56ec5ddcac8233011c17fc7d120a284707f0f712
Author: HyukjinKwon <[email protected]>
AuthorDate: Wed Aug 19 07:20:26 2020 -0700
[SPARK-32249][INFRA][3.0] Run Github Actions builds in branch-3.0
### What changes were proposed in this pull request?
This PR proposes to backport the following JIRAs:
- SPARK-32245
- SPARK-32292
- SPARK-32252
- SPARK-32316
- SPARK-32408
- SPARK-32303
- SPARK-32363
- SPARK-32419
- SPARK-32422
- SPARK-32491
- SPARK-32493
- SPARK-32496
- SPARK-32497
- SPARK-32357
- SPARK-32606
- SPARK-32605
- SPARK-32248
- SPARK-32645
- Minor renaming d0dfe49#diff-02d9c370a663741451423342d5869b21
in order to enable GitHub Actions in branch-3.0.
### Why are the changes needed?
To be able to run the tests in branch-3.0. Jenkins jobs are unstable.
### Does this PR introduce _any_ user-facing change?
No, dev-only.
### How was this patch tested?
Build in this PR will test.
Closes #29460 from HyukjinKwon/SPARK-32249.
Lead-authored-by: HyukjinKwon <[email protected]>
Co-authored-by: Hyukjin Kwon <[email protected]>
Co-authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.github/workflows/build_and_test.yml | 283 +++++++++++++++++++++
.github/workflows/master.yml | 156 ------------
.github/workflows/test_report.yml | 24 ++
.../spark/tags/GitHubActionsUnstableTest.java | 30 +++
.../java/org/apache/spark/tags/SlowHiveTest.java | 30 +++
dev/run-pip-tests | 10 +-
dev/run-tests.py | 185 ++++++++++----
dev/sparktestsupport/modules.py | 93 +++++--
project/SparkBuild.scala | 9 +
python/pyspark/sql/tests/test_arrow.py | 3 +
python/pyspark/sql/tests/test_types.py | 3 +-
python/pyspark/streaming/tests/test_dstream.py | 15 +-
python/run-tests.py | 2 +-
.../apache/spark/sql/IntegratedUDFTestUtils.scala | 2 +-
.../thriftserver/ThriftServerQueryTestSuite.scala | 2 +
.../ThriftServerWithSparkContextSuite.scala | 3 +
.../hive/execution/HiveCompatibilitySuite.scala | 2 +
.../hive/HiveExternalCatalogVersionsSuite.scala | 3 +-
.../spark/sql/hive/HiveSparkSubmitSuite.scala | 3 +-
.../spark/sql/hive/client/VersionsSuite.scala | 3 +-
.../sql/hive/execution/AggregationQuerySuite.scala | 2 +
.../spark/sql/hive/execution/HiveDDLSuite.scala | 3 +
.../spark/sql/hive/execution/HiveQuerySuite.scala | 2 +
.../sql/hive/execution/Hive_2_1_DDLSuite.scala | 3 +-
.../spark/sql/hive/execution/SQLQuerySuite.scala | 3 +
25 files changed, 639 insertions(+), 235 deletions(-)
diff --git a/.github/workflows/build_and_test.yml
b/.github/workflows/build_and_test.yml
new file mode 100644
index 0000000..581ad8a
--- /dev/null
+++ b/.github/workflows/build_and_test.yml
@@ -0,0 +1,283 @@
+name: Build and test
+
+on:
+ push:
+ branches:
+ - branch-3.0
+ pull_request:
+ branches:
+ - branch-3.0
+
+jobs:
+ # Build: build Spark and run the tests for specified modules.
+ build:
+ name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{
matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ java:
+ - 1.8
+ hadoop:
+ - hadoop2.7
+ hive:
+ - hive2.3
+ # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
+ # Kinesis tests depends on external Amazon kinesis service.
+ # Note that the modules below are from sparktestsupport/modules.py.
+ modules:
+ - >-
+ core, unsafe, kvstore, avro,
+ network-common, network-shuffle, repl, launcher,
+ examples, sketch, graphx
+ - >-
+ catalyst, hive-thriftserver
+ - >-
+ streaming, sql-kafka-0-10, streaming-kafka-0-10,
+ mllib-local, mllib,
+ yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl
+ - >-
+ pyspark-sql, pyspark-mllib
+ - >-
+ pyspark-core, pyspark-streaming, pyspark-ml
+ - >-
+ sparkr
+ # Here, we split Hive and SQL tests into some of slow ones and the
rest of them.
+ included-tags: [""]
+ # Some tests are disabled in GitHun Actions. Ideally, we should remove
this tag
+ # and run all tests.
+ excluded-tags: ["org.apache.spark.tags.GitHubActionsUnstableTest"]
+ comment: [""]
+ include:
+ # Hive tests
+ - modules: hive
+ java: 1.8
+ hadoop: hadoop2.7
+ hive: hive2.3
+ included-tags: org.apache.spark.tags.SlowHiveTest
+ comment: "- slow tests"
+ - modules: hive
+ java: 1.8
+ hadoop: hadoop2.7
+ hive: hive2.3
+ excluded-tags:
org.apache.spark.tags.SlowHiveTest,org.apache.spark.tags.GitHubActionsUnstableTest
+ comment: "- other tests"
+ # SQL tests
+ - modules: sql
+ java: 1.8
+ hadoop: hadoop2.7
+ hive: hive2.3
+ included-tags: org.apache.spark.tags.ExtendedSQLTest
+ comment: "- slow tests"
+ - modules: sql
+ java: 1.8
+ hadoop: hadoop2.7
+ hive: hive2.3
+ excluded-tags:
org.apache.spark.tags.ExtendedSQLTest,org.apache.spark.tags.GitHubActionsUnstableTest
+ comment: "- other tests"
+ env:
+ MODULES_TO_TEST: ${{ matrix.modules }}
+ EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
+ INCLUDED_TAGS: ${{ matrix.included-tags }}
+ HADOOP_PROFILE: ${{ matrix.hadoop }}
+ HIVE_PROFILE: ${{ matrix.hive }}
+ # GitHub Actions' default miniconda to use in pip packaging test.
+ CONDA_PREFIX: /usr/share/miniconda
+ GITHUB_PREV_SHA: ${{ github.event.before }}
+ steps:
+ - name: Checkout Spark repository
+ uses: actions/checkout@v2
+ # In order to fetch changed files
+ with:
+ fetch-depth: 0
+ # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
+ - name: Cache Scala, SBT, Maven and Zinc
+ uses: actions/cache@v1
+ with:
+ path: build
+ key: build-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ build-
+ - name: Cache Maven local repository
+ uses: actions/cache@v2
+ with:
+ path: ~/.m2/repository
+ key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{
hashFiles('**/pom.xml') }}
+ restore-keys: |
+ ${{ matrix.java }}-${{ matrix.hadoop }}-maven-
+ - name: Cache Ivy local repository
+ uses: actions/cache@v2
+ with:
+ path: ~/.ivy2/cache
+ key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{
hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }}
+ restore-keys: |
+ ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-
+ - name: Install JDK ${{ matrix.java }}
+ uses: actions/setup-java@v1
+ with:
+ java-version: ${{ matrix.java }}
+ # PySpark
+ - name: Install PyPy3
+ # Note that order of Python installations here matters because default
python3 is
+ # overridden by pypy3.
+ uses: actions/setup-python@v2
+ if: contains(matrix.modules, 'pyspark')
+ with:
+ python-version: pypy3
+ architecture: x64
+ - name: Install Python 2.7
+ uses: actions/setup-python@v2
+ if: contains(matrix.modules, 'pyspark')
+ with:
+ python-version: 2.7
+ architecture: x64
+ - name: Install Python 3.8
+ uses: actions/setup-python@v2
+ # We should install one Python that is higher then 3+ for SQL and Yarn
because:
+ # - SQL component also has Python related tests, for example,
IntegratedUDFTestUtils.
+ # - Yarn has a Python specific test too, for example, YarnClusterSuite.
+ if: contains(matrix.modules, 'yarn') || contains(matrix.modules,
'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules,
'sql-'))
+ with:
+ python-version: 3.8
+ architecture: x64
+ - name: Install Python packages (Python 2.7 and PyPy3)
+ if: contains(matrix.modules, 'pyspark')
+ # PyArrow is not supported in PyPy yet, see ARROW-2651.
+ # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown
reason.
+ run: |
+ python2.7 -m pip install numpy pyarrow pandas scipy xmlrunner
+ python2.7 -m pip list
+ # PyPy does not have xmlrunner
+ pypy3 -m pip install numpy pandas
+ pypy3 -m pip list
+ - name: Install Python packages (Python 3.8)
+ if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules,
'sql') && !contains(matrix.modules, 'sql-'))
+ run: |
+ python3.8 -m pip install numpy pyarrow pandas scipy xmlrunner
+ python3.8 -m pip list
+ # SparkR
+ - name: Install R 4.0
+ if: contains(matrix.modules, 'sparkr')
+ run: |
+ sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu
bionic-cran40/' >> /etc/apt/sources.list"
+ curl -sL
"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9"
| sudo apt-key add
+ sudo apt-get update
+ sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
+ - name: Install R packages
+ if: contains(matrix.modules, 'sparkr')
+ run: |
+ # qpdf is required to reduce the size of PDFs to make CRAN check pass.
See SPARK-32497.
+ sudo apt-get install -y libcurl4-openssl-dev qpdf
+ sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat',
'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'),
repos='https://cloud.r-project.org/')"
+ # Show installed packages in R.
+ sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[,
c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]'
+ # Run the tests.
+ - name: Run tests
+ run: |
+ # Hive tests become flaky when running in parallel as it's too
intensive.
+ if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1;
fi
+ mkdir -p ~/.m2
+ ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST"
--included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
+ rm -rf ~/.m2/repository/org/apache/spark
+ - name: Upload test results to report
+ if: always()
+ uses: actions/upload-artifact@v2
+ with:
+ name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{
matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
+ path: "**/target/test-reports/*.xml"
+ - name: Upload unit tests log files
+ if: failure()
+ uses: actions/upload-artifact@v2
+ with:
+ name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{
matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
+ path: "**/target/unit-tests.log"
+
+ # Static analysis, and documentation build
+ lint:
+ name: Linters, licenses, dependencies and documentation generation
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout Spark repository
+ uses: actions/checkout@v2
+ - name: Cache Maven local repository
+ uses: actions/cache@v2
+ with:
+ path: ~/.m2/repository
+ key: docs-maven-repo-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ docs-maven-
+ - name: Install JDK 1.8
+ uses: actions/setup-java@v1
+ with:
+ java-version: 1.8
+ - name: Install Python 3.6
+ uses: actions/setup-python@v2
+ with:
+ python-version: 3.6
+ architecture: x64
+ - name: Install Python linter dependencies
+ run: |
+ pip3 install flake8 sphinx numpy
+ - name: Install R 4.0
+ run: |
+ sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu
bionic-cran40/' >> /etc/apt/sources.list"
+ curl -sL
"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9"
| sudo apt-key add
+ sudo apt-get update
+ sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
+ - name: Install R linter dependencies and SparkR
+ run: |
+ sudo apt-get install -y libcurl4-openssl-dev
+ sudo Rscript -e "install.packages(c('devtools'),
repos='https://cloud.r-project.org/')"
+ sudo Rscript -e "devtools::install_github('jimhester/[email protected]')"
+ ./R/install-dev.sh
+ - name: Install Ruby 2.7 for documentation generation
+ uses: actions/setup-ruby@v1
+ with:
+ ruby-version: 2.7
+ - name: Install dependencies for documentation generation
+ run: |
+ sudo apt-get install -y libcurl4-openssl-dev pandoc
+ pip install sphinx mkdocs numpy
+ gem install jekyll jekyll-redirect-from rouge
+ sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr',
'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
+ - name: Scala linter
+ run: ./dev/lint-scala
+ - name: Java linter
+ run: ./dev/lint-java
+ - name: Python linter
+ run: ./dev/lint-python
+ - name: R linter
+ run: ./dev/lint-r
+ - name: License test
+ run: ./dev/check-license
+ - name: Dependencies test
+ run: ./dev/test-dependencies.sh
+ - name: Run documentation build
+ run: |
+ cd docs
+ jekyll build
+
+ java11:
+ name: Java 11 build
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout Spark repository
+ uses: actions/checkout@v2
+ - name: Cache Maven local repository
+ uses: actions/cache@v2
+ with:
+ path: ~/.m2/repository
+ key: java11-maven-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ java11-maven-
+ - name: Install Java 11
+ uses: actions/setup-java@v1
+ with:
+ java-version: 11
+ - name: Build with Maven
+ run: |
+ export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g
-Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
+ export MAVEN_CLI_OPTS="--no-transfer-progress"
+ mkdir -p ~/.m2
+ ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes
-Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
+ rm -rf ~/.m2/repository/org/apache/spark
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
deleted file mode 100644
index 36f9e33..0000000
--- a/.github/workflows/master.yml
+++ /dev/null
@@ -1,156 +0,0 @@
-name: master
-
-on:
- push:
- branches:
- - branch-3.0
- pull_request:
- branches:
- - branch-3.0
-
-jobs:
- build:
-
- runs-on: ubuntu-latest
- strategy:
- matrix:
- java: [ '1.8', '11' ]
- hadoop: [ 'hadoop-2.7', 'hadoop-3.2' ]
- hive: [ 'hive-1.2', 'hive-2.3' ]
- exclude:
- - java: '11'
- hive: 'hive-1.2'
- - hadoop: 'hadoop-3.2'
- hive: 'hive-1.2'
- name: Build Spark - JDK${{ matrix.java }}/${{ matrix.hadoop }}/${{
matrix.hive }}
-
- steps:
- - uses: actions/checkout@master
- # We split caches because GitHub Action Cache has a 400MB-size limit.
- - uses: actions/cache@v1
- with:
- path: build
- key: build-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- build-
- - uses: actions/cache@v1
- with:
- path: ~/.m2/repository/com
- key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-${{
hashFiles('**/pom.xml') }}
- restore-keys: |
- ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-
- - uses: actions/cache@v1
- with:
- path: ~/.m2/repository/org
- key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-${{
hashFiles('**/pom.xml') }}
- restore-keys: |
- ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-
- - uses: actions/cache@v1
- with:
- path: ~/.m2/repository/net
- key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-${{
hashFiles('**/pom.xml') }}
- restore-keys: |
- ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-
- - uses: actions/cache@v1
- with:
- path: ~/.m2/repository/io
- key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-${{
hashFiles('**/pom.xml') }}
- restore-keys: |
- ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-
- - name: Set up JDK ${{ matrix.java }}
- uses: actions/setup-java@v1
- with:
- java-version: ${{ matrix.java }}
- - name: Build with Maven
- run: |
- export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g
-Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
- export MAVEN_CLI_OPTS="--no-transfer-progress"
- mkdir -p ~/.m2
- ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes
-Phive -P${{ matrix.hive }} -Phive-thriftserver -P${{ matrix.hadoop }}
-Phadoop-cloud -Djava.version=${{ matrix.java }} install
- rm -rf ~/.m2/repository/org/apache/spark
-
-
- lint:
- runs-on: ubuntu-latest
- name: Linters (Java/Scala/Python), licenses, dependencies
- steps:
- - uses: actions/checkout@master
- - uses: actions/setup-java@v1
- with:
- java-version: '11'
- - uses: actions/setup-python@v1
- with:
- python-version: '3.x'
- architecture: 'x64'
- - name: Scala
- run: ./dev/lint-scala
- - name: Java
- run: ./dev/lint-java
- - name: Python
- run: |
- pip install flake8 sphinx numpy
- ./dev/lint-python
- - name: License
- run: ./dev/check-license
- - name: Dependencies
- run: ./dev/test-dependencies.sh
-
- lintr:
- runs-on: ubuntu-latest
- name: Linter (R)
- steps:
- - uses: actions/checkout@master
- - uses: actions/setup-java@v1
- with:
- java-version: '11'
- - uses: r-lib/actions/setup-r@v1
- with:
- r-version: '3.6.2'
- - name: Install lib
- run: |
- sudo apt-get install -y libcurl4-openssl-dev
- - name: install R packages
- run: |
- sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr',
'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'),
repos='https://cloud.r-project.org/')"
- sudo Rscript -e "devtools::install_github('jimhester/[email protected]')"
- - name: package and install SparkR
- run: ./R/install-dev.sh
- - name: lint-r
- run: ./dev/lint-r
-
- docs:
- runs-on: ubuntu-latest
- name: Generate documents
- steps:
- - uses: actions/checkout@master
- - uses: actions/cache@v1
- with:
- path: ~/.m2/repository
- key: docs-maven-repo-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- docs-maven-repo-
- - uses: actions/setup-java@v1
- with:
- java-version: '1.8'
- - uses: actions/setup-python@v1
- with:
- python-version: '3.x'
- architecture: 'x64'
- - uses: actions/setup-ruby@v1
- with:
- ruby-version: '2.7'
- - uses: r-lib/actions/setup-r@v1
- with:
- r-version: '3.6.2'
- - name: Install lib and pandoc
- run: |
- sudo apt-get install -y libcurl4-openssl-dev pandoc
- - name: Install packages
- run: |
- pip install sphinx mkdocs numpy
- gem install jekyll jekyll-redirect-from rouge
- sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr',
'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'),
repos='https://cloud.r-project.org/')"
- - name: Run jekyll build
- run: |
- cd docs
- jekyll build
diff --git a/.github/workflows/test_report.yml
b/.github/workflows/test_report.yml
new file mode 100644
index 0000000..93cdb86
--- /dev/null
+++ b/.github/workflows/test_report.yml
@@ -0,0 +1,24 @@
+name: Report test results
+on:
+ workflow_run:
+ workflows: ["Build and test"]
+ types:
+ - completed
+
+jobs:
+ test_report:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Download test results to report
+ uses: dawidd6/action-download-artifact@v2
+ with:
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ workflow: ${{ github.event.workflow_run.workflow_id }}
+ commit: ${{ github.event.workflow_run.head_commit.id }}
+ - name: Publish test report
+ uses: scacap/action-surefire-report@v1
+ with:
+ check_name: Report test results
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ report_paths: "**/target/test-reports/*.xml"
+ commit: ${{ github.event.workflow_run.head_commit.id }}
diff --git
a/common/tags/src/test/java/org/apache/spark/tags/GitHubActionsUnstableTest.java
b/common/tags/src/test/java/org/apache/spark/tags/GitHubActionsUnstableTest.java
new file mode 100644
index 0000000..a602656
--- /dev/null
+++
b/common/tags/src/test/java/org/apache/spark/tags/GitHubActionsUnstableTest.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.tags;
+
+import org.scalatest.TagAnnotation;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+@TagAnnotation
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.METHOD, ElementType.TYPE})
+public @interface GitHubActionsUnstableTest { }
diff --git a/common/tags/src/test/java/org/apache/spark/tags/SlowHiveTest.java
b/common/tags/src/test/java/org/apache/spark/tags/SlowHiveTest.java
new file mode 100644
index 0000000..a7e6f35
--- /dev/null
+++ b/common/tags/src/test/java/org/apache/spark/tags/SlowHiveTest.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.tags;
+
+import org.scalatest.TagAnnotation;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+@TagAnnotation
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.METHOD, ElementType.TYPE})
+public @interface SlowHiveTest { }
diff --git a/dev/run-pip-tests b/dev/run-pip-tests
index 81e33a6..b322d3f 100755
--- a/dev/run-pip-tests
+++ b/dev/run-pip-tests
@@ -63,7 +63,7 @@ fi
PYSPARK_VERSION=$(python3 -c
"exec(open('python/pyspark/version.py').read());print(__version__)")
PYSPARK_DIST="$FWDIR/python/dist/pyspark-$PYSPARK_VERSION.tar.gz"
# The pip install options we use for all the pip commands
-PIP_OPTIONS="--upgrade --no-cache-dir --force-reinstall "
+PIP_OPTIONS="--upgrade --no-cache-dir --force-reinstall"
# Test both regular user and edit/dev install modes.
PIP_COMMANDS=("pip install $PIP_OPTIONS $PYSPARK_DIST"
"pip install $PIP_OPTIONS -e python/")
@@ -80,8 +80,12 @@ for python in "${PYTHON_EXECS[@]}"; do
VIRTUALENV_PATH="$VIRTUALENV_BASE"/$python
rm -rf "$VIRTUALENV_PATH"
if [ -n "$USE_CONDA" ]; then
+ if [ -f "$CONDA_PREFIX/etc/profile.d/conda.sh" ]; then
+ # See also https://github.com/conda/conda/issues/7980
+ source "$CONDA_PREFIX/etc/profile.d/conda.sh"
+ fi
conda create -y -p "$VIRTUALENV_PATH" python=$python numpy pandas pip
setuptools
- source activate "$VIRTUALENV_PATH"
+ source activate "$VIRTUALENV_PATH" || conda activate "$VIRTUALENV_PATH"
else
mkdir -p "$VIRTUALENV_PATH"
virtualenv --python=$python "$VIRTUALENV_PATH"
@@ -124,7 +128,7 @@ for python in "${PYTHON_EXECS[@]}"; do
# conda / virtualenv environments need to be deactivated differently
if [ -n "$USE_CONDA" ]; then
- source deactivate
+ source deactivate || conda deactivate
else
deactivate
fi
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 5255a77..976854d 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -49,13 +49,12 @@ def determine_modules_for_files(filenames):
['pyspark-core', 'sql']
>>> [x.name for x in
determine_modules_for_files(["file_not_matched_by_any_subproject"])]
['root']
- >>> [x.name for x in determine_modules_for_files( \
- [".github/workflows/master.yml", "appveyor.yml"])]
+ >>> [x.name for x in determine_modules_for_files(["appveyor.yml"])]
[]
"""
changed_modules = set()
for filename in filenames:
- if filename in (".github/workflows/master.yml", "appveyor.yml"):
+ if filename in ("appveyor.yml",):
continue
matched_at_least_one_module = False
for module in modules.all_modules:
@@ -101,28 +100,52 @@ def setup_test_environ(environ):
os.environ[k] = v
-def determine_modules_to_test(changed_modules):
+def determine_modules_to_test(changed_modules, deduplicated=True):
"""
Given a set of modules that have changed, compute the transitive closure
of those modules'
dependent modules in order to determine the set of modules that should be
tested.
Returns a topologically-sorted list of modules (ties are broken by sorting
on module names).
+ If ``deduplicated`` is disabled, the modules are returned without tacking
the deduplication
+ by dependencies into account.
>>> [x.name for x in determine_modules_to_test([modules.root])]
['root']
>>> [x.name for x in determine_modules_to_test([modules.build])]
['root']
+ >>> [x.name for x in determine_modules_to_test([modules.core])]
+ ['root']
+ >>> [x.name for x in determine_modules_to_test([modules.launcher])]
+ ['root']
>>> [x.name for x in determine_modules_to_test([modules.graphx])]
['graphx', 'examples']
- >>> x = [x.name for x in determine_modules_to_test([modules.sql])]
- >>> x # doctest: +NORMALIZE_WHITESPACE
+ >>> [x.name for x in determine_modules_to_test([modules.sql])]
+ ... # doctest: +NORMALIZE_WHITESPACE
['sql', 'avro', 'hive', 'mllib', 'sql-kafka-0-10', 'examples',
'hive-thriftserver',
'pyspark-sql', 'repl', 'sparkr', 'pyspark-mllib', 'pyspark-ml']
+ >>> sorted([x.name for x in determine_modules_to_test(
+ ... [modules.sparkr, modules.sql], deduplicated=False)])
+ ... # doctest: +NORMALIZE_WHITESPACE
+ ['avro', 'examples', 'hive', 'hive-thriftserver', 'mllib', 'pyspark-ml',
+ 'pyspark-mllib', 'pyspark-sql', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
+ >>> sorted([x.name for x in determine_modules_to_test(
+ ... [modules.sql, modules.core], deduplicated=False)])
+ ... # doctest: +NORMALIZE_WHITESPACE
+ ['avro', 'catalyst', 'core', 'examples', 'graphx', 'hive',
'hive-thriftserver',
+ 'mllib', 'mllib-local', 'pyspark-core', 'pyspark-ml', 'pyspark-mllib',
+ 'pyspark-sql', 'pyspark-streaming', 'repl', 'root',
+ 'sparkr', 'sql', 'sql-kafka-0-10', 'streaming', 'streaming-kafka-0-10',
+ 'streaming-kinesis-asl']
"""
modules_to_test = set()
for module in changed_modules:
- modules_to_test =
modules_to_test.union(determine_modules_to_test(module.dependent_modules))
+ modules_to_test = modules_to_test.union(
+ determine_modules_to_test(module.dependent_modules, deduplicated))
modules_to_test = modules_to_test.union(set(changed_modules))
+
+ if not deduplicated:
+ return modules_to_test
+
# If we need to run all of the tests, then we should short-circuit and
return 'root'
if modules.root in modules_to_test:
return [modules.root]
@@ -415,7 +438,7 @@ def run_scala_tests_sbt(test_modules, test_profiles):
exec_sbt(profiles_and_goals)
-def run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags):
+def run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags,
included_tags):
"""Function to properly execute all tests passed in as a set from the
`determine_test_suites` function"""
set_title_and_block("Running Spark unit tests", "BLOCK_SPARK_UNIT_TESTS")
@@ -425,6 +448,8 @@ def run_scala_tests(build_tool, extra_profiles,
test_modules, excluded_tags):
test_profiles = extra_profiles + \
list(set(itertools.chain.from_iterable(m.build_profile_flags for m in
test_modules)))
+ if included_tags:
+ test_profiles += ['-Dtest.include.tags=' + ",".join(included_tags)]
if excluded_tags:
test_profiles += ['-Dtest.exclude.tags=' + ",".join(excluded_tags)]
@@ -532,6 +557,24 @@ def parse_opts():
"-p", "--parallelism", type=int, default=8,
help="The number of suites to test in parallel (default %(default)d)"
)
+ parser.add_argument(
+ "-m", "--modules", type=str,
+ default=None,
+ help="A comma-separated list of modules to test "
+ "(default: %s)" % ",".join(sorted([m.name for m in
modules.all_modules]))
+ )
+ parser.add_argument(
+ "-e", "--excluded-tags", type=str,
+ default=None,
+ help="A comma-separated list of tags to exclude in the tests, "
+ "e.g., org.apache.spark.tags.ExtendedHiveTest "
+ )
+ parser.add_argument(
+ "-i", "--included-tags", type=str,
+ default=None,
+ help="A comma-separated list of tags to include in the tests, "
+ "e.g., org.apache.spark.tags.ExtendedHiveTest "
+ )
args, unknown = parser.parse_known_args()
if unknown:
@@ -564,11 +607,20 @@ def main():
" install one and retry.")
sys.exit(2)
- # install SparkR
- if which("R"):
- run_cmd([os.path.join(SPARK_HOME, "R", "install-dev.sh")])
- else:
- print("Cannot install SparkR as R was not found in PATH")
+ # Install SparkR
+ should_only_test_modules = opts.modules is not None
+ test_modules = []
+ if should_only_test_modules:
+ str_test_modules = [m.strip() for m in opts.modules.split(",")]
+ test_modules = [m for m in modules.all_modules if m.name in
str_test_modules]
+
+ if not should_only_test_modules or modules.sparkr in test_modules:
+ # If tests modules are specified, we will not run R linter.
+ # SparkR needs the manual SparkR installation.
+ if which("R"):
+ run_cmd([os.path.join(SPARK_HOME, "R", "install-dev.sh")])
+ else:
+ print("Cannot install SparkR as R was not found in PATH")
if os.environ.get("AMPLAB_JENKINS"):
# if we're on the Amplab Jenkins build servers setup variables
@@ -582,27 +634,69 @@ def main():
# /home/jenkins/anaconda2/envs/py36/bin
os.environ["PATH"] = "/home/anaconda/envs/py36/bin:" +
os.environ.get("PATH")
else:
- # else we're running locally and can use local settings
+ # else we're running locally or Github Actions.
build_tool = "sbt"
hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.7")
hive_version = os.environ.get("HIVE_PROFILE", "hive2.3")
- test_env = "local"
+ if "GITHUB_ACTIONS" in os.environ:
+ test_env = "github_actions"
+ else:
+ test_env = "local"
print("[info] Using build tool", build_tool, "with Hadoop profile",
hadoop_version,
"and Hive profile", hive_version, "under environment", test_env)
extra_profiles = get_hadoop_profiles(hadoop_version) +
get_hive_profiles(hive_version)
- changed_modules = None
- changed_files = None
- if test_env == "amplab_jenkins" and os.environ.get("AMP_JENKINS_PRB"):
+ changed_modules = []
+ changed_files = []
+ included_tags = []
+ excluded_tags = []
+ if should_only_test_modules:
+ # If we're running the tests in Github Actions, attempt to detect and
test
+ # only the affected modules.
+ if test_env == "github_actions":
+ if os.environ["GITHUB_BASE_REF"] != "":
+ # Pull requests
+ changed_files = identify_changed_files_from_git_commits(
+ os.environ["GITHUB_SHA"],
target_branch=os.environ["GITHUB_BASE_REF"])
+ else:
+ # Build for each commit.
+ changed_files = identify_changed_files_from_git_commits(
+ os.environ["GITHUB_SHA"],
target_ref=os.environ["GITHUB_PREV_SHA"])
+
+ modules_to_test = determine_modules_to_test(
+ determine_modules_for_files(changed_files), deduplicated=False)
+
+ if modules.root not in modules_to_test:
+ # If root module is not found, only test the intersected
modules.
+ # If root module is found, just run the modules as specified
initially.
+ test_modules =
list(set(modules_to_test).intersection(test_modules))
+
+ changed_modules = test_modules
+ if len(changed_modules) == 0:
+ print("[info] There are no modules to test, exiting without
testing.")
+ return
+
+ # If we're running the tests in AMPLab Jenkins, calculate the diff from
the targeted branch, and
+ # detect modules to test.
+ elif test_env == "amplab_jenkins" and os.environ.get("AMP_JENKINS_PRB"):
target_branch = os.environ["ghprbTargetBranch"]
changed_files = identify_changed_files_from_git_commits("HEAD",
target_branch=target_branch)
changed_modules = determine_modules_for_files(changed_files)
+ test_modules = determine_modules_to_test(changed_modules)
excluded_tags = determine_tags_to_exclude(changed_modules)
+ # If there is no changed module found, tests all.
if not changed_modules:
changed_modules = [modules.root]
- excluded_tags = []
+ if not test_modules:
+ test_modules = determine_modules_to_test(changed_modules)
+
+ if opts.excluded_tags:
+ excluded_tags.extend([t.strip() for t in
opts.excluded_tags.split(",")])
+ if opts.included_tags:
+ included_tags.extend([t.strip() for t in
opts.included_tags.split(",")])
+
print("[info] Found the following changed modules:",
", ".join(x.name for x in changed_modules))
@@ -615,33 +709,32 @@ def main():
test_environ.update(m.environ)
setup_test_environ(test_environ)
- test_modules = determine_modules_to_test(changed_modules)
-
- # license checks
- run_apache_rat_checks()
-
- # style checks
- if not changed_files or any(f.endswith(".scala")
- or f.endswith("scalastyle-config.xml")
- for f in changed_files):
- run_scala_style_checks(extra_profiles)
should_run_java_style_checks = False
- if not changed_files or any(f.endswith(".java")
- or f.endswith("checkstyle.xml")
- or f.endswith("checkstyle-suppressions.xml")
- for f in changed_files):
- # Run SBT Checkstyle after the build to prevent a side-effect to the
build.
- should_run_java_style_checks = True
- if not changed_files or any(f.endswith("lint-python")
- or f.endswith("tox.ini")
- or f.endswith(".py")
- for f in changed_files):
- run_python_style_checks()
- if not changed_files or any(f.endswith(".R")
- or f.endswith("lint-r")
- or f.endswith(".lintr")
- for f in changed_files):
- run_sparkr_style_checks()
+ if not should_only_test_modules:
+ # license checks
+ run_apache_rat_checks()
+
+ # style checks
+ if not changed_files or any(f.endswith(".scala")
+ or f.endswith("scalastyle-config.xml")
+ for f in changed_files):
+ run_scala_style_checks(extra_profiles)
+ if not changed_files or any(f.endswith(".java")
+ or f.endswith("checkstyle.xml")
+ or
f.endswith("checkstyle-suppressions.xml")
+ for f in changed_files):
+ # Run SBT Checkstyle after the build to prevent a side-effect to
the build.
+ should_run_java_style_checks = True
+ if not changed_files or any(f.endswith("lint-python")
+ or f.endswith("tox.ini")
+ or f.endswith(".py")
+ for f in changed_files):
+ run_python_style_checks()
+ if not changed_files or any(f.endswith(".R")
+ or f.endswith("lint-r")
+ or f.endswith(".lintr")
+ for f in changed_files):
+ run_sparkr_style_checks()
# determine if docs were changed and if we're inside the amplab environment
# note - the below commented out until *all* Jenkins workers can get
`jekyll` installed
@@ -663,7 +756,7 @@ def main():
build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks)
# run the test suites
- run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags)
+ run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags,
included_tags)
modules_with_python_tests = [m for m in test_modules if
m.python_test_goals]
if modules_with_python_tests:
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 391e4bb..75bdec0 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -100,9 +100,75 @@ tags = Module(
]
)
+kvstore = Module(
+ name="kvstore",
+ dependencies=[tags],
+ source_file_regexes=[
+ "common/kvstore/",
+ ],
+ sbt_test_goals=[
+ "kvstore/test",
+ ],
+)
+
+network_common = Module(
+ name="network-common",
+ dependencies=[tags],
+ source_file_regexes=[
+ "common/network-common/",
+ ],
+ sbt_test_goals=[
+ "network-common/test",
+ ],
+)
+
+network_shuffle = Module(
+ name="network-shuffle",
+ dependencies=[tags],
+ source_file_regexes=[
+ "common/network-shuffle/",
+ ],
+ sbt_test_goals=[
+ "network-shuffle/test",
+ ],
+)
+
+unsafe = Module(
+ name="unsafe",
+ dependencies=[tags],
+ source_file_regexes=[
+ "common/unsafe",
+ ],
+ sbt_test_goals=[
+ "unsafe/test",
+ ],
+)
+
+launcher = Module(
+ name="launcher",
+ dependencies=[tags],
+ source_file_regexes=[
+ "launcher/",
+ ],
+ sbt_test_goals=[
+ "launcher/test",
+ ],
+)
+
+core = Module(
+ name="core",
+ dependencies=[kvstore, network_common, network_shuffle, unsafe, launcher],
+ source_file_regexes=[
+ "core/",
+ ],
+ sbt_test_goals=[
+ "core/test",
+ ],
+)
+
catalyst = Module(
name="catalyst",
- dependencies=[tags],
+ dependencies=[tags, core],
source_file_regexes=[
"sql/catalyst/",
],
@@ -111,7 +177,6 @@ catalyst = Module(
],
)
-
sql = Module(
name="sql",
dependencies=[catalyst],
@@ -123,7 +188,6 @@ sql = Module(
],
)
-
hive = Module(
name="hive",
dependencies=[sql],
@@ -142,7 +206,6 @@ hive = Module(
]
)
-
repl = Module(
name="repl",
dependencies=[hive],
@@ -154,7 +217,6 @@ repl = Module(
],
)
-
hive_thriftserver = Module(
name="hive-thriftserver",
dependencies=[hive],
@@ -192,7 +254,6 @@ sql_kafka = Module(
]
)
-
sketch = Module(
name="sketch",
dependencies=[tags],
@@ -204,10 +265,9 @@ sketch = Module(
]
)
-
graphx = Module(
name="graphx",
- dependencies=[tags],
+ dependencies=[tags, core],
source_file_regexes=[
"graphx/",
],
@@ -216,10 +276,9 @@ graphx = Module(
]
)
-
streaming = Module(
name="streaming",
- dependencies=[tags],
+ dependencies=[tags, core],
source_file_regexes=[
"streaming",
],
@@ -235,7 +294,7 @@ streaming = Module(
# fail other PRs.
streaming_kinesis_asl = Module(
name="streaming-kinesis-asl",
- dependencies=[tags],
+ dependencies=[tags, core],
source_file_regexes=[
"external/kinesis-asl/",
"external/kinesis-asl-assembly/",
@@ -254,21 +313,23 @@ streaming_kinesis_asl = Module(
streaming_kafka_0_10 = Module(
name="streaming-kafka-0-10",
- dependencies=[streaming],
+ dependencies=[streaming, core],
source_file_regexes=[
# The ending "/" is necessary otherwise it will include "sql-kafka"
codes
"external/kafka-0-10/",
"external/kafka-0-10-assembly",
+ "external/kafka-0-10-token-provider",
],
sbt_test_goals=[
"streaming-kafka-0-10/test",
+ "token-provider-kafka-0-10/test"
]
)
mllib_local = Module(
name="mllib-local",
- dependencies=[tags],
+ dependencies=[tags, core],
source_file_regexes=[
"mllib-local",
],
@@ -302,10 +363,9 @@ examples = Module(
]
)
-
pyspark_core = Module(
name="pyspark-core",
- dependencies=[],
+ dependencies=[core],
source_file_regexes=[
"python/(?!pyspark/(ml|mllib|sql|streaming))"
],
@@ -339,7 +399,6 @@ pyspark_core = Module(
]
)
-
pyspark_sql = Module(
name="pyspark-sql",
dependencies=[pyspark_core, hive, avro],
@@ -578,7 +637,7 @@ spark_ganglia_lgpl = Module(
# No other modules should directly depend on this module.
root = Module(
name="root",
- dependencies=[build], # Changes to build should trigger all tests.
+ dependencies=[build, core], # Changes to build should trigger all tests.
source_file_regexes=[],
# In order to run all of the tests, enable every test profile:
build_profile_flags=list(set(
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index a0e138c..7a9634a 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -1017,6 +1017,15 @@ object TestSettings {
sys.props.get("test.exclude.tags").map { tags =>
Seq("--exclude-categories=" + tags)
}.getOrElse(Nil): _*),
+ // Include tags defined in a system property
+ testOptions in Test += Tests.Argument(TestFrameworks.ScalaTest,
+ sys.props.get("test.include.tags").map { tags =>
+ tags.split(",").flatMap { tag => Seq("-n", tag) }.toSeq
+ }.getOrElse(Nil): _*),
+ testOptions in Test += Tests.Argument(TestFrameworks.JUnit,
+ sys.props.get("test.include.tags").map { tags =>
+ Seq("--include-categories=" + tags)
+ }.getOrElse(Nil): _*),
// Show full stack trace and duration in test cases.
testOptions in Test += Tests.Argument("-oDF"),
testOptions in Test += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"),
diff --git a/python/pyspark/sql/tests/test_arrow.py
b/python/pyspark/sql/tests/test_arrow.py
index 42f064a..15c5cf1 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -21,6 +21,9 @@ import threading
import time
import unittest
import warnings
+import sys
+if sys.version >= '3':
+ basestring = unicode = str
from pyspark import SparkContext, SparkConf
from pyspark.sql import Row, SparkSession
diff --git a/python/pyspark/sql/tests/test_types.py
b/python/pyspark/sql/tests/test_types.py
index 81402f5..016cafd6 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -725,7 +725,8 @@ class TypesTests(ReusedSQLTestCase):
if sys.version_info[0] < 3:
all_types = set(['c', 'b', 'B', 'u', 'h', 'H', 'i', 'I', 'l', 'L',
'f', 'd'])
else:
- all_types = set(array.typecodes)
+ # PyPy seems not having array.typecodes.
+ all_types = set(['b', 'B', 'u', 'h', 'H', 'i', 'I', 'l', 'L', 'q',
'Q', 'f', 'd'])
unsupported_types = all_types - set(supported_types)
# test unsupported types
for t in unsupported_types:
diff --git a/python/pyspark/streaming/tests/test_dstream.py
b/python/pyspark/streaming/tests/test_dstream.py
index 7ecdf6b..89edb23 100644
--- a/python/pyspark/streaming/tests/test_dstream.py
+++ b/python/pyspark/streaming/tests/test_dstream.py
@@ -30,8 +30,9 @@ from pyspark.testing.streamingutils import
PySparkStreamingTestCase
@unittest.skipIf(
- "pypy" in platform.python_implementation().lower() and
"COVERAGE_PROCESS_START" in os.environ,
- "PyPy implementation causes to hang DStream tests forever when Coverage
report is used.")
+ "pypy" in platform.python_implementation().lower(),
+ "The tests fail in PyPy3 implementation for an unknown reason. "
+ "With PyPy, it causes to hang DStream tests forever when Coverage report
is used.")
class BasicOperationTests(PySparkStreamingTestCase):
def test_map(self):
@@ -394,8 +395,9 @@ class BasicOperationTests(PySparkStreamingTestCase):
@unittest.skipIf(
- "pypy" in platform.python_implementation().lower() and
"COVERAGE_PROCESS_START" in os.environ,
- "PyPy implementation causes to hang DStream tests forever when Coverage
report is used.")
+ "pypy" in platform.python_implementation().lower(),
+ "The tests fail in PyPy3 implementation for an unknown reason. "
+ "With PyPy, it causes to hang DStream tests forever when Coverage report
is used.")
class WindowFunctionTests(PySparkStreamingTestCase):
timeout = 15
@@ -474,8 +476,9 @@ class WindowFunctionTests(PySparkStreamingTestCase):
@unittest.skipIf(
- "pypy" in platform.python_implementation().lower() and
"COVERAGE_PROCESS_START" in os.environ,
- "PyPy implementation causes to hang DStream tests forever when Coverage
report is used.")
+ "pypy" in platform.python_implementation().lower(),
+ "The tests fail in PyPy3 implementation for an unknown reason. "
+ "With PyPy, it causes to hang DStream tests forever when Coverage report
is used.")
class CheckpointTests(unittest.TestCase):
setupCalled = False
diff --git a/python/run-tests.py b/python/run-tests.py
index b677a51..a404c53 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -161,7 +161,7 @@ def run_individual_python_test(target_dir, test_name,
pyspark_python):
def get_default_python_executables():
- python_execs = [x for x in ["python3.6", "python2.7", "pypy"] if which(x)]
+ python_execs = [x for x in ["python3.8", "python2.7", "pypy3", "pypy"] if
which(x)]
if "python3.6" not in python_execs:
p = which("python3")
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
index 6391d56..80346b3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
@@ -197,7 +197,7 @@ object IntegratedUDFTestUtils extends SQLHelper {
lazy val pythonExec: String = {
val pythonExec = sys.env.getOrElse(
- "PYSPARK_DRIVER_PYTHON", sys.env.getOrElse("PYSPARK_PYTHON",
"python3.6"))
+ "PYSPARK_DRIVER_PYTHON", sys.env.getOrElse("PYSPARK_PYTHON", "python3"))
if (TestUtils.testCommandAvailable(pythonExec)) {
pythonExec
} else {
diff --git
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
index 15cc310..8b16674 100644
---
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
+++
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.util.fileToString
import org.apache.spark.sql.execution.HiveResult
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
+import org.apache.spark.tags.GitHubActionsUnstableTest
/**
* Re-run all the tests in SQLQueryTestSuite via Thrift Server.
@@ -52,6 +53,7 @@ import org.apache.spark.sql.types._
* 2. Support DESC command.
* 3. Support SHOW command.
*/
+@GitHubActionsUnstableTest
class ThriftServerQueryTestSuite extends SQLQueryTestSuite with
SharedThriftServer {
override protected def testFile(fileName: String): String = {
diff --git
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
index 3e1fce7..73547d7 100644
---
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
+++
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
@@ -17,6 +17,9 @@
package org.apache.spark.sql.hive.thriftserver
+import org.apache.spark.tags.GitHubActionsUnstableTest
+
+@GitHubActionsUnstableTest
class ThriftServerWithSparkContextSuite extends SharedThriftServer {
test("SPARK-29911: Uncache cached tables when session closed") {
diff --git
a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index db1f6fb..4112512 100644
---
a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++
b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -26,10 +26,12 @@ import org.apache.spark.sql.hive.HiveUtils
import org.apache.spark.sql.hive.test.TestHive
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
+import org.apache.spark.tags.SlowHiveTest
/**
* Runs the test cases that are included in the hive distribution.
*/
+@SlowHiveTest
class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
// TODO: bundle in jar files... get from classpath
private lazy val hiveQueryDir = TestHive.getHiveFile(
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index 8be3d26..aa96fa0 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.CatalogTableType
import org.apache.spark.sql.internal.StaticSQLConf.WAREHOUSE_PATH
import org.apache.spark.sql.test.SQLTestUtils
-import org.apache.spark.tags.ExtendedHiveTest
+import org.apache.spark.tags.{ExtendedHiveTest, SlowHiveTest}
import org.apache.spark.util.Utils
/**
@@ -46,6 +46,7 @@ import org.apache.spark.util.Utils
* expected version under this local directory, e.g.
`/tmp/spark-test/spark-2.0.3`, we will skip the
* downloading for this spark version.
*/
+@SlowHiveTest
@ExtendedHiveTest
class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
private val isTestAtLeastJava9 =
SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 8b97489..3a7e92e 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -38,12 +38,13 @@ import org.apache.spark.sql.hive.test.{HiveTestJars,
TestHiveContext}
import org.apache.spark.sql.internal.SQLConf.SHUFFLE_PARTITIONS
import org.apache.spark.sql.internal.StaticSQLConf.WAREHOUSE_PATH
import org.apache.spark.sql.types.{DecimalType, StructType}
-import org.apache.spark.tags.ExtendedHiveTest
+import org.apache.spark.tags.{ExtendedHiveTest, SlowHiveTest}
import org.apache.spark.util.{ResetSystemProperties, Utils}
/**
* This suite tests spark-submit with applications using HiveContext.
*/
+@SlowHiveTest
@ExtendedHiveTest
class HiveSparkSubmitSuite
extends SparkSubmitTestUtils
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index 8642a5f..5ddaaf3 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -41,7 +41,7 @@ import org.apache.spark.sql.hive.{HiveExternalCatalog,
HiveUtils}
import org.apache.spark.sql.hive.test.TestHiveVersion
import org.apache.spark.sql.types.IntegerType
import org.apache.spark.sql.types.StructType
-import org.apache.spark.tags.ExtendedHiveTest
+import org.apache.spark.tags.{ExtendedHiveTest, GitHubActionsUnstableTest}
import org.apache.spark.util.{MutableURLClassLoader, Utils}
/**
@@ -52,6 +52,7 @@ import org.apache.spark.util.{MutableURLClassLoader, Utils}
*/
// TODO: Refactor this to `HiveClientSuite` and make it a subclass of
`HiveVersionSuite`
@ExtendedHiveTest
+@GitHubActionsUnstableTest
class VersionsSuite extends SparkFunSuite with Logging {
override protected val enableAutoThreadAudit = false
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
index fac9812..1dd2ad3 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.sql.types._
+import org.apache.spark.tags.SlowHiveTest
import org.apache.spark.unsafe.UnsafeAlignedOffset
@@ -1054,6 +1055,7 @@ abstract class AggregationQuerySuite extends QueryTest
with SQLTestUtils with Te
class HashAggregationQuerySuite extends AggregationQuerySuite
+@SlowHiveTest
class HashAggregationQueryWithControlledFallbackSuite extends
AggregationQuerySuite {
override protected def checkAnswer(actual: => DataFrame, expectedAnswer:
Seq[Row]): Unit = {
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index e8cf4ad..d0aa618 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -44,9 +44,11 @@ import
org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION
import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.sql.types._
+import org.apache.spark.tags.SlowHiveTest
import org.apache.spark.util.Utils
// TODO(gatorsmile): combine HiveCatalogedDDLSuite and HiveDDLSuite
+@SlowHiveTest
class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with
BeforeAndAfterEach {
override def afterEach(): Unit = {
try {
@@ -404,6 +406,7 @@ class HiveCatalogedDDLSuite extends DDLSuite with
TestHiveSingleton with BeforeA
}
}
+@SlowHiveTest
class HiveDDLSuite
extends QueryTest with SQLTestUtils with TestHiveSingleton with
BeforeAndAfterEach {
import testImplicits._
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index b10a8cb..e798a35 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -39,6 +39,7 @@ import org.apache.spark.sql.hive.test.{HiveTestJars, TestHive}
import org.apache.spark.sql.hive.test.TestHive._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.tags.SlowHiveTest
case class TestData(a: Int, b: String)
@@ -46,6 +47,7 @@ case class TestData(a: Int, b: String)
* A set of test cases expressed in Hive QL that are not covered by the tests
* included in the hive distribution.
*/
+@SlowHiveTest
class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with
BeforeAndAfter {
import org.apache.spark.sql.hive.test.TestHive.implicits._
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
index b20ef03..6f37e39 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
@@ -27,13 +27,14 @@ import org.apache.spark.sql.hive.{HiveExternalCatalog,
HiveUtils}
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.internal.StaticSQLConf._
import org.apache.spark.sql.types._
-import org.apache.spark.tags.ExtendedHiveTest
+import org.apache.spark.tags.{ExtendedHiveTest, SlowHiveTest}
import org.apache.spark.util.Utils
/**
* A separate set of DDL tests that uses Hive 2.1 libraries, which behave a
little differently
* from the built-in ones.
*/
+@SlowHiveTest
@ExtendedHiveTest
class Hive_2_1_DDLSuite extends SparkFunSuite with TestHiveSingleton with
BeforeAndAfterEach
with BeforeAndAfterAll {
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index d12eae0e..a46db32 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -43,6 +43,7 @@ import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.sql.types._
+import org.apache.spark.tags.SlowHiveTest
import org.apache.spark.util.Utils
case class Nested1(f1: Nested2)
@@ -2559,6 +2560,8 @@ abstract class SQLQuerySuiteBase extends QueryTest with
SQLTestUtils with TestHi
}
}
+@SlowHiveTest
class SQLQuerySuite extends SQLQuerySuiteBase with
DisableAdaptiveExecutionSuite
+@SlowHiveTest
class SQLQuerySuiteAE extends SQLQuerySuiteBase with
EnableAdaptiveExecutionSuite
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]