[GitHub] [spark] HyukjinKwon commented on a change in pull request #29465: [SPARK-32249][INFRA][2.4] Run Github Actions builds in branch-2.4
HyukjinKwon commented on a change in pull request #29465: URL: https://github.com/apache/spark/pull/29465#discussion_r472710508 ## File path: .github/workflows/build_and_test.yml ## @@ -0,0 +1,241 @@ +name: Build and test + +on: + push: +branches: +- branch-2.4 + pull_request: +branches: +- branch-2.4 + +jobs: + # Build: build Spark and run the tests for specified modules. + build: +name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }})" +runs-on: ubuntu-latest +strategy: + fail-fast: false + matrix: +java: + - 1.8 +hadoop: + - hadoop2.6 +# TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now. +# Kinesis tests depends on external Amazon kinesis service. +# Note that the modules below are from sparktestsupport/modules.py. +modules: + - >- +core, unsafe, kvstore, avro, +network-common, network-shuffle, repl, launcher, +examples, sketch, graphx + - >- +catalyst, hive-thriftserver + - >- +streaming, sql-kafka-0-10, streaming-kafka-0-10, +mllib-local, mllib, +yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl, +streaming-flume, streaming-flume-sink, streaming-kafka-0-8 + - >- +pyspark-sql, pyspark-mllib + - >- +pyspark-core, pyspark-streaming, pyspark-ml + - >- +sparkr + - >- +sql +# Here, we split Hive and SQL tests into some of slow ones and the rest of them. +included-tags: [""] +excluded-tags: [""] +comment: [""] +include: + # Hive tests + - modules: hive +java: 1.8 +hadoop: hadoop2.6 +included-tags: org.apache.spark.tags.SlowHiveTest +comment: "- slow tests" + - modules: hive +java: 1.8 +hadoop: hadoop2.6 +excluded-tags: org.apache.spark.tags.SlowHiveTest +comment: "- other tests" +env: + MODULES_TO_TEST: ${{ matrix.modules }} + EXCLUDED_TAGS: ${{ matrix.excluded-tags }} + INCLUDED_TAGS: ${{ matrix.included-tags }} + HADOOP_PROFILE: ${{ matrix.hadoop }} + # GitHub Actions' default miniconda to use in pip packaging test. + CONDA_PREFIX: /usr/share/miniconda + GITHUB_PREV_SHA: ${{ github.event.before }} + ARROW_PRE_0_15_IPC_FORMAT: 1 +steps: +- name: Checkout Spark repository + uses: actions/checkout@v2 + # In order to fetch changed files + with: +fetch-depth: 0 +# Cache local repositories. Note that GitHub Actions cache has a 2G limit. +- name: Cache Scala, SBT, Maven and Zinc + uses: actions/cache@v1 + with: +path: build +key: build-${{ hashFiles('**/pom.xml') }} +restore-keys: | + build- +- name: Cache Maven local repository + uses: actions/cache@v2 + with: +path: ~/.m2/repository +key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }} +restore-keys: | + ${{ matrix.java }}-${{ matrix.hadoop }}-maven- +- name: Cache Ivy local repository + uses: actions/cache@v2 + with: +path: ~/.ivy2/cache +key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }} +restore-keys: | + ${{ matrix.java }}-${{ matrix.hadoop }}-ivy- +- name: Install JDK ${{ matrix.java }} + uses: actions/setup-java@v1 + with: +java-version: ${{ matrix.java }} +# PySpark +- name: Install PyPy3 + # Note that order of Python installations here matters because default python is + # overridden. + uses: actions/setup-python@v2 + if: contains(matrix.modules, 'pyspark') + with: +python-version: pypy3 +architecture: x64 +- name: Install Python 3.6 + uses: actions/setup-python@v2 + if: contains(matrix.modules, 'pyspark') + with: +python-version: 3.6 +architecture: x64 +- name: Install Python 2.7 + uses: actions/setup-python@v2 + # Yarn has a Python specific test too, for example, YarnClusterSuite. + if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) + with: +python-version: 2.7 +architecture: x64 +- name: Install Python packages (Python 3.6 and PyPy3) + if: contains(matrix.modules, 'pyspark') + # PyArrow is not supported in PyPy yet, see ARROW-2651. + # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason. + run: | +python3.6 -m pip install numpy pyarrow pandas scipy xmlrunner +
[GitHub] [spark] HyukjinKwon commented on a change in pull request #29465: [SPARK-32249][INFRA][2.4] Run Github Actions builds in branch-2.4
HyukjinKwon commented on a change in pull request #29465: URL: https://github.com/apache/spark/pull/29465#discussion_r472709812 ## File path: .github/workflows/build_and_test.yml ## @@ -0,0 +1,241 @@ +name: Build and test + +on: + push: +branches: +- branch-2.4 + pull_request: +branches: +- branch-2.4 + +jobs: + # Build: build Spark and run the tests for specified modules. + build: +name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }})" +runs-on: ubuntu-latest +strategy: + fail-fast: false + matrix: +java: + - 1.8 +hadoop: + - hadoop2.6 +# TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now. +# Kinesis tests depends on external Amazon kinesis service. +# Note that the modules below are from sparktestsupport/modules.py. +modules: + - >- +core, unsafe, kvstore, avro, +network-common, network-shuffle, repl, launcher, +examples, sketch, graphx + - >- +catalyst, hive-thriftserver + - >- +streaming, sql-kafka-0-10, streaming-kafka-0-10, +mllib-local, mllib, +yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl, +streaming-flume, streaming-flume-sink, streaming-kafka-0-8 + - >- +pyspark-sql, pyspark-mllib + - >- +pyspark-core, pyspark-streaming, pyspark-ml + - >- +sparkr + - >- +sql +# Here, we split Hive and SQL tests into some of slow ones and the rest of them. +included-tags: [""] +excluded-tags: [""] +comment: [""] +include: + # Hive tests + - modules: hive +java: 1.8 +hadoop: hadoop2.6 +included-tags: org.apache.spark.tags.SlowHiveTest +comment: "- slow tests" + - modules: hive +java: 1.8 +hadoop: hadoop2.6 +excluded-tags: org.apache.spark.tags.SlowHiveTest +comment: "- other tests" +env: + MODULES_TO_TEST: ${{ matrix.modules }} + EXCLUDED_TAGS: ${{ matrix.excluded-tags }} + INCLUDED_TAGS: ${{ matrix.included-tags }} + HADOOP_PROFILE: ${{ matrix.hadoop }} + # GitHub Actions' default miniconda to use in pip packaging test. + CONDA_PREFIX: /usr/share/miniconda + GITHUB_PREV_SHA: ${{ github.event.before }} + ARROW_PRE_0_15_IPC_FORMAT: 1 +steps: +- name: Checkout Spark repository + uses: actions/checkout@v2 + # In order to fetch changed files + with: +fetch-depth: 0 +# Cache local repositories. Note that GitHub Actions cache has a 2G limit. +- name: Cache Scala, SBT, Maven and Zinc + uses: actions/cache@v1 + with: +path: build +key: build-${{ hashFiles('**/pom.xml') }} +restore-keys: | + build- +- name: Cache Maven local repository + uses: actions/cache@v2 + with: +path: ~/.m2/repository +key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }} +restore-keys: | + ${{ matrix.java }}-${{ matrix.hadoop }}-maven- +- name: Cache Ivy local repository + uses: actions/cache@v2 + with: +path: ~/.ivy2/cache +key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }} +restore-keys: | + ${{ matrix.java }}-${{ matrix.hadoop }}-ivy- +- name: Install JDK ${{ matrix.java }} + uses: actions/setup-java@v1 + with: +java-version: ${{ matrix.java }} +# PySpark +- name: Install PyPy3 + # Note that order of Python installations here matters because default python is + # overridden. + uses: actions/setup-python@v2 + if: contains(matrix.modules, 'pyspark') + with: +python-version: pypy3 +architecture: x64 +- name: Install Python 3.6 + uses: actions/setup-python@v2 + if: contains(matrix.modules, 'pyspark') + with: +python-version: 3.6 +architecture: x64 +- name: Install Python 2.7 + uses: actions/setup-python@v2 + # Yarn has a Python specific test too, for example, YarnClusterSuite. + if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) + with: +python-version: 2.7 +architecture: x64 +- name: Install Python packages (Python 3.6 and PyPy3) + if: contains(matrix.modules, 'pyspark') + # PyArrow is not supported in PyPy yet, see ARROW-2651. + # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason. + run: | +python3.6 -m pip install numpy pyarrow pandas scipy xmlrunner +