[GitHub] [spark] HyukjinKwon commented on a change in pull request #29465: [SPARK-32249][INFRA][2.4] Run Github Actions builds in branch-2.4

2020-08-18 Thread GitBox


HyukjinKwon commented on a change in pull request #29465:
URL: https://github.com/apache/spark/pull/29465#discussion_r472710508



##
File path: .github/workflows/build_and_test.yml
##
@@ -0,0 +1,241 @@
+name: Build and test
+
+on:
+  push:
+branches:
+- branch-2.4
+  pull_request:
+branches:
+- branch-2.4
+
+jobs:
+  # Build: build Spark and run the tests for specified modules.
+  build:
+name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ 
matrix.java }}, ${{ matrix.hadoop }})"
+runs-on: ubuntu-latest
+strategy:
+  fail-fast: false
+  matrix:
+java:
+  - 1.8
+hadoop:
+  - hadoop2.6
+# TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
+# Kinesis tests depends on external Amazon kinesis service.
+# Note that the modules below are from sparktestsupport/modules.py.
+modules:
+  - >-
+core, unsafe, kvstore, avro,
+network-common, network-shuffle, repl, launcher,
+examples, sketch, graphx
+  - >-
+catalyst, hive-thriftserver
+  - >-
+streaming, sql-kafka-0-10, streaming-kafka-0-10,
+mllib-local, mllib,
+yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl,
+streaming-flume, streaming-flume-sink, streaming-kafka-0-8
+  - >-
+pyspark-sql, pyspark-mllib
+  - >-
+pyspark-core, pyspark-streaming, pyspark-ml
+  - >-
+sparkr
+  - >-
+sql
+# Here, we split Hive and SQL tests into some of slow ones and the 
rest of them.
+included-tags: [""]
+excluded-tags: [""]
+comment: [""]
+include:
+  # Hive tests
+  - modules: hive
+java: 1.8
+hadoop: hadoop2.6
+included-tags: org.apache.spark.tags.SlowHiveTest
+comment: "- slow tests"
+  - modules: hive
+java: 1.8
+hadoop: hadoop2.6
+excluded-tags: org.apache.spark.tags.SlowHiveTest
+comment: "- other tests"
+env:
+  MODULES_TO_TEST: ${{ matrix.modules }}
+  EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
+  INCLUDED_TAGS: ${{ matrix.included-tags }}
+  HADOOP_PROFILE: ${{ matrix.hadoop }}
+  # GitHub Actions' default miniconda to use in pip packaging test.
+  CONDA_PREFIX: /usr/share/miniconda
+  GITHUB_PREV_SHA: ${{ github.event.before }}
+  ARROW_PRE_0_15_IPC_FORMAT: 1
+steps:
+- name: Checkout Spark repository
+  uses: actions/checkout@v2
+  # In order to fetch changed files
+  with:
+fetch-depth: 0
+# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
+- name: Cache Scala, SBT, Maven and Zinc
+  uses: actions/cache@v1
+  with:
+path: build
+key: build-${{ hashFiles('**/pom.xml') }}
+restore-keys: |
+  build-
+- name: Cache Maven local repository
+  uses: actions/cache@v2
+  with:
+path: ~/.m2/repository
+key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ 
hashFiles('**/pom.xml') }}
+restore-keys: |
+  ${{ matrix.java }}-${{ matrix.hadoop }}-maven-
+- name: Cache Ivy local repository
+  uses: actions/cache@v2
+  with:
+path: ~/.ivy2/cache
+key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ 
hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }}
+restore-keys: |
+  ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-
+- name: Install JDK ${{ matrix.java }}
+  uses: actions/setup-java@v1
+  with:
+java-version: ${{ matrix.java }}
+# PySpark
+- name: Install PyPy3
+  # Note that order of Python installations here matters because default 
python is
+  # overridden.
+  uses: actions/setup-python@v2
+  if: contains(matrix.modules, 'pyspark')
+  with:
+python-version: pypy3
+architecture: x64
+- name: Install Python 3.6
+  uses: actions/setup-python@v2
+  if: contains(matrix.modules, 'pyspark')
+  with:
+python-version: 3.6
+architecture: x64
+- name: Install Python 2.7
+  uses: actions/setup-python@v2
+  # Yarn has a Python specific test too, for example, YarnClusterSuite.
+  if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 
'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 
'sql-'))
+  with:
+python-version: 2.7
+architecture: x64
+- name: Install Python packages (Python 3.6 and PyPy3)
+  if: contains(matrix.modules, 'pyspark')
+  # PyArrow is not supported in PyPy yet, see ARROW-2651.
+  # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown 
reason.
+  run: |
+python3.6 -m pip install numpy pyarrow pandas scipy xmlrunner
+

[GitHub] [spark] HyukjinKwon commented on a change in pull request #29465: [SPARK-32249][INFRA][2.4] Run Github Actions builds in branch-2.4

2020-08-18 Thread GitBox


HyukjinKwon commented on a change in pull request #29465:
URL: https://github.com/apache/spark/pull/29465#discussion_r472709812



##
File path: .github/workflows/build_and_test.yml
##
@@ -0,0 +1,241 @@
+name: Build and test
+
+on:
+  push:
+branches:
+- branch-2.4
+  pull_request:
+branches:
+- branch-2.4
+
+jobs:
+  # Build: build Spark and run the tests for specified modules.
+  build:
+name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ 
matrix.java }}, ${{ matrix.hadoop }})"
+runs-on: ubuntu-latest
+strategy:
+  fail-fast: false
+  matrix:
+java:
+  - 1.8
+hadoop:
+  - hadoop2.6
+# TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
+# Kinesis tests depends on external Amazon kinesis service.
+# Note that the modules below are from sparktestsupport/modules.py.
+modules:
+  - >-
+core, unsafe, kvstore, avro,
+network-common, network-shuffle, repl, launcher,
+examples, sketch, graphx
+  - >-
+catalyst, hive-thriftserver
+  - >-
+streaming, sql-kafka-0-10, streaming-kafka-0-10,
+mllib-local, mllib,
+yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl,
+streaming-flume, streaming-flume-sink, streaming-kafka-0-8
+  - >-
+pyspark-sql, pyspark-mllib
+  - >-
+pyspark-core, pyspark-streaming, pyspark-ml
+  - >-
+sparkr
+  - >-
+sql
+# Here, we split Hive and SQL tests into some of slow ones and the 
rest of them.
+included-tags: [""]
+excluded-tags: [""]
+comment: [""]
+include:
+  # Hive tests
+  - modules: hive
+java: 1.8
+hadoop: hadoop2.6
+included-tags: org.apache.spark.tags.SlowHiveTest
+comment: "- slow tests"
+  - modules: hive
+java: 1.8
+hadoop: hadoop2.6
+excluded-tags: org.apache.spark.tags.SlowHiveTest
+comment: "- other tests"
+env:
+  MODULES_TO_TEST: ${{ matrix.modules }}
+  EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
+  INCLUDED_TAGS: ${{ matrix.included-tags }}
+  HADOOP_PROFILE: ${{ matrix.hadoop }}
+  # GitHub Actions' default miniconda to use in pip packaging test.
+  CONDA_PREFIX: /usr/share/miniconda
+  GITHUB_PREV_SHA: ${{ github.event.before }}
+  ARROW_PRE_0_15_IPC_FORMAT: 1
+steps:
+- name: Checkout Spark repository
+  uses: actions/checkout@v2
+  # In order to fetch changed files
+  with:
+fetch-depth: 0
+# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
+- name: Cache Scala, SBT, Maven and Zinc
+  uses: actions/cache@v1
+  with:
+path: build
+key: build-${{ hashFiles('**/pom.xml') }}
+restore-keys: |
+  build-
+- name: Cache Maven local repository
+  uses: actions/cache@v2
+  with:
+path: ~/.m2/repository
+key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ 
hashFiles('**/pom.xml') }}
+restore-keys: |
+  ${{ matrix.java }}-${{ matrix.hadoop }}-maven-
+- name: Cache Ivy local repository
+  uses: actions/cache@v2
+  with:
+path: ~/.ivy2/cache
+key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ 
hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }}
+restore-keys: |
+  ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-
+- name: Install JDK ${{ matrix.java }}
+  uses: actions/setup-java@v1
+  with:
+java-version: ${{ matrix.java }}
+# PySpark
+- name: Install PyPy3
+  # Note that order of Python installations here matters because default 
python is
+  # overridden.
+  uses: actions/setup-python@v2
+  if: contains(matrix.modules, 'pyspark')
+  with:
+python-version: pypy3
+architecture: x64
+- name: Install Python 3.6
+  uses: actions/setup-python@v2
+  if: contains(matrix.modules, 'pyspark')
+  with:
+python-version: 3.6
+architecture: x64
+- name: Install Python 2.7
+  uses: actions/setup-python@v2
+  # Yarn has a Python specific test too, for example, YarnClusterSuite.
+  if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 
'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 
'sql-'))
+  with:
+python-version: 2.7
+architecture: x64
+- name: Install Python packages (Python 3.6 and PyPy3)
+  if: contains(matrix.modules, 'pyspark')
+  # PyArrow is not supported in PyPy yet, see ARROW-2651.
+  # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown 
reason.
+  run: |
+python3.6 -m pip install numpy pyarrow pandas scipy xmlrunner
+