dongjoon-hyun commented on code in PR #41529:
URL: https://github.com/apache/spark/pull/41529#discussion_r1230143556
##########
.github/workflows/build_and_test.yml:
##########
@@ -267,6 +268,165 @@ jobs:
name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{
matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
path: "**/target/unit-tests.log"
+ # Maven Build: build Spark and run the tests for specified modules using
maven.
+ maven-build:
+ name: "Maven build modules: ${{ matrix.modules }} ${{ matrix.comment }}"
+ needs: precondition
+ if: fromJson(needs.precondition.outputs.required).maven-build == 'true'
+ runs-on: ubuntu-22.04
+ strategy:
+ fail-fast: false
+ matrix:
+ java:
+ - ${{ inputs.java }}
+ hadoop:
+ - ${{ inputs.hadoop }}
+ hive:
+ - hive2.3
+ modules:
+ - >-
+
core,repl,launcher,common/unsafe,common/kvstore,common/network-common,common/network-shuffle,common/sketch
+ - >-
+ graphx,streaming,mllib-local,mllib,hadoop-cloud
+ - >-
+ sql/catalyst,sql/hive-thriftserver
+ - >-
+
connector/kafka-0-10,connector/kafka-0-10-sql,connector/kafka-0-10-token-provider,connector/spark-ganglia-lgpl,connector/protobuf,connector/avro
+ - >-
+
resource-managers/yarn,resource-managers/mesos,resource-managers/kubernetes
+ - >-
+ connect
+ # Here, we split Hive and SQL tests into some of slow ones and the
rest of them.
+ included-tags: [ "" ]
+ excluded-tags: [ "" ]
+ comment: [ "" ]
+ include:
+ # Hive tests
+ - modules: sql/hive
+ java: ${{ inputs.java }}
+ hadoop: ${{ inputs.hadoop }}
+ hive: hive2.3
+ included-tags: org.apache.spark.tags.SlowHiveTest
+ comment: "- slow tests"
+ - modules: sql/hive
+ java: ${{ inputs.java }}
+ hadoop: ${{ inputs.hadoop }}
+ hive: hive2.3
+ excluded-tags: org.apache.spark.tags.SlowHiveTest
+ comment: "- other tests"
+ # SQL tests
+ - modules: sql/core
+ java: ${{ inputs.java }}
+ hadoop: ${{ inputs.hadoop }}
+ hive: hive2.3
+ included-tags: org.apache.spark.tags.ExtendedSQLTest
+ comment: "- slow tests"
+ - modules: sql/core
+ java: ${{ inputs.java }}
+ hadoop: ${{ inputs.hadoop }}
+ hive: hive2.3
+ excluded-tags: org.apache.spark.tags.ExtendedSQLTest
+ comment: "- other tests"
+ env:
+ MODULES_TO_TEST: ${{ matrix.modules }}
+ EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
+ INCLUDED_TAGS: ${{ matrix.included-tags }}
+ HADOOP_PROFILE: ${{ matrix.hadoop }}
+ HIVE_PROFILE: ${{ matrix.hive }}
+ GITHUB_PREV_SHA: ${{ github.event.before }}
+ SPARK_LOCAL_IP: localhost
+ steps:
+ - name: Checkout Spark repository
+ uses: actions/checkout@v3
+ # In order to fetch changed files
+ with:
+ fetch-depth: 0
+ repository: apache/spark
+ ref: ${{ inputs.branch }}
+ - name: Sync the current branch with the latest in Apache Spark
+ if: github.repository != 'apache/spark'
+ run: |
+ echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
+ git fetch https://github.com/$GITHUB_REPOSITORY.git
${GITHUB_REF#refs/heads/}
+ git -c user.name='Apache Spark Test Account' -c
user.email='[email protected]' merge --no-commit --progress --squash
FETCH_HEAD
+ git -c user.name='Apache Spark Test Account' -c
user.email='[email protected]' commit -m "Merged commit" --allow-empty
+ # Cache local repositories. Note that GitHub Actions cache has a 2G
limit.
+ - name: Cache Scala, SBT and Maven
+ uses: actions/cache@v3
+ with:
+ path: |
+ build/apache-maven-*
+ build/scala-*
+ build/*.jar
+ ~/.sbt
+ key: build-${{ hashFiles('**/pom.xml', 'project/build.properties',
'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash',
'build/spark-build-info') }}
+ restore-keys: |
+ build-
+ - name: Cache Coursier local repository
+ uses: actions/cache@v3
+ with:
+ path: ~/.cache/coursier
+ key: ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-${{
hashFiles('**/pom.xml', '**/plugins.sbt') }}
+ restore-keys: |
+ ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-
+ - name: Install Java ${{ matrix.java }}
+ uses: actions/setup-java@v3
+ with:
+ distribution: temurin
+ java-version: ${{ matrix.java }}
+ - name: Install Python 3.8
+ uses: actions/setup-python@v4
+ # We should install one Python that is higher than 3+ for SQL and Yarn
because:
+ # - SQL component also has Python related tests, for example,
IntegratedUDFTestUtils.
+ # - Yarn has a Python specific test too, for example, YarnClusterSuite.
+ if: contains(matrix.modules, 'resource-managers/yarn') ||
(contains(matrix.modules, 'sql/core'))
+ with:
+ python-version: 3.8
+ architecture: x64
+ - name: Install Python packages (Python 3.8)
+ if: (contains(matrix.modules, 'sql/core'))
+ run: |
+ python3.8 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy
unittest-xml-reporting 'grpcio==1.48.1' 'protobuf==3.19.5'
+ python3.8 -m pip list
+ # Run the tests.
+ - name: Run tests
+ env: ${{ fromJSON(inputs.envs) }}
+ shell: 'script -q -e -c "bash {0}"'
+ run: |
+ # Fix for TTY related issues when launching the Ammonite REPL in
tests.
+ export TERM=vt100 && script -qfc 'echo exit | amm -s' && rm
typescript
+ # `set -e` to make the exit status as expected due to use `script -q
-e -c` to run the commands
+ set -e
+ export MAVEN_OPTS="-Xss64m -Xmx4g -Xms4g
-XX:ReservedCodeCacheSize=128m -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
+ export MAVEN_CLI_OPTS="--no-transfer-progress"
+ export JAVA_VERSION=${{ matrix.java }}
+ ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes
-Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud
-Djava.version=${JAVA_VERSION/-ea} clean install
+ if [[ "$INCLUDED_TAGS" != "" ]]; then
+ ./build/mvn $MAVEN_CLI_OPTS -pl "$MODULES_TO_TEST" -Pyarn -Pmesos
-Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud
-Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test
Review Comment:
`resource-managers/yarn,resource-managers/mesos,resource-managers/kubernetes`
is irrelevent to this, isn't it?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]