This is an automated email from the ASF dual-hosted git repository.
mbutrovich pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 345e23f7d build: optimize CI cache usage and add fast lint gate (#3251)
345e23f7d is described below
commit 345e23f7d799dd1de2989bcfff63ba05ff13d51c
Author: Andy Grove <[email protected]>
AuthorDate: Fri Jan 23 12:12:31 2026 -0700
build: optimize CI cache usage and add fast lint gate (#3251)
* build: optimize CI cache usage and add fast lint gate
This PR addresses cache storage approaching its 10GB limit by:
1. Cache optimization (saves ~2+ GB):
- Remove Java version from cargo cache key (Rust target is
JDK-independent)
- Use actions/cache/restore + actions/cache/save pattern
- Only save cache on main branch, not on PRs
2. Reduce Rust test matrix:
- Consolidate from 2 jobs (Java 11 + Java 17) to 1 job (Java 17)
- Rust code is JDK-independent, so no coverage lost
3. Add fast lint gate (~30 seconds):
- New lint job runs cargo fmt --check before expensive builds
- build-native and linux-test-rust depend on lint passing
- Fail fast on formatting errors instead of waiting 5-10 minutes
- macOS lint runs on ubuntu-latest for cost efficiency
Co-Authored-By: Claude Opus 4.5 <[email protected]>
* fix: add missing datafusion-datasource dependency
The csv_scan.rs file added in #3044 uses datafusion_datasource but
the dependency was not added to core/Cargo.toml.
Co-Authored-By: Claude Opus 4.5 <[email protected]>
* build: merge TPC-DS/TPC-H correctness tests into pr_build_linux
These workflows verify that benchmark queries produce correct results
(not actual performance benchmarks), so they can use the CI build profile
and share the native library artifact from build-native.
Changes:
- Add verify-benchmark-results-tpch job to pr_build_linux
- Add verify-benchmark-results-tpcds job to pr_build_linux (3 join
strategies)
- Delete standalone benchmark-tpcds.yml and benchmark-tpch.yml workflows
- Jobs reuse native library artifact instead of rebuilding
This eliminates 4+ redundant native builds per PR.
Co-Authored-By: Claude Opus 4.5 <[email protected]>
* fix: compile test classes before generating TPC data
The GenTPCHData and GenTPCDSData classes are test classes that need
to be compiled before running exec:java. Added a build step to compile
the project (including test classes) before data generation.
Co-Authored-By: Claude Opus 4.5 <[email protected]>
---------
Co-authored-by: Claude Opus 4.5 <[email protected]>
---
.github/actions/rust-test/action.yaml | 6 +-
.github/workflows/benchmark-tpcds.yml | 155 --------------------------
.github/workflows/benchmark-tpch.yml | 119 --------------------
.github/workflows/pr_build_linux.yml | 204 +++++++++++++++++++++++++++++++---
.github/workflows/pr_build_macos.yml | 29 ++++-
.github/workflows/spark_sql_test.yml | 14 ++-
6 files changed, 230 insertions(+), 297 deletions(-)
diff --git a/.github/actions/rust-test/action.yaml
b/.github/actions/rust-test/action.yaml
index 10fc1375f..c39c2dcd4 100644
--- a/.github/actions/rust-test/action.yaml
+++ b/.github/actions/rust-test/action.yaml
@@ -21,11 +21,7 @@ description: "Run Rust tests"
runs:
using: "composite"
steps:
- - name: Check Cargo fmt
- shell: bash
- run: |
- cd native
- cargo fmt --all -- --check --color=never
+ # Note: cargo fmt check is now handled by the lint job that gates this
workflow
- name: Check Cargo clippy
shell: bash
diff --git a/.github/workflows/benchmark-tpcds.yml
b/.github/workflows/benchmark-tpcds.yml
deleted file mode 100644
index db1fce019..000000000
--- a/.github/workflows/benchmark-tpcds.yml
+++ /dev/null
@@ -1,155 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-name: TPC-DS Correctness
-
-concurrency:
- group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{
github.workflow }}
- cancel-in-progress: true
-
-on:
- push:
- paths-ignore:
- - "doc/**"
- - "docs/**"
- - "**.md"
- - "native/core/benches/**"
- - "native/spark-expr/benches/**"
- - "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
- pull_request:
- paths-ignore:
- - "doc/**"
- - "docs/**"
- - "**.md"
- - "native/core/benches/**"
- - "native/spark-expr/benches/**"
- - "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
- # manual trigger
- #
https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
- workflow_dispatch:
-
-env:
- RUST_VERSION: stable
-
-jobs:
- prepare:
- name: Build native and prepare data
- runs-on: ubuntu-latest
- container:
- image: amd64/rust
- env:
- JAVA_VERSION: 11
- steps:
- - uses: actions/checkout@v6
- - name: Setup Rust & Java toolchain
- uses: ./.github/actions/setup-builder
- with:
- rust-version: ${{env.RUST_VERSION}}
- jdk-version: 11
- - name: Cache Maven dependencies
- uses: actions/cache@v5
- with:
- path: |
- ~/.m2/repository
- /root/.m2/repository
- key: ${{ runner.os }}-java-maven-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- ${{ runner.os }}-java-maven-
- - name: Build Comet
- run: make release
- - name: Cache TPC-DS generated data
- id: cache-tpcds-sf-1
- uses: actions/cache@v5
- with:
- path: ./tpcds-sf-1
- key: tpcds-${{ hashFiles('.github/workflows/benchmark.yml') }}
- - name: Checkout tpcds-kit repository
- if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
- uses: actions/checkout@v6
- with:
- repository: databricks/tpcds-kit
- path: ./tpcds-kit
- - name: Build tpcds-kit
- if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
- run: |
- apt-get install -y yacc bison flex gcc-12 g++-12
- update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 120
--slave /usr/bin/g++ g++ /usr/bin/g++-12
- gcc --version
- cd tpcds-kit/tools && make OS=LINUX
- - name: Generate TPC-DS (SF=1) table data
- if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
- run: |
- cd spark && MAVEN_OPTS='-Xmx20g' ../mvnw exec:java
-Dexec.mainClass="org.apache.spark.sql.GenTPCDSData"
-Dexec.classpathScope="test" -Dexec.cleanupDaemonThreads="false"
-Dexec.args="--dsdgenDir `pwd`/../tpcds-kit/tools --location
`pwd`/../tpcds-sf-1 --scaleFactor 1 --numPartitions 1"
- cd ..
-
- benchmark:
- name: Run TPCDSQuerySuite
- runs-on: ubuntu-latest
- needs: [prepare]
- container:
- image: amd64/rust
- strategy:
- matrix:
- join: [sort_merge, broadcast, hash]
- steps:
- - uses: actions/checkout@v6
- - name: Setup Rust & Java toolchain
- uses: ./.github/actions/setup-builder
- with:
- rust-version: ${{env.RUST_VERSION}}
- jdk-version: 11
- - name: Cache Maven dependencies
- uses: actions/cache@v5
- with:
- path: |
- ~/.m2/repository
- /root/.m2/repository
- key: ${{ runner.os }}-java-maven-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- ${{ runner.os }}-java-maven-
- - name: Restore TPC-DS generated data
- id: cache-tpcds-sf-1
- uses: actions/cache/restore@v5
- with:
- path: ./tpcds-sf-1
- key: tpcds-${{ hashFiles('.github/workflows/benchmark.yml') }}
- fail-on-cache-miss: true # it's always be cached as it should be
generated by pre-step if not existed
- - name: Build Comet
- run: make release
- - name: Run TPC-DS queries (Sort merge join)
- if: matrix.join == 'sort_merge'
- run: |
- SPARK_HOME=`pwd` SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 ./mvnw -B
-Prelease -Dsuites=org.apache.spark.sql.CometTPCDSQuerySuite test
- env:
- SPARK_TPCDS_JOIN_CONF: |
- spark.sql.autoBroadcastJoinThreshold=-1
- spark.sql.join.preferSortMergeJoin=true
- - name: Run TPC-DS queries (Broadcast hash join)
- if: matrix.join == 'broadcast'
- run: |
- SPARK_HOME=`pwd` SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 ./mvnw -B
-Prelease -Dsuites=org.apache.spark.sql.CometTPCDSQuerySuite test
- env:
- SPARK_TPCDS_JOIN_CONF: |
- spark.sql.autoBroadcastJoinThreshold=10485760
- - name: Run TPC-DS queries (Shuffled hash join)
- if: matrix.join == 'hash'
- run: |
- SPARK_HOME=`pwd` SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 ./mvnw -B
-Prelease -Dsuites=org.apache.spark.sql.CometTPCDSQuerySuite test
- env:
- SPARK_TPCDS_JOIN_CONF: |
- spark.sql.autoBroadcastJoinThreshold=-1
- spark.sql.join.forceApplyShuffledHashJoin=true
diff --git a/.github/workflows/benchmark-tpch.yml
b/.github/workflows/benchmark-tpch.yml
deleted file mode 100644
index 124b0d0c7..000000000
--- a/.github/workflows/benchmark-tpch.yml
+++ /dev/null
@@ -1,119 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-name: TPC-H Correctness
-
-concurrency:
- group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{
github.workflow }}
- cancel-in-progress: true
-
-on:
- push:
- paths-ignore:
- - "doc/**"
- - "docs/**"
- - "**.md"
- - "native/core/benches/**"
- - "native/spark-expr/benches/**"
- - "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
- pull_request:
- paths-ignore:
- - "doc/**"
- - "docs/**"
- - "**.md"
- - "native/core/benches/**"
- - "native/spark-expr/benches/**"
- - "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
- # manual trigger
- #
https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
- workflow_dispatch:
-
-env:
- RUST_VERSION: stable
-
-jobs:
- prepare:
- name: Build native and prepare data
- runs-on: ubuntu-latest
- container:
- image: amd64/rust
- env:
- JAVA_VERSION: 11
- steps:
- - uses: actions/checkout@v6
- - name: Setup Rust & Java toolchain
- uses: ./.github/actions/setup-builder
- with:
- rust-version: ${{env.RUST_VERSION}}
- jdk-version: 11
- - name: Cache Maven dependencies
- uses: actions/cache@v5
- with:
- path: |
- ~/.m2/repository
- /root/.m2/repository
- key: ${{ runner.os }}-java-maven-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- ${{ runner.os }}-java-maven-
- - name: Cache TPC-H generated data
- id: cache-tpch-sf-1
- uses: actions/cache@v5
- with:
- path: ./tpch
- key: tpch-${{ hashFiles('.github/workflows/benchmark-tpch.yml') }}
- - name: Build Comet
- run: make release
- - name: Generate TPC-H (SF=1) table data
- if: steps.cache-tpch-sf-1.outputs.cache-hit != 'true'
- run: |
- cd spark && MAVEN_OPTS='-Xmx20g' ../mvnw exec:java
-Dexec.mainClass="org.apache.spark.sql.GenTPCHData"
-Dexec.classpathScope="test" -Dexec.cleanupDaemonThreads="false"
-Dexec.args="--location `pwd`/.. --scaleFactor 1 --numPartitions 1 --overwrite"
- cd ..
-
- benchmark:
- name: Run TPCHQuerySuite
- runs-on: ubuntu-latest
- needs: [prepare]
- container:
- image: amd64/rust
- steps:
- - uses: actions/checkout@v6
- - name: Setup Rust & Java toolchain
- uses: ./.github/actions/setup-builder
- with:
- rust-version: ${{env.RUST_VERSION}}
- jdk-version: 11
- - name: Cache Maven dependencies
- uses: actions/cache@v5
- with:
- path: |
- ~/.m2/repository
- /root/.m2/repository
- key: ${{ runner.os }}-java-maven-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- ${{ runner.os }}-java-maven-
- - name: Restore TPC-H generated data
- id: cache-tpch-sf-1
- uses: actions/cache/restore@v5
- with:
- path: ./tpch
- key: tpch-${{ hashFiles('.github/workflows/benchmark-tpch.yml') }}
- fail-on-cache-miss: true # it's always be cached as it should be
generated by pre-step if not existed
- - name: Build Comet
- run: make release
- - name: Run TPC-H queries
- run: |
- SPARK_HOME=`pwd` SPARK_TPCH_DATA=`pwd`/tpch/sf1_parquet ./mvnw -B
-Prelease -Dsuites=org.apache.spark.sql.CometTPCHQuerySuite test
diff --git a/.github/workflows/pr_build_linux.yml
b/.github/workflows/pr_build_linux.yml
index 4a0b27761..bb7e917af 100644
--- a/.github/workflows/pr_build_linux.yml
+++ b/.github/workflows/pr_build_linux.yml
@@ -47,8 +47,23 @@ env:
jobs:
+ # Fast lint check - gates all other jobs
+ lint:
+ name: Lint
+ runs-on: ubuntu-latest
+ container:
+ image: amd64/rust
+ steps:
+ - uses: actions/checkout@v6
+
+ - name: Check Rust formatting
+ run: |
+ rustup component add rustfmt
+ cd native && cargo fmt --all -- --check
+
# Build native library once and share with all test jobs
build-native:
+ needs: lint
name: Build Native Library
runs-on: ubuntu-latest
container:
@@ -62,8 +77,8 @@ jobs:
rust-version: ${{ env.RUST_VERSION }}
jdk-version: 17 # JDK only needed for common module proto generation
- - name: Cache Cargo
- uses: actions/cache@v4
+ - name: Restore Cargo cache
+ uses: actions/cache/restore@v4
with:
path: |
~/.cargo/registry
@@ -87,15 +102,21 @@ jobs:
path: native/target/ci/libcomet.so
retention-days: 1
+ - name: Save Cargo cache
+ uses: actions/cache/save@v4
+ if: github.ref == 'refs/heads/main'
+ with:
+ path: |
+ ~/.cargo/registry
+ ~/.cargo/git
+ native/target
+ key: ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock',
'native/**/Cargo.toml') }}
+
# Run Rust tests (runs in parallel with build-native, uses debug builds)
linux-test-rust:
- strategy:
- matrix:
- os: [ubuntu-latest]
- java_version: [11, 17]
- fail-fast: false
- name: ${{ matrix.os }}/java ${{ matrix.java_version }}-rust
- runs-on: ${{ matrix.os }}
+ needs: lint
+ name: ubuntu-latest/rust-test
+ runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
@@ -105,22 +126,33 @@ jobs:
uses: ./.github/actions/setup-builder
with:
rust-version: ${{ env.RUST_VERSION }}
- jdk-version: ${{ matrix.java_version }}
+ jdk-version: 17
- - name: Cache Cargo
- uses: actions/cache@v4
+ - name: Restore Cargo cache
+ uses: actions/cache/restore@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
native/target
- key: ${{ runner.os }}-cargo-debug-java${{ matrix.java_version }}-${{
hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}
+ # Note: Java version intentionally excluded - Rust target is
JDK-independent
+ key: ${{ runner.os }}-cargo-debug-${{
hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}
restore-keys: |
- ${{ runner.os }}-cargo-debug-java${{ matrix.java_version }}-
+ ${{ runner.os }}-cargo-debug-
- name: Rust test steps
uses: ./.github/actions/rust-test
+ - name: Save Cargo cache
+ uses: actions/cache/save@v4
+ if: github.ref == 'refs/heads/main'
+ with:
+ path: |
+ ~/.cargo/registry
+ ~/.cargo/git
+ native/target
+ key: ${{ runner.os }}-cargo-debug-${{
hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}
+
linux-test:
needs: build-native
strategy:
@@ -277,3 +309,147 @@ jobs:
scan_impl: ${{ matrix.profile.scan_impl }}
upload-test-reports: true
skip-native-build: true
+
+ # TPC-H correctness test - verifies benchmark queries produce correct results
+ verify-benchmark-results-tpch:
+ needs: build-native
+ name: Verify TPC-H Results
+ runs-on: ubuntu-latest
+ container:
+ image: amd64/rust
+ steps:
+ - uses: actions/checkout@v6
+
+ - name: Setup Rust & Java toolchain
+ uses: ./.github/actions/setup-builder
+ with:
+ rust-version: ${{ env.RUST_VERSION }}
+ jdk-version: 11
+
+ - name: Download native library
+ uses: actions/download-artifact@v4
+ with:
+ name: native-lib-linux
+ path: native/target/release/
+
+ - name: Cache Maven dependencies
+ uses: actions/cache@v4
+ with:
+ path: |
+ ~/.m2/repository
+ /root/.m2/repository
+ key: ${{ runner.os }}-java-maven-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ ${{ runner.os }}-java-maven-
+
+ - name: Cache TPC-H data
+ id: cache-tpch
+ uses: actions/cache@v4
+ with:
+ path: ./tpch
+ key: tpch-${{ hashFiles('.github/workflows/pr_build_linux.yml') }}
+
+ - name: Build project
+ run: |
+ ./mvnw -B -Prelease compile test-compile -DskipTests
+
+ - name: Generate TPC-H data (SF=1)
+ if: steps.cache-tpch.outputs.cache-hit != 'true'
+ run: |
+ cd spark && MAVEN_OPTS='-Xmx20g' ../mvnw -B -Prelease exec:java
-Dexec.mainClass="org.apache.spark.sql.GenTPCHData"
-Dexec.classpathScope="test" -Dexec.cleanupDaemonThreads="false"
-Dexec.args="--location `pwd`/.. --scaleFactor 1 --numPartitions 1 --overwrite"
+
+ - name: Run TPC-H queries
+ run: |
+ SPARK_HOME=`pwd` SPARK_TPCH_DATA=`pwd`/tpch/sf1_parquet ./mvnw -B
-Prelease -Dsuites=org.apache.spark.sql.CometTPCHQuerySuite test
+
+ # TPC-DS correctness tests - verifies benchmark queries produce correct
results
+ verify-benchmark-results-tpcds:
+ needs: build-native
+ name: Verify TPC-DS Results (${{ matrix.join }})
+ runs-on: ubuntu-latest
+ container:
+ image: amd64/rust
+ strategy:
+ matrix:
+ join: [sort_merge, broadcast, hash]
+ fail-fast: false
+ steps:
+ - uses: actions/checkout@v6
+
+ - name: Setup Rust & Java toolchain
+ uses: ./.github/actions/setup-builder
+ with:
+ rust-version: ${{ env.RUST_VERSION }}
+ jdk-version: 11
+
+ - name: Download native library
+ uses: actions/download-artifact@v4
+ with:
+ name: native-lib-linux
+ path: native/target/release/
+
+ - name: Cache Maven dependencies
+ uses: actions/cache@v4
+ with:
+ path: |
+ ~/.m2/repository
+ /root/.m2/repository
+ key: ${{ runner.os }}-java-maven-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ ${{ runner.os }}-java-maven-
+
+ - name: Cache TPC-DS data
+ id: cache-tpcds
+ uses: actions/cache@v4
+ with:
+ path: ./tpcds-sf-1
+ key: tpcds-${{ hashFiles('.github/workflows/pr_build_linux.yml') }}
+
+ - name: Build project
+ run: |
+ ./mvnw -B -Prelease compile test-compile -DskipTests
+
+ - name: Checkout tpcds-kit
+ if: steps.cache-tpcds.outputs.cache-hit != 'true'
+ uses: actions/checkout@v6
+ with:
+ repository: databricks/tpcds-kit
+ path: ./tpcds-kit
+
+ - name: Build tpcds-kit
+ if: steps.cache-tpcds.outputs.cache-hit != 'true'
+ run: |
+ apt-get update && apt-get install -y yacc bison flex gcc-12 g++-12
+ update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 120
--slave /usr/bin/g++ g++ /usr/bin/g++-12
+ cd tpcds-kit/tools && make OS=LINUX
+
+ - name: Generate TPC-DS data (SF=1)
+ if: steps.cache-tpcds.outputs.cache-hit != 'true'
+ run: |
+ cd spark && MAVEN_OPTS='-Xmx20g' ../mvnw -B -Prelease exec:java
-Dexec.mainClass="org.apache.spark.sql.GenTPCDSData"
-Dexec.classpathScope="test" -Dexec.cleanupDaemonThreads="false"
-Dexec.args="--dsdgenDir `pwd`/../tpcds-kit/tools --location
`pwd`/../tpcds-sf-1 --scaleFactor 1 --numPartitions 1"
+
+ - name: Run TPC-DS queries (Sort merge join)
+ if: matrix.join == 'sort_merge'
+ run: |
+ SPARK_HOME=`pwd` SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 ./mvnw -B
-Prelease -Dsuites=org.apache.spark.sql.CometTPCDSQuerySuite test
+ env:
+ SPARK_TPCDS_JOIN_CONF: |
+ spark.sql.autoBroadcastJoinThreshold=-1
+ spark.sql.join.preferSortMergeJoin=true
+
+ - name: Run TPC-DS queries (Broadcast hash join)
+ if: matrix.join == 'broadcast'
+ run: |
+ SPARK_HOME=`pwd` SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 ./mvnw -B
-Prelease -Dsuites=org.apache.spark.sql.CometTPCDSQuerySuite test
+ env:
+ SPARK_TPCDS_JOIN_CONF: |
+ spark.sql.autoBroadcastJoinThreshold=10485760
+
+ - name: Run TPC-DS queries (Shuffled hash join)
+ if: matrix.join == 'hash'
+ run: |
+ SPARK_HOME=`pwd` SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 ./mvnw -B
-Prelease -Dsuites=org.apache.spark.sql.CometTPCDSQuerySuite test
+ env:
+ SPARK_TPCDS_JOIN_CONF: |
+ spark.sql.autoBroadcastJoinThreshold=-1
+ spark.sql.join.forceApplyShuffledHashJoin=true
diff --git a/.github/workflows/pr_build_macos.yml
b/.github/workflows/pr_build_macos.yml
index 37e623456..d76edc008 100644
--- a/.github/workflows/pr_build_macos.yml
+++ b/.github/workflows/pr_build_macos.yml
@@ -47,8 +47,23 @@ env:
jobs:
+ # Fast lint check - gates all other jobs (runs on Linux for cost efficiency)
+ lint:
+ name: Lint
+ runs-on: ubuntu-latest
+ container:
+ image: amd64/rust
+ steps:
+ - uses: actions/checkout@v6
+
+ - name: Check Rust formatting
+ run: |
+ rustup component add rustfmt
+ cd native && cargo fmt --all -- --check
+
# Build native library once and share with all test jobs
build-native:
+ needs: lint
name: Build Native Library (macOS)
runs-on: macos-14
steps:
@@ -62,8 +77,8 @@ jobs:
jdk-architecture: aarch64
protoc-architecture: aarch_64
- - name: Cache Cargo
- uses: actions/cache@v4
+ - name: Restore Cargo cache
+ uses: actions/cache/restore@v4
with:
path: |
~/.cargo/registry
@@ -87,6 +102,16 @@ jobs:
path: native/target/ci/libcomet.dylib
retention-days: 1
+ - name: Save Cargo cache
+ uses: actions/cache/save@v4
+ if: github.ref == 'refs/heads/main'
+ with:
+ path: |
+ ~/.cargo/registry
+ ~/.cargo/git
+ native/target
+ key: ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock',
'native/**/Cargo.toml') }}
+
macos-aarch64-test:
needs: build-native
strategy:
diff --git a/.github/workflows/spark_sql_test.yml
b/.github/workflows/spark_sql_test.yml
index 955fc6927..fd5429383 100644
--- a/.github/workflows/spark_sql_test.yml
+++ b/.github/workflows/spark_sql_test.yml
@@ -68,8 +68,8 @@ jobs:
rust-version: ${{ env.RUST_VERSION }}
jdk-version: 17
- - name: Cache Cargo
- uses: actions/cache@v4
+ - name: Restore Cargo cache
+ uses: actions/cache/restore@v4
with:
path: |
~/.cargo/registry
@@ -91,6 +91,16 @@ jobs:
path: native/target/ci/libcomet.so
retention-days: 1
+ - name: Save Cargo cache
+ uses: actions/cache/save@v4
+ if: github.ref == 'refs/heads/main'
+ with:
+ path: |
+ ~/.cargo/registry
+ ~/.cargo/git
+ native/target
+ key: ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock',
'native/**/Cargo.toml') }}
+
spark-sql-auto-scan:
needs: build-native
strategy:
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]