This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new ce5de44c7 chore: Enable partial CI checks for `native_datafusion` scan
(#1479)
ce5de44c7 is described below
commit ce5de44c705bde9438538ce4a57729d175914dd4
Author: Andy Grove <[email protected]>
AuthorDate: Fri Mar 7 16:41:19 2025 -0700
chore: Enable partial CI checks for `native_datafusion` scan (#1479)
---
.github/workflows/pr_build.yml | 35 ++++++++++++++++++++++
.../main/scala/org/apache/comet/CometConf.scala | 5 ++++
.../apache/comet/CometArrayExpressionSuite.scala | 3 ++
.../scala/org/apache/comet/CometCastSuite.scala | 8 +++++
.../org/apache/comet/CometExpressionSuite.scala | 6 ++++
.../org/apache/comet/exec/CometExecSuite.scala | 2 ++
.../org/apache/comet/exec/CometJoinSuite.scala | 2 ++
.../apache/comet/parquet/ParquetReadSuite.scala | 2 ++
8 files changed, 63 insertions(+)
diff --git a/.github/workflows/pr_build.yml b/.github/workflows/pr_build.yml
index ce81ea2b2..23b684267 100644
--- a/.github/workflows/pr_build.yml
+++ b/.github/workflows/pr_build.yml
@@ -76,6 +76,41 @@ jobs:
# upload test reports only for java 17
upload-test-reports: ${{ matrix.java_version == '17' }}
+ linux-test-native-datafusion-scan:
+ env:
+ COMET_PARQUET_SCAN_IMPL: "native_datafusion"
+ strategy:
+ matrix:
+ os: [ubuntu-latest]
+ java_version: [17]
+ test-target: [rust, java]
+ spark-version: ['3.5']
+ scala-version: ['2.12']
+ is_push_event:
+ - ${{ github.event_name == 'push' }}
+ fail-fast: false
+ name: ${{ matrix.os }}/java ${{ matrix.java_version
}}-spark-${{matrix.spark-version}}-scala-${{matrix.scala-version}}/${{
matrix.test-target }}-native-datafusion
+ runs-on: ${{ matrix.os }}
+ container:
+ image: amd64/rust
+ steps:
+ - uses: actions/checkout@v4
+ - name: Setup Rust & Java toolchain
+ uses: ./.github/actions/setup-builder
+ with:
+ rust-version: ${{env.RUST_VERSION}}
+ jdk-version: ${{ matrix.java_version }}
+ - if: matrix.test-target == 'rust'
+ name: Rust test steps
+ uses: ./.github/actions/rust-test
+ - if: matrix.test-target == 'java'
+ name: Java test steps
+ uses: ./.github/actions/java-test
+ with:
+ maven_opts: -Pspark-${{ matrix.spark-version }},scala-${{
matrix.scala-version }}
+ # upload test reports only for java 17
+ upload-test-reports: ${{ matrix.java_version == '17' }}
+
linux-test-with-spark4_0:
strategy:
matrix:
diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala
b/common/src/main/scala/org/apache/comet/CometConf.scala
index 32b999745..319b0848c 100644
--- a/common/src/main/scala/org/apache/comet/CometConf.scala
+++ b/common/src/main/scala/org/apache/comet/CometConf.scala
@@ -101,6 +101,11 @@ object CometConf extends ShimCometConf {
.getOrElse("COMET_PARQUET_SCAN_IMPL", SCAN_NATIVE_COMET)
.toLowerCase(Locale.ROOT))
+ def isExperimentalNativeScan: Boolean = COMET_NATIVE_SCAN_IMPL.get() match {
+ case SCAN_NATIVE_DATAFUSION | SCAN_NATIVE_ICEBERG_COMPAT => true
+ case SCAN_NATIVE_COMET => false
+ }
+
val COMET_PARQUET_PARALLEL_IO_ENABLED: ConfigEntry[Boolean] =
conf("spark.comet.parquet.read.parallel.io.enabled")
.doc(
diff --git
a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala
b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala
index 8850f2133..f8d709dc6 100644
--- a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala
@@ -234,7 +234,10 @@ class CometArrayExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHelp
}
test("array_intersect") {
+ // https://github.com/apache/datafusion-comet/issues/1441
+ assume(!CometConf.isExperimentalNativeScan)
withSQLConf(CometConf.COMET_EXPR_ALLOW_INCOMPATIBLE.key -> "true") {
+
Seq(true, false).foreach { dictionaryEnabled =>
withTempDir { dir =>
val path = new Path(dir.toURI.toString, "test.parquet")
diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
index 53af89bea..482e602ce 100644
--- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -900,6 +900,8 @@ class CometCastSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
}
test("cast TimestampType to LongType") {
+ // https://github.com/apache/datafusion-comet/issues/1441
+ assume(!CometConf.isExperimentalNativeScan)
assume(CometSparkSessionExtensions.isSpark33Plus)
castTest(generateTimestampsExtended(), DataTypes.LongType)
}
@@ -923,16 +925,22 @@ class CometCastSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
}
test("cast TimestampType to StringType") {
+ // https://github.com/apache/datafusion-comet/issues/1441
+ assume(!CometConf.isExperimentalNativeScan)
castTest(generateTimestamps(), DataTypes.StringType)
}
test("cast TimestampType to DateType") {
+ // https://github.com/apache/datafusion-comet/issues/1441
+ assume(!CometConf.isExperimentalNativeScan)
castTest(generateTimestamps(), DataTypes.DateType)
}
// Complex Types
test("cast StructType to StringType") {
+ // https://github.com/apache/datafusion-comet/issues/1441
+ assume(!CometConf.isExperimentalNativeScan)
Seq(true, false).foreach { dictionaryEnabled =>
withTempDir { dir =>
val path = new Path(dir.toURI.toString, "test.parquet")
diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
index e9b42b73a..4836b3604 100644
--- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
@@ -1266,6 +1266,8 @@ class CometExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
}
test("round") {
+ // https://github.com/apache/datafusion-comet/issues/1441
+ assume(!CometConf.isExperimentalNativeScan)
Seq(true, false).foreach { dictionaryEnabled =>
withTempDir { dir =>
val path = new Path(dir.toURI.toString, "test.parquet")
@@ -1494,6 +1496,8 @@ class CometExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
}
test("hex") {
+ // https://github.com/apache/datafusion-comet/issues/1441
+ assume(!CometConf.isExperimentalNativeScan)
Seq(true, false).foreach { dictionaryEnabled =>
withTempDir { dir =>
val path = new Path(dir.toURI.toString, "hex.parquet")
@@ -2676,6 +2680,8 @@ class CometExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
}
test("test integral divide") {
+ // https://github.com/apache/datafusion-comet/issues/1441
+ assume(!CometConf.isExperimentalNativeScan)
Seq(true, false).foreach { dictionaryEnabled =>
withTempDir { dir =>
val path1 = new Path(dir.toURI.toString, "test1.parquet")
diff --git a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
index 1c5395d92..781714983 100644
--- a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
@@ -818,6 +818,8 @@ class CometExecSuite extends CometTestBase {
}
test("explain native plan") {
+ // https://github.com/apache/datafusion-comet/issues/1441
+ assume(!CometConf.isExperimentalNativeScan)
// there are no assertions in this test to prove that the explain feature
// wrote the expected output to stdout, but we at least test that enabling
// the config does not cause any exceptions.
diff --git a/spark/src/test/scala/org/apache/comet/exec/CometJoinSuite.scala
b/spark/src/test/scala/org/apache/comet/exec/CometJoinSuite.scala
index d756da151..e68d63ef1 100644
--- a/spark/src/test/scala/org/apache/comet/exec/CometJoinSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/exec/CometJoinSuite.scala
@@ -201,6 +201,8 @@ class CometJoinSuite extends CometTestBase {
}
test("HashJoin struct key") {
+ // https://github.com/apache/datafusion-comet/issues/1441
+ assume(!CometConf.isExperimentalNativeScan)
withSQLConf(
"spark.sql.join.forceApplyShuffledHashJoin" -> "true",
SQLConf.PREFER_SORTMERGEJOIN.key -> "false",
diff --git
a/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala
b/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala
index b4c09a4a5..f6a57b12e 100644
--- a/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala
@@ -337,6 +337,8 @@ abstract class ParquetReadSuite extends CometTestBase {
}
test("test multiple pages with different sizes and nulls") {
+ // https://github.com/apache/datafusion-comet/issues/1441
+ assume(!CometConf.isExperimentalNativeScan)
def makeRawParquetFile(
path: Path,
dictionaryEnabled: Boolean,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]