This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new c9af2c6ec chore: Auto scan mode no longer falls back to `native_comet`
(#3236)
c9af2c6ec is described below
commit c9af2c6ecc6152081497143a504e6b8fe3e1e67a
Author: Andy Grove <[email protected]>
AuthorDate: Fri Jan 23 10:54:05 2026 -0700
chore: Auto scan mode no longer falls back to `native_comet` (#3236)
---
.../org/apache/comet/rules/CometScanRule.scala | 1 -
.../org/apache/comet/CometExpressionSuite.scala | 86 +++++++++++++---------
2 files changed, 51 insertions(+), 36 deletions(-)
diff --git a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala
b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala
index 4310605f2..bfcf25074 100644
--- a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala
+++ b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala
@@ -167,7 +167,6 @@ case class CometScanRule(session: SparkSession) extends
Rule[SparkPlan] with Com
case SCAN_AUTO =>
// TODO add support for native_datafusion in the future
nativeIcebergCompatScan(session, scanExec, r, hadoopConf)
- .orElse(nativeCometScan(session, scanExec, r, hadoopConf))
.getOrElse(scanExec)
case SCAN_NATIVE_DATAFUSION =>
nativeDataFusionScan(session, scanExec, r,
hadoopConf).getOrElse(scanExec)
diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
index 250d2f91c..e0a5c43ae 100644
--- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
@@ -187,53 +187,69 @@ class CometExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
}
test("basic data type support") {
+ // this test requires native_comet scan due to unsigned u8/u16 issue
+ withSQLConf(CometConf.COMET_NATIVE_SCAN_IMPL.key ->
CometConf.SCAN_NATIVE_COMET) {
+ Seq(true, false).foreach { dictionaryEnabled =>
+ withTempDir { dir =>
+ val path = new Path(dir.toURI.toString, "test.parquet")
+ makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled =
dictionaryEnabled, 10000)
+ withParquetTable(path.toString, "tbl") {
+ checkSparkAnswerAndOperator("select * FROM tbl WHERE _2 > 100")
+ }
+ }
+ }
+ }
+ }
+
+ test("basic data type support - excluding u8/u16") {
+ // variant that skips _9 (UINT_8) and _10 (UINT_16) for default scan impl
Seq(true, false).foreach { dictionaryEnabled =>
withTempDir { dir =>
val path = new Path(dir.toURI.toString, "test.parquet")
makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled =
dictionaryEnabled, 10000)
- withSQLConf(CometConf.COMET_SCAN_ALLOW_INCOMPATIBLE.key -> "false") {
+ withParquetTable(path.toString, "tbl") {
+ // select all columns except _9 (UINT_8) and _10 (UINT_16)
+ checkSparkAnswerAndOperator(
+ """select _1, _2, _3, _4, _5, _6, _7, _8, _11, _12, _13, _14, _15,
_16, _17,
+ |_18, _19, _20, _21, _id FROM tbl WHERE _2 > 100""".stripMargin)
+ }
+ }
+ }
+ }
+
+ test("uint data type support") {
+ // this test requires native_comet scan due to unsigned u8/u16 issue
+ withSQLConf(CometConf.COMET_NATIVE_SCAN_IMPL.key ->
CometConf.SCAN_NATIVE_COMET) {
+ Seq(true, false).foreach { dictionaryEnabled =>
+ withTempDir { dir =>
+ val path = new Path(dir.toURI.toString, "testuint.parquet")
+ makeParquetFileAllPrimitiveTypes(
+ path,
+ dictionaryEnabled = dictionaryEnabled,
+ Byte.MinValue,
+ Byte.MaxValue)
withParquetTable(path.toString, "tbl") {
- checkSparkAnswerAndOperator("select * FROM tbl WHERE _2 > 100")
+ val qry = "select _9 from tbl order by _11"
+ checkSparkAnswerAndOperator(qry)
}
}
}
}
}
- test("uint data type support") {
+ test("uint data type support - excluding u8/u16") {
+ // variant that tests UINT_32 and UINT_64, skipping _9 (UINT_8) and _10
(UINT_16)
Seq(true, false).foreach { dictionaryEnabled =>
- // TODO: Once the question of what to get back from uint_8, uint_16
types is resolved,
- // we can also update this test to check for
COMET_SCAN_ALLOW_INCOMPATIBLE=true
- Seq(false).foreach { allowIncompatible =>
- {
- withSQLConf(CometConf.COMET_SCAN_ALLOW_INCOMPATIBLE.key ->
allowIncompatible.toString) {
- withTempDir { dir =>
- val path = new Path(dir.toURI.toString, "testuint.parquet")
- makeParquetFileAllPrimitiveTypes(
- path,
- dictionaryEnabled = dictionaryEnabled,
- Byte.MinValue,
- Byte.MaxValue)
- withParquetTable(path.toString, "tbl") {
- val qry = "select _9 from tbl order by _11"
- if (usingDataSourceExec(conf)) {
- if (!allowIncompatible) {
- checkSparkAnswerAndOperator(qry)
- } else {
- // need to convert the values to unsigned values
- val expected = (Byte.MinValue to Byte.MaxValue)
- .map(v => {
- if (v < 0) Byte.MaxValue.toShort - v else v
- })
- .toDF("a")
- checkAnswer(sql(qry), expected)
- }
- } else {
- checkSparkAnswerAndOperator(qry)
- }
- }
- }
- }
+ withTempDir { dir =>
+ val path = new Path(dir.toURI.toString, "testuint.parquet")
+ makeParquetFileAllPrimitiveTypes(
+ path,
+ dictionaryEnabled = dictionaryEnabled,
+ Byte.MinValue,
+ Byte.MaxValue)
+ withParquetTable(path.toString, "tbl") {
+ // test UINT_32 (_11) and UINT_64 (_12) only
+ checkSparkAnswerAndOperator("select _11, _12 from tbl order by _11")
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]