andygrove commented on code in PR #1474: URL: https://github.com/apache/datafusion-comet/pull/1474#discussion_r1980165468
########## spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala: ########## @@ -116,140 +116,111 @@ class CometSparkSessionExtensions withInfo(scan, "Metadata column is not supported") scan - case scanExec: FileSourceScanExec - if COMET_DPP_FALLBACK_ENABLED.get() && - scanExec.partitionFilters.exists(isDynamicPruningFilter) => - withInfo(scanExec, "DPP not supported") - scanExec + // data source v1 + case scanExec: FileSourceScanExec => + if (COMET_DPP_FALLBACK_ENABLED.get() && + scanExec.partitionFilters.exists(isDynamicPruningFilter)) { + withInfo(scanExec, "DPP not supported") + return scanExec + } + + scanExec.relation match { + case HadoopFsRelation(_, partitionSchema, _, _, fileFormat, _) => + if (!CometScanExec.isFileFormatSupported(fileFormat)) { + withInfo(scanExec, s"fileFormat $fileFormat not supported") + return scanExec + } + + COMET_NATIVE_SCAN_IMPL.get() match { + case SCAN_NATIVE_DATAFUSION => + // TODO we only enable full native scan if COMET_EXEC_ENABLED is enabled + // but this is not really what we want .. we currently insert `CometScanExec` + // here and then it gets replaced with `CometNativeScanExec` in `CometExecRule` + // but that only happens if `COMET_EXEC_ENABLED` is enabled + if (!COMET_EXEC_ENABLED.get()) { + withInfo( + scanExec, + s"Native scan not enabled when ${COMET_EXEC_ENABLED.key} is not enabled") + return scanExec + } + if (!CometNativeScanExec.isSchemaSupported(scanExec.requiredSchema)) { + withInfo( + scanExec, + s"requiredSchema ${scanExec.requiredSchema} not supported") + return scanExec + } + if (!CometNativeScanExec.isSchemaSupported(partitionSchema)) { + withInfo(scanExec, s"partitionSchema $partitionSchema not supported") + return scanExec + } + CometScanExec(scanExec, session) + + case SCAN_NATIVE_COMET | SCAN_NATIVE_ICEBERG_COMPAT => Review Comment: This seems incorrect, but matches the original code. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org