andygrove commented on code in PR #1474:
URL: https://github.com/apache/datafusion-comet/pull/1474#discussion_r1980165468
##########
spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala:
##########
@@ -116,140 +116,111 @@ class CometSparkSessionExtensions
withInfo(scan, "Metadata column is not supported")
scan
- case scanExec: FileSourceScanExec
- if COMET_DPP_FALLBACK_ENABLED.get() &&
- scanExec.partitionFilters.exists(isDynamicPruningFilter) =>
- withInfo(scanExec, "DPP not supported")
- scanExec
+ // data source v1
+ case scanExec: FileSourceScanExec =>
+ if (COMET_DPP_FALLBACK_ENABLED.get() &&
+ scanExec.partitionFilters.exists(isDynamicPruningFilter)) {
+ withInfo(scanExec, "DPP not supported")
+ return scanExec
+ }
+
+ scanExec.relation match {
+ case HadoopFsRelation(_, partitionSchema, _, _, fileFormat, _) =>
+ if (!CometScanExec.isFileFormatSupported(fileFormat)) {
+ withInfo(scanExec, s"fileFormat $fileFormat not supported")
+ return scanExec
+ }
+
+ COMET_NATIVE_SCAN_IMPL.get() match {
+ case SCAN_NATIVE_DATAFUSION =>
+ // TODO we only enable full native scan if
COMET_EXEC_ENABLED is enabled
+ // but this is not really what we want .. we currently
insert `CometScanExec`
+ // here and then it gets replaced with
`CometNativeScanExec` in `CometExecRule`
+ // but that only happens if `COMET_EXEC_ENABLED` is enabled
+ if (!COMET_EXEC_ENABLED.get()) {
+ withInfo(
+ scanExec,
+ s"Native scan not enabled when
${COMET_EXEC_ENABLED.key} is not enabled")
+ return scanExec
+ }
+ if
(!CometNativeScanExec.isSchemaSupported(scanExec.requiredSchema)) {
+ withInfo(
+ scanExec,
+ s"requiredSchema ${scanExec.requiredSchema} not
supported")
+ return scanExec
+ }
+ if
(!CometNativeScanExec.isSchemaSupported(partitionSchema)) {
+ withInfo(scanExec, s"partitionSchema $partitionSchema
not supported")
+ return scanExec
+ }
+ CometScanExec(scanExec, session)
+
+ case SCAN_NATIVE_COMET | SCAN_NATIVE_ICEBERG_COMPAT =>
Review Comment:
This seems incorrect, but matches the original code.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]