andygrove commented on code in PR #3035:
URL: https://github.com/apache/datafusion-comet/pull/3035#discussion_r2872881789
##########
spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala:
##########
@@ -583,7 +584,33 @@ object CometScanExec {
def isFileFormatSupported(fileFormat: FileFormat): Boolean = {
// Only support Spark's built-in Parquet scans, not others such as Delta
which use a subclass
// of ParquetFileFormat.
- fileFormat.getClass().equals(classOf[ParquetFileFormat])
+ fileFormat.getClass().equals(classOf[ParquetFileFormat]) ||
isSupportedDeltaScan(fileFormat)
+ }
+
+ val unsupportedDeltaReaderFeatures: Set[String] = Set("columnMapping",
"deletionVectors")
+
+ def isSupportedDeltaScan(fileFormat: FileFormat): Boolean = {
+ if (fileFormat.getClass().getName() !=
"org.apache.spark.sql.delta.DeltaParquetFileFormat") {
+ return false
+ }
+
+ // Delta scans without certain features enabled are simply normal Parquet
scans that can
+ // take advantage of the native scan, so check to see if it is compatible
+ val deltaMetadata = CometDeltaNativeScanMetadata.extract(fileFormat) match
{
+ case Some(m) => m
+ case None => return false
+ }
+
+ // Version 1 has no special features
+ // Version 2 introduced column mapping, which is not supported
+ // Version 3 changes to use the readerFeatures list instead, so we check
for incompatible
+ // features
+ deltaMetadata.minReaderVersion match {
+ case 1 => true
+ case 2 => false
+ case 3 =>
Review Comment:
Could you add `case _` to handle future versions
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]