This is an automated email from the ASF dual-hosted git repository. agrove pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push: new bab70d2a9 fix: fall back on nested types for default values (#1799) bab70d2a9 is described below commit bab70d2a9d2760fdf71a7749a568135d60272648 Author: Matt Butrovich <mbutrov...@users.noreply.github.com> AuthorDate: Wed May 28 14:18:55 2025 -0400 fix: fall back on nested types for default values (#1799) * Fall back on nested types for default values. * Update compatibility guide. * Address PR feedback. Co-authored-by: Andy Grove <agr...@apache.org> * Fix compilation after applying suggestion. * Refactor check for nested types in default values. * Add comment. --------- Co-authored-by: Andy Grove <agr...@apache.org> --- docs/source/user-guide/compatibility.md | 1 + docs/templates/compatibility-template.md | 1 + .../org/apache/comet/rules/CometScanRule.scala | 22 ++++++++++++++++++---- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/docs/source/user-guide/compatibility.md b/docs/source/user-guide/compatibility.md index 4316c1c8c..b76e2616c 100644 --- a/docs/source/user-guide/compatibility.md +++ b/docs/source/user-guide/compatibility.md @@ -65,6 +65,7 @@ types (regardless of the logical type). This behavior can be disabled by setting - There is a known performance issue when pushing filters down to Parquet. See the [Comet Tuning Guide] for more information. - There are failures in the Spark SQL test suite when enabling these new scans (tracking issues: [#1542] and [#1545]). +- No support for default values that are nested types (e.g., maps, arrays, structs). Literal default values are supported. [#1545]: https://github.com/apache/datafusion-comet/issues/1545 [#1542]: https://github.com/apache/datafusion-comet/issues/1542 diff --git a/docs/templates/compatibility-template.md b/docs/templates/compatibility-template.md index 191507385..9750c6dc8 100644 --- a/docs/templates/compatibility-template.md +++ b/docs/templates/compatibility-template.md @@ -65,6 +65,7 @@ The new scans currently have the following limitations: - There is a known performance issue when pushing filters down to Parquet. See the [Comet Tuning Guide] for more information. - There are failures in the Spark SQL test suite when enabling these new scans (tracking issues: [#1542] and [#1545]). +- No support for default values that are nested types (e.g., maps, arrays, structs). Literal default values are supported. [#1545]: https://github.com/apache/datafusion-comet/issues/1545 [#1542]: https://github.com/apache/datafusion-comet/issues/1542 diff --git a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala index 677d190b0..b258ea10a 100644 --- a/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala +++ b/spark/src/main/scala/org/apache/comet/rules/CometScanRule.scala @@ -22,16 +22,17 @@ package org.apache.comet.rules import scala.collection.mutable.ListBuffer import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, PlanExpression} +import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, GenericInternalRow, PlanExpression} import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.catalyst.util.MetadataColumnHelper +import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, MetadataColumnHelper} +import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns.getExistenceDefaultValues import org.apache.spark.sql.comet.{CometBatchScanExec, CometScanExec} import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan} import org.apache.spark.sql.execution.datasources.HadoopFsRelation import org.apache.spark.sql.execution.datasources.v2.BatchScanExec import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types.{ArrayType, ByteType, DataType, MapType, ShortType, StructType} +import org.apache.spark.sql.types._ import org.apache.comet.{CometConf, DataTypeSupport} import org.apache.comet.CometConf._ @@ -118,7 +119,20 @@ case class CometScanRule(session: SparkSession) extends Rule[SparkPlan] { return withInfos(scanExec, fallbackReasons.toSet) } - val typeChecker = new CometScanTypeChecker(scanImpl) + val possibleDefaultValues = getExistenceDefaultValues(scanExec.requiredSchema) + if (possibleDefaultValues.exists(d => { + d != null && (d.isInstanceOf[ArrayBasedMapData] || d + .isInstanceOf[GenericInternalRow] || d.isInstanceOf[GenericArrayData]) + })) { + // Spark already converted these to Java-native types, so we can't check SQL types. + // ArrayBasedMapData, GenericInternalRow, GenericArrayData correspond to maps, structs, + // and arrays respectively. + fallbackReasons += + "Full native scan disabled because nested types for default values are not supported" + return withInfos(scanExec, fallbackReasons.toSet) + } + + val typeChecker = CometScanTypeChecker(scanImpl) val schemaSupported = typeChecker.isSchemaSupported(scanExec.requiredSchema, fallbackReasons) val partitionSchemaSupported = --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org