parthchandra commented on code in PR #731: URL: https://github.com/apache/datafusion-comet/pull/731#discussion_r1699207409
########## spark/src/main/scala/org/apache/spark/sql/comet/CometRowToColumnarExec.scala: ########## @@ -60,8 +62,17 @@ case class CometRowToColumnarExec(child: SparkPlan) val timeZoneId = conf.sessionLocalTimeZone val schema = child.schema - child - .execute() + val rdd: RDD[InternalRow] = if (child.supportsColumnar) { + child + .executeColumnar() + .mapPartitionsInternal { iter => + iter.flatMap(_.rowIterator().asScala) + } + } else { + child.execute() + } + + rdd Review Comment: > it could be worth thinking about using the Spark reader as a real fallback use case instead of just for testing purposes It might be worthwhile, though I think we would like to prioritize the complex type support in the reader. ########## spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala: ########## @@ -1115,6 +1119,7 @@ object CometSparkSessionExtensions extends Logging { BinaryType | StringType | _: DecimalType | DateType | TimestampType => true case t: DataType if t.typeName == "timestamp_ntz" => true + case s: StructType => isSchemaSupported(s) Review Comment: Is this change needed? In general, structs are not supported (yet) and since this method is not operator specific, we probably shouldn't have this here. ########## spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala: ########## @@ -59,12 +59,13 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim logWarning(s"Comet native execution is disabled due to: $reason") } - def supportedDataType(dt: DataType): Boolean = dt match { + def supportedDataType(dt: DataType, allowComplex: Boolean = false): Boolean = dt match { Review Comment: nit: Can we consider renaming this to `allowStruct` to make it explicit that this is only for structs (and not maps and arrays). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org