wForget commented on code in PR #3214:
URL: https://github.com/apache/datafusion-comet/pull/3214#discussion_r2703004660
##########
spark/src/test/scala/org/apache/comet/parquet/CometParquetWriterSuite.scala:
##########
@@ -228,4 +230,299 @@ class CometParquetWriterSuite extends CometTestBase {
}
}
}
+
+ // ===== Complex Type Tests =====
+
+ private def writeComplexTypeData(
+ inputDf: DataFrame,
+ outputPath: String,
+ expectedRows: Int): Unit = {
+ withTempPath { inputDir =>
+ val inputPath = new File(inputDir, "input.parquet").getAbsolutePath
+
+ // First write the input data without Comet to get proper Arrow arrays
when reading
+ withSQLConf(
+ CometConf.COMET_NATIVE_PARQUET_WRITE_ENABLED.key -> "false",
+ SQLConf.SESSION_LOCAL_TIMEZONE.key -> "America/Denver") {
+ inputDf.write.parquet(inputPath)
+ }
+
+ // Now read and write with Comet native writer
+ // Use auto scan mode so native_iceberg_compat is used (which supports
complex types)
+ // instead of native_comet. This overrides the COMET_PARQUET_SCAN_IMPL
env var set by CI.
+ withSQLConf(
+ CometConf.COMET_NATIVE_PARQUET_WRITE_ENABLED.key -> "true",
+ CometConf.COMET_NATIVE_SCAN_IMPL.key -> "auto",
+ CometConf.COMET_SCAN_ALLOW_INCOMPATIBLE.key -> "true",
+ SQLConf.SESSION_LOCAL_TIMEZONE.key -> "America/Halifax",
+
CometConf.getOperatorAllowIncompatConfigKey(classOf[DataWritingCommandExec]) ->
"true",
+ CometConf.COMET_EXEC_ENABLED.key -> "true") {
+
+ val parquetDf = spark.read.parquet(inputPath)
+ parquetDf.write.parquet(outputPath)
+
+ // Verify round-trip: read with Spark and Comet, compare results
+ var sparkDf: DataFrame = null
+ var cometDf: DataFrame = null
+ withSQLConf(CometConf.COMET_NATIVE_SCAN_ENABLED.key -> "false") {
+ sparkDf = spark.read.parquet(outputPath)
+ }
+ withSQLConf(CometConf.COMET_NATIVE_SCAN_ENABLED.key -> "true") {
+ cometDf = spark.read.parquet(outputPath)
+ }
+
+ assert(sparkDf.count() == expectedRows, s"Expected $expectedRows rows")
+ checkAnswer(sparkDf, cometDf)
Review Comment:
Will the configurations in L267/L270 take effect here?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]