hsiang-c commented on code in PR #1715: URL: https://github.com/apache/datafusion-comet/pull/1715#discussion_r2078673638
########## dev/diffs/iceberg/1.8.1.diff: ########## @@ -0,0 +1,266 @@ +diff --git a/spark/v3.4/build.gradle b/spark/v3.4/build.gradle +index 6eb26e8..c288e72 100644 +--- a/spark/v3.4/build.gradle ++++ b/spark/v3.4/build.gradle +@@ -75,7 +75,7 @@ project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") { + exclude group: 'org.roaringbitmap' + } + +- compileOnly "org.apache.datafusion:comet-spark-spark${sparkMajorVersion}_${scalaVersion}:0.5.0" ++ compileOnly "org.apache.datafusion:comet-spark-spark${sparkMajorVersion}_${scalaVersion}:0.9.0-SNAPSHOT" + + implementation libs.parquet.column + implementation libs.parquet.hadoop +@@ -185,7 +185,7 @@ project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVer + testImplementation libs.avro.avro + testImplementation libs.parquet.hadoop + testImplementation libs.junit.vintage.engine +- testImplementation "org.apache.datafusion:comet-spark-spark${sparkMajorVersion}_${scalaVersion}:0.5.0" ++ testImplementation "org.apache.datafusion:comet-spark-spark${sparkMajorVersion}_${scalaVersion}:0.9.0-SNAPSHOT" + + // Required because we remove antlr plugin dependencies from the compile configuration, see note above + runtimeOnly libs.antlr.runtime +@@ -260,6 +260,8 @@ project(":iceberg-spark:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersio + integrationImplementation project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts') + integrationImplementation project(path: ":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}", configuration: 'testArtifacts') + integrationImplementation project(path: ":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVersion}", configuration: 'testArtifacts') ++ integrationImplementation project(path: ':iceberg-parquet') Review Comment: Only in Spark 3.4, I need to include `iceberg-parquet` otherwise the `iceberg-spark-runtime-3.4` tests throw the following errors ```shell org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 2.0 failed 1 times, most recent failure: Lost task 0.0 in stage 2.0 (TID 4) (17.115.161.202 executor driver): java.lang.NoSuchMethodError: 'org.apache.parquet.column.ParquetProperties$Builder org.apache.parquet.column.ParquetProperties$Builder.withBloomFilterFPP(java.lang.String, double)' at org.apache.iceberg.parquet.Parquet$WriteBuilder.build(Parquet.java:389) at org.apache.iceberg.parquet.Parquet$DataWriteBuilder.build(Parquet.java:787) at org.apache.iceberg.data.BaseFileWriterFactory.newDataWriter(BaseFileWriterFactory.java:131) at org.apache.iceberg.io.RollingDataWriter.newWriter(RollingDataWriter.java:52) at org.apache.iceberg.io.RollingDataWriter.newWriter(RollingDataWriter.java:32) at org.apache.iceberg.io.RollingFileWriter.openCurrentWriter(RollingFileWriter.java:108) at org.apache.iceberg.io.RollingDataWriter.<init>(RollingDataWriter.java:47) at org.apache.iceberg.spark.source.SparkWrite$UnpartitionedDataWriter.<init>(SparkWrite.java:701) at org.apache.iceberg.spark.source.SparkWrite$WriterFactory.createWriter(SparkWrite.java:675) at org.apache.iceberg.spark.source.SparkWrite$WriterFactory.createWriter(SparkWrite.java:652) at org.apache.spark.sql.execution.datasources.v2.WritingSparkTask.run(WriteToDataSourceV2Exec.scala:459) at org.apache.spark.sql.execution.datasources.v2.WritingSparkTask.run$(WriteToDataSourceV2Exec.scala:448) at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.run(WriteToDataSourceV2Exec.scala:514) at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.$anonfun$writeWithV2$2(WriteToDataSourceV2Exec.scala:411) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92) at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161) at org.apache.spark.scheduler.Task.run(Task.scala:139) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org