This is an automated email from the ASF dual-hosted git repository. yihua pushed a commit to branch release-0.12.3-spark-upgrade in repository https://gitbox.apache.org/repos/asf/hudi.git
commit c54fd2870dc568337bafe81b191ba3dfce09da63 Author: Y Ethan Guo <[email protected]> AuthorDate: Tue Feb 10 14:30:47 2026 -0800 fix: Upgrade Spark 3.2.x and 3.3.x and fix parquet schema conversion --- README.md | 2 +- .../datasources/parquet/Spark32PlusHoodieParquetFileFormat.scala | 3 +++ pom.xml | 4 ++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d389754ca216..573d72642ef4 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ mvn clean javadoc:aggregate -Pjavadocs ### Build with different Spark versions -The default Spark 2.x version supported is 2.4.4. The default Spark 3.x version, corresponding to `spark3` profile is 3.3.1. +The default Spark 2.x version supported is 2.4.4. The default Spark 3.x version, corresponding to `spark3` profile is 3.3.4. Refer to the table below for building with different Spark and Scala versions. | Maven build options | Expected Spark bundle jar name | Notes | diff --git a/hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32PlusHoodieParquetFileFormat.scala b/hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32PlusHoodieParquetFileFormat.scala index ae686d33a31b..8d4bd46cb6e0 100644 --- a/hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32PlusHoodieParquetFileFormat.scala +++ b/hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32PlusHoodieParquetFileFormat.scala @@ -95,6 +95,9 @@ class Spark32PlusHoodieParquetFileFormat(private val shouldAppendPartitionValues hadoopConf.setBoolean( SQLConf.PARQUET_INT96_AS_TIMESTAMP.key, sparkSession.sessionState.conf.isParquetINT96AsTimestamp) + hadoopConf.setBoolean( + SQLConf.LEGACY_PARQUET_NANOS_AS_LONG.key, + sparkSession.sessionState.conf.legacyParquetNanosAsLong) val internalSchemaStr = hadoopConf.get(SparkInternalSchemaConverter.HOODIE_QUERY_SCHEMA) // For Spark DataSource v1, there's no Physical Plan projection/schema pruning w/in Spark itself, diff --git a/pom.xml b/pom.xml index b6ffc2b307b9..9ba76ec95703 100644 --- a/pom.xml +++ b/pom.xml @@ -148,8 +148,8 @@ <flink.hadoop.compatibility.artifactId>flink-hadoop-compatibility_2.12</flink.hadoop.compatibility.artifactId> <rocksdbjni.version>5.17.2</rocksdbjni.version> <spark31.version>3.1.3</spark31.version> - <spark32.version>3.2.3</spark32.version> - <spark33.version>3.3.1</spark33.version> + <spark32.version>3.2.4</spark32.version> + <spark33.version>3.3.4</spark33.version> <hudi.spark.module>hudi-spark2</hudi.spark.module> <!-- NOTE: Different Spark versions might require different number of shared modules being incorporated, hence we're creating multiple placeholders
