This is an automated email from the ASF dual-hosted git repository. yihua pushed a commit to branch release-1.2.0 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit b1799290107e3cfd60681ab320037b416df882f3 Author: Y Ethan Guo <[email protected]> AuthorDate: Tue May 19 21:41:29 2026 -0700 fix(spark): use HoodieStorageUtils factory in Spark 4.1 legacy parquet read (#18785) --- .../datasources/parquet/Spark41LegacyHoodieParquetFileFormat.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hudi-spark-datasource/hudi-spark4.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark41LegacyHoodieParquetFileFormat.scala b/hudi-spark-datasource/hudi-spark4.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark41LegacyHoodieParquetFileFormat.scala index 58406fbdc024..8dff79c1e07b 100644 --- a/hudi-spark-datasource/hudi-spark4.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark41LegacyHoodieParquetFileFormat.scala +++ b/hudi-spark-datasource/hudi-spark4.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark41LegacyHoodieParquetFileFormat.scala @@ -24,10 +24,11 @@ import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion import org.apache.hudi.common.util.InternalSchemaCache import org.apache.hudi.common.util.StringUtils.isNullOrEmpty import org.apache.hudi.common.util.collection.Pair +import org.apache.hudi.hadoop.fs.HadoopFSUtils import org.apache.hudi.internal.schema.InternalSchema import org.apache.hudi.internal.schema.action.InternalSchemaMerger import org.apache.hudi.internal.schema.utils.{InternalSchemaUtils, SerDeHelper} -import org.apache.hudi.storage.hadoop.HoodieHadoopStorage +import org.apache.hudi.storage.HoodieStorageUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.mapred.FileSplit @@ -176,7 +177,7 @@ class Spark41LegacyHoodieParquetFileFormat(private val shouldAppendPartitionValu val fileSchema = if (shouldUseInternalSchema) { val commitInstantTime = FSUtils.getCommitTime(filePath.getName).toLong; val validCommits = sharedConf.get(SparkInternalSchemaConverter.HOODIE_VALID_COMMITS_LIST) - val storage = new HoodieHadoopStorage(tablePath, sharedConf) + val storage = HoodieStorageUtils.getStorage(tablePath, HadoopFSUtils.getStorageConf(sharedConf)) //TODO: HARDCODED TIMELINE OBJECT val layout = TimelineLayout.fromVersion(TimelineLayoutVersion.CURR_LAYOUT_VERSION) InternalSchemaCache.getInternalSchemaByVersionId(
