This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 77082705069 [HUDI-7073] Fix schema projection in file group
reader-based parquet file format (#10047)
77082705069 is described below
commit 77082705069dad6bf7e427a32a382137881bf335
Author: Lin Liu <[email protected]>
AuthorDate: Fri Nov 10 06:03:32 2023 -0800
[HUDI-7073] Fix schema projection in file group reader-based parquet file
format (#10047)
- To fix the schema project, we search fields in table schema. Previously
we only search the data schema for a field, but
it can also be contained in the partition schema. We add this logic.
- Update the manually created schema to match the one created through spark
sql.
Co-authored-by: Sagar Sumit <[email protected]>
---
.../parquet/HoodieFileGroupReaderBasedParquetFileFormat.scala | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedParquetFileFormat.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedParquetFileFormat.scala
index cee66336ec9..e978d90f1ac 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedParquetFileFormat.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedParquetFileFormat.scala
@@ -274,9 +274,9 @@ class
HoodieFileGroupReaderBasedParquetFileFormat(tableState: HoodieTableState,
if (requiredSchema.getFieldIndex(field).isEmpty) {
// Support for nested fields
val fieldParts = field.split("\\.")
- val fieldToAdd = findNestedField(dataSchema, fieldParts).getOrElse(
- throw new IllegalArgumentException(s"Field $field does not exist
in the data schema")
- )
+ val fieldToAdd = findNestedField(dataSchema, fieldParts)
+ .orElse(findNestedField(partitionSchema, fieldParts))
+ .getOrElse(throw new IllegalArgumentException(s"Field $field does
not exist in the table schema"))
added.append(fieldToAdd)
}
}