jonvex commented on code in PR #13572:
URL: https://github.com/apache/hudi/pull/13572#discussion_r2238015476


##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkFileFormatInternalRowReaderContext.scala:
##########
@@ -266,6 +271,24 @@ class 
SparkFileFormatInternalRowReaderContext(parquetFileReader: SparkParquetRea
       }.asInstanceOf[ClosableIterator[InternalRow]]
     }
   }
+
+  override def getDataFileSchema(filePath: StoragePath, storage: 
HoodieStorage): Schema = {
+    val configuration = 
storageConfiguration.asInstanceOf[StorageConfiguration[Configuration]].unwrap()
+    if (configuration.get(AvroSchemaConverter.ADD_LIST_ELEMENT_RECORDS) == 
null) {
+      configuration.set(AvroSchemaConverter.ADD_LIST_ELEMENT_RECORDS, "false")
+    }
+    val path = HadoopFSUtils.convertToHadoopPath(filePath)
+    val readOptions = HadoopReadOptions.builder(configuration, path)
+      .withMetadataFilter(ParquetMetadataConverter.SKIP_ROW_GROUPS).build
+    val inputFile = HadoopInputFile.fromPath(path, configuration)
+    try {
+      val fileReader = ParquetFileReader.open(inputFile, readOptions)
+      try {
+        val footer = fileReader.getFooter
+        new 
AvroSchemaConverter(configuration).convert(footer.getFileMetaData.getSchema)

Review Comment:
   it was a typo in the pr description. This pr is for schema on write



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to