Re: [PR] feat(schema): phase 5 - Perform Java Client Core Migration [hudi]

via GitHub Thu, 04 Dec 2025 19:28:16 -0800


the-other-tim-brown commented on code in PR #14340:
URL: https://github.com/apache/hudi/pull/14340#discussion_r2591280236



##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkFileFormatInternalRowReaderContext.scala:
##########
@@ -130,34 +128,34 @@ class 
SparkFileFormatInternalRowReaderContext(baseFileReader: SparkColumnarFileR
    * @return iterator that concatenates the skeletonFileIterator and 
dataFileIterator
    */
   override def mergeBootstrapReaders(skeletonFileIterator: 
ClosableIterator[InternalRow],
-                                     skeletonRequiredSchema: Schema,
+                                     skeletonRequiredSchema: HoodieSchema,
                                      dataFileIterator: 
ClosableIterator[InternalRow],
-                                     dataRequiredSchema: Schema,
+                                     dataRequiredSchema: HoodieSchema,
                                      partitionFieldAndValues: 
java.util.List[HPair[String, Object]]): ClosableIterator[InternalRow] = {
     doBootstrapMerge(skeletonFileIterator.asInstanceOf[ClosableIterator[Any]], 
skeletonRequiredSchema,
       dataFileIterator.asInstanceOf[ClosableIterator[Any]], 
dataRequiredSchema, partitionFieldAndValues)
   }
 
   private def doBootstrapMerge(skeletonFileIterator: ClosableIterator[Any],
-                               skeletonRequiredSchema: Schema,
+                               skeletonRequiredSchema: HoodieSchema,
                                dataFileIterator: ClosableIterator[Any],
-                               dataRequiredSchema: Schema,
+                               dataRequiredSchema: HoodieSchema,
                                partitionFieldAndValues: 
java.util.List[HPair[String, Object]]): ClosableIterator[InternalRow] = {
     if (getRecordContext.supportsParquetRowIndex()) {
-      assert(AvroSchemaUtils.containsFieldInSchema(skeletonRequiredSchema, 
ROW_INDEX_TEMPORARY_COLUMN_NAME))
-      assert(AvroSchemaUtils.containsFieldInSchema(dataRequiredSchema, 
ROW_INDEX_TEMPORARY_COLUMN_NAME))
+      
assert(AvroSchemaUtils.containsFieldInSchema(skeletonRequiredSchema.toAvroSchema,
 ROW_INDEX_TEMPORARY_COLUMN_NAME))

Review Comment:
   Let's do a similar check here as well in place of `containsFieldInSchema`



##########
hudi-common/src/main/java/org/apache/hudi/common/engine/HoodieReaderContext.java:
##########
@@ -357,8 +357,8 @@ public ClosableIterator<T> 
applyInstantRangeFilter(ClosableIterator<T> fileRecor
       return fileRecordIterator;
     }
     InstantRange instantRange = getInstantRange().get();
-    final Schema.Field commitTimeField = 
getSchemaHandler().getRequiredSchema().getField(HoodieRecord.COMMIT_TIME_METADATA_FIELD);
-    final int commitTimePos = commitTimeField.pos();
+    final Option<HoodieSchemaField> commitTimeFieldOpt = 
HoodieSchema.fromAvroSchema(getSchemaHandler().getRequiredSchema()).getField(HoodieRecord.COMMIT_TIME_METADATA_FIELD);
+    final int commitTimePos = commitTimeFieldOpt.get().pos();

Review Comment:
   Let's also use `orElseThrow` here to generate a more meaningful error message



##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkFileFormatInternalRowReaderContext.scala:
##########
@@ -75,19 +73,19 @@ class 
SparkFileFormatInternalRowReaderContext(baseFileReader: SparkColumnarFileR
   override def getFileRecordIterator(filePath: StoragePath,
                                      start: Long,
                                      length: Long,
-                                     dataSchema: Schema, // dataSchema refers 
to table schema in most cases(non log file reads).
-                                     requiredSchema: Schema,
+                                     dataSchema: HoodieSchema, // dataSchema 
refers to table schema in most cases(non log file reads).
+                                     requiredSchema: HoodieSchema,
                                      storage: HoodieStorage): 
ClosableIterator[InternalRow] = {
-    val hasRowIndexField = 
AvroSchemaUtils.containsFieldInSchema(requiredSchema, 
ROW_INDEX_TEMPORARY_COLUMN_NAME)
+    val hasRowIndexField = 
AvroSchemaUtils.containsFieldInSchema(requiredSchema.toAvroSchema, 
ROW_INDEX_TEMPORARY_COLUMN_NAME)

Review Comment:
   ```suggestion
       val hasRowIndexField = requiredSchema.getField( 
ROW_INDEX_TEMPORARY_COLUMN_NAME).isPresent
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] feat(schema): phase 5 - Perform Java Client Core Migration [hudi]

Reply via email to