the-other-tim-brown commented on code in PR #14340:
URL: https://github.com/apache/hudi/pull/14340#discussion_r2591280236
##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkFileFormatInternalRowReaderContext.scala:
##########
@@ -130,34 +128,34 @@ class
SparkFileFormatInternalRowReaderContext(baseFileReader: SparkColumnarFileR
* @return iterator that concatenates the skeletonFileIterator and
dataFileIterator
*/
override def mergeBootstrapReaders(skeletonFileIterator:
ClosableIterator[InternalRow],
- skeletonRequiredSchema: Schema,
+ skeletonRequiredSchema: HoodieSchema,
dataFileIterator:
ClosableIterator[InternalRow],
- dataRequiredSchema: Schema,
+ dataRequiredSchema: HoodieSchema,
partitionFieldAndValues:
java.util.List[HPair[String, Object]]): ClosableIterator[InternalRow] = {
doBootstrapMerge(skeletonFileIterator.asInstanceOf[ClosableIterator[Any]],
skeletonRequiredSchema,
dataFileIterator.asInstanceOf[ClosableIterator[Any]],
dataRequiredSchema, partitionFieldAndValues)
}
private def doBootstrapMerge(skeletonFileIterator: ClosableIterator[Any],
- skeletonRequiredSchema: Schema,
+ skeletonRequiredSchema: HoodieSchema,
dataFileIterator: ClosableIterator[Any],
- dataRequiredSchema: Schema,
+ dataRequiredSchema: HoodieSchema,
partitionFieldAndValues:
java.util.List[HPair[String, Object]]): ClosableIterator[InternalRow] = {
if (getRecordContext.supportsParquetRowIndex()) {
- assert(AvroSchemaUtils.containsFieldInSchema(skeletonRequiredSchema,
ROW_INDEX_TEMPORARY_COLUMN_NAME))
- assert(AvroSchemaUtils.containsFieldInSchema(dataRequiredSchema,
ROW_INDEX_TEMPORARY_COLUMN_NAME))
+
assert(AvroSchemaUtils.containsFieldInSchema(skeletonRequiredSchema.toAvroSchema,
ROW_INDEX_TEMPORARY_COLUMN_NAME))
Review Comment:
Let's do a similar check here as well in place of `containsFieldInSchema`
##########
hudi-common/src/main/java/org/apache/hudi/common/engine/HoodieReaderContext.java:
##########
@@ -357,8 +357,8 @@ public ClosableIterator<T>
applyInstantRangeFilter(ClosableIterator<T> fileRecor
return fileRecordIterator;
}
InstantRange instantRange = getInstantRange().get();
- final Schema.Field commitTimeField =
getSchemaHandler().getRequiredSchema().getField(HoodieRecord.COMMIT_TIME_METADATA_FIELD);
- final int commitTimePos = commitTimeField.pos();
+ final Option<HoodieSchemaField> commitTimeFieldOpt =
HoodieSchema.fromAvroSchema(getSchemaHandler().getRequiredSchema()).getField(HoodieRecord.COMMIT_TIME_METADATA_FIELD);
+ final int commitTimePos = commitTimeFieldOpt.get().pos();
Review Comment:
Let's also use `orElseThrow` here to generate a more meaningful error message
##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkFileFormatInternalRowReaderContext.scala:
##########
@@ -75,19 +73,19 @@ class
SparkFileFormatInternalRowReaderContext(baseFileReader: SparkColumnarFileR
override def getFileRecordIterator(filePath: StoragePath,
start: Long,
length: Long,
- dataSchema: Schema, // dataSchema refers
to table schema in most cases(non log file reads).
- requiredSchema: Schema,
+ dataSchema: HoodieSchema, // dataSchema
refers to table schema in most cases(non log file reads).
+ requiredSchema: HoodieSchema,
storage: HoodieStorage):
ClosableIterator[InternalRow] = {
- val hasRowIndexField =
AvroSchemaUtils.containsFieldInSchema(requiredSchema,
ROW_INDEX_TEMPORARY_COLUMN_NAME)
+ val hasRowIndexField =
AvroSchemaUtils.containsFieldInSchema(requiredSchema.toAvroSchema,
ROW_INDEX_TEMPORARY_COLUMN_NAME)
Review Comment:
```suggestion
val hasRowIndexField = requiredSchema.getField(
ROW_INDEX_TEMPORARY_COLUMN_NAME).isPresent
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]