danny0405 commented on code in PR #8490:
URL: https://github.com/apache/hudi/pull/8490#discussion_r1226660761
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java:
##########
@@ -175,4 +186,106 @@ public static boolean checkIfValidCommit(HoodieTimeline
commitTimeline, String c
// 2) is less than the first commit ts in the timeline
return !commitTimeline.empty() &&
commitTimeline.containsOrBeforeTimelineStarts(commitTs);
}
+
+ /**
+ * Read existing records based on the given partition path and {@link
HoodieRecordLocation} info.
+ * <p>
+ * This will perform merged read for MOR table, in case a FileGroup contains
log files.
+ *
+ * @return {@link HoodieRecord}s that have the current location being set.
+ */
+ private static <R> HoodieData<HoodieRecord<R>> getExistingRecords(
+ HoodieData<Pair<String, HoodieRecordLocation>> partitionLocations,
HoodieWriteConfig config, HoodieTable hoodieTable) {
+ final Option<String> instantTime = hoodieTable
+ .getMetaClient()
+ .getCommitsTimeline()
+ .filterCompletedInstants()
+ .lastInstant()
+ .map(HoodieInstant::getTimestamp);
+ return partitionLocations.flatMap(p -> {
+ String partitionPath = p.getLeft();
+ String fileId = p.getRight().getFileId();
+ return new HoodieMergedReadHandle(config, instantTime, hoodieTable,
Pair.of(partitionPath, fileId))
+ .getMergedRecords().iterator();
+ });
+ }
+
+ /**
+ * Merge the incoming record with the matching existing record loaded via
{@link HoodieMergedReadHandle}. The existing record is the latest version in
the table.
+ */
+ private static <R> Option<HoodieRecord<R>>
mergeIncomingWithExistingRecord(HoodieRecord<R> incoming, HoodieRecord<R>
existing, HoodieWriteConfig config) throws IOException {
+ Schema existingSchema = HoodieAvroUtils.addMetadataFields(new
Schema.Parser().parse(config.getSchema()),
config.allowOperationMetadataField());
+ Schema writeSchema = new Schema.Parser().parse(config.getWriteSchema());
+ Schema writeSchemaWithMetaFields =
HoodieAvroUtils.addMetadataFields(writeSchema,
config.allowOperationMetadataField());
+ // prepend the hoodie meta fields as the incoming record does not have them
+ HoodieRecord incomingPrepended = incoming
+ .prependMetaFields(writeSchema, writeSchemaWithMetaFields, new
MetadataValues().setRecordKey(incoming.getRecordKey()).setPartitionPath(incoming.getPartitionPath()),
config.getProps());
+ // after prepend the meta fields, convert the record back to the original
payload
+ HoodieRecord incomingWithMetaFields = incomingPrepended
+ .wrapIntoHoodieRecordPayloadWithParams(writeSchema, config.getProps(),
Option.empty(), config.allowOperationMetadataField(), Option.empty(), false,
Option.empty());
+ Option<Pair<HoodieRecord, Schema>> mergeResult = config.getRecordMerger()
+ .merge(existing, existingSchema, incomingWithMetaFields,
writeSchemaWithMetaFields, config.getProps());
Review Comment:
The record merger is instantiated for each time, will cause unnecessary
onverhead.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]