codope commented on code in PR #12214:
URL: https://github.com/apache/hudi/pull/12214#discussion_r1832503626
##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java:
##########
@@ -841,31 +843,41 @@ private Map<String, String>
reverseLookupSecondaryKeys(String partitionName, Lis
Set<String> keySet = new TreeSet<>(recordKeys);
Set<String> deletedRecordsFromLogs = new HashSet<>();
+ // Map of recordKey (primaryKey) -> log record that is not deleted for
all input recordKeys
Map<String, HoodieRecord<HoodieMetadataPayload>> logRecordsMap = new
HashMap<>();
logRecordScanner.getRecords().forEach(record -> {
+ String recordKey =
getRecordKeyFromSecondaryIndex(record.getRecordKey());
HoodieMetadataPayload payload = record.getData();
- if (!payload.isDeleted()) { // process only valid records.
- String recordKey = payload.getRecordKeyFromSecondaryIndex();
- if (keySet.contains(recordKey)) {
- logRecordsMap.put(recordKey, record);
- }
+ if (!payload.isDeleted() && keySet.contains(recordKey)) { // process
only valid records.
+ logRecordsMap.put(recordKey, record);
} else {
- deletedRecordsFromLogs.add(record.getRecordKey());
+ deletedRecordsFromLogs.add(recordKey);
}
});
// Map of (record-key, secondary-index-record)
Map<String, HoodieRecord<HoodieMetadataPayload>> baseFileRecords =
fetchBaseFileAllRecordsByPayload(baseFileReader, keySet, partitionName);
- // Iterate over all provided log-records, merging them into existing
records
- logRecordsMap.forEach((key1, value1) -> baseFileRecords.merge(key1,
value1, (oldRecord, newRecord) -> {
- Option<HoodieRecord<HoodieMetadataPayload>> mergedRecord =
HoodieMetadataPayload.combineSecondaryIndexRecord(oldRecord, newRecord);
- return mergedRecord.orElseGet(null);
- }));
- baseFileRecords.forEach((key, value) -> {
- if (!deletedRecordsFromLogs.contains(key)) {
- recordKeyMap.put(key, value.getRecordKey());
- }
- });
+ if (baseFileRecords.isEmpty()) {
Review Comment:
it will return emptyMap in that case, but i'll add a null check anyway.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]