This is an automated email from the ASF dual-hosted git repository.
nsivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 173e76eb3ba8 [HUDI] Add input records caching to
HoodieGlobalSimpleIndex (#18921)
173e76eb3ba8 is described below
commit 173e76eb3ba8660224e67b36d8e72f6e1184fdf5
Author: Lokesh Jain <[email protected]>
AuthorDate: Thu Jun 11 11:54:59 2026 +0530
[HUDI] Add input records caching to HoodieGlobalSimpleIndex (#18921)
Adding caching to HoodieGlobalSimpleIndex.tagLocationInternal
Co-authored-by: Lokesh Jain <[email protected]>
Co-authored-by: Claude Sonnet 4.6 <[email protected]>
---
.../org/apache/hudi/index/simple/HoodieGlobalSimpleIndex.java | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieGlobalSimpleIndex.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieGlobalSimpleIndex.java
index 336f6376bcd0..b712b708a389 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieGlobalSimpleIndex.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieGlobalSimpleIndex.java
@@ -68,13 +68,20 @@ public class HoodieGlobalSimpleIndex extends
HoodieSimpleIndex {
protected <R> HoodieData<HoodieRecord<R>> tagLocationInternal(
HoodieData<HoodieRecord<R>> inputRecords, HoodieEngineContext context,
HoodieTable hoodieTable) {
+ if (config.getSimpleIndexUseCaching()) {
+ inputRecords.persist(config.getSimpleIndexInputStorageLevel());
+ }
List<Pair<String, HoodieBaseFile>> latestBaseFiles =
getAllBaseFilesInTable(context, hoodieTable);
HoodiePairData<String, HoodieRecordGlobalLocation> allKeysAndLocations =
fetchRecordGlobalLocations(context, hoodieTable, latestBaseFiles);
boolean mayContainDuplicateLookup =
hoodieTable.getMetaClient().getTableType() == MERGE_ON_READ;
boolean shouldUpdatePartitionPath =
config.getGlobalSimpleIndexUpdatePartitionPath() && hoodieTable.isPartitioned();
- return tagGlobalLocationBackToRecords(inputRecords, allKeysAndLocations,
+ HoodieData<HoodieRecord<R>> taggedRecords =
tagGlobalLocationBackToRecords(inputRecords, allKeysAndLocations,
mayContainDuplicateLookup, shouldUpdatePartitionPath, config,
hoodieTable);
+ if (config.getSimpleIndexUseCaching()) {
+ inputRecords.unpersist();
+ }
+ return taggedRecords;
}
private HoodiePairData<String, HoodieRecordGlobalLocation>
fetchRecordGlobalLocations(