This is an automated email from the ASF dual-hosted git repository. sivabalan pushed a commit to branch release-0.12.2-shadow in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 20956a21ac57366acdc75c81b0381a0e4ab10848 Author: LiChuang <[email protected]> AuthorDate: Tue Dec 13 12:37:38 2022 +0800 【HUDI-4917】Optimized the way to get HoodieBaseFile of loadColumnRangesFromFiles of Bloom Index (#6793) --- .../java/org/apache/hudi/index/bloom/HoodieBloomIndex.java | 13 +++++++------ .../main/java/org/apache/hudi/io/HoodieRangeInfoHandle.java | 8 ++++++++ .../src/main/java/org/apache/hudi/io/HoodieReadHandle.java | 5 +++++ 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java index 1417e40a9f5..57d9def9b42 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java @@ -26,6 +26,7 @@ import org.apache.hudi.common.data.HoodieData; import org.apache.hudi.common.data.HoodiePairData; import org.apache.hudi.common.engine.HoodieEngineContext; import org.apache.hudi.common.fs.FSUtils; +import org.apache.hudi.common.model.HoodieBaseFile; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecordLocation; @@ -161,19 +162,19 @@ public class HoodieBloomIndex extends HoodieIndex<Object, Object> { List<Pair<String, BloomIndexFileInfo>> loadColumnRangesFromFiles( List<String> partitions, final HoodieEngineContext context, final HoodieTable hoodieTable) { // Obtain the latest data files from all the partitions. - List<Pair<String, String>> partitionPathFileIDList = getLatestBaseFilesForAllPartitions(partitions, context, hoodieTable).stream() - .map(pair -> Pair.of(pair.getKey(), pair.getValue().getFileId())) + List<Pair<String, Pair<String, HoodieBaseFile>>> partitionPathFileIDList = getLatestBaseFilesForAllPartitions(partitions, context, hoodieTable).stream() + .map(pair -> Pair.of(pair.getKey(), Pair.of(pair.getValue().getFileId(), pair.getValue()))) .collect(toList()); context.setJobStatus(this.getClass().getName(), "Obtain key ranges for file slices (range pruning=on): " + config.getTableName()); return context.map(partitionPathFileIDList, pf -> { try { - HoodieRangeInfoHandle rangeInfoHandle = new HoodieRangeInfoHandle(config, hoodieTable, pf); - String[] minMaxKeys = rangeInfoHandle.getMinMaxKeys(); - return Pair.of(pf.getKey(), new BloomIndexFileInfo(pf.getValue(), minMaxKeys[0], minMaxKeys[1])); + HoodieRangeInfoHandle rangeInfoHandle = new HoodieRangeInfoHandle(config, hoodieTable, Pair.of(pf.getKey(), pf.getValue().getKey())); + String[] minMaxKeys = rangeInfoHandle.getMinMaxKeys(pf.getValue().getValue()); + return Pair.of(pf.getKey(), new BloomIndexFileInfo(pf.getValue().getKey(), minMaxKeys[0], minMaxKeys[1])); } catch (MetadataNotFoundException me) { LOG.warn("Unable to find range metadata in file :" + pf); - return Pair.of(pf.getKey(), new BloomIndexFileInfo(pf.getValue())); + return Pair.of(pf.getKey(), new BloomIndexFileInfo(pf.getValue().getKey())); } }, Math.max(partitionPathFileIDList.size(), 1)); } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieRangeInfoHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieRangeInfoHandle.java index abe4a9befef..44407335081 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieRangeInfoHandle.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieRangeInfoHandle.java @@ -18,6 +18,7 @@ package org.apache.hudi.io; +import org.apache.hudi.common.model.HoodieBaseFile; import org.apache.hudi.common.model.HoodieRecordPayload; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.config.HoodieWriteConfig; @@ -41,4 +42,11 @@ public class HoodieRangeInfoHandle<T extends HoodieRecordPayload, I, K, O> exten return reader.readMinMaxRecordKeys(); } } + + public String[] getMinMaxKeys(HoodieBaseFile baseFile) throws IOException { + try (HoodieFileReader reader = createNewFileReader(baseFile)) { + return reader.readMinMaxRecordKeys(); + } + } + } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java index fee75b22dec..223241bc373 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java @@ -66,4 +66,9 @@ public abstract class HoodieReadHandle<T extends HoodieRecordPayload, I, K, O> e return HoodieFileReaderFactory.getFileReader(hoodieTable.getHadoopConf(), new Path(getLatestDataFile().getPath())); } + + protected HoodieFileReader createNewFileReader(HoodieBaseFile hoodieBaseFile) throws IOException { + return HoodieFileReaderFactory.getFileReader(hoodieTable.getHadoopConf(), + new Path(hoodieBaseFile.getPath())); + } }
