manojpec commented on a change in pull request #4352:
URL: https://github.com/apache/hudi/pull/4352#discussion_r797102112



##########
File path: 
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java
##########
@@ -46,52 +51,58 @@
 
   private static final Logger LOG = 
LogManager.getLogger(HoodieKeyLookupHandle.class);
 
-  private final HoodieTableType tableType;
-
   private final BloomFilter bloomFilter;
-
   private final List<String> candidateRecordKeys;
-
+  private final boolean useMetadataTableIndex;
+  private Option<String> fileName = Option.empty();
   private long totalKeysChecked;
 
   public HoodieKeyLookupHandle(HoodieWriteConfig config, HoodieTable<T, I, K, 
O> hoodieTable,
-                               Pair<String, String> partitionPathFilePair) {
-    super(config, null, hoodieTable, partitionPathFilePair);
-    this.tableType = hoodieTable.getMetaClient().getTableType();
+                               Pair<String, String> partitionPathFileIDPair) {
+    this(config, hoodieTable, partitionPathFileIDPair, Option.empty(), false);
+  }
+
+  public HoodieKeyLookupHandle(HoodieWriteConfig config, HoodieTable<T, I, K, 
O> hoodieTable,
+                               Pair<String, String> partitionPathFileIDPair, 
Option<String> fileName,
+                               boolean useMetadataTableIndex) {
+    super(config, hoodieTable, partitionPathFileIDPair);
     this.candidateRecordKeys = new ArrayList<>();
     this.totalKeysChecked = 0;
-    HoodieTimer timer = new HoodieTimer().startTimer();
-
-    try {
-      this.bloomFilter = createNewFileReader().readBloomFilter();
-    } catch (IOException e) {
-      throw new HoodieIndexException(String.format("Error reading bloom filter 
from %s: %s", partitionPathFilePair, e));
+    if (fileName.isPresent()) {
+      
ValidationUtils.checkArgument(FSUtils.getFileId(fileName.get()).equals(getFileId()),
+          "File name '" + fileName.get() + "' doesn't match this lookup handle 
fileid '" + getFileId() + "'");
+      this.fileName = fileName;
     }
-    LOG.info(String.format("Read bloom filter from %s in %d ms", 
partitionPathFilePair, timer.endTimer()));
+    this.useMetadataTableIndex = useMetadataTableIndex;
+    this.bloomFilter = getBloomFilter();
   }
 
-  /**
-   * Given a list of row keys and one file, return only row keys existing in 
that file.
-   */
-  public List<String> checkCandidatesAgainstFile(Configuration configuration, 
List<String> candidateRecordKeys,
-                                                 Path filePath) throws 
HoodieIndexException {
-    List<String> foundRecordKeys = new ArrayList<>();
+  private BloomFilter getBloomFilter() {
+    BloomFilter bloomFilter = null;
+    HoodieTimer timer = new HoodieTimer().startTimer();
     try {
-      // Load all rowKeys from the file, to double-confirm
-      if (!candidateRecordKeys.isEmpty()) {
-        HoodieTimer timer = new HoodieTimer().startTimer();
-        Set<String> fileRowKeys = createNewFileReader().filterRowKeys(new 
HashSet<>(candidateRecordKeys));
-        foundRecordKeys.addAll(fileRowKeys);
-        LOG.info(String.format("Checked keys against file %s, in %d ms. 
#candidates (%d) #found (%d)", filePath,
-            timer.endTimer(), candidateRecordKeys.size(), 
foundRecordKeys.size()));
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("Keys matching for file " + filePath + " => " + 
foundRecordKeys);
+      if (this.useMetadataTableIndex) {
+        ValidationUtils.checkArgument(this.fileName.isPresent(),
+            "File name not available to fetch bloom filter from the metadata 
table index.");
+        Option<ByteBuffer> bloomFilterByteBuffer =
+            
hoodieTable.getMetadataTable().getBloomFilter(partitionPathFileIDPair.getLeft(),
 fileName.get());
+        if (!bloomFilterByteBuffer.isPresent()) {
+          throw new HoodieIndexException("BloomFilter missing for " + 
partitionPathFileIDPair.getRight());
+        }
+        bloomFilter =

Review comment:
       Right, there is a larger refactoring coming in as part of HUDI-3203 for 
this. 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to