JingsongLi commented on code in PR #6407:
URL: https://github.com/apache/paimon/pull/6407#discussion_r2435842363


##########
paimon-core/src/main/java/org/apache/paimon/index/IndexFileHandler.java:
##########
@@ -87,6 +101,123 @@ public Optional<IndexFileMeta> scanHashIndex(
         return result.isEmpty() ? Optional.empty() : 
Optional.of(result.get(0));
     }
 
+    public void withCacheMetrics(@Nullable CacheMetrics cacheMetrics) {
+        this.cacheMetrics = cacheMetrics;
+    }
+
+    @Nullable
+    // Construct DataFile -> DeletionFile based on IndexFileMeta
+    public Map<String, DeletionFile> extractDeletionFileByMeta(
+            BinaryRow partition, Integer bucket, IndexFileMeta fileMeta) {
+        if (fileMeta.dvRanges() != null && fileMeta.dvRanges().size() > 0) {
+            Map<String, DeletionFile> result = new HashMap<>();
+            for (DeletionVectorMeta dvMeta : fileMeta.dvRanges().values()) {
+                result.put(
+                        dvMeta.dataFileName(),
+                        new DeletionFile(
+                                dvIndex(partition, 
bucket).path(fileMeta).toString(),
+                                dvMeta.offset(),
+                                dvMeta.length(),
+                                dvMeta.cardinality()));
+            }
+            return result;
+        }
+        return null;
+    }
+
+    // Scan DV index file of given partition buckets
+    // returns <DataFile: DeletionFile> map grouped by partition and bucket
+    public Map<Pair<BinaryRow, Integer>, Map<String, DeletionFile>> 
scanDVIndex(
+            Snapshot snapshot, Set<Pair<BinaryRow, Integer>> partitionBuckets) 
{
+        Map<Pair<BinaryRow, Integer>, List<IndexFileMeta>> partitionFileMetas =
+                scan(
+                        snapshot,
+                        DELETION_VECTORS_INDEX,
+                        
partitionBuckets.stream().map(Pair::getLeft).collect(Collectors.toSet()));
+        Map<Pair<BinaryRow, Integer>, Map<String, DeletionFile>> result = new 
HashMap<>();
+        partitionBuckets.forEach(
+                entry -> {
+                    List<IndexFileMeta> fileMetas = 
partitionFileMetas.get(entry);
+                    if (fileMetas != null) {
+                        fileMetas.forEach(
+                                meta -> {
+                                    Map<String, DeletionFile> dvMetas =
+                                            extractDeletionFileByMeta(
+                                                    entry.getLeft(), 
entry.getRight(), meta);
+                                    if (dvMetas != null) {
+                                        result.computeIfAbsent(entry, k -> new 
HashMap<>())
+                                                .putAll(dvMetas);
+                                    }
+                                });
+                    }
+                });
+        return result;
+    }
+
+    // Scan DV Meta Cache first, if not exist, scan DV index file, returns the 
exact deletion file
+    // of the specified partition/buckets
+    public Map<String, DeletionFile> scanDVIndexWithCache(
+            Snapshot snapshot, BinaryRow partition, Integer bucket) {
+        if (snapshot == null || snapshot.indexManifest() == null) {
+            return Collections.emptyMap();
+        }
+        // read from cache
+        String indexManifestName = snapshot.indexManifest();
+        Map<String, DeletionFile> result =
+                indexManifestFile.readFromDVMetaCache(indexManifestName, 
partition, bucket);

Review Comment:
   Inline `indexManifestFile.readFromDVMetaCache`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to