This is an automated email from the ASF dual-hosted git repository.

JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new b36db800f2 [core] Replace O(n*m) list dedup with HashSet-based O(n+m) 
in SnapshotReaderImpl (#7333)
b36db800f2 is described below

commit b36db800f2bc54528a32a3692cfef27454ab408f
Author: Du Bin <[email protected]>
AuthorDate: Sun May 24 10:06:02 2026 +0800

    [core] Replace O(n*m) list dedup with HashSet-based O(n+m) in 
SnapshotReaderImpl (#7333)
---
 .../table/source/snapshot/SnapshotReaderImpl.java  | 23 ++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git 
a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
 
b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
index a61f43b4eb..d033e427a6 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
@@ -534,8 +534,8 @@ public class SnapshotReaderImpl implements SnapshotReader {
                     totalBuckets = beforeEntries.get(0).totalBuckets();
                 }
 
-                // deduplicate
-                beforeEntries.removeIf(dataEntries::remove);
+                // deduplicate: remove entries common to both lists
+                deduplicate(beforeEntries, dataEntries);
 
                 List<DataFileMeta> before =
                         beforeEntries.stream()
@@ -705,4 +705,23 @@ public class SnapshotReaderImpl implements SnapshotReader {
         }
         return deletionFiles;
     }
+
+    /**
+     * Remove entries common to both lists using HashSet for O(n+m) complexity 
instead of O(n*m)
+     * with List.remove().
+     */
+    private static void deduplicate(
+            List<ManifestEntry> beforeEntries, List<ManifestEntry> 
dataEntries) {
+        Set<ManifestEntry> afterSet = new HashSet<>(dataEntries);
+        Set<ManifestEntry> commonEntries = new HashSet<>();
+        beforeEntries.removeIf(
+                entry -> {
+                    if (afterSet.contains(entry)) {
+                        commonEntries.add(entry);
+                        return true;
+                    }
+                    return false;
+                });
+        dataEntries.removeAll(commonEntries);
+    }
 }

Reply via email to