This is an automated email from the ASF dual-hosted git repository.
JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new b36db800f2 [core] Replace O(n*m) list dedup with HashSet-based O(n+m)
in SnapshotReaderImpl (#7333)
b36db800f2 is described below
commit b36db800f2bc54528a32a3692cfef27454ab408f
Author: Du Bin <[email protected]>
AuthorDate: Sun May 24 10:06:02 2026 +0800
[core] Replace O(n*m) list dedup with HashSet-based O(n+m) in
SnapshotReaderImpl (#7333)
---
.../table/source/snapshot/SnapshotReaderImpl.java | 23 ++++++++++++++++++++--
1 file changed, 21 insertions(+), 2 deletions(-)
diff --git
a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
index a61f43b4eb..d033e427a6 100644
---
a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
+++
b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
@@ -534,8 +534,8 @@ public class SnapshotReaderImpl implements SnapshotReader {
totalBuckets = beforeEntries.get(0).totalBuckets();
}
- // deduplicate
- beforeEntries.removeIf(dataEntries::remove);
+ // deduplicate: remove entries common to both lists
+ deduplicate(beforeEntries, dataEntries);
List<DataFileMeta> before =
beforeEntries.stream()
@@ -705,4 +705,23 @@ public class SnapshotReaderImpl implements SnapshotReader {
}
return deletionFiles;
}
+
+ /**
+ * Remove entries common to both lists using HashSet for O(n+m) complexity
instead of O(n*m)
+ * with List.remove().
+ */
+ private static void deduplicate(
+ List<ManifestEntry> beforeEntries, List<ManifestEntry>
dataEntries) {
+ Set<ManifestEntry> afterSet = new HashSet<>(dataEntries);
+ Set<ManifestEntry> commonEntries = new HashSet<>();
+ beforeEntries.removeIf(
+ entry -> {
+ if (afterSet.contains(entry)) {
+ commonEntries.add(entry);
+ return true;
+ }
+ return false;
+ });
+ dataEntries.removeAll(commonEntries);
+ }
}