This is an automated email from the ASF dual-hosted git repository.

jsedding pushed a commit to branch 
jsedding/OAK-12068-remove-expensive-uuid-deduplication
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit bc7798a4d0d5caeca3e930f2ee298eb7c7ed4b9d
Author: Julian Sedding <[email protected]>
AuthorDate: Wed Jan 21 12:15:13 2026 +0100

    OAK-12068 - segment graph UUID deduplication (OAK-12005) can be too 
inefficient
    
    - remove UUID deduplication in segment graphs
    - avoid over-allocation of HashMaps of known bounded size
---
 .../jackrabbit/oak/segment/file/tar/TarFiles.java  | 30 ++++++++--------------
 1 file changed, 11 insertions(+), 19 deletions(-)

diff --git 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java
 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java
index 283d4c9ded..027da0e8f3 100644
--- 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java
+++ 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java
@@ -42,7 +42,6 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
 import java.util.function.Consumer;
 import java.util.function.Function;
 import java.util.function.Predicate;
-import java.util.function.UnaryOperator;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
@@ -338,7 +337,7 @@ public class TarFiles implements Closeable {
 
     private final long maxFileSize;
 
-    private SegmentArchiveManager archiveManager;
+    private final SegmentArchiveManager archiveManager;
 
     /**
      * Guards access to the {@link #readers} and {@link #writer} references.
@@ -907,22 +906,15 @@ public class TarFiles implements Closeable {
 
         for (TarReader reader : iterable(head)) {
             if (fileName.equals(reader.getFileName())) {
-                Map<String, Set<UUID>> indices = getIndices();
-                Map<UUID, UUID> uuidDeduplicationMap = 
indices.values().stream()
-                        .flatMap(Set::stream)
-                        
.collect(Collectors.toUnmodifiableMap(Function.identity(), 
Function.identity()));
-                UnaryOperator<UUID> uuidDeduplicator = uuid -> 
uuidDeduplicationMap.getOrDefault(uuid, uuid);
-                Set<UUID> uuids = indices.get(reader.getFileName());
-                Map<UUID, Set<UUID>> edges = reader.getGraph().getEdges();
-                // Create a map covering all UUIDs contained in the file's 
index and deduplicate
-                // all UUID instances based on the UUIDs already present in 
_all_ archives' indices.
-                // This helps to keep the memory overhead during the lifetime 
of graph-maps to a minimum.
-                return uuids.stream().collect(Collectors.toUnmodifiableMap(
-                        uuidDeduplicator,
-                        uuid -> edges.getOrDefault(uuid, emptySet()).stream()
-                                .map(uuidDeduplicator)
-                                .collect(Collectors.toUnmodifiableSet())));
-
+                SegmentGraph graph = reader.getGraph();
+                Set<UUID> uuids = reader.getUUIDs();
+                return uuids.stream()
+                        .collect(Collectors.toMap(
+                                Function.identity(),
+                                graph::getEdges,
+                                (a, b) -> { a.addAll(b); return a; },
+                                () -> new 
HashMap<>(Math.toIntExact(uuids.size()), 1.0f)
+                        ));
             }
         }
         return emptyMap();
@@ -938,7 +930,7 @@ public class TarFiles implements Closeable {
             lock.readLock().unlock();
         }
 
-        Map<String, Set<UUID>> index = new HashMap<>();
+        Map<String, Set<UUID>> index = new 
HashMap<>(Math.toIntExact(getSize(head)), 1.0f);
         for (TarReader reader : iterable(head)) {
             index.put(reader.getFileName(), reader.getUUIDs());
         }

Reply via email to