This is an automated email from the ASF dual-hosted git repository.

jsedding pushed a commit to branch jsedding/OAK-12005-re-use-uuid-instances
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 26c3dfc22af76fc7636de550be09915268a0ec3c
Author: Julian Sedding <[email protected]>
AuthorDate: Thu Nov 6 15:19:14 2025 +0100

    OAK-12005 - segment preloading graph-cache uses too much heap
---
 .../jackrabbit/oak/segment/file/tar/TarFiles.java   | 21 +++++++++++++++++----
 .../jackrabbit/oak/segment/file/tar/TarReader.java  |  2 +-
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java
 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java
index 2bc7b44576..85ad7bbd9a 100644
--- 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java
+++ 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java
@@ -40,6 +40,7 @@ import java.util.UUID;
 import java.util.concurrent.locks.ReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 import java.util.function.Consumer;
+import java.util.function.Function;
 import java.util.function.Predicate;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -905,10 +906,22 @@ public class TarFiles implements Closeable {
 
         for (TarReader reader : iterable(head)) {
             if (fileName.equals(reader.getFileName())) {
-                Map<UUID, Set<UUID>> result = new HashMap<>();
-                reader.getUUIDs().forEach((uuid -> result.put(uuid, 
emptySet())));
-                result.putAll(reader.getGraph().getEdges());
-                return result;
+                Map<String, Set<UUID>> indices = getIndices();
+                Map<UUID, UUID> uuidDeduplicationMap = 
indices.values().stream()
+                        .flatMap(Set::stream)
+                        
.collect(Collectors.toUnmodifiableMap(Function.identity(), 
Function.identity()));
+                Function<UUID, UUID> uuidDeduplicator = uuid -> 
uuidDeduplicationMap.getOrDefault(uuid, uuid);
+                Set<UUID> uuids = indices.get(reader.getFileName());
+                Map<UUID, Set<UUID>> edges = reader.getGraph().getEdges();
+                // Create a map covering all UUIDs contained in the file's 
index and deduplicate
+                // all UUID instances based on the UUIDs already present in 
_all_ archives' indices.
+                // This helps to keep the memory overhead during the lifetime 
of graph-maps to a minimum.
+                return uuids.stream().collect(Collectors.toUnmodifiableMap(
+                        uuidDeduplicator,
+                        uuid -> edges.getOrDefault(uuid, emptySet()).stream()
+                                .map(uuidDeduplicator)
+                                .collect(Collectors.toUnmodifiableSet())));
+
             }
         }
         return emptyMap();
diff --git 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java
 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java
index 7d5ca71707..28f2e9f396 100644
--- 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java
+++ 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java
@@ -286,7 +286,7 @@ public class TarReader implements Closeable {
         this.segmentUUIDs = archive.listSegments()
                 .stream()
                 .map(e -> new UUID(e.getMsb(), e.getLsb()))
-                .collect(Collectors.toSet());
+                .collect(Collectors.toUnmodifiableSet());
     }
 
     long size() {

Reply via email to