This is an automated email from the ASF dual-hosted git repository. jsedding pushed a commit to branch jsedding/OAK-12005-re-use-uuid-instances in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 26c3dfc22af76fc7636de550be09915268a0ec3c Author: Julian Sedding <[email protected]> AuthorDate: Thu Nov 6 15:19:14 2025 +0100 OAK-12005 - segment preloading graph-cache uses too much heap --- .../jackrabbit/oak/segment/file/tar/TarFiles.java | 21 +++++++++++++++++---- .../jackrabbit/oak/segment/file/tar/TarReader.java | 2 +- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java index 2bc7b44576..85ad7bbd9a 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java @@ -40,6 +40,7 @@ import java.util.UUID; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.function.Consumer; +import java.util.function.Function; import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -905,10 +906,22 @@ public class TarFiles implements Closeable { for (TarReader reader : iterable(head)) { if (fileName.equals(reader.getFileName())) { - Map<UUID, Set<UUID>> result = new HashMap<>(); - reader.getUUIDs().forEach((uuid -> result.put(uuid, emptySet()))); - result.putAll(reader.getGraph().getEdges()); - return result; + Map<String, Set<UUID>> indices = getIndices(); + Map<UUID, UUID> uuidDeduplicationMap = indices.values().stream() + .flatMap(Set::stream) + .collect(Collectors.toUnmodifiableMap(Function.identity(), Function.identity())); + Function<UUID, UUID> uuidDeduplicator = uuid -> uuidDeduplicationMap.getOrDefault(uuid, uuid); + Set<UUID> uuids = indices.get(reader.getFileName()); + Map<UUID, Set<UUID>> edges = reader.getGraph().getEdges(); + // Create a map covering all UUIDs contained in the file's index and deduplicate + // all UUID instances based on the UUIDs already present in _all_ archives' indices. + // This helps to keep the memory overhead during the lifetime of graph-maps to a minimum. + return uuids.stream().collect(Collectors.toUnmodifiableMap( + uuidDeduplicator, + uuid -> edges.getOrDefault(uuid, emptySet()).stream() + .map(uuidDeduplicator) + .collect(Collectors.toUnmodifiableSet()))); + } } return emptyMap(); diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java index 7d5ca71707..28f2e9f396 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java @@ -286,7 +286,7 @@ public class TarReader implements Closeable { this.segmentUUIDs = archive.listSegments() .stream() .map(e -> new UUID(e.getMsb(), e.getLsb())) - .collect(Collectors.toSet()); + .collect(Collectors.toUnmodifiableSet()); } long size() {
