This is an automated email from the ASF dual-hosted git repository.
jsedding pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new c9209db556 OAK-12068 - segment graph UUID deduplication (OAK-12005)
can be too inefficient (#2695)
c9209db556 is described below
commit c9209db5562dddd02e56aa860cbf4f8d9a5ad955
Author: Julian Sedding <[email protected]>
AuthorDate: Wed Jan 21 15:23:46 2026 +0100
OAK-12068 - segment graph UUID deduplication (OAK-12005) can be too
inefficient (#2695)
- remove UUID deduplication in segment graphs
- avoid over-allocation of HashMaps of known bounded size
---
.../jackrabbit/oak/segment/file/tar/TarFiles.java | 30 ++++++++--------------
1 file changed, 11 insertions(+), 19 deletions(-)
diff --git
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java
index 283d4c9ded..027da0e8f3 100644
---
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java
+++
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java
@@ -42,7 +42,6 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Predicate;
-import java.util.function.UnaryOperator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
@@ -338,7 +337,7 @@ public class TarFiles implements Closeable {
private final long maxFileSize;
- private SegmentArchiveManager archiveManager;
+ private final SegmentArchiveManager archiveManager;
/**
* Guards access to the {@link #readers} and {@link #writer} references.
@@ -907,22 +906,15 @@ public class TarFiles implements Closeable {
for (TarReader reader : iterable(head)) {
if (fileName.equals(reader.getFileName())) {
- Map<String, Set<UUID>> indices = getIndices();
- Map<UUID, UUID> uuidDeduplicationMap =
indices.values().stream()
- .flatMap(Set::stream)
-
.collect(Collectors.toUnmodifiableMap(Function.identity(),
Function.identity()));
- UnaryOperator<UUID> uuidDeduplicator = uuid ->
uuidDeduplicationMap.getOrDefault(uuid, uuid);
- Set<UUID> uuids = indices.get(reader.getFileName());
- Map<UUID, Set<UUID>> edges = reader.getGraph().getEdges();
- // Create a map covering all UUIDs contained in the file's
index and deduplicate
- // all UUID instances based on the UUIDs already present in
_all_ archives' indices.
- // This helps to keep the memory overhead during the lifetime
of graph-maps to a minimum.
- return uuids.stream().collect(Collectors.toUnmodifiableMap(
- uuidDeduplicator,
- uuid -> edges.getOrDefault(uuid, emptySet()).stream()
- .map(uuidDeduplicator)
- .collect(Collectors.toUnmodifiableSet())));
-
+ SegmentGraph graph = reader.getGraph();
+ Set<UUID> uuids = reader.getUUIDs();
+ return uuids.stream()
+ .collect(Collectors.toMap(
+ Function.identity(),
+ graph::getEdges,
+ (a, b) -> { a.addAll(b); return a; },
+ () -> new
HashMap<>(Math.toIntExact(uuids.size()), 1.0f)
+ ));
}
}
return emptyMap();
@@ -938,7 +930,7 @@ public class TarFiles implements Closeable {
lock.readLock().unlock();
}
- Map<String, Set<UUID>> index = new HashMap<>();
+ Map<String, Set<UUID>> index = new
HashMap<>(Math.toIntExact(getSize(head)), 1.0f);
for (TarReader reader : iterable(head)) {
index.put(reader.getFileName(), reader.getUUIDs());
}