This is an automated email from the ASF dual-hosted git repository. jsedding pushed a commit to branch jsedding/OAK-12070-reduce-azure-segmentstore-heap-usage in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 451d1bacc90b3ff10a3415002c5f67c0726173de Author: Julian Sedding <[email protected]> AuthorDate: Fri Jan 23 14:26:32 2026 +0100 OAK-12070 - Reduce memory consumption of azure segment stores - deduplicate in-memory UUID instances (halves number of instances in memory at 32 bytes each) - avoid LinkedHashMap in favour of java.util.ImmutableCollections (eliminates one LinkedHashMap$Entry weighing 40 bytes per segment) - create GCGeneration instance pool (instances now used by RemoteSegmentArchiveEntry) - use UUID and GCGeneration references in RemoteSegmentArchiveEntry (reduces object size from 48 to 32 bytes) --- .../oak/segment/aws/AwsSegmentArchiveReader.java | 58 +++++------- .../segment/azure/AzureSegmentArchiveReader.java | 50 ++++------ .../azure/v8/AzureSegmentArchiveReaderV8.java | 43 +++------ .../remote/AbstractRemoteSegmentArchiveReader.java | 105 ++++++++++++++++++--- .../remote/AbstractRemoteSegmentArchiveWriter.java | 6 +- .../segment/remote/RemoteSegmentArchiveEntry.java | 34 ++++--- .../oak/segment/remote/package-info.java | 2 +- .../oak/segment/file/tar/GCGeneration.java | 28 +++++- .../jackrabbit/oak/segment/file/tar/TarReader.java | 6 +- .../spi/persistence/SegmentArchiveReader.java | 16 ++++ .../CachingSegmentArchiveReader.java | 7 ++ .../persistence/persistentcache/package-info.java | 2 +- .../split/UnclosedSegmentArchiveReader.java | 7 ++ 13 files changed, 223 insertions(+), 141 deletions(-) diff --git a/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/AwsSegmentArchiveReader.java b/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/AwsSegmentArchiveReader.java index 9811cbe380..f326b80329 100644 --- a/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/AwsSegmentArchiveReader.java +++ b/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/AwsSegmentArchiveReader.java @@ -20,7 +20,7 @@ import static org.apache.jackrabbit.oak.segment.remote.RemoteUtilities.OFF_HEAP; import java.io.File; import java.io.IOException; -import java.util.UUID; +import java.util.Iterator; import org.apache.jackrabbit.oak.commons.Buffer; import org.apache.jackrabbit.oak.segment.remote.AbstractRemoteSegmentArchiveReader; @@ -31,47 +31,31 @@ public class AwsSegmentArchiveReader extends AbstractRemoteSegmentArchiveReader private final S3Directory directory; - private final String archiveName; - - private final long length; - AwsSegmentArchiveReader(S3Directory directory, String archiveName, IOMonitor ioMonitor) throws IOException { - super(ioMonitor); + super(ioMonitor, archiveName, createEntryIterable(directory, archiveName)); this.directory = directory; - this.archiveName = archiveName; - this.length = computeArchiveIndexAndLength(); - } - - @Override - public long length() { - return length; - } - - @Override - public String getName() { - return archiveName; } - @Override - protected long computeArchiveIndexAndLength() throws IOException { - long length = 0; + private static Iterable<ArchiveEntry> createEntryIterable(S3Directory directory, String archiveName) throws IOException{ Buffer buffer = directory.readObjectToBuffer(archiveName + ".idx", OFF_HEAP); - while (buffer.hasRemaining()) { - long msb = buffer.getLong(); - long lsb = buffer.getLong(); - int position = buffer.getInt(); - int contentLength = buffer.getInt(); - int generation = buffer.getInt(); - int fullGeneration = buffer.getInt(); - boolean compacted = buffer.get() != 0; - - RemoteSegmentArchiveEntry indexEntry = new RemoteSegmentArchiveEntry(msb, lsb, position, contentLength, - generation, fullGeneration, compacted); - index.put(new UUID(indexEntry.getMsb(), indexEntry.getLsb()), indexEntry); - length += contentLength; - } - - return length; + return () -> new Iterator<>() { + @Override + public boolean hasNext() { + return buffer.hasRemaining(); + } + + @Override + public ArchiveEntry next() { + long msb = buffer.getLong(); + long lsb = buffer.getLong(); + int position = buffer.getInt(); + int contentLength = buffer.getInt(); + int generation = buffer.getInt(); + int fullGeneration = buffer.getInt(); + boolean compacted = buffer.get() != 0; + return new ArchiveEntry(new RemoteSegmentArchiveEntry(msb, lsb, position, contentLength, generation, fullGeneration, compacted)); + } + }; } @Override diff --git a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java index 18ca18f0d6..7a8be8198d 100644 --- a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java +++ b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java @@ -17,19 +17,17 @@ package org.apache.jackrabbit.oak.segment.azure; import com.azure.storage.blob.BlobContainerClient; -import com.azure.storage.blob.models.BlobItem; import com.azure.storage.blob.models.BlobStorageException; import com.azure.storage.blob.models.ListBlobsOptions; import com.azure.storage.blob.specialized.BlockBlobClient; import org.apache.jackrabbit.oak.commons.Buffer; import org.apache.jackrabbit.oak.segment.remote.AbstractRemoteSegmentArchiveReader; -import org.apache.jackrabbit.oak.segment.remote.RemoteSegmentArchiveEntry; import org.apache.jackrabbit.oak.segment.spi.monitor.IOMonitor; +import org.jetbrains.annotations.NotNull; import java.io.File; import java.io.IOException; import java.util.Map; -import java.util.UUID; import static org.apache.jackrabbit.oak.segment.azure.AzureUtilities.readBufferFully; @@ -37,45 +35,29 @@ public class AzureSegmentArchiveReader extends AbstractRemoteSegmentArchiveReade private final BlobContainerClient blobContainerClient; - private final long length; - - private final String archiveName; - private final String archivePathPrefix; - AzureSegmentArchiveReader(BlobContainerClient blobContainerClient, String rootPrefix, String archiveName, IOMonitor ioMonitor) throws IOException { - super(ioMonitor); + AzureSegmentArchiveReader(BlobContainerClient blobContainerClient, String rootPrefix, String archiveName, IOMonitor ioMonitor) { + super(ioMonitor, AzureUtilities.ensureNoTrailingSlash(archiveName), + createEntryIterable(blobContainerClient, AzureUtilities.asAzurePrefix(rootPrefix, archiveName))); this.blobContainerClient = blobContainerClient; - this.archiveName = AzureUtilities.ensureNoTrailingSlash(archiveName); this.archivePathPrefix = AzureUtilities.asAzurePrefix(rootPrefix, archiveName); - this.length = computeArchiveIndexAndLength(); - } - - @Override - public long length() { - return length; - } - - @Override - public String getName() { - return archiveName; } - @Override - protected long computeArchiveIndexAndLength() throws IOException { - long length = 0; + private static Iterable<ArchiveEntry> createEntryIterable(BlobContainerClient blobContainerClient, @NotNull String archivePathPrefix) { ListBlobsOptions listBlobsOptions = new ListBlobsOptions(); listBlobsOptions.setPrefix(archivePathPrefix); - for (BlobItem blob : AzureUtilities.getBlobs(blobContainerClient, listBlobsOptions)) { - Map<String, String> metadata = blob.getMetadata(); - if (AzureBlobMetadata.isSegment(metadata)) { - RemoteSegmentArchiveEntry indexEntry = AzureBlobMetadata.toIndexEntry(metadata, blob.getProperties().getContentLength().intValue()); - index.put(new UUID(indexEntry.getMsb(), indexEntry.getLsb()), indexEntry); - } - length += blob.getProperties().getContentLength(); - } - - return length; + return AzureUtilities.getBlobs(blobContainerClient, listBlobsOptions).stream() + .map(blobItem -> { + Map<String, String> metadata = blobItem.getMetadata(); + int length = blobItem.getProperties().getContentLength().intValue(); + if (AzureBlobMetadata.isSegment(metadata)) { + return new ArchiveEntry(AzureBlobMetadata.toIndexEntry(metadata, length)); + } else { + return new ArchiveEntry(length); + } + }) + ::iterator; } @Override diff --git a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureSegmentArchiveReaderV8.java b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureSegmentArchiveReaderV8.java index de69711b6e..7242de4fe2 100644 --- a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureSegmentArchiveReaderV8.java +++ b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureSegmentArchiveReaderV8.java @@ -22,54 +22,37 @@ import java.io.File; import java.io.IOException; import java.net.URISyntaxException; import java.util.Map; -import java.util.UUID; import com.microsoft.azure.storage.StorageException; -import com.microsoft.azure.storage.blob.CloudBlob; import com.microsoft.azure.storage.blob.CloudBlobDirectory; import com.microsoft.azure.storage.blob.CloudBlockBlob; import org.apache.jackrabbit.oak.commons.Buffer; import org.apache.jackrabbit.oak.segment.azure.AzureBlobMetadata; import org.apache.jackrabbit.oak.segment.remote.AbstractRemoteSegmentArchiveReader; -import org.apache.jackrabbit.oak.segment.remote.RemoteSegmentArchiveEntry; import org.apache.jackrabbit.oak.segment.spi.monitor.IOMonitor; public class AzureSegmentArchiveReaderV8 extends AbstractRemoteSegmentArchiveReader { private final CloudBlobDirectory archiveDirectory; - private final long length; - protected AzureSegmentArchiveReaderV8(CloudBlobDirectory archiveDirectory, IOMonitor ioMonitor) throws IOException { - super(ioMonitor); + super(ioMonitor, AzureUtilitiesV8.getName(archiveDirectory), createEntryIterable(archiveDirectory)); this.archiveDirectory = archiveDirectory; - this.length = computeArchiveIndexAndLength(); - } - - @Override - public long length() { - return length; } - @Override - public String getName() { - return AzureUtilitiesV8.getName(archiveDirectory); - } - - @Override - protected long computeArchiveIndexAndLength() throws IOException { - long length = 0; - for (CloudBlob blob : AzureUtilitiesV8.getBlobs(archiveDirectory)) { - Map<String, String> metadata = blob.getMetadata(); - if (AzureBlobMetadata.isSegment(metadata)) { - RemoteSegmentArchiveEntry indexEntry = AzureBlobMetadata.toIndexEntry(metadata, (int) blob.getProperties().getLength()); - index.put(new UUID(indexEntry.getMsb(), indexEntry.getLsb()), indexEntry); - } - length += blob.getProperties().getLength(); - } - - return length; + private static Iterable<ArchiveEntry> createEntryIterable(CloudBlobDirectory archiveDirectory) throws IOException { + return AzureUtilitiesV8.getBlobs(archiveDirectory).stream() + .map(blob -> { + Map<String, String> metadata = blob.getMetadata(); + int length = (int) blob.getProperties().getLength(); + if (AzureBlobMetadata.isSegment(metadata)) { + return new ArchiveEntry(AzureBlobMetadata.toIndexEntry(metadata, length)); + } else { + return new ArchiveEntry(length); + } + }) + ::iterator; } @Override diff --git a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReader.java b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReader.java index fe99490e18..a289d39ee7 100644 --- a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReader.java +++ b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReader.java @@ -30,20 +30,53 @@ import org.jetbrains.annotations.Nullable; import java.io.File; import java.io.IOException; -import java.util.ArrayList; -import java.util.LinkedHashMap; +import java.util.Collections; +import java.util.Comparator; +import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; public abstract class AbstractRemoteSegmentArchiveReader implements SegmentArchiveReader { + protected final IOMonitor ioMonitor; - protected final Map<UUID, RemoteSegmentArchiveEntry> index = new LinkedHashMap<>(); + /** + * Unordered immutable map of segment UUIDs to their corresponding archive entries. + */ + private final Map<UUID, RemoteSegmentArchiveEntry> index; + + /** + * The name of the archive. + */ + private final String archiveName; - public AbstractRemoteSegmentArchiveReader(IOMonitor ioMonitor) throws IOException { + /** + * The total size of the archive in bytes. + */ + private final long length; + + protected AbstractRemoteSegmentArchiveReader(IOMonitor ioMonitor, String archiveName, Iterable<ArchiveEntry> entries) { this.ioMonitor = ioMonitor; + this.archiveName = archiveName; + + IndexBuilder indexBuilder = new IndexBuilder(); + entries.forEach(indexBuilder::addEntry); + this.index = indexBuilder.createIndex(); + this.length = indexBuilder.getLength(); + } + + @Override + public @NotNull String getName() { + return archiveName; + } + + @Override + public long length() { + return length; } @Override @@ -73,9 +106,16 @@ public abstract class AbstractRemoteSegmentArchiveReader implements SegmentArchi return index.containsKey(new UUID(msb, lsb)); } + @Override + public Set<UUID> getSegmentUUIDs() { + return Collections.unmodifiableSet(index.keySet()); + } + @Override public List<SegmentArchiveEntry> listSegments() { - return new ArrayList<>(index.values()); + return index.values().stream() + .sorted(Comparator.comparing(RemoteSegmentArchiveEntry::getPosition)) + .collect(Collectors.toList()); } @Override @@ -102,12 +142,6 @@ public abstract class AbstractRemoteSegmentArchiveReader implements SegmentArchi return size; } - /** - * Populates the archive index, summing up each entry's length. - * @return length, the total length of the archive - */ - protected abstract long computeArchiveIndexAndLength() throws IOException; - /** * Reads the segment from the remote storage. * @param segmentFileName, the name of the segment (msb + lsb) prefixed by its position in the archive @@ -132,4 +166,53 @@ public abstract class AbstractRemoteSegmentArchiveReader implements SegmentArchi public boolean isRemote() { return true; } + + protected static final class ArchiveEntry { + + private final RemoteSegmentArchiveEntry entry; + + private final int length; + + public ArchiveEntry(RemoteSegmentArchiveEntry entry) { + this.entry = entry; + this.length = entry.getLength(); + } + + public ArchiveEntry(int length) { + this.entry = null; + this.length = length; + } + + int getLength() { + return length; + } + + RemoteSegmentArchiveEntry getRemoteSegmentArchiveEntry() { + return entry; + } + } + + private static final class IndexBuilder { + + private final List<Map.Entry<UUID, RemoteSegmentArchiveEntry>> entries = new LinkedList<>(); + + private long length = 0; + + private void addEntry(ArchiveEntry entry) { + RemoteSegmentArchiveEntry archiveEntry = entry.getRemoteSegmentArchiveEntry(); + if (archiveEntry != null) { + this.entries.add(Map.entry(archiveEntry.getUuid(), archiveEntry)); + } + this.length += entry.getLength(); + } + + @SuppressWarnings("unchecked") + private Map<UUID, RemoteSegmentArchiveEntry> createIndex() { + return Map.ofEntries(entries.toArray(Map.Entry[]::new)); + } + + private long getLength() { + return length; + } + } } diff --git a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveWriter.java b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveWriter.java index eee6d40cbf..3e1964bb26 100644 --- a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveWriter.java +++ b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveWriter.java @@ -66,7 +66,7 @@ public abstract class AbstractRemoteSegmentArchiveWriter implements SegmentArchi } else { doWriteArchiveEntry(entry, data, offset, size); } - index.put(new UUID(msb, lsb), entry); + index.put(entry.getUuid(), entry); totalLength += size; monitor.written(size); @@ -80,7 +80,7 @@ public abstract class AbstractRemoteSegmentArchiveWriter implements SegmentArchi return segment.get().toBuffer(); } - RemoteSegmentArchiveEntry indexEntry = index.get(new UUID(msb, lsb)); + RemoteSegmentArchiveEntry indexEntry = index.get(uuid); if (indexEntry == null) { return null; } @@ -95,7 +95,7 @@ public abstract class AbstractRemoteSegmentArchiveWriter implements SegmentArchi if (segment.isPresent()) { return true; } - return index.containsKey(new UUID(msb, lsb)); + return index.containsKey(uuid); } @Override diff --git a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntry.java b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntry.java index f14f793ac7..c3cdafbcf3 100644 --- a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntry.java +++ b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntry.java @@ -16,42 +16,36 @@ */ package org.apache.jackrabbit.oak.segment.remote; +import org.apache.jackrabbit.oak.segment.file.tar.GCGeneration; import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveEntry; -public class RemoteSegmentArchiveEntry implements SegmentArchiveEntry { +import java.util.UUID; - private final long msb; +public class RemoteSegmentArchiveEntry implements SegmentArchiveEntry { - private final long lsb; + private final UUID uuid; private final int position; private final int length; - private final int generation; - - private final int fullGeneration; - - private final boolean compacted; + private final GCGeneration gcGeneration; public RemoteSegmentArchiveEntry(long msb, long lsb, int position, int length, int generation, int fullGeneration, boolean compacted) { - this.msb = msb; - this.lsb = lsb; + this.uuid = new UUID(msb, lsb); this.position = position; this.length = length; - this.generation = generation; - this.fullGeneration = fullGeneration; - this.compacted = compacted; + this.gcGeneration = GCGeneration.newGCGeneration(generation, fullGeneration, compacted); } @Override public long getMsb() { - return msb; + return uuid.getMostSignificantBits(); } @Override public long getLsb() { - return lsb; + return uuid.getLeastSignificantBits(); } public int getPosition() { @@ -65,16 +59,20 @@ public class RemoteSegmentArchiveEntry implements SegmentArchiveEntry { @Override public int getGeneration() { - return generation; + return gcGeneration.getGeneration(); } @Override public int getFullGeneration() { - return fullGeneration; + return gcGeneration.getFullGeneration(); } @Override public boolean isCompacted() { - return compacted; + return gcGeneration.isCompacted(); + } + + UUID getUuid() { + return uuid; } } diff --git a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/package-info.java b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/package-info.java index fd64aa03ce..5c70f9fbe8 100644 --- a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/package-info.java +++ b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/package-info.java @@ -15,7 +15,7 @@ * limitations under the License. */ @Internal(since = "1.0.0") -@Version("2.0.0") +@Version("3.0.0") package org.apache.jackrabbit.oak.segment.remote; import org.apache.jackrabbit.oak.commons.annotations.Internal; diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/GCGeneration.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/GCGeneration.java index d9784d66a9..176f3f0433 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/GCGeneration.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/GCGeneration.java @@ -19,7 +19,12 @@ package org.apache.jackrabbit.oak.segment.file.tar; import static java.util.Objects.requireNonNull; +import java.lang.ref.WeakReference; +import java.util.Collections; +import java.util.Iterator; import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveEntry; import org.jetbrains.annotations.NotNull; @@ -54,12 +59,31 @@ public final class GCGeneration { public static final GCGeneration NULL = new GCGeneration(0, 0, false); + private static final Set<WeakReference<GCGeneration>> gcGenerations = Collections.newSetFromMap(new ConcurrentHashMap<>()); + public static GCGeneration newGCGeneration(int generation, int fullGeneration, boolean isCompacted) { - return new GCGeneration(generation, fullGeneration, isCompacted); + Iterator<WeakReference<GCGeneration>> iterator = gcGenerations.iterator(); + GCGeneration gen = null; + while(iterator.hasNext()) { + WeakReference<GCGeneration> next = iterator.next(); + GCGeneration gcGeneration = next.get(); + if (gcGeneration == null) { + iterator.remove(); + } else if (gcGeneration.generation == generation + && gcGeneration.fullGeneration == fullGeneration + && gcGeneration.isCompacted == isCompacted) { + gen = gcGeneration; + } + } + if (gen == null) { + gen = new GCGeneration(generation, fullGeneration, isCompacted); + gcGenerations.add(new WeakReference<>(gen)); + } + return gen; } public static GCGeneration newGCGeneration(SegmentArchiveEntry indexEntry) { - return new GCGeneration(indexEntry.getGeneration(), indexEntry.getFullGeneration(), indexEntry.isCompacted()); + return newGCGeneration(indexEntry.getGeneration(), indexEntry.getFullGeneration(), indexEntry.isCompacted()); } private final int generation; diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java index 28f2e9f396..1be18b89a4 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java @@ -25,6 +25,7 @@ import java.io.Closeable; import java.io.File; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -283,10 +284,7 @@ public class TarReader implements Closeable { private TarReader(SegmentArchiveManager archiveManager, SegmentArchiveReader archive) { this.archiveManager = archiveManager; this.archive = archive; - this.segmentUUIDs = archive.listSegments() - .stream() - .map(e -> new UUID(e.getMsb(), e.getLsb())) - .collect(Collectors.toUnmodifiableSet()); + this.segmentUUIDs = archive.getSegmentUUIDs(); } long size() { diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveReader.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveReader.java index 3ac897e65b..8b22150516 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveReader.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveReader.java @@ -21,6 +21,9 @@ package org.apache.jackrabbit.oak.segment.spi.persistence; import java.io.Closeable; import java.io.IOException; import java.util.List; +import java.util.Set; +import java.util.UUID; +import java.util.stream.Collectors; import org.apache.jackrabbit.oak.commons.Buffer; import org.apache.jackrabbit.oak.segment.file.tar.SegmentGraph; @@ -52,6 +55,19 @@ public interface SegmentArchiveReader extends Closeable { */ boolean containsSegment(long msb, long lsb); + /** + * Returns an immutable {@code Set} of the UUIDs of all segments contained in this archive. + * No guarantees are made regarding the iteration order of the elements. + * + * @return set of segment UUIDs + */ + default Set<UUID> getSegmentUUIDs() { + return listSegments() + .stream() + .map(e -> new UUID(e.getMsb(), e.getLsb())) + .collect(Collectors.toUnmodifiableSet()); + } + /** * List all the segments, in the order as they have been written to the archive. * diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/CachingSegmentArchiveReader.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/CachingSegmentArchiveReader.java index 8ef42fbf4d..f81eaee782 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/CachingSegmentArchiveReader.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/CachingSegmentArchiveReader.java @@ -27,6 +27,8 @@ import org.jetbrains.annotations.Nullable; import java.io.IOException; import java.util.List; +import java.util.Set; +import java.util.UUID; public class CachingSegmentArchiveReader implements SegmentArchiveReader { @@ -63,6 +65,11 @@ public class CachingSegmentArchiveReader implements SegmentArchiveReader { return delegate.listSegments(); } + @Override + public Set<UUID> getSegmentUUIDs() { + return delegate.getSegmentUUIDs(); + } + @Override public @NotNull SegmentGraph getGraph() throws IOException { return delegate.getGraph(); diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/package-info.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/package-info.java index 611279bc26..0ad0171f1b 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/package-info.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/package-info.java @@ -15,7 +15,7 @@ * limitations under the License. */ @Internal(since = "1.0.0") -@Version("6.0.0") +@Version("6.1.0") package org.apache.jackrabbit.oak.segment.spi.persistence.persistentcache; import org.apache.jackrabbit.oak.commons.annotations.Internal; diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/UnclosedSegmentArchiveReader.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/UnclosedSegmentArchiveReader.java index d6902c757d..f50ffd757c 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/UnclosedSegmentArchiveReader.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/UnclosedSegmentArchiveReader.java @@ -18,6 +18,8 @@ package org.apache.jackrabbit.oak.segment.spi.persistence.split; import java.io.IOException; import java.util.List; +import java.util.Set; +import java.util.UUID; import org.apache.jackrabbit.oak.commons.Buffer; import org.apache.jackrabbit.oak.segment.file.tar.SegmentGraph; @@ -52,6 +54,11 @@ class UnclosedSegmentArchiveReader implements SegmentArchiveReader { return delegate.listSegments(); } + @Override + public Set<UUID> getSegmentUUIDs() { + return delegate.getSegmentUUIDs(); + } + @Override public @NotNull SegmentGraph getGraph() throws IOException { return delegate.getGraph();
