This is an automated email from the ASF dual-hosted git repository.

jsedding pushed a commit to branch 
jsedding/OAK-12070-reduce-azure-segmentstore-heap-usage
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 451d1bacc90b3ff10a3415002c5f67c0726173de
Author: Julian Sedding <[email protected]>
AuthorDate: Fri Jan 23 14:26:32 2026 +0100

    OAK-12070 - Reduce memory consumption of azure segment stores
    
    - deduplicate in-memory UUID instances (halves number of instances in memory
      at 32 bytes each)
    - avoid LinkedHashMap in favour of java.util.ImmutableCollections 
(eliminates
      one LinkedHashMap$Entry weighing 40 bytes per segment)
    - create GCGeneration instance pool (instances now used by 
RemoteSegmentArchiveEntry)
    - use UUID and GCGeneration references in RemoteSegmentArchiveEntry (reduces
      object size from 48 to 32 bytes)
---
 .../oak/segment/aws/AwsSegmentArchiveReader.java   |  58 +++++-------
 .../segment/azure/AzureSegmentArchiveReader.java   |  50 ++++------
 .../azure/v8/AzureSegmentArchiveReaderV8.java      |  43 +++------
 .../remote/AbstractRemoteSegmentArchiveReader.java | 105 ++++++++++++++++++---
 .../remote/AbstractRemoteSegmentArchiveWriter.java |   6 +-
 .../segment/remote/RemoteSegmentArchiveEntry.java  |  34 ++++---
 .../oak/segment/remote/package-info.java           |   2 +-
 .../oak/segment/file/tar/GCGeneration.java         |  28 +++++-
 .../jackrabbit/oak/segment/file/tar/TarReader.java |   6 +-
 .../spi/persistence/SegmentArchiveReader.java      |  16 ++++
 .../CachingSegmentArchiveReader.java               |   7 ++
 .../persistence/persistentcache/package-info.java  |   2 +-
 .../split/UnclosedSegmentArchiveReader.java        |   7 ++
 13 files changed, 223 insertions(+), 141 deletions(-)

diff --git 
a/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/AwsSegmentArchiveReader.java
 
b/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/AwsSegmentArchiveReader.java
index 9811cbe380..f326b80329 100644
--- 
a/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/AwsSegmentArchiveReader.java
+++ 
b/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/AwsSegmentArchiveReader.java
@@ -20,7 +20,7 @@ import static 
org.apache.jackrabbit.oak.segment.remote.RemoteUtilities.OFF_HEAP;
 
 import java.io.File;
 import java.io.IOException;
-import java.util.UUID;
+import java.util.Iterator;
 
 import org.apache.jackrabbit.oak.commons.Buffer;
 import 
org.apache.jackrabbit.oak.segment.remote.AbstractRemoteSegmentArchiveReader;
@@ -31,47 +31,31 @@ public class AwsSegmentArchiveReader extends 
AbstractRemoteSegmentArchiveReader
 
     private final S3Directory directory;
 
-    private final String archiveName;
-
-    private final long length;
-
     AwsSegmentArchiveReader(S3Directory directory, String archiveName, 
IOMonitor ioMonitor) throws IOException {
-        super(ioMonitor);
+        super(ioMonitor, archiveName, createEntryIterable(directory, 
archiveName));
         this.directory = directory;
-        this.archiveName = archiveName;
-        this.length = computeArchiveIndexAndLength();
-    }
-
-    @Override
-    public long length() {
-        return length;
-    }
-
-    @Override
-    public String getName() {
-        return archiveName;
     }
 
-    @Override
-    protected long computeArchiveIndexAndLength() throws IOException {
-        long length = 0;
+    private static Iterable<ArchiveEntry> createEntryIterable(S3Directory 
directory, String archiveName) throws IOException{
         Buffer buffer = directory.readObjectToBuffer(archiveName + ".idx", 
OFF_HEAP);
-        while (buffer.hasRemaining()) {
-            long msb = buffer.getLong();
-            long lsb = buffer.getLong();
-            int position = buffer.getInt();
-            int contentLength = buffer.getInt();
-            int generation = buffer.getInt();
-            int fullGeneration = buffer.getInt();
-            boolean compacted = buffer.get() != 0;
-
-            RemoteSegmentArchiveEntry indexEntry = new 
RemoteSegmentArchiveEntry(msb, lsb, position, contentLength,
-                    generation, fullGeneration, compacted);
-            index.put(new UUID(indexEntry.getMsb(), indexEntry.getLsb()), 
indexEntry);
-            length += contentLength;
-        }
-
-        return length;
+        return () -> new Iterator<>() {
+            @Override
+            public boolean hasNext() {
+                return buffer.hasRemaining();
+            }
+
+            @Override
+            public ArchiveEntry next() {
+                long msb = buffer.getLong();
+                long lsb = buffer.getLong();
+                int position = buffer.getInt();
+                int contentLength = buffer.getInt();
+                int generation = buffer.getInt();
+                int fullGeneration = buffer.getInt();
+                boolean compacted = buffer.get() != 0;
+                return new ArchiveEntry(new RemoteSegmentArchiveEntry(msb, 
lsb, position, contentLength, generation, fullGeneration, compacted));
+            }
+        };
     }
 
     @Override
diff --git 
a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java
 
b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java
index 18ca18f0d6..7a8be8198d 100644
--- 
a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java
+++ 
b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java
@@ -17,19 +17,17 @@
 package org.apache.jackrabbit.oak.segment.azure;
 
 import com.azure.storage.blob.BlobContainerClient;
-import com.azure.storage.blob.models.BlobItem;
 import com.azure.storage.blob.models.BlobStorageException;
 import com.azure.storage.blob.models.ListBlobsOptions;
 import com.azure.storage.blob.specialized.BlockBlobClient;
 import org.apache.jackrabbit.oak.commons.Buffer;
 import 
org.apache.jackrabbit.oak.segment.remote.AbstractRemoteSegmentArchiveReader;
-import org.apache.jackrabbit.oak.segment.remote.RemoteSegmentArchiveEntry;
 import org.apache.jackrabbit.oak.segment.spi.monitor.IOMonitor;
+import org.jetbrains.annotations.NotNull;
 
 import java.io.File;
 import java.io.IOException;
 import java.util.Map;
-import java.util.UUID;
 
 import static 
org.apache.jackrabbit.oak.segment.azure.AzureUtilities.readBufferFully;
 
@@ -37,45 +35,29 @@ public class AzureSegmentArchiveReader extends 
AbstractRemoteSegmentArchiveReade
 
     private final BlobContainerClient blobContainerClient;
 
-    private final long length;
-
-    private final String archiveName;
-
     private final String archivePathPrefix;
 
-    AzureSegmentArchiveReader(BlobContainerClient blobContainerClient, String 
rootPrefix, String archiveName, IOMonitor ioMonitor) throws IOException {
-        super(ioMonitor);
+    AzureSegmentArchiveReader(BlobContainerClient blobContainerClient, String 
rootPrefix, String archiveName, IOMonitor ioMonitor) {
+        super(ioMonitor, AzureUtilities.ensureNoTrailingSlash(archiveName),
+                createEntryIterable(blobContainerClient, 
AzureUtilities.asAzurePrefix(rootPrefix, archiveName)));
         this.blobContainerClient = blobContainerClient;
-        this.archiveName = AzureUtilities.ensureNoTrailingSlash(archiveName);
         this.archivePathPrefix = AzureUtilities.asAzurePrefix(rootPrefix, 
archiveName);
-        this.length = computeArchiveIndexAndLength();
-    }
-
-    @Override
-    public long length() {
-        return length;
-    }
-
-    @Override
-    public String getName() {
-        return archiveName;
     }
 
-    @Override
-    protected long computeArchiveIndexAndLength() throws IOException {
-        long length = 0;
+    private static Iterable<ArchiveEntry> 
createEntryIterable(BlobContainerClient blobContainerClient, @NotNull String 
archivePathPrefix) {
         ListBlobsOptions listBlobsOptions = new ListBlobsOptions();
         listBlobsOptions.setPrefix(archivePathPrefix);
-        for (BlobItem blob : AzureUtilities.getBlobs(blobContainerClient, 
listBlobsOptions)) {
-            Map<String, String> metadata = blob.getMetadata();
-            if (AzureBlobMetadata.isSegment(metadata)) {
-                RemoteSegmentArchiveEntry indexEntry = 
AzureBlobMetadata.toIndexEntry(metadata, 
blob.getProperties().getContentLength().intValue());
-                index.put(new UUID(indexEntry.getMsb(), indexEntry.getLsb()), 
indexEntry);
-            }
-            length += blob.getProperties().getContentLength();
-        }
-
-        return length;
+        return AzureUtilities.getBlobs(blobContainerClient, 
listBlobsOptions).stream()
+                .map(blobItem -> {
+                    Map<String, String> metadata = blobItem.getMetadata();
+                    int length = 
blobItem.getProperties().getContentLength().intValue();
+                    if (AzureBlobMetadata.isSegment(metadata)) {
+                        return new 
ArchiveEntry(AzureBlobMetadata.toIndexEntry(metadata, length));
+                    } else {
+                        return new ArchiveEntry(length);
+                    }
+                })
+                ::iterator;
     }
 
     @Override
diff --git 
a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureSegmentArchiveReaderV8.java
 
b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureSegmentArchiveReaderV8.java
index de69711b6e..7242de4fe2 100644
--- 
a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureSegmentArchiveReaderV8.java
+++ 
b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureSegmentArchiveReaderV8.java
@@ -22,54 +22,37 @@ import java.io.File;
 import java.io.IOException;
 import java.net.URISyntaxException;
 import java.util.Map;
-import java.util.UUID;
 
 import com.microsoft.azure.storage.StorageException;
-import com.microsoft.azure.storage.blob.CloudBlob;
 import com.microsoft.azure.storage.blob.CloudBlobDirectory;
 import com.microsoft.azure.storage.blob.CloudBlockBlob;
 
 import org.apache.jackrabbit.oak.commons.Buffer;
 import org.apache.jackrabbit.oak.segment.azure.AzureBlobMetadata;
 import 
org.apache.jackrabbit.oak.segment.remote.AbstractRemoteSegmentArchiveReader;
-import org.apache.jackrabbit.oak.segment.remote.RemoteSegmentArchiveEntry;
 import org.apache.jackrabbit.oak.segment.spi.monitor.IOMonitor;
 
 public class AzureSegmentArchiveReaderV8 extends 
AbstractRemoteSegmentArchiveReader {
 
     private final CloudBlobDirectory archiveDirectory;
 
-    private final long length;
-
     protected AzureSegmentArchiveReaderV8(CloudBlobDirectory archiveDirectory, 
IOMonitor ioMonitor) throws IOException {
-        super(ioMonitor);
+        super(ioMonitor, AzureUtilitiesV8.getName(archiveDirectory), 
createEntryIterable(archiveDirectory));
         this.archiveDirectory = archiveDirectory;
-        this.length = computeArchiveIndexAndLength();
-    }
-
-    @Override
-    public long length() {
-        return length;
     }
 
-    @Override
-    public String getName() {
-        return AzureUtilitiesV8.getName(archiveDirectory);
-    }
-
-    @Override
-    protected long computeArchiveIndexAndLength() throws IOException {
-        long length = 0;
-        for (CloudBlob blob : AzureUtilitiesV8.getBlobs(archiveDirectory)) {
-            Map<String, String> metadata = blob.getMetadata();
-            if (AzureBlobMetadata.isSegment(metadata)) {
-                RemoteSegmentArchiveEntry indexEntry = 
AzureBlobMetadata.toIndexEntry(metadata, (int) 
blob.getProperties().getLength());
-                index.put(new UUID(indexEntry.getMsb(), indexEntry.getLsb()), 
indexEntry);
-            }
-            length += blob.getProperties().getLength();
-        }
-
-        return length;
+    private static Iterable<ArchiveEntry> 
createEntryIterable(CloudBlobDirectory archiveDirectory) throws IOException {
+        return AzureUtilitiesV8.getBlobs(archiveDirectory).stream()
+                .map(blob -> {
+                    Map<String, String> metadata = blob.getMetadata();
+                    int length = (int) blob.getProperties().getLength();
+                    if (AzureBlobMetadata.isSegment(metadata)) {
+                        return new 
ArchiveEntry(AzureBlobMetadata.toIndexEntry(metadata, length));
+                    } else {
+                        return new ArchiveEntry(length);
+                    }
+                })
+                ::iterator;
     }
 
     @Override
diff --git 
a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReader.java
 
b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReader.java
index fe99490e18..a289d39ee7 100644
--- 
a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReader.java
+++ 
b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReader.java
@@ -30,20 +30,53 @@ import org.jetbrains.annotations.Nullable;
 
 import java.io.File;
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.LinkedHashMap;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.UUID;
 import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
 
 public abstract class AbstractRemoteSegmentArchiveReader implements 
SegmentArchiveReader {
+
     protected final IOMonitor ioMonitor;
 
-    protected final Map<UUID, RemoteSegmentArchiveEntry> index = new 
LinkedHashMap<>();
+    /**
+     * Unordered immutable map of segment UUIDs to their corresponding archive 
entries.
+     */
+    private final Map<UUID, RemoteSegmentArchiveEntry> index;
+
+    /**
+     * The name of the archive.
+     */
+    private final String archiveName;
 
-    public AbstractRemoteSegmentArchiveReader(IOMonitor ioMonitor) throws 
IOException {
+    /**
+     * The total size of the archive in bytes.
+     */
+    private final long length;
+
+    protected AbstractRemoteSegmentArchiveReader(IOMonitor ioMonitor, String 
archiveName, Iterable<ArchiveEntry> entries) {
         this.ioMonitor = ioMonitor;
+        this.archiveName = archiveName;
+
+        IndexBuilder indexBuilder = new IndexBuilder();
+        entries.forEach(indexBuilder::addEntry);
+        this.index = indexBuilder.createIndex();
+        this.length = indexBuilder.getLength();
+    }
+
+    @Override
+    public @NotNull String getName() {
+        return archiveName;
+    }
+
+    @Override
+    public long length() {
+        return length;
     }
 
     @Override
@@ -73,9 +106,16 @@ public abstract class AbstractRemoteSegmentArchiveReader 
implements SegmentArchi
         return index.containsKey(new UUID(msb, lsb));
     }
 
+    @Override
+    public Set<UUID> getSegmentUUIDs() {
+        return Collections.unmodifiableSet(index.keySet());
+    }
+
     @Override
     public List<SegmentArchiveEntry> listSegments() {
-        return new ArrayList<>(index.values());
+        return index.values().stream()
+                
.sorted(Comparator.comparing(RemoteSegmentArchiveEntry::getPosition))
+                .collect(Collectors.toList());
     }
 
     @Override
@@ -102,12 +142,6 @@ public abstract class AbstractRemoteSegmentArchiveReader 
implements SegmentArchi
         return size;
     }
 
-    /**
-     * Populates the archive index, summing up each entry's length.
-     * @return length, the total length of the archive
-     */
-    protected abstract long computeArchiveIndexAndLength() throws IOException;
-
     /**
      * Reads the segment from the remote storage.
      * @param segmentFileName, the name of the segment (msb + lsb) prefixed by 
its position in the archive
@@ -132,4 +166,53 @@ public abstract class AbstractRemoteSegmentArchiveReader 
implements SegmentArchi
     public boolean isRemote() {
         return true;
     }
+
+    protected static final class ArchiveEntry {
+
+        private final RemoteSegmentArchiveEntry entry;
+
+        private final int length;
+
+        public ArchiveEntry(RemoteSegmentArchiveEntry entry) {
+            this.entry = entry;
+            this.length = entry.getLength();
+        }
+
+        public ArchiveEntry(int length) {
+            this.entry = null;
+            this.length = length;
+        }
+
+        int getLength() {
+            return length;
+        }
+
+        RemoteSegmentArchiveEntry getRemoteSegmentArchiveEntry() {
+            return entry;
+        }
+    }
+
+    private static final class IndexBuilder {
+
+        private final List<Map.Entry<UUID, RemoteSegmentArchiveEntry>> entries 
= new LinkedList<>();
+
+        private long length = 0;
+
+        private void addEntry(ArchiveEntry entry) {
+            RemoteSegmentArchiveEntry archiveEntry = 
entry.getRemoteSegmentArchiveEntry();
+            if (archiveEntry != null) {
+                this.entries.add(Map.entry(archiveEntry.getUuid(), 
archiveEntry));
+            }
+            this.length += entry.getLength();
+        }
+
+        @SuppressWarnings("unchecked")
+        private Map<UUID, RemoteSegmentArchiveEntry> createIndex() {
+            return Map.ofEntries(entries.toArray(Map.Entry[]::new));
+        }
+
+        private long getLength() {
+            return length;
+        }
+    }
 }
diff --git 
a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveWriter.java
 
b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveWriter.java
index eee6d40cbf..3e1964bb26 100644
--- 
a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveWriter.java
+++ 
b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveWriter.java
@@ -66,7 +66,7 @@ public abstract class AbstractRemoteSegmentArchiveWriter 
implements SegmentArchi
         } else {
             doWriteArchiveEntry(entry, data, offset, size);
         }
-        index.put(new UUID(msb, lsb), entry);
+        index.put(entry.getUuid(), entry);
 
         totalLength += size;
         monitor.written(size);
@@ -80,7 +80,7 @@ public abstract class AbstractRemoteSegmentArchiveWriter 
implements SegmentArchi
             return segment.get().toBuffer();
         }
 
-        RemoteSegmentArchiveEntry indexEntry = index.get(new UUID(msb, lsb));
+        RemoteSegmentArchiveEntry indexEntry = index.get(uuid);
         if (indexEntry == null) {
             return null;
         }
@@ -95,7 +95,7 @@ public abstract class AbstractRemoteSegmentArchiveWriter 
implements SegmentArchi
         if (segment.isPresent()) {
             return true;
         }
-        return index.containsKey(new UUID(msb, lsb));
+        return index.containsKey(uuid);
     }
 
     @Override
diff --git 
a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntry.java
 
b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntry.java
index f14f793ac7..c3cdafbcf3 100644
--- 
a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntry.java
+++ 
b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntry.java
@@ -16,42 +16,36 @@
  */
 package org.apache.jackrabbit.oak.segment.remote;
 
+import org.apache.jackrabbit.oak.segment.file.tar.GCGeneration;
 import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveEntry;
 
-public class RemoteSegmentArchiveEntry implements SegmentArchiveEntry {
+import java.util.UUID;
 
-    private final long msb;
+public class RemoteSegmentArchiveEntry implements SegmentArchiveEntry {
 
-    private final long lsb;
+    private final UUID uuid;
 
     private final int position;
 
     private final int length;
 
-    private final int generation;
-
-    private final int fullGeneration;
-
-    private final boolean compacted;
+    private final GCGeneration gcGeneration;
 
     public RemoteSegmentArchiveEntry(long msb, long lsb, int position, int 
length, int generation, int fullGeneration, boolean compacted) {
-        this.msb = msb;
-        this.lsb = lsb;
+        this.uuid = new UUID(msb, lsb);
         this.position = position;
         this.length = length;
-        this.generation = generation;
-        this.fullGeneration = fullGeneration;
-        this.compacted = compacted;
+        this.gcGeneration = GCGeneration.newGCGeneration(generation, 
fullGeneration, compacted);
     }
 
     @Override
     public long getMsb() {
-        return msb;
+        return uuid.getMostSignificantBits();
     }
 
     @Override
     public long getLsb() {
-        return lsb;
+        return uuid.getLeastSignificantBits();
     }
 
     public int getPosition() {
@@ -65,16 +59,20 @@ public class RemoteSegmentArchiveEntry implements 
SegmentArchiveEntry {
 
     @Override
     public int getGeneration() {
-        return generation;
+        return gcGeneration.getGeneration();
     }
 
     @Override
     public int getFullGeneration() {
-        return fullGeneration;
+        return gcGeneration.getFullGeneration();
     }
 
     @Override
     public boolean isCompacted() {
-        return compacted;
+        return gcGeneration.isCompacted();
+    }
+
+    UUID getUuid() {
+        return uuid;
     }
 }
diff --git 
a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/package-info.java
 
b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/package-info.java
index fd64aa03ce..5c70f9fbe8 100644
--- 
a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/package-info.java
+++ 
b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/package-info.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 @Internal(since = "1.0.0")
-@Version("2.0.0")
+@Version("3.0.0")
 package org.apache.jackrabbit.oak.segment.remote;
 
 import org.apache.jackrabbit.oak.commons.annotations.Internal;
diff --git 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/GCGeneration.java
 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/GCGeneration.java
index d9784d66a9..176f3f0433 100644
--- 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/GCGeneration.java
+++ 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/GCGeneration.java
@@ -19,7 +19,12 @@ package org.apache.jackrabbit.oak.segment.file.tar;
 
 import static java.util.Objects.requireNonNull;
 
+import java.lang.ref.WeakReference;
+import java.util.Collections;
+import java.util.Iterator;
 import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveEntry;
 import org.jetbrains.annotations.NotNull;
@@ -54,12 +59,31 @@ public final class GCGeneration {
 
     public static final GCGeneration NULL = new GCGeneration(0, 0, false);
 
+    private static final Set<WeakReference<GCGeneration>> gcGenerations = 
Collections.newSetFromMap(new ConcurrentHashMap<>());
+
     public static GCGeneration newGCGeneration(int generation, int 
fullGeneration, boolean isCompacted) {
-        return new GCGeneration(generation, fullGeneration, isCompacted);
+        Iterator<WeakReference<GCGeneration>> iterator = 
gcGenerations.iterator();
+        GCGeneration gen = null;
+        while(iterator.hasNext()) {
+            WeakReference<GCGeneration> next = iterator.next();
+            GCGeneration gcGeneration = next.get();
+            if (gcGeneration == null) {
+                iterator.remove();
+            } else if (gcGeneration.generation == generation
+                    && gcGeneration.fullGeneration == fullGeneration
+                    && gcGeneration.isCompacted == isCompacted) {
+                gen = gcGeneration;
+            }
+        }
+        if (gen == null) {
+            gen = new GCGeneration(generation, fullGeneration, isCompacted);
+            gcGenerations.add(new WeakReference<>(gen));
+        }
+        return gen;
     }
 
     public static GCGeneration newGCGeneration(SegmentArchiveEntry indexEntry) 
{
-        return new GCGeneration(indexEntry.getGeneration(), 
indexEntry.getFullGeneration(), indexEntry.isCompacted());
+        return newGCGeneration(indexEntry.getGeneration(), 
indexEntry.getFullGeneration(), indexEntry.isCompacted());
     }
 
     private final int generation;
diff --git 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java
 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java
index 28f2e9f396..1be18b89a4 100644
--- 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java
+++ 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java
@@ -25,6 +25,7 @@ import java.io.Closeable;
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -283,10 +284,7 @@ public class TarReader implements Closeable {
     private TarReader(SegmentArchiveManager archiveManager, 
SegmentArchiveReader archive) {
         this.archiveManager = archiveManager;
         this.archive = archive;
-        this.segmentUUIDs = archive.listSegments()
-                .stream()
-                .map(e -> new UUID(e.getMsb(), e.getLsb()))
-                .collect(Collectors.toUnmodifiableSet());
+        this.segmentUUIDs = archive.getSegmentUUIDs();
     }
 
     long size() {
diff --git 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveReader.java
 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveReader.java
index 3ac897e65b..8b22150516 100644
--- 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveReader.java
+++ 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveReader.java
@@ -21,6 +21,9 @@ package org.apache.jackrabbit.oak.segment.spi.persistence;
 import java.io.Closeable;
 import java.io.IOException;
 import java.util.List;
+import java.util.Set;
+import java.util.UUID;
+import java.util.stream.Collectors;
 
 import org.apache.jackrabbit.oak.commons.Buffer;
 import org.apache.jackrabbit.oak.segment.file.tar.SegmentGraph;
@@ -52,6 +55,19 @@ public interface SegmentArchiveReader extends Closeable {
      */
     boolean containsSegment(long msb, long lsb);
 
+    /**
+     * Returns an immutable {@code Set} of the UUIDs of all segments contained 
in this archive.
+     * No guarantees are made regarding the iteration order of the elements.
+     *
+     * @return set of segment UUIDs
+     */
+    default Set<UUID> getSegmentUUIDs() {
+        return listSegments()
+                .stream()
+                .map(e -> new UUID(e.getMsb(), e.getLsb()))
+                .collect(Collectors.toUnmodifiableSet());
+    }
+
     /**
      * List all the segments, in the order as they have been written to the 
archive.
      *
diff --git 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/CachingSegmentArchiveReader.java
 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/CachingSegmentArchiveReader.java
index 8ef42fbf4d..f81eaee782 100644
--- 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/CachingSegmentArchiveReader.java
+++ 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/CachingSegmentArchiveReader.java
@@ -27,6 +27,8 @@ import org.jetbrains.annotations.Nullable;
 
 import java.io.IOException;
 import java.util.List;
+import java.util.Set;
+import java.util.UUID;
 
 public class CachingSegmentArchiveReader implements SegmentArchiveReader {
 
@@ -63,6 +65,11 @@ public class CachingSegmentArchiveReader implements 
SegmentArchiveReader {
         return delegate.listSegments();
     }
 
+    @Override
+    public Set<UUID> getSegmentUUIDs() {
+        return delegate.getSegmentUUIDs();
+    }
+
     @Override
     public @NotNull SegmentGraph getGraph() throws IOException {
         return delegate.getGraph();
diff --git 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/package-info.java
 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/package-info.java
index 611279bc26..0ad0171f1b 100644
--- 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/package-info.java
+++ 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/package-info.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 @Internal(since = "1.0.0")
-@Version("6.0.0")
+@Version("6.1.0")
 package org.apache.jackrabbit.oak.segment.spi.persistence.persistentcache;
 
 import org.apache.jackrabbit.oak.commons.annotations.Internal;
diff --git 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/UnclosedSegmentArchiveReader.java
 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/UnclosedSegmentArchiveReader.java
index d6902c757d..f50ffd757c 100644
--- 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/UnclosedSegmentArchiveReader.java
+++ 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/UnclosedSegmentArchiveReader.java
@@ -18,6 +18,8 @@ package 
org.apache.jackrabbit.oak.segment.spi.persistence.split;
 
 import java.io.IOException;
 import java.util.List;
+import java.util.Set;
+import java.util.UUID;
 
 import org.apache.jackrabbit.oak.commons.Buffer;
 import org.apache.jackrabbit.oak.segment.file.tar.SegmentGraph;
@@ -52,6 +54,11 @@ class UnclosedSegmentArchiveReader implements 
SegmentArchiveReader {
         return delegate.listSegments();
     }
 
+    @Override
+    public Set<UUID> getSegmentUUIDs() {
+        return delegate.getSegmentUUIDs();
+    }
+
     @Override
     public @NotNull SegmentGraph getGraph() throws IOException {
         return delegate.getGraph();

Reply via email to