This is an automated email from the ASF dual-hosted git repository.
jsedding pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new 082c3a3e55 OAK-12070 - Reduce memory consumption of azure segment
stores (#2699)
082c3a3e55 is described below
commit 082c3a3e551bd04663c785c6d78124ec415e784f
Author: Julian Sedding <[email protected]>
AuthorDate: Mon Jan 26 11:39:26 2026 +0100
OAK-12070 - Reduce memory consumption of azure segment stores (#2699)
- deduplicate in-memory UUID instances (halves number of instances in memory
at 32 bytes each)
- avoid LinkedHashMap in favour of java.util.ImmutableCollections
(eliminates
one LinkedHashMap$Entry weighing 40 bytes per segment)
- create GCGeneration instance pool (instances now used by
RemoteSegmentArchiveEntry)
- use UUID and GCGeneration references in RemoteSegmentArchiveEntry (reduces
object size from 48 to 32 bytes)
---
.../oak/segment/aws/AwsSegmentArchiveReader.java | 63 +++-----
.../segment/azure/AzureSegmentArchiveReader.java | 50 ++----
.../azure/v8/AzureSegmentArchiveReaderV8.java | 43 ++---
.../remote/AbstractRemoteSegmentArchiveReader.java | 105 ++++++++++--
.../remote/AbstractRemoteSegmentArchiveWriter.java | 6 +-
.../segment/remote/RemoteSegmentArchiveEntry.java | 34 ++--
.../oak/segment/remote/package-info.java | 2 +-
.../AbstractRemoteSegmentArchiveReaderTest.java | 176 +++++++++++++++++++++
.../remote/RemoteSegmentArchiveEntryTest.java | 72 +++++++++
.../oak/segment/file/tar/GCGeneration.java | 28 +++-
.../jackrabbit/oak/segment/file/tar/TarReader.java | 6 +-
.../spi/persistence/SegmentArchiveReader.java | 16 ++
.../CachingSegmentArchiveReader.java | 7 +
.../persistence/persistentcache/package-info.java | 2 +-
.../split/UnclosedSegmentArchiveReader.java | 7 +
15 files changed, 475 insertions(+), 142 deletions(-)
diff --git
a/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/AwsSegmentArchiveReader.java
b/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/AwsSegmentArchiveReader.java
index 9811cbe380..7b9fb3a97d 100644
---
a/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/AwsSegmentArchiveReader.java
+++
b/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/AwsSegmentArchiveReader.java
@@ -20,7 +20,8 @@ import static
org.apache.jackrabbit.oak.segment.remote.RemoteUtilities.OFF_HEAP;
import java.io.File;
import java.io.IOException;
-import java.util.UUID;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
import org.apache.jackrabbit.oak.commons.Buffer;
import
org.apache.jackrabbit.oak.segment.remote.AbstractRemoteSegmentArchiveReader;
@@ -31,47 +32,35 @@ public class AwsSegmentArchiveReader extends
AbstractRemoteSegmentArchiveReader
private final S3Directory directory;
- private final String archiveName;
-
- private final long length;
-
AwsSegmentArchiveReader(S3Directory directory, String archiveName,
IOMonitor ioMonitor) throws IOException {
- super(ioMonitor);
+ super(ioMonitor, archiveName, createEntryIterable(directory,
archiveName));
this.directory = directory;
- this.archiveName = archiveName;
- this.length = computeArchiveIndexAndLength();
- }
-
- @Override
- public long length() {
- return length;
- }
-
- @Override
- public String getName() {
- return archiveName;
}
- @Override
- protected long computeArchiveIndexAndLength() throws IOException {
- long length = 0;
+ private static Iterable<ArchiveEntry> createEntryIterable(S3Directory
directory, String archiveName) throws IOException{
Buffer buffer = directory.readObjectToBuffer(archiveName + ".idx",
OFF_HEAP);
- while (buffer.hasRemaining()) {
- long msb = buffer.getLong();
- long lsb = buffer.getLong();
- int position = buffer.getInt();
- int contentLength = buffer.getInt();
- int generation = buffer.getInt();
- int fullGeneration = buffer.getInt();
- boolean compacted = buffer.get() != 0;
-
- RemoteSegmentArchiveEntry indexEntry = new
RemoteSegmentArchiveEntry(msb, lsb, position, contentLength,
- generation, fullGeneration, compacted);
- index.put(new UUID(indexEntry.getMsb(), indexEntry.getLsb()),
indexEntry);
- length += contentLength;
- }
-
- return length;
+ return () -> new Iterator<>() {
+ @Override
+ public boolean hasNext() {
+ return buffer.hasRemaining();
+ }
+
+ @Override
+ public ArchiveEntry next() {
+ if (!hasNext()) {
+ throw new NoSuchElementException();
+ }
+
+ long msb = buffer.getLong();
+ long lsb = buffer.getLong();
+ int position = buffer.getInt();
+ int contentLength = buffer.getInt();
+ int generation = buffer.getInt();
+ int fullGeneration = buffer.getInt();
+ boolean compacted = buffer.get() != 0;
+ return new ArchiveEntry(new RemoteSegmentArchiveEntry(msb,
lsb, position, contentLength, generation, fullGeneration, compacted));
+ }
+ };
}
@Override
diff --git
a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java
b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java
index 18ca18f0d6..7a8be8198d 100644
---
a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java
+++
b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java
@@ -17,19 +17,17 @@
package org.apache.jackrabbit.oak.segment.azure;
import com.azure.storage.blob.BlobContainerClient;
-import com.azure.storage.blob.models.BlobItem;
import com.azure.storage.blob.models.BlobStorageException;
import com.azure.storage.blob.models.ListBlobsOptions;
import com.azure.storage.blob.specialized.BlockBlobClient;
import org.apache.jackrabbit.oak.commons.Buffer;
import
org.apache.jackrabbit.oak.segment.remote.AbstractRemoteSegmentArchiveReader;
-import org.apache.jackrabbit.oak.segment.remote.RemoteSegmentArchiveEntry;
import org.apache.jackrabbit.oak.segment.spi.monitor.IOMonitor;
+import org.jetbrains.annotations.NotNull;
import java.io.File;
import java.io.IOException;
import java.util.Map;
-import java.util.UUID;
import static
org.apache.jackrabbit.oak.segment.azure.AzureUtilities.readBufferFully;
@@ -37,45 +35,29 @@ public class AzureSegmentArchiveReader extends
AbstractRemoteSegmentArchiveReade
private final BlobContainerClient blobContainerClient;
- private final long length;
-
- private final String archiveName;
-
private final String archivePathPrefix;
- AzureSegmentArchiveReader(BlobContainerClient blobContainerClient, String
rootPrefix, String archiveName, IOMonitor ioMonitor) throws IOException {
- super(ioMonitor);
+ AzureSegmentArchiveReader(BlobContainerClient blobContainerClient, String
rootPrefix, String archiveName, IOMonitor ioMonitor) {
+ super(ioMonitor, AzureUtilities.ensureNoTrailingSlash(archiveName),
+ createEntryIterable(blobContainerClient,
AzureUtilities.asAzurePrefix(rootPrefix, archiveName)));
this.blobContainerClient = blobContainerClient;
- this.archiveName = AzureUtilities.ensureNoTrailingSlash(archiveName);
this.archivePathPrefix = AzureUtilities.asAzurePrefix(rootPrefix,
archiveName);
- this.length = computeArchiveIndexAndLength();
- }
-
- @Override
- public long length() {
- return length;
- }
-
- @Override
- public String getName() {
- return archiveName;
}
- @Override
- protected long computeArchiveIndexAndLength() throws IOException {
- long length = 0;
+ private static Iterable<ArchiveEntry>
createEntryIterable(BlobContainerClient blobContainerClient, @NotNull String
archivePathPrefix) {
ListBlobsOptions listBlobsOptions = new ListBlobsOptions();
listBlobsOptions.setPrefix(archivePathPrefix);
- for (BlobItem blob : AzureUtilities.getBlobs(blobContainerClient,
listBlobsOptions)) {
- Map<String, String> metadata = blob.getMetadata();
- if (AzureBlobMetadata.isSegment(metadata)) {
- RemoteSegmentArchiveEntry indexEntry =
AzureBlobMetadata.toIndexEntry(metadata,
blob.getProperties().getContentLength().intValue());
- index.put(new UUID(indexEntry.getMsb(), indexEntry.getLsb()),
indexEntry);
- }
- length += blob.getProperties().getContentLength();
- }
-
- return length;
+ return AzureUtilities.getBlobs(blobContainerClient,
listBlobsOptions).stream()
+ .map(blobItem -> {
+ Map<String, String> metadata = blobItem.getMetadata();
+ int length =
blobItem.getProperties().getContentLength().intValue();
+ if (AzureBlobMetadata.isSegment(metadata)) {
+ return new
ArchiveEntry(AzureBlobMetadata.toIndexEntry(metadata, length));
+ } else {
+ return new ArchiveEntry(length);
+ }
+ })
+ ::iterator;
}
@Override
diff --git
a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureSegmentArchiveReaderV8.java
b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureSegmentArchiveReaderV8.java
index de69711b6e..7242de4fe2 100644
---
a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureSegmentArchiveReaderV8.java
+++
b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureSegmentArchiveReaderV8.java
@@ -22,54 +22,37 @@ import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Map;
-import java.util.UUID;
import com.microsoft.azure.storage.StorageException;
-import com.microsoft.azure.storage.blob.CloudBlob;
import com.microsoft.azure.storage.blob.CloudBlobDirectory;
import com.microsoft.azure.storage.blob.CloudBlockBlob;
import org.apache.jackrabbit.oak.commons.Buffer;
import org.apache.jackrabbit.oak.segment.azure.AzureBlobMetadata;
import
org.apache.jackrabbit.oak.segment.remote.AbstractRemoteSegmentArchiveReader;
-import org.apache.jackrabbit.oak.segment.remote.RemoteSegmentArchiveEntry;
import org.apache.jackrabbit.oak.segment.spi.monitor.IOMonitor;
public class AzureSegmentArchiveReaderV8 extends
AbstractRemoteSegmentArchiveReader {
private final CloudBlobDirectory archiveDirectory;
- private final long length;
-
protected AzureSegmentArchiveReaderV8(CloudBlobDirectory archiveDirectory,
IOMonitor ioMonitor) throws IOException {
- super(ioMonitor);
+ super(ioMonitor, AzureUtilitiesV8.getName(archiveDirectory),
createEntryIterable(archiveDirectory));
this.archiveDirectory = archiveDirectory;
- this.length = computeArchiveIndexAndLength();
- }
-
- @Override
- public long length() {
- return length;
}
- @Override
- public String getName() {
- return AzureUtilitiesV8.getName(archiveDirectory);
- }
-
- @Override
- protected long computeArchiveIndexAndLength() throws IOException {
- long length = 0;
- for (CloudBlob blob : AzureUtilitiesV8.getBlobs(archiveDirectory)) {
- Map<String, String> metadata = blob.getMetadata();
- if (AzureBlobMetadata.isSegment(metadata)) {
- RemoteSegmentArchiveEntry indexEntry =
AzureBlobMetadata.toIndexEntry(metadata, (int)
blob.getProperties().getLength());
- index.put(new UUID(indexEntry.getMsb(), indexEntry.getLsb()),
indexEntry);
- }
- length += blob.getProperties().getLength();
- }
-
- return length;
+ private static Iterable<ArchiveEntry>
createEntryIterable(CloudBlobDirectory archiveDirectory) throws IOException {
+ return AzureUtilitiesV8.getBlobs(archiveDirectory).stream()
+ .map(blob -> {
+ Map<String, String> metadata = blob.getMetadata();
+ int length = (int) blob.getProperties().getLength();
+ if (AzureBlobMetadata.isSegment(metadata)) {
+ return new
ArchiveEntry(AzureBlobMetadata.toIndexEntry(metadata, length));
+ } else {
+ return new ArchiveEntry(length);
+ }
+ })
+ ::iterator;
}
@Override
diff --git
a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReader.java
b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReader.java
index fe99490e18..a289d39ee7 100644
---
a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReader.java
+++
b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReader.java
@@ -30,20 +30,53 @@ import org.jetbrains.annotations.Nullable;
import java.io.File;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.LinkedHashMap;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.LinkedList;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
public abstract class AbstractRemoteSegmentArchiveReader implements
SegmentArchiveReader {
+
protected final IOMonitor ioMonitor;
- protected final Map<UUID, RemoteSegmentArchiveEntry> index = new
LinkedHashMap<>();
+ /**
+ * Unordered immutable map of segment UUIDs to their corresponding archive
entries.
+ */
+ private final Map<UUID, RemoteSegmentArchiveEntry> index;
+
+ /**
+ * The name of the archive.
+ */
+ private final String archiveName;
- public AbstractRemoteSegmentArchiveReader(IOMonitor ioMonitor) throws
IOException {
+ /**
+ * The total size of the archive in bytes.
+ */
+ private final long length;
+
+ protected AbstractRemoteSegmentArchiveReader(IOMonitor ioMonitor, String
archiveName, Iterable<ArchiveEntry> entries) {
this.ioMonitor = ioMonitor;
+ this.archiveName = archiveName;
+
+ IndexBuilder indexBuilder = new IndexBuilder();
+ entries.forEach(indexBuilder::addEntry);
+ this.index = indexBuilder.createIndex();
+ this.length = indexBuilder.getLength();
+ }
+
+ @Override
+ public @NotNull String getName() {
+ return archiveName;
+ }
+
+ @Override
+ public long length() {
+ return length;
}
@Override
@@ -73,9 +106,16 @@ public abstract class AbstractRemoteSegmentArchiveReader
implements SegmentArchi
return index.containsKey(new UUID(msb, lsb));
}
+ @Override
+ public Set<UUID> getSegmentUUIDs() {
+ return Collections.unmodifiableSet(index.keySet());
+ }
+
@Override
public List<SegmentArchiveEntry> listSegments() {
- return new ArrayList<>(index.values());
+ return index.values().stream()
+
.sorted(Comparator.comparing(RemoteSegmentArchiveEntry::getPosition))
+ .collect(Collectors.toList());
}
@Override
@@ -102,12 +142,6 @@ public abstract class AbstractRemoteSegmentArchiveReader
implements SegmentArchi
return size;
}
- /**
- * Populates the archive index, summing up each entry's length.
- * @return length, the total length of the archive
- */
- protected abstract long computeArchiveIndexAndLength() throws IOException;
-
/**
* Reads the segment from the remote storage.
* @param segmentFileName, the name of the segment (msb + lsb) prefixed by
its position in the archive
@@ -132,4 +166,53 @@ public abstract class AbstractRemoteSegmentArchiveReader
implements SegmentArchi
public boolean isRemote() {
return true;
}
+
+ protected static final class ArchiveEntry {
+
+ private final RemoteSegmentArchiveEntry entry;
+
+ private final int length;
+
+ public ArchiveEntry(RemoteSegmentArchiveEntry entry) {
+ this.entry = entry;
+ this.length = entry.getLength();
+ }
+
+ public ArchiveEntry(int length) {
+ this.entry = null;
+ this.length = length;
+ }
+
+ int getLength() {
+ return length;
+ }
+
+ RemoteSegmentArchiveEntry getRemoteSegmentArchiveEntry() {
+ return entry;
+ }
+ }
+
+ private static final class IndexBuilder {
+
+ private final List<Map.Entry<UUID, RemoteSegmentArchiveEntry>> entries
= new LinkedList<>();
+
+ private long length = 0;
+
+ private void addEntry(ArchiveEntry entry) {
+ RemoteSegmentArchiveEntry archiveEntry =
entry.getRemoteSegmentArchiveEntry();
+ if (archiveEntry != null) {
+ this.entries.add(Map.entry(archiveEntry.getUuid(),
archiveEntry));
+ }
+ this.length += entry.getLength();
+ }
+
+ @SuppressWarnings("unchecked")
+ private Map<UUID, RemoteSegmentArchiveEntry> createIndex() {
+ return Map.ofEntries(entries.toArray(Map.Entry[]::new));
+ }
+
+ private long getLength() {
+ return length;
+ }
+ }
}
diff --git
a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveWriter.java
b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveWriter.java
index eee6d40cbf..3e1964bb26 100644
---
a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveWriter.java
+++
b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveWriter.java
@@ -66,7 +66,7 @@ public abstract class AbstractRemoteSegmentArchiveWriter
implements SegmentArchi
} else {
doWriteArchiveEntry(entry, data, offset, size);
}
- index.put(new UUID(msb, lsb), entry);
+ index.put(entry.getUuid(), entry);
totalLength += size;
monitor.written(size);
@@ -80,7 +80,7 @@ public abstract class AbstractRemoteSegmentArchiveWriter
implements SegmentArchi
return segment.get().toBuffer();
}
- RemoteSegmentArchiveEntry indexEntry = index.get(new UUID(msb, lsb));
+ RemoteSegmentArchiveEntry indexEntry = index.get(uuid);
if (indexEntry == null) {
return null;
}
@@ -95,7 +95,7 @@ public abstract class AbstractRemoteSegmentArchiveWriter
implements SegmentArchi
if (segment.isPresent()) {
return true;
}
- return index.containsKey(new UUID(msb, lsb));
+ return index.containsKey(uuid);
}
@Override
diff --git
a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntry.java
b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntry.java
index f14f793ac7..c3cdafbcf3 100644
---
a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntry.java
+++
b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntry.java
@@ -16,42 +16,36 @@
*/
package org.apache.jackrabbit.oak.segment.remote;
+import org.apache.jackrabbit.oak.segment.file.tar.GCGeneration;
import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveEntry;
-public class RemoteSegmentArchiveEntry implements SegmentArchiveEntry {
+import java.util.UUID;
- private final long msb;
+public class RemoteSegmentArchiveEntry implements SegmentArchiveEntry {
- private final long lsb;
+ private final UUID uuid;
private final int position;
private final int length;
- private final int generation;
-
- private final int fullGeneration;
-
- private final boolean compacted;
+ private final GCGeneration gcGeneration;
public RemoteSegmentArchiveEntry(long msb, long lsb, int position, int
length, int generation, int fullGeneration, boolean compacted) {
- this.msb = msb;
- this.lsb = lsb;
+ this.uuid = new UUID(msb, lsb);
this.position = position;
this.length = length;
- this.generation = generation;
- this.fullGeneration = fullGeneration;
- this.compacted = compacted;
+ this.gcGeneration = GCGeneration.newGCGeneration(generation,
fullGeneration, compacted);
}
@Override
public long getMsb() {
- return msb;
+ return uuid.getMostSignificantBits();
}
@Override
public long getLsb() {
- return lsb;
+ return uuid.getLeastSignificantBits();
}
public int getPosition() {
@@ -65,16 +59,20 @@ public class RemoteSegmentArchiveEntry implements
SegmentArchiveEntry {
@Override
public int getGeneration() {
- return generation;
+ return gcGeneration.getGeneration();
}
@Override
public int getFullGeneration() {
- return fullGeneration;
+ return gcGeneration.getFullGeneration();
}
@Override
public boolean isCompacted() {
- return compacted;
+ return gcGeneration.isCompacted();
+ }
+
+ UUID getUuid() {
+ return uuid;
}
}
diff --git
a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/package-info.java
b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/package-info.java
index fd64aa03ce..5c70f9fbe8 100644
---
a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/package-info.java
+++
b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/package-info.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
@Internal(since = "1.0.0")
-@Version("2.0.0")
+@Version("3.0.0")
package org.apache.jackrabbit.oak.segment.remote;
import org.apache.jackrabbit.oak.commons.annotations.Internal;
diff --git
a/oak-segment-remote/src/test/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReaderTest.java
b/oak-segment-remote/src/test/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReaderTest.java
new file mode 100644
index 0000000000..09902031c7
--- /dev/null
+++
b/oak-segment-remote/src/test/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReaderTest.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.segment.remote;
+
+import org.apache.jackrabbit.oak.commons.Buffer;
+import org.apache.jackrabbit.oak.segment.file.tar.SegmentGraph;
+import
org.apache.jackrabbit.oak.segment.remote.AbstractRemoteSegmentArchiveReader.ArchiveEntry;
+import org.apache.jackrabbit.oak.segment.spi.monitor.IOMonitorAdapter;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.UUID;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+public class AbstractRemoteSegmentArchiveReaderTest {
+
+ private static class TestSegmentArchiveReader extends
AbstractRemoteSegmentArchiveReader {
+
+ public TestSegmentArchiveReader(String archiveName,
Iterable<ArchiveEntry> entries) {
+ super(new IOMonitorAdapter(), archiveName, entries);
+ }
+
+ @Override
+ protected void doReadSegmentToBuffer(String segmentFileName, Buffer
buffer) throws IOException {
+ for (int i = 0; i < buffer.limit(); i++) {
+ buffer.put((byte) 1);
+ }
+ }
+
+ @Override
+ protected Buffer doReadDataFile(String extension) throws IOException {
+ return null;
+ }
+
+ @Override
+ protected File archivePathAsFile() {
+ return new File(getName());
+ }
+
+ }
+
+ private static final List<UUID> SEGMENT_UUIDS = List.of(
+ new UUID(0L, 0L),
+ new UUID(0L, 1L),
+ new UUID(0L, 2L),
+ new UUID(0L, 3L),
+ new UUID(0L, 4L)
+ );
+
+ private TestSegmentArchiveReader reader;
+
+ @Before
+ public void setup() {
+
+ ArrayList<ArchiveEntry> archiveEntries = new
ArrayList<>(SEGMENT_UUIDS.size() + 2);
+ archiveEntries.add(new ArchiveEntry(15));
+ archiveEntries.add(new ArchiveEntry(35));
+ for (int i = 0; i < SEGMENT_UUIDS.size(); i++) {
+ UUID uuid = SEGMENT_UUIDS.get(i);
+ archiveEntries.add(new ArchiveEntry(new
RemoteSegmentArchiveEntry(uuid.getMostSignificantBits(),
uuid.getLeastSignificantBits(), i, 20, 0, 0, true)));
+ }
+
+ // sort in random order to make sure the reader sorts them correctly
by their position
+ Random random = new Random();
+ archiveEntries.sort(Comparator.comparing(e -> random.nextInt(2) - 1));
+
+ reader = new TestSegmentArchiveReader("data00000a.tar",
archiveEntries);
+ }
+
+ @Test
+ public void testReadSegment() throws IOException {
+ Buffer buffer = reader.readSegment(0L, 1L);
+ assertNotNull(buffer);
+ assertEquals(20, buffer.limit());
+ for (int i = 0; i < buffer.limit(); i++) {
+ assertEquals(1, buffer.get(i));
+ }
+ }
+
+ @Test
+ public void testReadNonExistentSegment() throws IOException {
+ Buffer buffer = reader.readSegment(1L, 3L);
+ assertNull(buffer);
+ }
+
+ @Test
+ public void testGetArchiveSize() {
+ assertEquals(150, reader.length());
+ }
+
+ @Test
+ public void testIsRemote() {
+ assertTrue(reader.isRemote());
+ }
+
+ @Test
+ public void testGetEntrySize() {
+ assertEquals(10, reader.getEntrySize(10));
+ assertEquals(20, reader.getEntrySize(20));
+ }
+
+ @Test
+ public void testGetBinaryReferences() throws IOException {
+ assertNull(reader.getBinaryReferences());
+ }
+
+ @Test
+ public void testGetGraph() throws IOException {
+ SegmentGraph graph = reader.getGraph();
+ assertNotNull(graph);
+ }
+
+ @Test
+ public void testGetName() {
+ assertEquals("data00000a.tar", reader.getName());
+ }
+
+ @Test
+ public void testClose() {
+ try {
+ reader.close();
+ } catch (Exception e) {
+ fail("Close should not throw an exception");
+ }
+ }
+
+ @Test
+ public void testContainsSegment() {
+ SEGMENT_UUIDS.forEach(uuid ->
assertTrue(reader.containsSegment(uuid.getMostSignificantBits(),
uuid.getLeastSignificantBits())));
+ assertFalse(reader.containsSegment(1L, 3L));
+ }
+
+ @Test
+ public void testListSegments() {
+ var segments = reader.listSegments();
+ assertEquals(5, segments.size());
+ segments.forEach(e -> assertEquals(0L, e.getMsb()));
+ for (int i = 0; i < segments.size(); i++) {
+ assertEquals(i, segments.get(i).getLsb()); // LSBs are set up to
be the same as the position
+ }
+ }
+
+ @Test
+ public void testGetSegmentUUIDs() {
+ assertEquals(Set.copyOf(SEGMENT_UUIDS), reader.getSegmentUUIDs());
+ }
+}
diff --git
a/oak-segment-remote/src/test/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntryTest.java
b/oak-segment-remote/src/test/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntryTest.java
new file mode 100644
index 0000000000..9cf66524cb
--- /dev/null
+++
b/oak-segment-remote/src/test/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntryTest.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.segment.remote;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+
+public class RemoteSegmentArchiveEntryTest {
+
+ private final RemoteSegmentArchiveEntry entry = new
RemoteSegmentArchiveEntry(1L, 2L, 0, 128, 3, 4, true);
+
+ @Test
+ public void getMsb() {
+ assertEquals(1, entry.getMsb());
+ }
+
+ @Test
+ public void getLsb() {
+ assertEquals(2, entry.getLsb());
+ }
+
+ @Test
+ public void getPosition() {
+ assertEquals(0, entry.getPosition());
+ }
+
+ @Test
+ public void getLength() {
+ assertEquals(128, entry.getLength());
+ }
+
+ @Test
+ public void getGeneration() {
+ assertEquals(3, entry.getGeneration());
+ }
+
+ @Test
+ public void getFullGeneration() {
+ assertEquals(4, entry.getFullGeneration());
+ }
+
+ @Test
+ public void isCompacted() {
+ assertTrue(entry.isCompacted());
+ }
+
+ @Test
+ public void getUuid() {
+ assertSame("The same UUID instance must be returned for different
calls", entry.getUuid(), entry.getUuid());
+ assertEquals(entry.getMsb(), entry.getUuid().getMostSignificantBits());
+ assertEquals(entry.getLsb(),
entry.getUuid().getLeastSignificantBits());
+ }
+}
diff --git
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/GCGeneration.java
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/GCGeneration.java
index d9784d66a9..176f3f0433 100644
---
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/GCGeneration.java
+++
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/GCGeneration.java
@@ -19,7 +19,12 @@ package org.apache.jackrabbit.oak.segment.file.tar;
import static java.util.Objects.requireNonNull;
+import java.lang.ref.WeakReference;
+import java.util.Collections;
+import java.util.Iterator;
import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveEntry;
import org.jetbrains.annotations.NotNull;
@@ -54,12 +59,31 @@ public final class GCGeneration {
public static final GCGeneration NULL = new GCGeneration(0, 0, false);
+ private static final Set<WeakReference<GCGeneration>> gcGenerations =
Collections.newSetFromMap(new ConcurrentHashMap<>());
+
public static GCGeneration newGCGeneration(int generation, int
fullGeneration, boolean isCompacted) {
- return new GCGeneration(generation, fullGeneration, isCompacted);
+ Iterator<WeakReference<GCGeneration>> iterator =
gcGenerations.iterator();
+ GCGeneration gen = null;
+ while(iterator.hasNext()) {
+ WeakReference<GCGeneration> next = iterator.next();
+ GCGeneration gcGeneration = next.get();
+ if (gcGeneration == null) {
+ iterator.remove();
+ } else if (gcGeneration.generation == generation
+ && gcGeneration.fullGeneration == fullGeneration
+ && gcGeneration.isCompacted == isCompacted) {
+ gen = gcGeneration;
+ }
+ }
+ if (gen == null) {
+ gen = new GCGeneration(generation, fullGeneration, isCompacted);
+ gcGenerations.add(new WeakReference<>(gen));
+ }
+ return gen;
}
public static GCGeneration newGCGeneration(SegmentArchiveEntry indexEntry)
{
- return new GCGeneration(indexEntry.getGeneration(),
indexEntry.getFullGeneration(), indexEntry.isCompacted());
+ return newGCGeneration(indexEntry.getGeneration(),
indexEntry.getFullGeneration(), indexEntry.isCompacted());
}
private final int generation;
diff --git
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java
index 28f2e9f396..9f2575f5b1 100644
---
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java
+++
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java
@@ -39,7 +39,6 @@ import java.util.TreeMap;
import java.util.UUID;
import java.util.function.Consumer;
import java.util.function.Predicate;
-import java.util.stream.Collectors;
import org.apache.jackrabbit.oak.commons.Buffer;
import org.apache.jackrabbit.oak.segment.Segment;
@@ -283,10 +282,7 @@ public class TarReader implements Closeable {
private TarReader(SegmentArchiveManager archiveManager,
SegmentArchiveReader archive) {
this.archiveManager = archiveManager;
this.archive = archive;
- this.segmentUUIDs = archive.listSegments()
- .stream()
- .map(e -> new UUID(e.getMsb(), e.getLsb()))
- .collect(Collectors.toUnmodifiableSet());
+ this.segmentUUIDs = archive.getSegmentUUIDs();
}
long size() {
diff --git
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveReader.java
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveReader.java
index 3ac897e65b..8b22150516 100644
---
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveReader.java
+++
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveReader.java
@@ -21,6 +21,9 @@ package org.apache.jackrabbit.oak.segment.spi.persistence;
import java.io.Closeable;
import java.io.IOException;
import java.util.List;
+import java.util.Set;
+import java.util.UUID;
+import java.util.stream.Collectors;
import org.apache.jackrabbit.oak.commons.Buffer;
import org.apache.jackrabbit.oak.segment.file.tar.SegmentGraph;
@@ -52,6 +55,19 @@ public interface SegmentArchiveReader extends Closeable {
*/
boolean containsSegment(long msb, long lsb);
+ /**
+ * Returns an immutable {@code Set} of the UUIDs of all segments contained
in this archive.
+ * No guarantees are made regarding the iteration order of the elements.
+ *
+ * @return set of segment UUIDs
+ */
+ default Set<UUID> getSegmentUUIDs() {
+ return listSegments()
+ .stream()
+ .map(e -> new UUID(e.getMsb(), e.getLsb()))
+ .collect(Collectors.toUnmodifiableSet());
+ }
+
/**
* List all the segments, in the order as they have been written to the
archive.
*
diff --git
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/CachingSegmentArchiveReader.java
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/CachingSegmentArchiveReader.java
index 8ef42fbf4d..f81eaee782 100644
---
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/CachingSegmentArchiveReader.java
+++
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/CachingSegmentArchiveReader.java
@@ -27,6 +27,8 @@ import org.jetbrains.annotations.Nullable;
import java.io.IOException;
import java.util.List;
+import java.util.Set;
+import java.util.UUID;
public class CachingSegmentArchiveReader implements SegmentArchiveReader {
@@ -63,6 +65,11 @@ public class CachingSegmentArchiveReader implements
SegmentArchiveReader {
return delegate.listSegments();
}
+ @Override
+ public Set<UUID> getSegmentUUIDs() {
+ return delegate.getSegmentUUIDs();
+ }
+
@Override
public @NotNull SegmentGraph getGraph() throws IOException {
return delegate.getGraph();
diff --git
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/package-info.java
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/package-info.java
index 611279bc26..0ad0171f1b 100644
---
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/package-info.java
+++
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/package-info.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
@Internal(since = "1.0.0")
-@Version("6.0.0")
+@Version("6.1.0")
package org.apache.jackrabbit.oak.segment.spi.persistence.persistentcache;
import org.apache.jackrabbit.oak.commons.annotations.Internal;
diff --git
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/UnclosedSegmentArchiveReader.java
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/UnclosedSegmentArchiveReader.java
index d6902c757d..f50ffd757c 100644
---
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/UnclosedSegmentArchiveReader.java
+++
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/UnclosedSegmentArchiveReader.java
@@ -18,6 +18,8 @@ package
org.apache.jackrabbit.oak.segment.spi.persistence.split;
import java.io.IOException;
import java.util.List;
+import java.util.Set;
+import java.util.UUID;
import org.apache.jackrabbit.oak.commons.Buffer;
import org.apache.jackrabbit.oak.segment.file.tar.SegmentGraph;
@@ -52,6 +54,11 @@ class UnclosedSegmentArchiveReader implements
SegmentArchiveReader {
return delegate.listSegments();
}
+ @Override
+ public Set<UUID> getSegmentUUIDs() {
+ return delegate.getSegmentUUIDs();
+ }
+
@Override
public @NotNull SegmentGraph getGraph() throws IOException {
return delegate.getGraph();