This is an automated email from the ASF dual-hosted git repository. jsedding pushed a commit to branch jsedding/OAK-12070-segment-remote-memory-consumption in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 90fd2a185863bee5db9b5ca680335e58bb7df776 Author: Julian Sedding <[email protected]> AuthorDate: Wed Feb 4 16:15:18 2026 +0100 OAK-12070 - Reduce memory consumption of azure segment stores - restores changes reverted in commit 9bfbce0b7dd8d457a5640dd97545f140931e7b44. --- .../oak/segment/aws/AwsSegmentArchiveReader.java | 63 +++----- .../segment/azure/AzureSegmentArchiveReader.java | 50 ++---- .../azure/v8/AzureSegmentArchiveReaderV8.java | 43 ++--- .../remote/AbstractRemoteSegmentArchiveReader.java | 105 ++++++++++-- .../remote/AbstractRemoteSegmentArchiveWriter.java | 6 +- .../segment/remote/RemoteSegmentArchiveEntry.java | 34 ++-- .../oak/segment/remote/package-info.java | 2 +- .../AbstractRemoteSegmentArchiveReaderTest.java | 176 +++++++++++++++++++++ .../remote/RemoteSegmentArchiveEntryTest.java | 72 +++++++++ .../jackrabbit/oak/segment/file/tar/TarReader.java | 6 +- .../oak/segment/spi/persistence/GCGeneration.java | 27 +++- .../spi/persistence/SegmentArchiveReader.java | 16 ++ .../CachingSegmentArchiveReader.java | 7 + .../persistence/persistentcache/package-info.java | 2 +- .../split/UnclosedSegmentArchiveReader.java | 7 + .../segment/spi/persistence/GCGenerationTest.java | 46 ++++++ 16 files changed, 520 insertions(+), 142 deletions(-) diff --git a/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/AwsSegmentArchiveReader.java b/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/AwsSegmentArchiveReader.java index 9811cbe380..7b9fb3a97d 100644 --- a/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/AwsSegmentArchiveReader.java +++ b/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/AwsSegmentArchiveReader.java @@ -20,7 +20,8 @@ import static org.apache.jackrabbit.oak.segment.remote.RemoteUtilities.OFF_HEAP; import java.io.File; import java.io.IOException; -import java.util.UUID; +import java.util.Iterator; +import java.util.NoSuchElementException; import org.apache.jackrabbit.oak.commons.Buffer; import org.apache.jackrabbit.oak.segment.remote.AbstractRemoteSegmentArchiveReader; @@ -31,47 +32,35 @@ public class AwsSegmentArchiveReader extends AbstractRemoteSegmentArchiveReader private final S3Directory directory; - private final String archiveName; - - private final long length; - AwsSegmentArchiveReader(S3Directory directory, String archiveName, IOMonitor ioMonitor) throws IOException { - super(ioMonitor); + super(ioMonitor, archiveName, createEntryIterable(directory, archiveName)); this.directory = directory; - this.archiveName = archiveName; - this.length = computeArchiveIndexAndLength(); - } - - @Override - public long length() { - return length; - } - - @Override - public String getName() { - return archiveName; } - @Override - protected long computeArchiveIndexAndLength() throws IOException { - long length = 0; + private static Iterable<ArchiveEntry> createEntryIterable(S3Directory directory, String archiveName) throws IOException{ Buffer buffer = directory.readObjectToBuffer(archiveName + ".idx", OFF_HEAP); - while (buffer.hasRemaining()) { - long msb = buffer.getLong(); - long lsb = buffer.getLong(); - int position = buffer.getInt(); - int contentLength = buffer.getInt(); - int generation = buffer.getInt(); - int fullGeneration = buffer.getInt(); - boolean compacted = buffer.get() != 0; - - RemoteSegmentArchiveEntry indexEntry = new RemoteSegmentArchiveEntry(msb, lsb, position, contentLength, - generation, fullGeneration, compacted); - index.put(new UUID(indexEntry.getMsb(), indexEntry.getLsb()), indexEntry); - length += contentLength; - } - - return length; + return () -> new Iterator<>() { + @Override + public boolean hasNext() { + return buffer.hasRemaining(); + } + + @Override + public ArchiveEntry next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + + long msb = buffer.getLong(); + long lsb = buffer.getLong(); + int position = buffer.getInt(); + int contentLength = buffer.getInt(); + int generation = buffer.getInt(); + int fullGeneration = buffer.getInt(); + boolean compacted = buffer.get() != 0; + return new ArchiveEntry(new RemoteSegmentArchiveEntry(msb, lsb, position, contentLength, generation, fullGeneration, compacted)); + } + }; } @Override diff --git a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java index 18ca18f0d6..7a8be8198d 100644 --- a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java +++ b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java @@ -17,19 +17,17 @@ package org.apache.jackrabbit.oak.segment.azure; import com.azure.storage.blob.BlobContainerClient; -import com.azure.storage.blob.models.BlobItem; import com.azure.storage.blob.models.BlobStorageException; import com.azure.storage.blob.models.ListBlobsOptions; import com.azure.storage.blob.specialized.BlockBlobClient; import org.apache.jackrabbit.oak.commons.Buffer; import org.apache.jackrabbit.oak.segment.remote.AbstractRemoteSegmentArchiveReader; -import org.apache.jackrabbit.oak.segment.remote.RemoteSegmentArchiveEntry; import org.apache.jackrabbit.oak.segment.spi.monitor.IOMonitor; +import org.jetbrains.annotations.NotNull; import java.io.File; import java.io.IOException; import java.util.Map; -import java.util.UUID; import static org.apache.jackrabbit.oak.segment.azure.AzureUtilities.readBufferFully; @@ -37,45 +35,29 @@ public class AzureSegmentArchiveReader extends AbstractRemoteSegmentArchiveReade private final BlobContainerClient blobContainerClient; - private final long length; - - private final String archiveName; - private final String archivePathPrefix; - AzureSegmentArchiveReader(BlobContainerClient blobContainerClient, String rootPrefix, String archiveName, IOMonitor ioMonitor) throws IOException { - super(ioMonitor); + AzureSegmentArchiveReader(BlobContainerClient blobContainerClient, String rootPrefix, String archiveName, IOMonitor ioMonitor) { + super(ioMonitor, AzureUtilities.ensureNoTrailingSlash(archiveName), + createEntryIterable(blobContainerClient, AzureUtilities.asAzurePrefix(rootPrefix, archiveName))); this.blobContainerClient = blobContainerClient; - this.archiveName = AzureUtilities.ensureNoTrailingSlash(archiveName); this.archivePathPrefix = AzureUtilities.asAzurePrefix(rootPrefix, archiveName); - this.length = computeArchiveIndexAndLength(); - } - - @Override - public long length() { - return length; - } - - @Override - public String getName() { - return archiveName; } - @Override - protected long computeArchiveIndexAndLength() throws IOException { - long length = 0; + private static Iterable<ArchiveEntry> createEntryIterable(BlobContainerClient blobContainerClient, @NotNull String archivePathPrefix) { ListBlobsOptions listBlobsOptions = new ListBlobsOptions(); listBlobsOptions.setPrefix(archivePathPrefix); - for (BlobItem blob : AzureUtilities.getBlobs(blobContainerClient, listBlobsOptions)) { - Map<String, String> metadata = blob.getMetadata(); - if (AzureBlobMetadata.isSegment(metadata)) { - RemoteSegmentArchiveEntry indexEntry = AzureBlobMetadata.toIndexEntry(metadata, blob.getProperties().getContentLength().intValue()); - index.put(new UUID(indexEntry.getMsb(), indexEntry.getLsb()), indexEntry); - } - length += blob.getProperties().getContentLength(); - } - - return length; + return AzureUtilities.getBlobs(blobContainerClient, listBlobsOptions).stream() + .map(blobItem -> { + Map<String, String> metadata = blobItem.getMetadata(); + int length = blobItem.getProperties().getContentLength().intValue(); + if (AzureBlobMetadata.isSegment(metadata)) { + return new ArchiveEntry(AzureBlobMetadata.toIndexEntry(metadata, length)); + } else { + return new ArchiveEntry(length); + } + }) + ::iterator; } @Override diff --git a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureSegmentArchiveReaderV8.java b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureSegmentArchiveReaderV8.java index de69711b6e..7242de4fe2 100644 --- a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureSegmentArchiveReaderV8.java +++ b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureSegmentArchiveReaderV8.java @@ -22,54 +22,37 @@ import java.io.File; import java.io.IOException; import java.net.URISyntaxException; import java.util.Map; -import java.util.UUID; import com.microsoft.azure.storage.StorageException; -import com.microsoft.azure.storage.blob.CloudBlob; import com.microsoft.azure.storage.blob.CloudBlobDirectory; import com.microsoft.azure.storage.blob.CloudBlockBlob; import org.apache.jackrabbit.oak.commons.Buffer; import org.apache.jackrabbit.oak.segment.azure.AzureBlobMetadata; import org.apache.jackrabbit.oak.segment.remote.AbstractRemoteSegmentArchiveReader; -import org.apache.jackrabbit.oak.segment.remote.RemoteSegmentArchiveEntry; import org.apache.jackrabbit.oak.segment.spi.monitor.IOMonitor; public class AzureSegmentArchiveReaderV8 extends AbstractRemoteSegmentArchiveReader { private final CloudBlobDirectory archiveDirectory; - private final long length; - protected AzureSegmentArchiveReaderV8(CloudBlobDirectory archiveDirectory, IOMonitor ioMonitor) throws IOException { - super(ioMonitor); + super(ioMonitor, AzureUtilitiesV8.getName(archiveDirectory), createEntryIterable(archiveDirectory)); this.archiveDirectory = archiveDirectory; - this.length = computeArchiveIndexAndLength(); - } - - @Override - public long length() { - return length; } - @Override - public String getName() { - return AzureUtilitiesV8.getName(archiveDirectory); - } - - @Override - protected long computeArchiveIndexAndLength() throws IOException { - long length = 0; - for (CloudBlob blob : AzureUtilitiesV8.getBlobs(archiveDirectory)) { - Map<String, String> metadata = blob.getMetadata(); - if (AzureBlobMetadata.isSegment(metadata)) { - RemoteSegmentArchiveEntry indexEntry = AzureBlobMetadata.toIndexEntry(metadata, (int) blob.getProperties().getLength()); - index.put(new UUID(indexEntry.getMsb(), indexEntry.getLsb()), indexEntry); - } - length += blob.getProperties().getLength(); - } - - return length; + private static Iterable<ArchiveEntry> createEntryIterable(CloudBlobDirectory archiveDirectory) throws IOException { + return AzureUtilitiesV8.getBlobs(archiveDirectory).stream() + .map(blob -> { + Map<String, String> metadata = blob.getMetadata(); + int length = (int) blob.getProperties().getLength(); + if (AzureBlobMetadata.isSegment(metadata)) { + return new ArchiveEntry(AzureBlobMetadata.toIndexEntry(metadata, length)); + } else { + return new ArchiveEntry(length); + } + }) + ::iterator; } @Override diff --git a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReader.java b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReader.java index fe99490e18..a289d39ee7 100644 --- a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReader.java +++ b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReader.java @@ -30,20 +30,53 @@ import org.jetbrains.annotations.Nullable; import java.io.File; import java.io.IOException; -import java.util.ArrayList; -import java.util.LinkedHashMap; +import java.util.Collections; +import java.util.Comparator; +import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; public abstract class AbstractRemoteSegmentArchiveReader implements SegmentArchiveReader { + protected final IOMonitor ioMonitor; - protected final Map<UUID, RemoteSegmentArchiveEntry> index = new LinkedHashMap<>(); + /** + * Unordered immutable map of segment UUIDs to their corresponding archive entries. + */ + private final Map<UUID, RemoteSegmentArchiveEntry> index; + + /** + * The name of the archive. + */ + private final String archiveName; - public AbstractRemoteSegmentArchiveReader(IOMonitor ioMonitor) throws IOException { + /** + * The total size of the archive in bytes. + */ + private final long length; + + protected AbstractRemoteSegmentArchiveReader(IOMonitor ioMonitor, String archiveName, Iterable<ArchiveEntry> entries) { this.ioMonitor = ioMonitor; + this.archiveName = archiveName; + + IndexBuilder indexBuilder = new IndexBuilder(); + entries.forEach(indexBuilder::addEntry); + this.index = indexBuilder.createIndex(); + this.length = indexBuilder.getLength(); + } + + @Override + public @NotNull String getName() { + return archiveName; + } + + @Override + public long length() { + return length; } @Override @@ -73,9 +106,16 @@ public abstract class AbstractRemoteSegmentArchiveReader implements SegmentArchi return index.containsKey(new UUID(msb, lsb)); } + @Override + public Set<UUID> getSegmentUUIDs() { + return Collections.unmodifiableSet(index.keySet()); + } + @Override public List<SegmentArchiveEntry> listSegments() { - return new ArrayList<>(index.values()); + return index.values().stream() + .sorted(Comparator.comparing(RemoteSegmentArchiveEntry::getPosition)) + .collect(Collectors.toList()); } @Override @@ -102,12 +142,6 @@ public abstract class AbstractRemoteSegmentArchiveReader implements SegmentArchi return size; } - /** - * Populates the archive index, summing up each entry's length. - * @return length, the total length of the archive - */ - protected abstract long computeArchiveIndexAndLength() throws IOException; - /** * Reads the segment from the remote storage. * @param segmentFileName, the name of the segment (msb + lsb) prefixed by its position in the archive @@ -132,4 +166,53 @@ public abstract class AbstractRemoteSegmentArchiveReader implements SegmentArchi public boolean isRemote() { return true; } + + protected static final class ArchiveEntry { + + private final RemoteSegmentArchiveEntry entry; + + private final int length; + + public ArchiveEntry(RemoteSegmentArchiveEntry entry) { + this.entry = entry; + this.length = entry.getLength(); + } + + public ArchiveEntry(int length) { + this.entry = null; + this.length = length; + } + + int getLength() { + return length; + } + + RemoteSegmentArchiveEntry getRemoteSegmentArchiveEntry() { + return entry; + } + } + + private static final class IndexBuilder { + + private final List<Map.Entry<UUID, RemoteSegmentArchiveEntry>> entries = new LinkedList<>(); + + private long length = 0; + + private void addEntry(ArchiveEntry entry) { + RemoteSegmentArchiveEntry archiveEntry = entry.getRemoteSegmentArchiveEntry(); + if (archiveEntry != null) { + this.entries.add(Map.entry(archiveEntry.getUuid(), archiveEntry)); + } + this.length += entry.getLength(); + } + + @SuppressWarnings("unchecked") + private Map<UUID, RemoteSegmentArchiveEntry> createIndex() { + return Map.ofEntries(entries.toArray(Map.Entry[]::new)); + } + + private long getLength() { + return length; + } + } } diff --git a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveWriter.java b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveWriter.java index eee6d40cbf..3e1964bb26 100644 --- a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveWriter.java +++ b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveWriter.java @@ -66,7 +66,7 @@ public abstract class AbstractRemoteSegmentArchiveWriter implements SegmentArchi } else { doWriteArchiveEntry(entry, data, offset, size); } - index.put(new UUID(msb, lsb), entry); + index.put(entry.getUuid(), entry); totalLength += size; monitor.written(size); @@ -80,7 +80,7 @@ public abstract class AbstractRemoteSegmentArchiveWriter implements SegmentArchi return segment.get().toBuffer(); } - RemoteSegmentArchiveEntry indexEntry = index.get(new UUID(msb, lsb)); + RemoteSegmentArchiveEntry indexEntry = index.get(uuid); if (indexEntry == null) { return null; } @@ -95,7 +95,7 @@ public abstract class AbstractRemoteSegmentArchiveWriter implements SegmentArchi if (segment.isPresent()) { return true; } - return index.containsKey(new UUID(msb, lsb)); + return index.containsKey(uuid); } @Override diff --git a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntry.java b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntry.java index f14f793ac7..c3cdafbcf3 100644 --- a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntry.java +++ b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntry.java @@ -16,42 +16,36 @@ */ package org.apache.jackrabbit.oak.segment.remote; +import org.apache.jackrabbit.oak.segment.file.tar.GCGeneration; import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveEntry; -public class RemoteSegmentArchiveEntry implements SegmentArchiveEntry { +import java.util.UUID; - private final long msb; +public class RemoteSegmentArchiveEntry implements SegmentArchiveEntry { - private final long lsb; + private final UUID uuid; private final int position; private final int length; - private final int generation; - - private final int fullGeneration; - - private final boolean compacted; + private final GCGeneration gcGeneration; public RemoteSegmentArchiveEntry(long msb, long lsb, int position, int length, int generation, int fullGeneration, boolean compacted) { - this.msb = msb; - this.lsb = lsb; + this.uuid = new UUID(msb, lsb); this.position = position; this.length = length; - this.generation = generation; - this.fullGeneration = fullGeneration; - this.compacted = compacted; + this.gcGeneration = GCGeneration.newGCGeneration(generation, fullGeneration, compacted); } @Override public long getMsb() { - return msb; + return uuid.getMostSignificantBits(); } @Override public long getLsb() { - return lsb; + return uuid.getLeastSignificantBits(); } public int getPosition() { @@ -65,16 +59,20 @@ public class RemoteSegmentArchiveEntry implements SegmentArchiveEntry { @Override public int getGeneration() { - return generation; + return gcGeneration.getGeneration(); } @Override public int getFullGeneration() { - return fullGeneration; + return gcGeneration.getFullGeneration(); } @Override public boolean isCompacted() { - return compacted; + return gcGeneration.isCompacted(); + } + + UUID getUuid() { + return uuid; } } diff --git a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/package-info.java b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/package-info.java index fd64aa03ce..5c70f9fbe8 100644 --- a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/package-info.java +++ b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/package-info.java @@ -15,7 +15,7 @@ * limitations under the License. */ @Internal(since = "1.0.0") -@Version("2.0.0") +@Version("3.0.0") package org.apache.jackrabbit.oak.segment.remote; import org.apache.jackrabbit.oak.commons.annotations.Internal; diff --git a/oak-segment-remote/src/test/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReaderTest.java b/oak-segment-remote/src/test/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReaderTest.java new file mode 100644 index 0000000000..09902031c7 --- /dev/null +++ b/oak-segment-remote/src/test/java/org/apache/jackrabbit/oak/segment/remote/AbstractRemoteSegmentArchiveReaderTest.java @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.segment.remote; + +import org.apache.jackrabbit.oak.commons.Buffer; +import org.apache.jackrabbit.oak.segment.file.tar.SegmentGraph; +import org.apache.jackrabbit.oak.segment.remote.AbstractRemoteSegmentArchiveReader.ArchiveEntry; +import org.apache.jackrabbit.oak.segment.spi.monitor.IOMonitorAdapter; +import org.junit.Before; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.UUID; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +public class AbstractRemoteSegmentArchiveReaderTest { + + private static class TestSegmentArchiveReader extends AbstractRemoteSegmentArchiveReader { + + public TestSegmentArchiveReader(String archiveName, Iterable<ArchiveEntry> entries) { + super(new IOMonitorAdapter(), archiveName, entries); + } + + @Override + protected void doReadSegmentToBuffer(String segmentFileName, Buffer buffer) throws IOException { + for (int i = 0; i < buffer.limit(); i++) { + buffer.put((byte) 1); + } + } + + @Override + protected Buffer doReadDataFile(String extension) throws IOException { + return null; + } + + @Override + protected File archivePathAsFile() { + return new File(getName()); + } + + } + + private static final List<UUID> SEGMENT_UUIDS = List.of( + new UUID(0L, 0L), + new UUID(0L, 1L), + new UUID(0L, 2L), + new UUID(0L, 3L), + new UUID(0L, 4L) + ); + + private TestSegmentArchiveReader reader; + + @Before + public void setup() { + + ArrayList<ArchiveEntry> archiveEntries = new ArrayList<>(SEGMENT_UUIDS.size() + 2); + archiveEntries.add(new ArchiveEntry(15)); + archiveEntries.add(new ArchiveEntry(35)); + for (int i = 0; i < SEGMENT_UUIDS.size(); i++) { + UUID uuid = SEGMENT_UUIDS.get(i); + archiveEntries.add(new ArchiveEntry(new RemoteSegmentArchiveEntry(uuid.getMostSignificantBits(), uuid.getLeastSignificantBits(), i, 20, 0, 0, true))); + } + + // sort in random order to make sure the reader sorts them correctly by their position + Random random = new Random(); + archiveEntries.sort(Comparator.comparing(e -> random.nextInt(2) - 1)); + + reader = new TestSegmentArchiveReader("data00000a.tar", archiveEntries); + } + + @Test + public void testReadSegment() throws IOException { + Buffer buffer = reader.readSegment(0L, 1L); + assertNotNull(buffer); + assertEquals(20, buffer.limit()); + for (int i = 0; i < buffer.limit(); i++) { + assertEquals(1, buffer.get(i)); + } + } + + @Test + public void testReadNonExistentSegment() throws IOException { + Buffer buffer = reader.readSegment(1L, 3L); + assertNull(buffer); + } + + @Test + public void testGetArchiveSize() { + assertEquals(150, reader.length()); + } + + @Test + public void testIsRemote() { + assertTrue(reader.isRemote()); + } + + @Test + public void testGetEntrySize() { + assertEquals(10, reader.getEntrySize(10)); + assertEquals(20, reader.getEntrySize(20)); + } + + @Test + public void testGetBinaryReferences() throws IOException { + assertNull(reader.getBinaryReferences()); + } + + @Test + public void testGetGraph() throws IOException { + SegmentGraph graph = reader.getGraph(); + assertNotNull(graph); + } + + @Test + public void testGetName() { + assertEquals("data00000a.tar", reader.getName()); + } + + @Test + public void testClose() { + try { + reader.close(); + } catch (Exception e) { + fail("Close should not throw an exception"); + } + } + + @Test + public void testContainsSegment() { + SEGMENT_UUIDS.forEach(uuid -> assertTrue(reader.containsSegment(uuid.getMostSignificantBits(), uuid.getLeastSignificantBits()))); + assertFalse(reader.containsSegment(1L, 3L)); + } + + @Test + public void testListSegments() { + var segments = reader.listSegments(); + assertEquals(5, segments.size()); + segments.forEach(e -> assertEquals(0L, e.getMsb())); + for (int i = 0; i < segments.size(); i++) { + assertEquals(i, segments.get(i).getLsb()); // LSBs are set up to be the same as the position + } + } + + @Test + public void testGetSegmentUUIDs() { + assertEquals(Set.copyOf(SEGMENT_UUIDS), reader.getSegmentUUIDs()); + } +} diff --git a/oak-segment-remote/src/test/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntryTest.java b/oak-segment-remote/src/test/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntryTest.java new file mode 100644 index 0000000000..9cf66524cb --- /dev/null +++ b/oak-segment-remote/src/test/java/org/apache/jackrabbit/oak/segment/remote/RemoteSegmentArchiveEntryTest.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.segment.remote; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; + +public class RemoteSegmentArchiveEntryTest { + + private final RemoteSegmentArchiveEntry entry = new RemoteSegmentArchiveEntry(1L, 2L, 0, 128, 3, 4, true); + + @Test + public void getMsb() { + assertEquals(1, entry.getMsb()); + } + + @Test + public void getLsb() { + assertEquals(2, entry.getLsb()); + } + + @Test + public void getPosition() { + assertEquals(0, entry.getPosition()); + } + + @Test + public void getLength() { + assertEquals(128, entry.getLength()); + } + + @Test + public void getGeneration() { + assertEquals(3, entry.getGeneration()); + } + + @Test + public void getFullGeneration() { + assertEquals(4, entry.getFullGeneration()); + } + + @Test + public void isCompacted() { + assertTrue(entry.isCompacted()); + } + + @Test + public void getUuid() { + assertSame("The same UUID instance must be returned for different calls", entry.getUuid(), entry.getUuid()); + assertEquals(entry.getMsb(), entry.getUuid().getMostSignificantBits()); + assertEquals(entry.getLsb(), entry.getUuid().getLeastSignificantBits()); + } +} diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java index 794137f6f0..f9c7382561 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java @@ -39,7 +39,6 @@ import java.util.TreeMap; import java.util.UUID; import java.util.function.Consumer; import java.util.function.Predicate; -import java.util.stream.Collectors; import org.apache.jackrabbit.oak.commons.Buffer; import org.apache.jackrabbit.oak.segment.Segment; @@ -284,10 +283,7 @@ public class TarReader implements Closeable { private TarReader(SegmentArchiveManager archiveManager, SegmentArchiveReader archive) { this.archiveManager = archiveManager; this.archive = archive; - this.segmentUUIDs = archive.listSegments() - .stream() - .map(e -> new UUID(e.getMsb(), e.getLsb())) - .collect(Collectors.toUnmodifiableSet()); + this.segmentUUIDs = archive.getSegmentUUIDs(); } long size() { diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/GCGeneration.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/GCGeneration.java index a5a401f442..600bb7ed39 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/GCGeneration.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/GCGeneration.java @@ -19,7 +19,11 @@ package org.apache.jackrabbit.oak.segment.spi.persistence; import static java.util.Objects.requireNonNull; +import java.lang.ref.WeakReference; +import java.util.Iterator; import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import org.jetbrains.annotations.NotNull; @@ -53,12 +57,31 @@ public final class GCGeneration { public static final GCGeneration NULL = new GCGeneration(0, 0, false); + private static final Set<WeakReference<GCGeneration>> gcGenerations = ConcurrentHashMap.newKeySet(); + public static GCGeneration newGCGeneration(int generation, int fullGeneration, boolean isCompacted) { - return new GCGeneration(generation, fullGeneration, isCompacted); + Iterator<WeakReference<GCGeneration>> iterator = gcGenerations.iterator(); + GCGeneration gen = null; + while(iterator.hasNext()) { + WeakReference<GCGeneration> next = iterator.next(); + GCGeneration gcGeneration = next.get(); + if (gcGeneration == null) { + iterator.remove(); + } else if (gcGeneration.generation == generation + && gcGeneration.fullGeneration == fullGeneration + && gcGeneration.isCompacted == isCompacted) { + gen = gcGeneration; + } + } + if (gen == null) { + gen = new GCGeneration(generation, fullGeneration, isCompacted); + gcGenerations.add(new WeakReference<>(gen)); + } + return gen; } public static GCGeneration newGCGeneration(SegmentArchiveEntry indexEntry) { - return new GCGeneration(indexEntry.getGeneration(), indexEntry.getFullGeneration(), indexEntry.isCompacted()); + return newGCGeneration(indexEntry.getGeneration(), indexEntry.getFullGeneration(), indexEntry.isCompacted()); } private final int generation; diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveReader.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveReader.java index 3ac897e65b..8b22150516 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveReader.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveReader.java @@ -21,6 +21,9 @@ package org.apache.jackrabbit.oak.segment.spi.persistence; import java.io.Closeable; import java.io.IOException; import java.util.List; +import java.util.Set; +import java.util.UUID; +import java.util.stream.Collectors; import org.apache.jackrabbit.oak.commons.Buffer; import org.apache.jackrabbit.oak.segment.file.tar.SegmentGraph; @@ -52,6 +55,19 @@ public interface SegmentArchiveReader extends Closeable { */ boolean containsSegment(long msb, long lsb); + /** + * Returns an immutable {@code Set} of the UUIDs of all segments contained in this archive. + * No guarantees are made regarding the iteration order of the elements. + * + * @return set of segment UUIDs + */ + default Set<UUID> getSegmentUUIDs() { + return listSegments() + .stream() + .map(e -> new UUID(e.getMsb(), e.getLsb())) + .collect(Collectors.toUnmodifiableSet()); + } + /** * List all the segments, in the order as they have been written to the archive. * diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/CachingSegmentArchiveReader.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/CachingSegmentArchiveReader.java index 8ef42fbf4d..f81eaee782 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/CachingSegmentArchiveReader.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/CachingSegmentArchiveReader.java @@ -27,6 +27,8 @@ import org.jetbrains.annotations.Nullable; import java.io.IOException; import java.util.List; +import java.util.Set; +import java.util.UUID; public class CachingSegmentArchiveReader implements SegmentArchiveReader { @@ -63,6 +65,11 @@ public class CachingSegmentArchiveReader implements SegmentArchiveReader { return delegate.listSegments(); } + @Override + public Set<UUID> getSegmentUUIDs() { + return delegate.getSegmentUUIDs(); + } + @Override public @NotNull SegmentGraph getGraph() throws IOException { return delegate.getGraph(); diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/package-info.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/package-info.java index 611279bc26..0ad0171f1b 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/package-info.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/persistentcache/package-info.java @@ -15,7 +15,7 @@ * limitations under the License. */ @Internal(since = "1.0.0") -@Version("6.0.0") +@Version("6.1.0") package org.apache.jackrabbit.oak.segment.spi.persistence.persistentcache; import org.apache.jackrabbit.oak.commons.annotations.Internal; diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/UnclosedSegmentArchiveReader.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/UnclosedSegmentArchiveReader.java index d6902c757d..f50ffd757c 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/UnclosedSegmentArchiveReader.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/UnclosedSegmentArchiveReader.java @@ -18,6 +18,8 @@ package org.apache.jackrabbit.oak.segment.spi.persistence.split; import java.io.IOException; import java.util.List; +import java.util.Set; +import java.util.UUID; import org.apache.jackrabbit.oak.commons.Buffer; import org.apache.jackrabbit.oak.segment.file.tar.SegmentGraph; @@ -52,6 +54,11 @@ class UnclosedSegmentArchiveReader implements SegmentArchiveReader { return delegate.listSegments(); } + @Override + public Set<UUID> getSegmentUUIDs() { + return delegate.getSegmentUUIDs(); + } + @Override public @NotNull SegmentGraph getGraph() throws IOException { return delegate.getGraph(); diff --git a/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/spi/persistence/GCGenerationTest.java b/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/spi/persistence/GCGenerationTest.java index 3f16d4bde7..6a2fe06f83 100644 --- a/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/spi/persistence/GCGenerationTest.java +++ b/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/spi/persistence/GCGenerationTest.java @@ -20,9 +20,19 @@ package org.apache.jackrabbit.oak.segment.spi.persistence; import static org.apache.jackrabbit.oak.segment.spi.persistence.GCGeneration.newGCGeneration; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertSame; +import org.awaitility.Awaitility; import org.junit.Test; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.function.IntFunction; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + public class GCGenerationTest { @Test @@ -38,4 +48,40 @@ public class GCGenerationTest { GCGeneration n = newGCGeneration(2, 3, false); assertEquals(3, n.compareFullGenerationWith(m)); } + + @Test + public void testObjectReuse() { + IntFunction<GCGeneration> gcGenerationProducer = + i -> newGCGeneration(i / 10, i / 2, i % 2 == 0); + + Map<Integer, GCGeneration> generations = IntStream.range(0, 50) + .boxed() + .collect(Collectors.toMap(Function.identity(), gcGenerationProducer::apply)); + + Map<Integer, Integer> removed = IntStream.of(5, 22, 37) + .boxed() + .collect(Collectors.toMap(Function.identity(), i -> System.identityHashCode(generations.remove(i)))); + + for (int i = 0; i < 50; i++) { + if (removed.containsKey(i)) { + final int index = i; + Awaitility.await() + .atMost(1, TimeUnit.SECONDS) + .untilAsserted(() -> { + System.gc(); + assertNotEquals( + removed.get(index).intValue(), + System.identityHashCode(gcGenerationProducer.apply(index))); + }); + + } else { + assertSame( + generations.get(i), + gcGenerationProducer.apply(i)); + assertEquals( + System.identityHashCode(generations.get(i)), + System.identityHashCode(gcGenerationProducer.apply(i))); + } + } + } }
