This is an automated email from the ASF dual-hosted git repository.
apolovtsev pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/ignite-3.git
The following commit(s) were added to refs/heads/main by this push:
new 859eab07e43 IGNITE-28218 Fix Raft log GC wrt suffix truncations (#7791)
859eab07e43 is described below
commit 859eab07e43436d35241f4d910751fd766a4494c
Author: Alexander Polovtcev <[email protected]>
AuthorDate: Tue Mar 17 12:16:25 2026 +0200
IGNITE-28218 Fix Raft log GC wrt suffix truncations (#7791)
---
.../raft/storage/segstore/GroupIndexMeta.java | 85 +++++--
.../raft/storage/segstore/IndexFileManager.java | 67 ++---
.../raft/storage/segstore/IndexFileMeta.java | 6 +-
.../raft/storage/segstore/IndexFileMetaArray.java | 34 ++-
.../storage/segstore/RaftLogGarbageCollector.java | 15 +-
.../raft/storage/segstore/GroupIndexMetaTest.java | 124 ++++++++--
.../segstore/RaftLogGarbageCollectorTest.java | 275 ++++++++++++++++-----
7 files changed, 460 insertions(+), 146 deletions(-)
diff --git
a/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/GroupIndexMeta.java
b/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/GroupIndexMeta.java
index ae386a63224..e0217504469 100644
---
a/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/GroupIndexMeta.java
+++
b/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/GroupIndexMeta.java
@@ -101,6 +101,23 @@ class GroupIndexMeta {
}
}
}
+
+ boolean onIndexRemoved(FileProperties oldProperties) {
+ while (true) {
+ IndexFileMetaArray fileMetas = this.fileMetas;
+
+ IndexFileMetaArray newFileMetas =
fileMetas.onIndexRemoved(oldProperties);
+
+ // Nothing was updated which means the array does not contain
index meta for the removed file.
+ if (fileMetas == newFileMetas) {
+ return false;
+ }
+
+ if (FILE_METAS_VH.compareAndSet(this, fileMetas,
newFileMetas)) {
+ return true;
+ }
+ }
+ }
}
/**
@@ -136,12 +153,6 @@ class GroupIndexMeta {
long newFirstLogIndex = indexFileMeta.firstLogIndexInclusive();
- assert newFirstLogIndex <= curLastLogIndex :
- String.format(
- "Gaps between Index File Metas are not allowed. Last
log index: %d, new log index: %d",
- curLastLogIndex, newFirstLogIndex
- );
-
int lastFileOrdinal = curFileMetas.lastFileProperties().ordinal();
int newFileOrdinal = indexFileMeta.indexFileProperties().ordinal();
@@ -154,7 +165,7 @@ class GroupIndexMeta {
// Merge consecutive index metas into a single meta block. If there's
an overlap (e.g. due to log truncation), start a new block,
// which will override the previous one during search.
- if (curLastLogIndex == newFirstLogIndex) {
+ if (newFirstLogIndex >= curLastLogIndex) {
curFileMetas.addIndexMeta(indexFileMeta);
} else {
fileMetaDeque.add(new IndexMetaArrayHolder(indexFileMeta));
@@ -191,17 +202,55 @@ class GroupIndexMeta {
return null;
}
+ /**
+ * Computes an "effective" index meta of the given index file. Effective
index meta represents has altered first and last log indices
+ * that reflect the up-to-date state of the group, rather than the indices
actually present in the file. This is needed during
+ * compaction, when we need to identify which entries have been truncated
from a file.
+ */
@Nullable
- IndexFileMeta indexMetaByFileOrdinal(int fileOrdinal) {
- for (IndexMetaArrayHolder indexMetaArrayHolder : fileMetaDeque) {
- IndexFileMeta indexMeta =
indexMetaArrayHolder.fileMetas.findByFileOrdinal(fileOrdinal);
+ IndexFileMeta effectiveIndexMetaByFileOrdinal(int fileOrdinal) {
+ Iterator<IndexMetaArrayHolder> it = fileMetaDeque.iterator();
- if (indexMeta != null) {
- return indexMeta;
- }
+ // Find the holder/meta pair containing the given file ordinal.
+ IndexMetaArrayHolder holder = null;
+
+ IndexFileMeta indexMeta = null;
+
+ while (it.hasNext() && indexMeta == null) {
+ holder = it.next();
+
+ indexMeta = holder.fileMetas.findByFileOrdinal(fileOrdinal);
}
- return null;
+ if (indexMeta == null || indexMeta.isEmpty()) {
+ return null;
+ }
+
+ // Even if index meta exists, it may still be obsolete due to suffix
truncations (during suffix truncations we do not trim
+ // the meta as during prefix truncations, but add a new meta block).
+ long globalFirstLogIndexInclusive = firstLogIndexInclusive();
+
+ // If there are newer blocks, they override from their
firstLogIndexInclusive onwards. We need to find the minimum across the
+ // blocks that follow the current one.
+ long globalLastLogIndexExclusive = holder.lastLogIndexExclusive();
+
+ while (it.hasNext()) {
+ globalLastLogIndexExclusive =
Math.min(globalLastLogIndexExclusive, it.next().firstLogIndexInclusive());
+ }
+
+ boolean isObsolete = indexMeta.firstLogIndexInclusive() >=
globalLastLogIndexExclusive
+ || indexMeta.lastLogIndexExclusive() <=
globalFirstLogIndexInclusive;
+
+ if (isObsolete) {
+ return null;
+ }
+
+ return new IndexFileMeta(
+ Math.max(globalFirstLogIndexInclusive,
indexMeta.firstLogIndexInclusive()),
+ Math.min(globalLastLogIndexExclusive,
indexMeta.lastLogIndexExclusive()),
+ indexMeta.indexFilePayloadOffset(),
+ indexMeta.indexFileProperties()
+ );
}
/**
@@ -251,6 +300,14 @@ class GroupIndexMeta {
}
}
+ void onIndexRemoved(FileProperties oldProperties) {
+ for (IndexMetaArrayHolder holder : fileMetaDeque) {
+ if (holder.onIndexRemoved(oldProperties)) {
+ return;
+ }
+ }
+ }
+
long firstLogIndexInclusive() {
for (IndexMetaArrayHolder indexMetaArrayHolder : fileMetaDeque) {
long firstLogIndex = indexMetaArrayHolder.firstLogIndexInclusive();
diff --git
a/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexFileManager.java
b/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexFileManager.java
index ab7d8d0a2e6..b893f7fe643 100644
---
a/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexFileManager.java
+++
b/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexFileManager.java
@@ -266,6 +266,10 @@ class IndexFileManager {
return newIndexFilePath;
}
+ void onIndexFileRemoved(FileProperties oldIndexFileProperties) {
+ groupIndexMetas.values().forEach(groupIndexMeta ->
groupIndexMeta.onIndexRemoved(oldIndexFileProperties));
+ }
+
/**
* Returns a pointer into a segment file that contains the entry for the
given group's index. Returns {@code null} if the given log
* index could not be found in any of the index files.
@@ -349,27 +353,14 @@ class IndexFileManager {
* truncation, then the mapping will not contain an entry for this group.
Otherwise, it will contain the smallest and largest log
* indices across all index files for this group.
*/
- Long2ObjectMap<GroupDescriptor> describeSegmentFile(int fileOrdinal) {
- var result = new
Long2ObjectOpenHashMap<GroupDescriptor>(groupIndexMetas.size());
+ Long2ObjectMap<IndexFileMeta> describeSegmentFile(int fileOrdinal) {
+ var result = new
Long2ObjectOpenHashMap<IndexFileMeta>(groupIndexMetas.size());
groupIndexMetas.forEach((groupId, groupIndexMeta) -> {
- IndexFileMeta indexFileMeta =
groupIndexMeta.indexMetaByFileOrdinal(fileOrdinal);
-
- if (indexFileMeta != null) {
- // Even if index meta exists, it may still be obsolete due to
suffix truncations (during suffix truncations we do not trim
- // the meta as during prefix truncations, but add a new meta
block).
- long firstLogIndexInclusive =
groupIndexMeta.firstLogIndexInclusive();
-
- long lastLogIndexExclusive =
groupIndexMeta.lastLogIndexExclusive();
-
- boolean isObsolete = indexFileMeta.firstLogIndexInclusive() >=
lastLogIndexExclusive
- || indexFileMeta.lastLogIndexExclusive() <=
firstLogIndexInclusive;
+ IndexFileMeta indexMeta =
groupIndexMeta.effectiveIndexMetaByFileOrdinal(fileOrdinal);
- if (!isObsolete) {
- var groupDescriptor = new
GroupDescriptor(firstLogIndexInclusive, lastLogIndexExclusive);
-
- result.put((long) groupId, groupDescriptor);
- }
+ if (indexMeta != null) {
+ result.put((long) groupId, indexMeta);
}
});
@@ -528,11 +519,19 @@ class IndexFileManager {
private void recoverIndexFileMetas(Path indexFilePath) throws IOException {
FileProperties fileProperties = indexFileProperties(indexFilePath);
- if (curFileOrdinal >= 0 && fileProperties.ordinal() != curFileOrdinal
+ 1) {
- throw new IllegalStateException(String.format(
- "Unexpected index file ordinal. Expected %d, actual %d
(%s).",
- curFileOrdinal + 1, fileProperties.ordinal(), indexFilePath
- ));
+ if (curFileOrdinal >= 0) {
+ int fileOrdinal = fileProperties.ordinal();
+
+ // There can be gaps in file numbering because of suffix
truncations and subsequent GCs.
+ if (fileOrdinal > curFileOrdinal + 1) {
+ LOG.info("Missing index files in [{} : {}) range, creating
empty index metas", curFileOrdinal + 1, fileOrdinal);
+
+ for (int ordinal = curFileOrdinal + 1; ordinal < fileOrdinal;
ordinal++) {
+ var missingFileProperties = new FileProperties(ordinal, 0);
+
+ putIndexFileMetasForMissingGroups(LongSet.of(),
missingFileProperties);
+ }
+ }
}
curFileOrdinal = fileProperties.ordinal();
@@ -659,26 +658,4 @@ class IndexFileManager {
return firstIndexKept;
}
}
-
- /** Class that provides information about a Raft group. */
- static class GroupDescriptor {
- /** First log index for the group across all files (inclusive). */
- private final long firstLogIndexInclusive;
-
- /** Last log index for the group across all files (exclusive). */
- private final long lastLogIndexExclusive;
-
- private GroupDescriptor(long firstLogIndexInclusive, long
lastLogIndexExclusive) {
- this.firstLogIndexInclusive = firstLogIndexInclusive;
- this.lastLogIndexExclusive = lastLogIndexExclusive;
- }
-
- long firstLogIndexInclusive() {
- return firstLogIndexInclusive;
- }
-
- long lastLogIndexExclusive() {
- return lastLogIndexExclusive;
- }
- }
}
diff --git
a/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexFileMeta.java
b/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexFileMeta.java
index b89a8cc7bfa..4000e53818b 100644
---
a/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexFileMeta.java
+++
b/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexFileMeta.java
@@ -71,7 +71,7 @@ class IndexFileMeta {
* Returns the offset of the payload for the Raft Group in the index file.
*/
int indexFilePayloadOffset() {
- assert indexFilePayloadOffset != NO_PAYLOAD_OFFSET : "Must not be
called for empty metas.";
+ assert !isEmpty() : "Must not be called for empty metas.";
return indexFilePayloadOffset;
}
@@ -80,6 +80,10 @@ class IndexFileMeta {
return indexFileProperties;
}
+ boolean isEmpty() {
+ return indexFilePayloadOffset == NO_PAYLOAD_OFFSET;
+ }
+
@Override
public boolean equals(Object o) {
if (o == null || getClass() != o.getClass()) {
diff --git
a/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexFileMetaArray.java
b/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexFileMetaArray.java
index 44484b5e938..bcb5f821fa5 100644
---
a/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexFileMetaArray.java
+++
b/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexFileMetaArray.java
@@ -50,8 +50,8 @@ class IndexFileMetaArray {
}
IndexFileMetaArray add(IndexFileMeta indexFileMeta) {
- assert indexFileMeta.firstLogIndexInclusive() == array[size -
1].lastLogIndexExclusive() :
- String.format("Index File Metas must be contiguous. Expected
log index: %d, actual log index: %d",
+ assert indexFileMeta.firstLogIndexInclusive() >= array[size -
1].lastLogIndexExclusive() :
+ String.format("Index File Metas must be increasing. Expected
log index: %d, actual log index: %d",
array[size - 1].lastLogIndexExclusive(),
indexFileMeta.firstLogIndexInclusive()
);
@@ -181,8 +181,8 @@ class IndexFileMetaArray {
if (updateIndex > 0) {
IndexFileMeta prevOldMeta = array[updateIndex - 1];
- assert newMeta.firstLogIndexInclusive() ==
prevOldMeta.lastLogIndexExclusive() :
- String.format("Index File Metas must be contiguous.
Expected log index: %d, actual log index: %d",
+ assert newMeta.firstLogIndexInclusive() >=
prevOldMeta.lastLogIndexExclusive() :
+ String.format("Index File Metas must be increasing.
Expected log index: %d, actual log index: %d",
prevOldMeta.lastLogIndexExclusive(),
newMeta.firstLogIndexInclusive()
);
@@ -191,8 +191,8 @@ class IndexFileMetaArray {
if (updateIndex < size - 1) {
IndexFileMeta nextOldMeta = array[updateIndex + 1];
- assert newMeta.lastLogIndexExclusive() ==
nextOldMeta.firstLogIndexInclusive() :
- String.format("Index File Metas must be contiguous.
Expected log index: %d, actual log index: %d",
+ assert newMeta.lastLogIndexExclusive() <=
nextOldMeta.firstLogIndexInclusive() :
+ String.format("Index File Metas must be increasing.
Expected log index: %d, actual log index: %d",
nextOldMeta.firstLogIndexInclusive(),
newMeta.lastLogIndexExclusive()
);
@@ -205,6 +205,28 @@ class IndexFileMetaArray {
return new IndexFileMetaArray(newArray, size);
}
+ /**
+ * Replaces the meta for the given file with an empty meta when an index
file gets removed to preserve contiguous file ordinals.
+ */
+ IndexFileMetaArray onIndexRemoved(FileProperties oldProperties) {
+ int updateIndex = arrayIndexByFileOrdinal(oldProperties.ordinal());
+
+ if (updateIndex == MISSING_ARRAY_INDEX) {
+ return this;
+ }
+
+ IndexFileMeta oldMeta = array[updateIndex];
+
+ assert oldMeta.indexFileProperties().equals(oldProperties)
+ : String.format("File properties mismatch [expected=%s,
actual=%s].", oldMeta.indexFileProperties(), oldProperties);
+
+ IndexFileMeta[] newArray = array.clone();
+
+ newArray[updateIndex] =
IndexFileMeta.empty(oldMeta.lastLogIndexExclusive(),
oldMeta.indexFileProperties());
+
+ return new IndexFileMetaArray(newArray, size);
+ }
+
@Nullable
IndexFileMeta findByFileOrdinal(int fileOrdinal) {
int arrayIndex = arrayIndexByFileOrdinal(fileOrdinal);
diff --git
a/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/RaftLogGarbageCollector.java
b/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/RaftLogGarbageCollector.java
index 9402a0ae0f1..a1b5e9cb5a1 100644
---
a/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/RaftLogGarbageCollector.java
+++
b/modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/RaftLogGarbageCollector.java
@@ -42,7 +42,6 @@ import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Stream;
import org.apache.ignite.internal.logger.IgniteLogger;
import org.apache.ignite.internal.logger.Loggers;
-import
org.apache.ignite.internal.raft.storage.segstore.IndexFileManager.GroupDescriptor;
import org.apache.ignite.internal.raft.util.VarlenEncoder;
import org.apache.ignite.raft.jraft.storage.LogStorage;
import org.jetbrains.annotations.VisibleForTesting;
@@ -141,7 +140,7 @@ class RaftLogGarbageCollector {
void runCompaction(SegmentFile segmentFile) throws IOException {
LOG.info("Compacting segment file [path = {}].", segmentFile.path());
- Long2ObjectMap<GroupDescriptor> segmentFileDescription
+ Long2ObjectMap<IndexFileMeta> segmentFileDescription
=
indexFileManager.describeSegmentFile(segmentFile.fileProperties().ordinal());
boolean canRemoveSegmentFile = segmentFileDescription.isEmpty();
@@ -151,6 +150,8 @@ class RaftLogGarbageCollector {
long logSizeDelta;
if (canRemoveSegmentFile) {
+ indexFileManager.onIndexFileRemoved(segmentFile.fileProperties());
+
logSizeDelta = Files.size(segmentFile.path()) +
Files.size(indexFilePath);
} else {
logSizeDelta = compactSegmentFile(segmentFile, indexFilePath,
segmentFileDescription);
@@ -181,7 +182,7 @@ class RaftLogGarbageCollector {
private long compactSegmentFile(
SegmentFile segmentFile,
Path indexFilePath,
- Long2ObjectMap<GroupDescriptor> segmentFileDescription
+ Long2ObjectMap<IndexFileMeta> segmentFileDescription
) throws IOException {
ByteBuffer buffer = segmentFile.buffer();
@@ -233,9 +234,9 @@ class RaftLogGarbageCollector {
long index = VarlenEncoder.readLong(buffer);
- GroupDescriptor groupDescriptor =
segmentFileDescription.get(groupId);
+ IndexFileMeta indexFileMeta =
segmentFileDescription.get(groupId);
- if (groupDescriptor == null || !isLogIndexInRange(index,
groupDescriptor)) {
+ if (indexFileMeta == null || !isLogIndexInRange(index,
indexFileMeta)) {
// We found a truncated entry, it should be skipped.
buffer.position(endOfRecordOffset);
@@ -281,8 +282,8 @@ class RaftLogGarbageCollector {
}
}
- private static boolean isLogIndexInRange(long index, GroupDescriptor
groupDescriptor) {
- return index >= groupDescriptor.firstLogIndexInclusive() && index <
groupDescriptor.lastLogIndexExclusive();
+ private static boolean isLogIndexInRange(long index, IndexFileMeta
indexFileMeta) {
+ return index >= indexFileMeta.firstLogIndexInclusive() && index <
indexFileMeta.lastLogIndexExclusive();
}
private class TmpSegmentFile implements AutoCloseable {
diff --git
a/modules/raft/src/test/java/org/apache/ignite/internal/raft/storage/segstore/GroupIndexMetaTest.java
b/modules/raft/src/test/java/org/apache/ignite/internal/raft/storage/segstore/GroupIndexMetaTest.java
index e210910b96c..e5089861af2 100644
---
a/modules/raft/src/test/java/org/apache/ignite/internal/raft/storage/segstore/GroupIndexMetaTest.java
+++
b/modules/raft/src/test/java/org/apache/ignite/internal/raft/storage/segstore/GroupIndexMetaTest.java
@@ -340,13 +340,13 @@ class GroupIndexMetaTest extends BaseIgniteAbstractTest {
groupMeta.addIndexMeta(meta2);
groupMeta.addIndexMeta(meta3);
- assertThat(groupMeta.indexMetaByFileOrdinal(1), is(meta1));
- assertThat(groupMeta.indexMetaByFileOrdinal(2), is(meta2));
- assertThat(groupMeta.indexMetaByFileOrdinal(3), is(meta3));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(1), is(meta1));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(2), is(meta2));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(3), is(meta3));
// Ordinals before the first and after the last return null.
- assertThat(groupMeta.indexMetaByFileOrdinal(0), is(nullValue()));
- assertThat(groupMeta.indexMetaByFileOrdinal(4), is(nullValue()));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(0),
is(nullValue()));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(4),
is(nullValue()));
}
@Test
@@ -363,13 +363,80 @@ class GroupIndexMetaTest extends BaseIgniteAbstractTest {
var meta3 = new IndexFileMeta(100, 150, 84, new FileProperties(3));
groupMeta.addIndexMeta(meta3);
- assertThat(groupMeta.indexMetaByFileOrdinal(1), is(meta1));
- assertThat(groupMeta.indexMetaByFileOrdinal(2), is(meta2));
- assertThat(groupMeta.indexMetaByFileOrdinal(3), is(meta3));
+ // Effective meta1 should be truncated on top by the second block.
+ var effectiveMeta1 = new IndexFileMeta(
+ meta1.firstLogIndexInclusive(),
+ meta2.firstLogIndexInclusive(),
+ meta1.indexFilePayloadOffset(),
+ meta1.indexFileProperties()
+ );
+
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(1),
is(effectiveMeta1));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(2), is(meta2));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(3), is(meta3));
+
+ // Ordinals before the first and after the last return null.
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(0),
is(nullValue()));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(4),
is(nullValue()));
+ }
+
+ @Test
+ void testIndexMetaByFileOrdinalWithThreeBlocks() {
+ // meta1 is in block 0.
+ var meta1 = new IndexFileMeta(1, 100, 0, new FileProperties(1));
+ var groupMeta = new GroupIndexMeta(meta1);
+
+ // meta2 overlaps meta1, creating a second deque block (first suffix
truncation).
+ var meta2 = new IndexFileMeta(50, 100, 42, new FileProperties(2));
+ groupMeta.addIndexMeta(meta2);
+
+ // meta3 overlaps meta2 (and meta1), creating a third deque block
(second suffix truncation).
+ var meta3 = new IndexFileMeta(30, 100, 84, new FileProperties(3));
+ groupMeta.addIndexMeta(meta3);
+
+ // Effective meta1 range is capped by the minimum of all subsequent
blocks' firstLogIndexInclusive.
+ var effectiveMeta1 = new IndexFileMeta(
+ meta1.firstLogIndexInclusive(),
+ meta3.firstLogIndexInclusive(),
+ meta1.indexFilePayloadOffset(),
+ meta1.indexFileProperties()
+ );
+
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(1),
is(effectiveMeta1));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(2),
is(nullValue()));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(3), is(meta3));
// Ordinals before the first and after the last return null.
- assertThat(groupMeta.indexMetaByFileOrdinal(0), is(nullValue()));
- assertThat(groupMeta.indexMetaByFileOrdinal(4), is(nullValue()));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(0),
is(nullValue()));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(4),
is(nullValue()));
+ }
+
+ @Test
+ void testIndexMetaByFileOrdinalWithThreeBlocksWhenSecondBlockIsObsolete() {
+ // meta1 is in block 0.
+ var meta1 = new IndexFileMeta(1, 100, 0, new FileProperties(1));
+ var groupMeta = new GroupIndexMeta(meta1);
+
+ // meta2 overlaps meta1, creating a second deque block.
+ var meta2 = new IndexFileMeta(50, 100, 42, new FileProperties(2));
+ groupMeta.addIndexMeta(meta2);
+
+ // meta3's firstLogIndexInclusive is lower than meta2's, creating a
third block.
+ var meta3 = new IndexFileMeta(10, 100, 84, new FileProperties(3));
+ groupMeta.addIndexMeta(meta3);
+
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(2),
is(nullValue()));
+
+ var effectiveMeta1 = new IndexFileMeta(
+ meta1.firstLogIndexInclusive(),
+ meta3.firstLogIndexInclusive(),
+ meta1.indexFilePayloadOffset(),
+ meta1.indexFileProperties()
+ );
+
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(1),
is(effectiveMeta1));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(2),
is(nullValue()));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(3), is(meta3));
}
@Test
@@ -385,15 +452,15 @@ class GroupIndexMetaTest extends BaseIgniteAbstractTest {
// After prefix truncation, ordinal 0 is dropped; ordinals 1 and 2
must still be found correctly.
groupMeta.truncatePrefix(75);
- assertThat(groupMeta.indexMetaByFileOrdinal(0), is(nullValue()));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(0),
is(nullValue()));
// meta2 was trimmed – the ordinal is the same but
firstLogIndexInclusive changed.
- IndexFileMeta trimmedMeta2 = groupMeta.indexMetaByFileOrdinal(1);
+ IndexFileMeta trimmedMeta2 =
groupMeta.effectiveIndexMetaByFileOrdinal(1);
assertThat(trimmedMeta2, is(notNullValue()));
assertThat(trimmedMeta2.firstLogIndexInclusive(), is(75L));
assertThat(trimmedMeta2.indexFileProperties(), is(new
FileProperties(1)));
- assertThat(groupMeta.indexMetaByFileOrdinal(2), is(meta3));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(2), is(meta3));
}
@Test
@@ -413,15 +480,15 @@ class GroupIndexMetaTest extends BaseIgniteAbstractTest {
// After prefix truncation, ordinal 0 is dropped; ordinals 1 and 2
must still be found correctly.
groupMeta.truncatePrefix(75);
- assertThat(groupMeta.indexMetaByFileOrdinal(1), is(nullValue()));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(1),
is(nullValue()));
// meta2 was trimmed – the ordinal is the same but
firstLogIndexInclusive changed.
- IndexFileMeta trimmedMeta2 = groupMeta.indexMetaByFileOrdinal(2);
+ IndexFileMeta trimmedMeta2 =
groupMeta.effectiveIndexMetaByFileOrdinal(2);
assertThat(trimmedMeta2, is(notNullValue()));
assertThat(trimmedMeta2.firstLogIndexInclusive(), is(75L));
assertThat(trimmedMeta2.indexFileProperties(), is(new
FileProperties(2)));
- assertThat(groupMeta.indexMetaByFileOrdinal(3), is(meta3));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(3), is(meta3));
}
@Test
@@ -436,8 +503,29 @@ class GroupIndexMetaTest extends BaseIgniteAbstractTest {
var compactedMeta1 = new IndexFileMeta(1, 50, 0, new FileProperties(0,
1));
groupMeta.onIndexCompacted(new FileProperties(0), compactedMeta1);
- assertThat(groupMeta.indexMetaByFileOrdinal(0), is(compactedMeta1));
- assertThat(groupMeta.indexMetaByFileOrdinal(1), is(meta2));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(0),
is(compactedMeta1));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(1), is(meta2));
+ }
+
+ @Test
+ void testAddIndexMetaAfterFileRemoval() {
+ var meta1 = new IndexFileMeta(0, 50, 0, new FileProperties(0));
+ var meta2 = new IndexFileMeta(50, 100, 42, new FileProperties(1));
+
+ var groupMeta = new GroupIndexMeta(meta1);
+ groupMeta.addIndexMeta(meta2);
+
+ groupMeta.onIndexRemoved(meta2.indexFileProperties());
+
+ var meta3 = new IndexFileMeta(50, 150, 84, new FileProperties(2));
+ groupMeta.addIndexMeta(meta3);
+
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(0), is(meta1));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(1),
is(nullValue()));
+ assertThat(groupMeta.effectiveIndexMetaByFileOrdinal(2), is(meta3));
+
+ assertThat(groupMeta.indexMetaByLogIndex(25), is(meta1));
+ assertThat(groupMeta.indexMetaByLogIndex(50), is(meta3));
}
@RepeatedTest(100)
diff --git
a/modules/raft/src/test/java/org/apache/ignite/internal/raft/storage/segstore/RaftLogGarbageCollectorTest.java
b/modules/raft/src/test/java/org/apache/ignite/internal/raft/storage/segstore/RaftLogGarbageCollectorTest.java
index a476d09c614..733ff153ba2 100644
---
a/modules/raft/src/test/java/org/apache/ignite/internal/raft/storage/segstore/RaftLogGarbageCollectorTest.java
+++
b/modules/raft/src/test/java/org/apache/ignite/internal/raft/storage/segstore/RaftLogGarbageCollectorTest.java
@@ -129,12 +129,7 @@ class RaftLogGarbageCollectorTest extends
IgniteAbstractTest {
FileProperties fileProperties =
SegmentFile.fileProperties(segmentFilePath);
- SegmentFile segmentFile = SegmentFile.openExisting(segmentFilePath,
false);
- try {
- garbageCollector.runCompaction(segmentFile);
- } finally {
- segmentFile.close();
- }
+ runCompaction(segmentFilePath);
assertThat(segmentFilePath, not(exists()));
assertThat(indexFileManager.indexFilePath(fileProperties),
not(exists()));
@@ -170,12 +165,7 @@ class RaftLogGarbageCollectorTest extends
IgniteAbstractTest {
FileProperties originalFileProperties =
SegmentFile.fileProperties(firstSegmentFile);
- SegmentFile segmentFile = SegmentFile.openExisting(firstSegmentFile,
false);
- try {
- garbageCollector.runCompaction(segmentFile);
- } finally {
- segmentFile.close();
- }
+ runCompaction(firstSegmentFile);
// Segment file should be replaced by a new one with increased
generation.
assertThat(firstSegmentFile, not(exists()));
@@ -208,13 +198,7 @@ class RaftLogGarbageCollectorTest extends
IgniteAbstractTest {
triggerAndAwaitCheckpoint(batches.size() / 2);
for (Path segmentFilePath : segmentFiles) {
- SegmentFile segmentFile =
SegmentFile.openExisting(segmentFilePath, false);
-
- try {
- garbageCollector.runCompaction(segmentFile);
- } finally {
- segmentFile.close();
- }
+ runCompaction(segmentFilePath);
assertThat(segmentFilePath, not(exists()));
}
@@ -244,12 +228,7 @@ class RaftLogGarbageCollectorTest extends
IgniteAbstractTest {
while (!segmentFiles.get(0).equals(lastSegmentFile)) {
fileManager.truncatePrefix(GROUP_ID_1, ++aliveIndex);
- SegmentFile segmentFile =
SegmentFile.openExisting(segmentFiles.get(0), false);
- try {
- garbageCollector.runCompaction(segmentFile);
- } finally {
- segmentFile.close();
- }
+ runCompaction(segmentFiles.get(0));
segmentFiles = segmentFiles();
}
@@ -308,12 +287,7 @@ class RaftLogGarbageCollectorTest extends
IgniteAbstractTest {
long sizeBeforeCompaction = Files.size(curSegmentFilePath);
- SegmentFile segmentFile =
SegmentFile.openExisting(curSegmentFilePath, false);
- try {
- garbageCollector.runCompaction(segmentFile);
- } finally {
- segmentFile.close();
- }
+ runCompaction(curSegmentFilePath);
FileProperties newFileProperties = new
FileProperties(fileProperties.ordinal(), fileProperties.generation() + 1);
@@ -387,25 +361,9 @@ class RaftLogGarbageCollectorTest extends
IgniteAbstractTest {
List<Path> segmentFiles = segmentFiles();
- SegmentFile segmentFile =
SegmentFile.openExisting(segmentFiles.get(0), false);
- try {
- garbageCollector.runCompaction(segmentFile);
- } finally {
- segmentFile.close();
- }
-
- fileManager.close();
+ runCompaction(segmentFiles.get(0));
- fileManager = new SegmentFileManager(
- NODE_NAME,
- workDir,
- STRIPES,
- new NoOpFailureManager(),
- raftConfiguration,
- storageConfiguration
- );
-
- fileManager.start();
+ restartSegmentFileManager();
for (int i = 0; i < batches.size(); i++) {
int index = i;
@@ -477,7 +435,7 @@ class RaftLogGarbageCollectorTest extends
IgniteAbstractTest {
}
@Test
- void testRunCompactionDoesNotRetainEntriesTruncatedBySuffix() throws
Exception {
+ void testRunCompactionWithFullyCompactedFileByTruncatedSuffix() throws
Exception {
List<byte[]> batches = createRandomData(FILE_SIZE / 4, 10);
for (int i = 0; i < batches.size(); i++) {
@@ -493,14 +451,196 @@ class RaftLogGarbageCollectorTest extends
IgniteAbstractTest {
assertThat(segmentFiles, hasSize(greaterThan(2)));
- SegmentFile segmentFile =
SegmentFile.openExisting(segmentFiles.get(1), false);
- try {
- garbageCollector.runCompaction(segmentFile);
- } finally {
- segmentFile.close();
+ runCompaction(segmentFiles.get(1));
+
+ assertThat(segmentFiles.get(1), not(exists()));
+
+ // Check that no other generations of this file exist.
+ var differentGenerationFileProperties = new FileProperties(1, 1);
+ Path differentGenerationFile =
fileManager.segmentFilesDir().resolve(SegmentFile.fileName(differentGenerationFileProperties));
+
+ assertThat(differentGenerationFile, not(exists()));
+ }
+
+ @Test
+ void testRunCompactionWithPartiallyCompactedFileByTruncatedSuffix() throws
Exception {
+ List<byte[]> batches = createRandomData(FILE_SIZE / 4, 10);
+
+ // Use two groups to guarantee that no files will be fully truncated.
+ for (int i = 0; i < batches.size(); i++) {
+ appendBytes(GROUP_ID_1, batches.get(i), i);
+ appendBytes(GROUP_ID_2, batches.get(i), i);
}
+ fileManager.truncateSuffix(GROUP_ID_1, 1);
+
+ triggerAndAwaitCheckpoint(1);
+
+ // Since we truncated GROUP_ID_1 entries after log index 1, the second
segment file will be partially compacted:
+ // GROUP_ID_1 entries are dropped but GROUP_ID_2 entries survive in a
new generation file.
+ List<Path> segmentFiles = segmentFiles();
+
+ assertThat(segmentFiles, hasSize(greaterThan(2)));
+
+ runCompaction(segmentFiles.get(1));
+
assertThat(segmentFiles.get(1), not(exists()));
+
+ // Check that a new generation file has been created.
+ var differentGenerationFileProperties = new FileProperties(1, 1);
+ Path differentGenerationFile =
fileManager.segmentFilesDir().resolve(SegmentFile.fileName(differentGenerationFileProperties));
+
+ assertThat(differentGenerationFile, exists());
+ }
+
+ /**
+ * Similar to {@link
#testRunCompactionWithFullyCompactedFileByTruncatedSuffix()} but includes
recovery validation.
+ */
+ @Test
+ void testRecoveryAfterCompactionWithFullyCompactedFileByTruncatedSuffix()
throws Exception {
+ List<byte[]> staleBatches = createRandomData(FILE_SIZE / 4, 10);
+
+ for (int i = 0; i < staleBatches.size(); i++) {
+ appendBytes(GROUP_ID_1, staleBatches.get(i), i);
+ }
+
+ long lastLogIndexKept = 1;
+
+ fileManager.truncateSuffix(GROUP_ID_1, lastLogIndexKept);
+
+ List<byte[]> newBatches = createRandomData(FILE_SIZE / 4, 5);
+
+ for (int i = 0; i < newBatches.size(); i++) {
+ appendBytes(GROUP_ID_1, newBatches.get(i), i + lastLogIndexKept +
1);
+ }
+
+ await().until(this::indexFiles,
hasSize(greaterThanOrEqualTo(segmentFiles().size() - 1)));
+
+ List<Path> segmentFiles = segmentFiles();
+
+ runCompaction(segmentFiles.get(0));
+
+ runCompaction(segmentFiles.get(1));
+
+ restartSegmentFileManager();
+
+ for (int i = 0; i <= lastLogIndexKept; i++) {
+ int finalI = i;
+
+ fileManager.getEntry(GROUP_ID_1, i, bs -> {
+ assertThat(bs, is(staleBatches.get(finalI)));
+ return null;
+ });
+ }
+
+ for (int i = 0; i < newBatches.size(); i++) {
+ int finalI = i;
+
+ fileManager.getEntry(GROUP_ID_1, lastLogIndexKept + i + 1, bs -> {
+ assertThat(bs, is(newBatches.get(finalI)));
+ return null;
+ });
+ }
+ }
+
+ /**
+ * Similar to {@link
#testRunCompactionWithPartiallyCompactedFileByTruncatedSuffix()} but includes
recovery validation.
+ */
+ @Test
+ void
testRecoveryAfterCompactionWithPartiallyCompactedFileByTruncatedSuffix() throws
Exception {
+ List<byte[]> batches = createRandomData(FILE_SIZE / 4, 10);
+
+ for (int i = 0; i < batches.size(); i++) {
+ appendBytes(GROUP_ID_1, batches.get(i), i);
+ appendBytes(GROUP_ID_2, batches.get(i), i);
+ }
+
+ fileManager.truncateSuffix(GROUP_ID_1, 1);
+
+ triggerAndAwaitCheckpoint(1);
+
+ List<Path> segmentFiles = segmentFiles();
+
+ runCompaction(segmentFiles.get(0));
+
+ runCompaction(segmentFiles.get(1));
+
+ restartSegmentFileManager();
+
+ // GROUP_ID_1: only entries 0 and 1 are within the suffix truncation
boundary and should be readable.
+ fileManager.getEntry(GROUP_ID_1, 0, bs -> {
+ assertThat(bs, is(batches.get(0)));
+ return null;
+ });
+
+ fileManager.getEntry(GROUP_ID_1, 1, bs -> {
+ assertThat(bs, is(batches.get(1)));
+ return null;
+ });
+
+ // GROUP_ID_2: all original entries should still be accessible.
+ for (int i = 0; i < batches.size(); i++) {
+ int index = i;
+
+ fileManager.getEntry(GROUP_ID_2, i, bs -> {
+ assertThat(bs, is(batches.get(index)));
+ return null;
+ });
+ }
+ }
+
+ /**
+ * Reproducer for the stale-deque-entry corruption bug: after fully
deleting a middle segment file, its {@link IndexFileMeta} remains
+ * in the same deque block as the preceding file's meta. When the
preceding file is subsequently partially compacted (its live log range
+ * shrinks), {@link IndexFileMetaArray#onIndexCompacted} asserts that the
new meta's {@code lastLogIndexExclusive} equals the next
+ * entry's {@code firstLogIndexInclusive}.
+ */
+ @Test
+ void testCompactionOfFileAdjacentToStaleEntryInDequeCausesCorruption()
throws Exception {
+ // Use FILE_SIZE / 8 batches so that ~4 entries fit per segment file.
This ensures file 0 covers [0,4),
+ // file 1 covers [4,8), etc. After suffix-truncation at index 1, file
0's live range shrinks to [0,2)
+ // while the stale file 1 meta still starts at 4.
+ List<byte[]> batches = createRandomData(FILE_SIZE / 8, 20);
+
+ for (int i = 0; i < batches.size(); i++) {
+ appendBytes(GROUP_ID_1, batches.get(i), i);
+ }
+
+ // Entries > 1 are now stale. File 0 has live entries [0,1] plus stale
entries, file 1 is fully stale.
+ fileManager.truncateSuffix(GROUP_ID_1, 1);
+
+ triggerAndAwaitCheckpoint(1);
+
+ List<Path> segmentFiles = segmentFiles();
+
+ assertThat(segmentFiles, hasSize(greaterThan(3)));
+
+ // Partially compact file 0.
+ runCompaction(segmentFiles.get(0));
+
+ assertThat(segmentFiles.get(0), not(exists()));
+
+ var newFile0Properties = new FileProperties(0, 1);
+
+
assertThat(fileManager.segmentFilesDir().resolve(SegmentFile.fileName(newFile0Properties)),
exists());
+
+ // Fully delete file 1 and 2 (all entries stale). Its IndexFileMeta
stays in block 0 of the GroupIndexMeta deque.
+ runCompaction(segmentFiles.get(1));
+ runCompaction(segmentFiles.get(2));
+
+ assertThat(segmentFiles.get(1), not(exists()));
+ assertThat(segmentFiles.get(2), not(exists()));
+
+ // Entries 0 and 1 must still be readable.
+ fileManager.getEntry(GROUP_ID_1, 0, bs -> {
+ assertThat(bs, is(batches.get(0)));
+ return null;
+ });
+
+ fileManager.getEntry(GROUP_ID_1, 1, bs -> {
+ assertThat(bs, is(batches.get(1)));
+ return null;
+ });
}
private List<Path> segmentFiles() throws IOException {
@@ -566,4 +706,29 @@ class RaftLogGarbageCollectorTest extends
IgniteAbstractTest {
// Wait for the checkpoint process to complete.
await().until(this::indexFiles,
hasSize(greaterThanOrEqualTo(segmentFilesAfterCheckpoint.size() - 1)));
}
+
+ private void restartSegmentFileManager() throws Exception {
+ fileManager.close();
+
+ fileManager = new SegmentFileManager(
+ NODE_NAME,
+ workDir,
+ STRIPES,
+ new NoOpFailureManager(),
+ raftConfiguration,
+ storageConfiguration
+ );
+
+ fileManager.start();
+ }
+
+ private void runCompaction(Path segmentFilePath) throws IOException {
+ SegmentFile segmentFile = SegmentFile.openExisting(segmentFilePath,
false);
+
+ try {
+ garbageCollector.runCompaction(segmentFile);
+ } finally {
+ segmentFile.close();
+ }
+ }
}