sashapolo commented on code in PR #6753:
URL: https://github.com/apache/ignite-3/pull/6753#discussion_r2440179589


##########
modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/GroupIndexMeta.java:
##########
@@ -19,57 +19,117 @@
 
 import java.lang.invoke.MethodHandles;
 import java.lang.invoke.VarHandle;
+import java.util.Deque;
+import java.util.Iterator;
+import java.util.concurrent.ConcurrentLinkedDeque;
 import org.jetbrains.annotations.Nullable;
 
 /**
  * Represents in-memory meta information about a particular Raft group stored 
in an index file.
  */
 class GroupIndexMeta {
-    private static final VarHandle FILE_METAS_VH;
+    private static class IndexMetaArrayHolder {
+        private static final VarHandle FILE_METAS_VH;
 
-    static {
-        try {
-            FILE_METAS_VH = 
MethodHandles.lookup().findVarHandle(GroupIndexMeta.class, "fileMetas", 
IndexFileMetaArray.class);
-        } catch (ReflectiveOperationException e) {
-            throw new ExceptionInInitializerError(e);
+        static {
+            try {
+                FILE_METAS_VH = 
MethodHandles.lookup().findVarHandle(IndexMetaArrayHolder.class, "fileMetas", 
IndexFileMetaArray.class);
+            } catch (ReflectiveOperationException e) {
+                throw new ExceptionInInitializerError(e);
+            }
+        }
+
+        @SuppressWarnings("FieldMayBeFinal") // Updated through a VarHandle.
+        volatile IndexFileMetaArray fileMetas;
+
+        IndexMetaArrayHolder(IndexFileMeta startFileMeta) {
+            this.fileMetas = new IndexFileMetaArray(startFileMeta);
+        }
+
+        void addIndexMeta(IndexFileMeta indexFileMeta) {
+            IndexFileMetaArray fileMetas = this.fileMetas;
+
+            IndexFileMetaArray newFileMetas = fileMetas.add(indexFileMeta);
+
+            // Simple assignment would suffice, since we only have one thread 
writing to this field, but we use compareAndSet to verify
+            // this invariant, just in case.
+            boolean updated = FILE_METAS_VH.compareAndSet(this, fileMetas, 
newFileMetas);
+
+            assert updated : "Concurrent writes detected";
         }
     }
 
-    @SuppressWarnings("FieldMayBeFinal") // Updated through a VarHandle.
-    private volatile IndexFileMetaArray fileMetas;
+    private final Deque<IndexMetaArrayHolder> fileMetaDeque = new 
ConcurrentLinkedDeque<>();
 
     GroupIndexMeta(IndexFileMeta startFileMeta) {
-        this.fileMetas = new IndexFileMetaArray(startFileMeta);
+        fileMetaDeque.add(new IndexMetaArrayHolder(startFileMeta));
     }
 
     void addIndexMeta(IndexFileMeta indexFileMeta) {
-        IndexFileMetaArray fileMetas = this.fileMetas;
+        IndexMetaArrayHolder curFileMetas = fileMetaDeque.getLast();
+
+        long curLastLogIndex = curFileMetas.fileMetas.lastLogIndexExclusive();
 
-        IndexFileMetaArray newFileMetas = fileMetas.add(indexFileMeta);
+        long newFirstLogIndex = indexFileMeta.firstLogIndexInclusive();
 
-        // Simple assignment would suffice, since we only have one thread 
writing to this field, but we use compareAndSet to verify
-        // this invariant, just in case.
-        boolean updated = FILE_METAS_VH.compareAndSet(this, fileMetas, 
newFileMetas);
+        assert newFirstLogIndex <= curLastLogIndex :
+                String.format(
+                        "Gaps between Index File Metas are not allowed. Last 
log index: %d, new log index: %d",
+                        curLastLogIndex, newFirstLogIndex
+                );
 
-        assert updated : "Concurrent writes detected";
+        // Merge consecutive index metas into a single meta block. If there's 
an overlap (e.g. due to log truncation), start a new block,
+        // which will override the previous one during search.
+        if (curLastLogIndex == newFirstLogIndex) {
+            curFileMetas.addIndexMeta(indexFileMeta);
+        } else {
+            fileMetaDeque.add(new IndexMetaArrayHolder(indexFileMeta));
+        }
     }
 
     /**
-     * Returns a file pointer that uniquely identifies the index file for the 
given log index. Returns {@code null} if the given log index
+     * Returns index file meta that uniquely identifies the index file for the 
given log index. Returns {@code null} if the given log index
      * is not found in any of the index files in this group.
      */
     @Nullable
     IndexFileMeta indexMeta(long logIndex) {
-        return fileMetas.find(logIndex);
+        Iterator<IndexMetaArrayHolder> it = fileMetaDeque.descendingIterator();
+
+        while (it.hasNext()) {
+            IndexFileMetaArray fileMetas = it.next().fileMetas;
+
+            // Log suffix might have been truncated, so we can have an entry 
on the top of the queue that cuts off part of the search range.
+            if (logIndex >= fileMetas.lastLogIndexExclusive()) {
+                return null;
+            }
+
+            if (logIndex < fileMetas.firstLogIndexInclusive()) {
+                continue;
+            }
+
+            IndexFileMeta indexMeta = fileMetas.find(logIndex);
+
+            if (indexMeta != null) {
+                return indexMeta;
+            }
+        }
+
+        return null;
     }
 
     long firstLogIndexInclusive() {
-        return fileMetas.get(0).firstLogIndexInclusive();
+        for (IndexMetaArrayHolder indexMetaArrayHolder : fileMetaDeque) {
+            IndexFileMetaArray fileMetas = indexMetaArrayHolder.fileMetas;
+
+            if (fileMetas.size() > 0) {
+                return fileMetas.firstLogIndexInclusive();
+            }

Review Comment:
   This condition is not correct, I've confused myself, thanks for pointing it 
out



##########
modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/RaftLogCheckpointer.java:
##########
@@ -137,7 +137,8 @@ long firstLogIndexInclusive(long groupId) {
         while (it.hasNext()) {
             SegmentInfo segmentInfo = 
it.next().memTable().segmentInfo(groupId);

Review Comment:
   > Is it possible that a constant need to resolve groupId into various 
structures will affect the performance?
   
   Probably yes, but I don't see an easy way to optimize this at the moment.
   
   > I thought that a single checkpoint only corresponds to a single memtable, 
is this assumption false?
   
   This is correct, but I don't understand how this is related to this method. 
We can have multiple queued checkpoints and we search inside the queue during 
reads.



##########
modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexMemTable.java:
##########
@@ -64,6 +64,22 @@ public SegmentInfo segmentInfo(long groupId) {
         return stripe(groupId).memTable.get(groupId);
     }
 
+    @Override
+    public void truncateSuffix(long groupId, long lastLogIndexKept) {
+        ConcurrentMap<Long, SegmentInfo> memtable = stripe(groupId).memTable;
+
+        SegmentInfo segmentInfo = memtable.get(groupId);
+
+        if (segmentInfo == null || lastLogIndexKept < 
segmentInfo.firstLogIndexInclusive()) {
+            // If the current memtable does not have information for the given 
group or if we are truncating everything currently present
+            // in the memtable, we need to write a special "empty" SegmentInfo 
into the memtable to override existing persisted data during
+            // search.
+            memtable.put(groupId, new SegmentInfo(lastLogIndexKept + 1));

Review Comment:
   I just don't understand why it should be specified in the comment, this is 
an implementation detail, which is kind of obvious since we can't reduce the 
`logIndexBase`. I wanted to emphasize in this comment that this SegmentInfo is 
special, because there's no entry in the storage for "lastLogIndexKept + 1", 
while most of the time SegmentInfo points to a segment record



##########
modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/SegmentInfo.java:
##########
@@ -159,4 +172,23 @@ void saveOffsetsTo(ByteBuffer buffer) {
 
         buffer.asIntBuffer().put(offsets.array, 0, offsets.size);
     }
+
+    void truncateSuffix(long lastLogIndexKept) {
+        ArrayWithSize segmentFileOffsets = this.segmentFileOffsets;
+
+        long newSize = max(lastLogIndexKept - logIndexBase + 1, 0);
+
+        if (newSize >= segmentFileOffsets.size()) {

Review Comment:
   I would expect it to never be larger or even equal (it's strange to call 
`truncateSuffix` with the latest index), this is just in case. I can replace 
this check with an assertion



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to