This is an automated email from the ASF dual-hosted git repository.

tkalkirill pushed a commit to branch ignite-26693
in repository https://gitbox.apache.org/repos/asf/ignite-3.git

commit 0c8dbddaabd592efba83660566a955ee2e02be7d
Author: Kirill Tkalenko <[email protected]>
AuthorDate: Tue Oct 14 17:47:42 2025 +0300

    IGNITE-26693 wip
---
 .../persistence/checkpoint/CheckpointWorkflow.java | 10 ++--
 .../PersistentPageMemoryMvTableStorageTest.java    | 64 +++++++++++++++++++++-
 2 files changed, 68 insertions(+), 6 deletions(-)

diff --git 
a/modules/page-memory/src/main/java/org/apache/ignite/internal/pagememory/persistence/checkpoint/CheckpointWorkflow.java
 
b/modules/page-memory/src/main/java/org/apache/ignite/internal/pagememory/persistence/checkpoint/CheckpointWorkflow.java
index a915fec0011..b80299c3347 100644
--- 
a/modules/page-memory/src/main/java/org/apache/ignite/internal/pagememory/persistence/checkpoint/CheckpointWorkflow.java
+++ 
b/modules/page-memory/src/main/java/org/apache/ignite/internal/pagememory/persistence/checkpoint/CheckpointWorkflow.java
@@ -193,7 +193,7 @@ class CheckpointWorkflow {
      * @param curr Current checkpoint event info.
      * @param tracker Checkpoint metrics tracker.
      * @param updateHeartbeat Update heartbeat callback.
-     * @param onReleaseWriteLock Callback on write lock release.
+     * @param onBeforeWriteLockRelease Callback before write lock release.
      * @return Checkpoint collected info.
      * @throws IgniteInternalCheckedException If failed.
      */
@@ -202,7 +202,7 @@ class CheckpointWorkflow {
             CheckpointProgressImpl curr,
             CheckpointMetricsTracker tracker,
             Runnable updateHeartbeat,
-            Runnable onReleaseWriteLock
+            Runnable onBeforeWriteLockRelease
     ) throws IgniteInternalCheckedException {
         List<CheckpointListener> listeners = 
collectCheckpointListeners(dataRegions);
 
@@ -267,11 +267,13 @@ class CheckpointWorkflow {
 
             curr.transitTo(PAGES_SNAPSHOT_TAKEN);
         } finally {
+            // It must be called strictly before the release write lock, 
otherwise it can lead to a very rare race condition on updating
+            // the partition meta and writing it to disk, which can cause 
problems when restarting the partition.
+            onBeforeWriteLockRelease.run();
+
             checkpointReadWriteLock.writeUnlock();
 
             tracker.onWriteLockHoldEnd();
-
-            onReleaseWriteLock.run();
         }
 
         curr.transitTo(LOCK_RELEASED);
diff --git 
a/modules/storage-page-memory/src/test/java/org/apache/ignite/internal/storage/pagememory/PersistentPageMemoryMvTableStorageTest.java
 
b/modules/storage-page-memory/src/test/java/org/apache/ignite/internal/storage/pagememory/PersistentPageMemoryMvTableStorageTest.java
index 3e4d419a51b..9369c26a845 100644
--- 
a/modules/storage-page-memory/src/test/java/org/apache/ignite/internal/storage/pagememory/PersistentPageMemoryMvTableStorageTest.java
+++ 
b/modules/storage-page-memory/src/test/java/org/apache/ignite/internal/storage/pagememory/PersistentPageMemoryMvTableStorageTest.java
@@ -23,6 +23,7 @@ import static 
org.apache.ignite.internal.catalog.commands.CatalogUtils.DEFAULT_P
 import static 
org.apache.ignite.internal.pagememory.persistence.checkpoint.CheckpointState.FINISHED;
 import static 
org.apache.ignite.internal.pagememory.persistence.checkpoint.CheckpointState.PAGES_SORTED;
 import static 
org.apache.ignite.internal.storage.pagememory.PersistentPageMemoryStorageEngine.ENGINE_NAME;
+import static 
org.apache.ignite.internal.testframework.IgniteTestUtils.runAsync;
 import static org.apache.ignite.internal.testframework.IgniteTestUtils.runRace;
 import static 
org.apache.ignite.internal.testframework.matchers.CompletableFutureMatcher.willCompleteSuccessfully;
 import static org.apache.ignite.internal.util.ArrayUtils.BYTE_EMPTY_ARRAY;
@@ -30,15 +31,20 @@ import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.allOf;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThan;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertInstanceOf;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.mockito.Mockito.mock;
 
 import java.nio.file.Path;
 import java.util.List;
+import java.util.UUID;
 import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutorService;
+import java.util.concurrent.TimeUnit;
 import java.util.function.Supplier;
 import java.util.stream.IntStream;
 import org.apache.ignite.internal.components.LogSyncer;
@@ -52,6 +58,7 @@ import org.apache.ignite.internal.metrics.TestMetricManager;
 import org.apache.ignite.internal.pagememory.io.PageIoRegistry;
 import org.apache.ignite.internal.pagememory.persistence.GroupPartitionId;
 import 
org.apache.ignite.internal.pagememory.persistence.checkpoint.CheckpointProgress;
+import 
org.apache.ignite.internal.pagememory.persistence.checkpoint.CheckpointState;
 import 
org.apache.ignite.internal.pagememory.persistence.checkpoint.CheckpointTimeoutLock;
 import org.apache.ignite.internal.pagememory.persistence.store.FilePageStore;
 import org.apache.ignite.internal.schema.BinaryRow;
@@ -63,6 +70,7 @@ import 
org.apache.ignite.internal.storage.configurations.StorageProfileConfigura
 import org.apache.ignite.internal.storage.engine.MvPartitionMeta;
 import org.apache.ignite.internal.storage.engine.MvTableStorage;
 import org.apache.ignite.internal.storage.engine.StorageTableDescriptor;
+import org.apache.ignite.internal.storage.lease.LeaseInfo;
 import 
org.apache.ignite.internal.storage.pagememory.configuration.schema.PersistentPageMemoryProfileConfiguration;
 import 
org.apache.ignite.internal.storage.pagememory.mv.PersistentPageMemoryMvPartitionStorage;
 import org.apache.ignite.internal.testframework.ExecutorServiceExtension;
@@ -91,14 +99,19 @@ public class PersistentPageMemoryMvTableStorageTest extends 
AbstractMvTableStora
     @InjectExecutorService
     private ExecutorService executorService;
 
-    private final TestMetricManager metricManager = new TestMetricManager();
+    private TestMetricManager metricManager;
+
+    @WorkDirectory
+    private Path workDir;
 
     @BeforeEach
-    void setUp(@WorkDirectory Path workDir) {
+    void setUp() {
         var ioRegistry = new PageIoRegistry();
 
         ioRegistry.loadFromServiceLoader();
 
+        metricManager = new TestMetricManager();
+
         engine = new PersistentPageMemoryStorageEngine(
                 "test",
                 metricManager,
@@ -452,6 +465,53 @@ public class PersistentPageMemoryMvTableStorageTest 
extends AbstractMvTableStora
         }
     }
 
+    /**
+     * Checks for a very rare race condition where, after releasing the 
checkpoint write lock, the partition meta is updated with an
+     * incorrect empty last empty checkpoint ({@code lastCheckpointId == 
null}), which can lead to error
+     * "IGN-CMN-65535 Unknown page IO type: 0" on node restart.
+     */
+    @Test
+    void testSuccessfulPartitionRestartAfterParallelUpdateLeaseAndCheckpoint() 
throws Exception {
+        for (int i = 0; i < 100; i++) {
+            MvPartitionStorage mvPartition = 
getOrCreateMvPartition(PARTITION_ID);
+
+            addWriteCommitted(mvPartition);
+
+            CountDownLatch readyToUpdateLeaseLatch = new CountDownLatch(1);
+            CountDownLatch updateLeaseLatch = new CountDownLatch(1);
+
+            CompletableFuture<Void> updateLeaseFuture = runAsync(() -> {
+                readyToUpdateLeaseLatch.countDown();
+
+                assertTrue(updateLeaseLatch.await(10, TimeUnit.SECONDS));
+
+                mvPartition.runConsistently(locker -> {
+                    mvPartition.updateLease(new LeaseInfo(100, 
UUID.randomUUID(), "node"));
+
+                    return null;
+                });
+            });
+
+            assertTrue(readyToUpdateLeaseLatch.await(10, TimeUnit.SECONDS));
+
+            CheckpointProgress checkpointProgress = 
engine.checkpointManager().forceCheckpoint("test");
+
+            CompletableFuture<Void> updateLeaseLatchFuture = checkpointProgress
+                    .futureFor(CheckpointState.LOCK_TAKEN)
+                    .thenAccept(unused -> updateLeaseLatch.countDown());
+
+            assertThat(
+                    CompletableFuture.allOf(updateLeaseLatchFuture, 
updateLeaseFuture, checkpointProgress.futureFor(FINISHED)),
+                    willCompleteSuccessfully()
+            );
+
+            tearDown();
+            setUp();
+
+            assertDoesNotThrow(() -> getOrCreateMvPartition(PARTITION_ID));
+        }
+    }
+
     private CompletableFuture<Void> forceCheckpointAsync() {
         return 
engine.checkpointManager().forceCheckpoint("test").futureFor(FINISHED);
     }

Reply via email to