This is an automated email from the ASF dual-hosted git repository.

sanpwc pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/ignite-3.git


The following commit(s) were added to refs/heads/main by this push:
     new 6a8bd89f6e5 IGNITE-24412 Extend test coverage of basic HA 
functionality (#5220)
6a8bd89f6e5 is described below

commit 6a8bd89f6e5a4c662e95fc31dbaf6dd05717cf1a
Author: Mirza Aliev <[email protected]>
AuthorDate: Thu Feb 13 19:55:15 2025 +0400

    IGNITE-24412 Extend test coverage of basic HA functionality (#5220)
---
 ...bstractHighAvailablePartitionsRecoveryTest.java |  18 +-
 ...AvailablePartitionSequentialRecoveriesTest.java |  16 ++
 ...ilablePartitionsRecoveryByFilterUpdateTest.java | 193 +++++++++++++++++++++
 .../ItHighAvailablePartitionsRecoveryTest.java     |  51 ++++++
 4 files changed, 275 insertions(+), 3 deletions(-)

diff --git 
a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/AbstractHighAvailablePartitionsRecoveryTest.java
 
b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/AbstractHighAvailablePartitionsRecoveryTest.java
index 753da2324e2..55ec350d3fb 100644
--- 
a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/AbstractHighAvailablePartitionsRecoveryTest.java
+++ 
b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/AbstractHighAvailablePartitionsRecoveryTest.java
@@ -54,6 +54,7 @@ import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.Predicate;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 import org.apache.ignite.Ignite;
@@ -476,11 +477,22 @@ public abstract class 
AbstractHighAvailablePartitionsRecoveryTest extends Cluste
 
     void assertValuesPresentOnNodes(HybridTimestamp ts, Table table, 
Integer... indexes) {
         for (Integer index : indexes) {
-            assertValuesPresentOnNode(table, ts, index);
+            assertValuesOnNode(table, ts, index, fut -> fut.join() != null);
         }
     }
 
-    private void assertValuesPresentOnNode(Table table, HybridTimestamp ts, 
int targetNodeIndex) {
+    void assertValuesNotPresentOnNodes(HybridTimestamp ts, Table table, 
Integer... indexes) {
+        for (Integer index : indexes) {
+            assertValuesOnNode(table, ts, index, rowFut -> rowFut.join() == 
null);
+        }
+    }
+
+    private void assertValuesOnNode(
+            Table table,
+            HybridTimestamp ts,
+            int targetNodeIndex,
+            Predicate<CompletableFuture<BinaryRow>> dataCondition
+    ) {
         IgniteImpl targetNode = unwrapIgniteImpl(node(targetNodeIndex));
 
         TableImpl tableImpl = unwrapTableImpl(table);
@@ -491,7 +503,7 @@ public abstract class 
AbstractHighAvailablePartitionsRecoveryTest extends Cluste
                     internalTable.get(marshalKey(tableImpl, 
Tuple.create(of("id", i))), ts, targetNode.node());
             assertThat(fut, willCompleteSuccessfully());
 
-            assertNotNull(fut.join());
+            assertTrue(dataCondition.test(fut));
         }
     }
 
diff --git 
a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionSequentialRecoveriesTest.java
 
b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionSequentialRecoveriesTest.java
index afeede74197..8ad9db355c0 100644
--- 
a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionSequentialRecoveriesTest.java
+++ 
b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionSequentialRecoveriesTest.java
@@ -17,7 +17,13 @@
 
 package org.apache.ignite.internal.table.distributed.disaster;
 
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.empty;
+import static org.hamcrest.Matchers.is;
+
+import java.util.List;
 import org.apache.ignite.internal.app.IgniteImpl;
+import org.apache.ignite.table.Table;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 
@@ -40,6 +46,14 @@ public class 
ItHighAvailablePartitionSequentialRecoveriesTest extends AbstractHi
 
         IgniteImpl node = igniteImpl(0);
 
+        Table table = node.tables().table(HA_TABLE_NAME);
+
+        List<Throwable> errors = insertValues(table, 0);
+
+        assertThat(errors, is(empty()));
+
+        assertValuesPresentOnNodes(node.clock().now(), table, 0, 1, 2, 3, 4);
+
         assertRecoveryKeyIsEmpty(node);
 
         stopNodes(2, 3, 4);
@@ -49,5 +63,7 @@ public class ItHighAvailablePartitionSequentialRecoveriesTest 
extends AbstractHi
         stopNode(1);
 
         waitAndAssertStableAssignmentsOfPartitionEqualTo(node, HA_TABLE_NAME, 
PARTITION_IDS, nodeNames(0));
+
+        assertValuesPresentOnNodes(node.clock().now(), table, 0);
     }
 }
diff --git 
a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionsRecoveryByFilterUpdateTest.java
 
b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionsRecoveryByFilterUpdateTest.java
index 18d1a24ed7c..24a7712c44f 100644
--- 
a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionsRecoveryByFilterUpdateTest.java
+++ 
b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionsRecoveryByFilterUpdateTest.java
@@ -17,10 +17,17 @@
 
 package org.apache.ignite.internal.table.distributed.disaster;
 
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.empty;
+import static org.hamcrest.Matchers.is;
+
+import java.util.List;
 import java.util.Set;
 import org.apache.ignite.internal.app.IgniteImpl;
+import org.apache.ignite.table.Table;
 import org.intellij.lang.annotations.Language;
 import org.jetbrains.annotations.Nullable;
+import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 
 /** Test suite for the cases with a recovery of the group replication factor 
after reset by zone filter update. */
@@ -32,6 +39,8 @@ public class 
ItHighAvailablePartitionsRecoveryByFilterUpdateTest extends Abstrac
 
     private static final String GLOBAL_NODES_CONFIG = nodeConfig("{zone = 
global}", null);
 
+    private static final String CUSTOM_NODES_CONFIG = nodeConfig("{zone = 
custom}", null);
+
     private static final String ROCKS_NODES_CONFIG = nodeConfig(null, 
"{lru_rocks.engine = rocksdb}");
 
     private static final String AIPERSIST_NODES_CONFIG = nodeConfig(null, 
"{segmented_aipersist.engine = aipersist}");
@@ -122,6 +131,190 @@ public class 
ItHighAvailablePartitionsRecoveryByFilterUpdateTest extends Abstrac
         
waitThatAllRebalancesHaveFinishedAndStableAssignmentsEqualsToExpected(node, 
HA_TABLE_NAME, PARTITION_IDS, nodeNames(0));
     }
 
+    /**
+     * Test scenario.
+     * <ol>
+     *   <li>Create a zone in HA mode (7 nodes, A, B, C, D, E, F, G) - phase 
1</li>
+     *   <li>Insert data and wait for replication to all nodes.</li>
+     *   <li>Stop a majority of nodes (4 nodes A, B, C, D)</li>
+     *   <li>Wait for the partition to become available (E, F, G), no new 
writes - phase 2</li>
+     *   <li>Stop a majority of nodes once again (E, F)</li>
+     *   <li>Wait for the partition to become available (G), no new writes - 
phase 3</li>
+     *   <li>Stop the last node G</li>
+     *   <li>Start one node from phase 1, A</li>
+     *   <li>Start one node from phase 3, G</li>
+     *   <li>Start one node from phase 2, E</li>
+     *   <li>No data should be lost (reads from partition on A and E must be 
consistent with G)</li>
+     * </ol>
+     *
+     * @throws Exception If failed.
+     */
+    @Test
+    @Disabled("https://issues.apache.org/jira/browse/IGNITE-24513";)
+    void testSeveralHaResetsAndSomeNodeRestart() throws Exception {
+        for (int i = 1; i < 8; i++) {
+            startNode(i, CUSTOM_NODES_CONFIG);
+        }
+
+        String globalFilter = "$[?(@.zone == \"custom\")]";
+        createHaZoneWithTable(7, globalFilter, nodeNames(1, 2, 3, 4, 5, 6, 7));
+
+        IgniteImpl node0 = igniteImpl(0);
+        Table table = node0.tables().table(HA_TABLE_NAME);
+
+        List<Throwable> errors = insertValues(table, 0);
+        assertThat(errors, is(empty()));
+        assertValuesPresentOnNodes(node0.clock().now(), table, 1, 2, 3, 4, 5, 
6, 7);
+
+        // Stop 4 nodes (A, B, C, D)
+        stopNodes(4, 5, 6, 7);
+
+        // Wait for the partition to become available on the remaining nodes 
(E, F, G)
+        waitAndAssertStableAssignmentsOfPartitionEqualTo(node0, HA_TABLE_NAME, 
PARTITION_IDS, nodeNames(1, 2, 3));
+
+        // Stop 2 more nodes (E, F)
+        stopNodes(2, 3);
+
+        // Wait for the partition to become available on the last node (G)
+        waitAndAssertStableAssignmentsOfPartitionEqualTo(node0, HA_TABLE_NAME, 
PARTITION_IDS, nodeNames(1));
+
+        // Stop the last node (G)
+        stopNode(1);
+
+        // Start one node from phase 1 (A)
+        startNode(4);
+
+        // Start one node from phase 3 (G)
+        startNode(1);
+
+        //  Start one node from phase 2 (E)
+        startNode(2);
+
+        waitAndAssertStableAssignmentsOfPartitionEqualTo(node0, HA_TABLE_NAME, 
PARTITION_IDS, nodeNames(1, 2, 4));
+
+        // Verify that no data is lost and reads from partition on nodes A and 
E are consistent with node G
+        assertValuesPresentOnNodes(node0.clock().now(), table, 1, 2, 4);
+    }
+
+    /**
+     * Test scenario, where we start nodes from the previous assignments 
chain, with new writes.
+     * The whole scenario will be possible when second phase of HA feature 
will be implemented.
+     *
+     * <ol>
+     *   <li>Create a zone in HA mode with 7 nodes (A, B, C, D, E, F, G).</li>
+     *   <li>Stop a majority of nodes (4 nodes A, B, C, D).</li>
+     *   <li>Wait for the partition to become available on the remaining nodes 
(E, F, G).</li>
+     *   <li>Stop a majority of nodes (E, F).</li>
+     *   <li>Write data to node G.</li>
+     *   <li>Stop node G.</li>
+     *   <li>Start nodes E and F.</li>
+     *   <li>Nodes should wait for node G to come back online.</li>
+     * </ol>
+     *
+     * @throws Exception If failed.
+     */
+    @Test
+    @Disabled("https://issues.apache.org/jira/browse/IGNITE-24509";)
+    void testNodesWaitForLastNodeFromChainToComeBackOnlineAfterMajorityStops() 
throws Exception {
+        for (int i = 1; i < 8; i++) {
+            startNode(i, CUSTOM_NODES_CONFIG);
+        }
+
+        String globalFilter = "$[?(@.zone == \"custom\")]";
+        createHaZoneWithTable(7, globalFilter, nodeNames(1, 2, 3, 4, 5, 6, 7));
+
+        IgniteImpl node0 = igniteImpl(0);
+        Table table = node0.tables().table(HA_TABLE_NAME);
+
+        List<Throwable> errors = insertValues(table, 0);
+        assertThat(errors, is(empty()));
+        assertValuesPresentOnNodes(node0.clock().now(), table, 1, 2, 3, 4, 5, 
6, 7);
+
+        // Stop 4 nodes (A, B, C, D)
+        stopNodes(4, 5, 6, 7);
+
+        // Wait for the partition to become available on the remaining nodes 
(E, F, G)
+        waitAndAssertStableAssignmentsOfPartitionEqualTo(node0, HA_TABLE_NAME, 
PARTITION_IDS, nodeNames(1, 2, 3));
+
+        // Stop 2 more nodes (E, F)
+        stopNodes(2, 3);
+
+        // Wait for the partition to become available on the last node (G)
+        waitAndAssertStableAssignmentsOfPartitionEqualTo(node0, HA_TABLE_NAME, 
PARTITION_IDS, nodeNames(1));
+
+        errors = insertValues(table, 1000);
+        assertThat(errors, is(empty()));
+
+        assertValuesPresentOnNodes(node0.clock().now(), table, 1);
+
+        // Stop the last node (G)
+        stopNode(1);
+
+        // Start one node from phase 3 (E)
+        startNode(2);
+
+        //  Start one node from phase 2 (F)
+        startNode(3);
+
+        assertValuesNotPresentOnNodes(node0.clock().now(), table,  2, 3);
+    }
+
+    /**
+     * Test scenario, where we start nodes from the previous assignments 
chain, without new writes.
+     * The whole scenario will be possible when second phase of HA feature 
will be implemented.
+     * <ol>
+     *   <li>Create a zone in HA mode with 7 nodes (A, B, C, D, E, F, G).</li>
+     *   <li>Stop a majority of nodes (4 nodes A, B, C, D).</li>
+     *   <li>Wait for the partition to become available on the remaining nodes 
(E, F, G).</li>
+     *   <li>Stop a majority of nodes (E, F).</li>
+     *   <li>Stop node G.</li>
+     *   <li>Start nodes E and F.</li>
+     *   <li>Nodes should wait for nodes A, B, C, D, E, F, G to come back 
online.</li>
+     * </ol>
+     *
+     * @throws Exception If failed.
+     */
+    @Test
+    @Disabled("https://issues.apache.org/jira/browse/IGNITE-24509";)
+    void 
testNodesWaitForNodesFromGracefulChainToComeBackOnlineAfterMajorityStops() 
throws Exception {
+        for (int i = 1; i < 8; i++) {
+            startNode(i, CUSTOM_NODES_CONFIG);
+        }
+
+        String globalFilter = "$[?(@.zone == \"custom\")]";
+        createHaZoneWithTable(7, globalFilter, nodeNames(1, 2, 3, 4, 5, 6, 7));
+
+        IgniteImpl node0 = igniteImpl(0);
+        Table table = node0.tables().table(HA_TABLE_NAME);
+
+        List<Throwable> errors = insertValues(table, 0);
+        assertThat(errors, is(empty()));
+        assertValuesPresentOnNodes(node0.clock().now(), table, 1, 2, 3, 4, 5, 
6, 7);
+
+        // Stop 4 nodes (A, B, C, D)
+        stopNodes(4, 5, 6, 7);
+
+        // Wait for the partition to become available on the remaining nodes 
(E, F, G)
+        waitAndAssertStableAssignmentsOfPartitionEqualTo(node0, HA_TABLE_NAME, 
PARTITION_IDS, nodeNames(1, 2, 3));
+
+        // Stop 2 more nodes (E, F)
+        stopNodes(2, 3);
+
+        // Wait for the partition to become available on the last node (G)
+        waitAndAssertStableAssignmentsOfPartitionEqualTo(node0, HA_TABLE_NAME, 
PARTITION_IDS, nodeNames(1));
+
+        // Stop the last node (G)
+        stopNode(1);
+
+        // Start one node from phase 3 (E)
+        startNode(2);
+
+        //  Start one node from phase 2 (F)
+        startNode(3);
+
+        assertValuesNotPresentOnNodes(node0.clock().now(), table,  2, 3);
+    }
+
     private void alterZoneSql(String filter, String zoneName) {
         executeSql(String.format("ALTER ZONE \"%s\" SET \"DATA_NODES_FILTER\" 
= '%s'", zoneName, filter));
     }
diff --git 
a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionsRecoveryTest.java
 
b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionsRecoveryTest.java
index 5b2424b24a5..3e8f78565e8 100644
--- 
a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionsRecoveryTest.java
+++ 
b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionsRecoveryTest.java
@@ -21,6 +21,7 @@ import static java.lang.String.format;
 import static org.apache.ignite.internal.TestWrappers.unwrapIgniteImpl;
 import static 
org.apache.ignite.internal.catalog.commands.CatalogUtils.IMMEDIATE_TIMER_VALUE;
 import static 
org.apache.ignite.internal.catalog.commands.CatalogUtils.INFINITE_TIMER_VALUE;
+import static 
org.apache.ignite.internal.distributionzones.DistributionZonesTestUtil.alterZone;
 import static 
org.apache.ignite.internal.distributionzones.DistributionZonesUtil.PARTITION_DISTRIBUTION_RESET_TIMEOUT;
 import static 
org.apache.ignite.internal.distributionzones.DistributionZonesUtil.zoneScaleDownChangeTriggerKey;
 import static 
org.apache.ignite.internal.testframework.IgniteTestUtils.waitForCondition;
@@ -47,6 +48,7 @@ import 
org.apache.ignite.internal.distributionzones.DistributionZonesTestUtil;
 import org.apache.ignite.internal.metastorage.impl.MetaStorageManagerImpl;
 import org.apache.ignite.internal.metastorage.server.KeyValueStorage;
 import org.apache.ignite.table.Table;
+import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 
 /** Test for the HA zones recovery. */
@@ -257,6 +259,12 @@ public class ItHighAvailablePartitionsRecoveryTest extends 
AbstractHighAvailable
 
         stopNodes(1, 2, 3, 4);
 
+        Table table = node.tables().table(HA_TABLE_NAME);
+
+        List<Throwable> errors = insertValues(table, 0);
+
+        assertThat(errors, is(empty()));
+
         waitAndAssertRecoveryKeyIsNotEmpty(node);
 
         assertRecoveryRequestForHaZoneTable(node);
@@ -280,6 +288,8 @@ public class ItHighAvailablePartitionsRecoveryTest extends 
AbstractHighAvailable
                 PARTITION_IDS,
                 Set.of(node.name(), node1.name(), node2.name())
         );
+
+        assertValuesPresentOnNodes(node.clock().now(), table, 0, 1, 2);
     }
 
     @Test
@@ -397,4 +407,45 @@ public class ItHighAvailablePartitionsRecoveryTest extends 
AbstractHighAvailable
                 keyValueStorage
         );
     }
+
+    /**
+     * Test scenario.
+     * <ol>
+     *   <li>Create a zone in HA mode with 7 nodes (A, B, C, D, E, F, G).</li>
+     *   <li>Insert data and wait for replication to all nodes.</li>
+     *   <li>Stop a majority of nodes (A, B, C, D).</li>
+     *   <li>Wait for the partition to become available on the remaining nodes 
(E, F, G).</li>
+     *   <li>Start node A.</li>
+     *   <li>Verify that node A cleans up its state.</li>
+     * </ol>
+     *
+     * @throws Exception If failed.
+     */
+    @Test
+    @Disabled("https://issues.apache.org/jira/browse/IGNITE-24509";)
+    void testNodeStateCleanupAfterRestartInHaMode() throws Exception {
+        startNode(3);
+        startNode(4);
+        startNode(5);
+        startNode(6);
+
+        createHaZoneWithTable();
+
+        IgniteImpl node0 = igniteImpl(0);
+        Table table = node0.tables().table(HA_TABLE_NAME);
+
+        List<Throwable> errors = insertValues(table, 0);
+        assertThat(errors, is(empty()));
+        assertValuesPresentOnNodes(node0.clock().now(), table, 0, 1, 2, 3, 4, 
5, 6);
+
+        alterZone(node0.catalogManager(), HA_ZONE_NAME, INFINITE_TIMER_VALUE, 
null, null);
+
+        stopNodes(3, 4, 5, 6);
+
+        waitAndAssertStableAssignmentsOfPartitionEqualTo(node0, HA_TABLE_NAME, 
PARTITION_IDS, nodeNames(0, 1, 2));
+
+        startNode(6);
+
+        assertValuesNotPresentOnNodes(node0.clock().now(), table, 6);
+    }
 }

Reply via email to