Cyrill commented on code in PR #6408: URL: https://github.com/apache/ignite-3/pull/6408#discussion_r2285232579
########## modules/cluster-management/src/integrationTest/java/org/apache/ignite/internal/cluster/management/ItClusterManagerTest.java: ########## @@ -290,6 +296,126 @@ void testInitInvalidNodesAsync() throws Exception { ); } + @Test + void testNoConfigurationReordering() throws Exception { + startCluster(5); + + ClusterManagementGroupManager clusterManager = cluster.get(0).clusterManager(); + + List<String> nodes = cluster.stream().map(MockNode::name).limit(3).collect(Collectors.toList()); + + // successful init + assertThat( + clusterManager.initClusterAsync(nodes, List.of(), "cluster"), + willCompleteSuccessfully() + ); + + for (MockNode node : cluster) { + assertThat(node.clusterManager().joinFuture(), willCompleteSuccessfully()); + } + + // Wait for the initial cluster reconfiguration to complete. + assertLearnerSize(2); + + String node3Name = cluster.get(3).name(); + + AtomicBoolean blockMessage = new AtomicBoolean(true); + + // Block the first reconfiguration to simulate network issues. + // We stop node 4, that should produce a ResetLearnersRequest with only one learner - node 3. + blockMessage((recipientName, networkMessage) -> { + if (!blockMessage.get()) { + return false; + } + + if (networkMessage instanceof ResetLearnersRequest) { + ResetLearnersRequest rlr = (ResetLearnersRequest) networkMessage; + + if (rlr.learnersList().contains(node3Name) && rlr.learnersList().size() == 1) { + logger().info("Block message {} to {}", networkMessage, recipientName); + return true; + } + } + + return false; + }); + + logger().info("Stop last node [4]."); + MockNode last = cluster.remove(cluster.size() - 1); + stopNodes(List.of(last)); + + logger().info("Stop last node [3]."); + MockNode last2 = cluster.remove(cluster.size() - 1); + stopNodes(List.of(last2)); + + // There should be still two learner nodes since the previous reconfiguration was blocked. + assertLearnerSize(2); + + // Start nodes 3 and 4 back, so that the topology is back to normal and no node availability issues are expected. + logger().info("Start nodes [3] and [4]."); + // Start node 4 first to avoid clashing with the earlier blocked message.. + startNode(4, 5); + startNode(3, 5); + + logger().info("Nodes started."); + + // Waif for the nodes 3 and 4 to start. + for (MockNode node : cluster) { + assertThat(node.clusterManager().joinFuture(), willCompleteSuccessfully()); + } + + assertLearnerSize(2); + + for (MockNode node : cluster) { + Boolean leader = node.clusterManager().isCmgLeader().get(); + if (leader) { + logger().info("lerner nodes {}", node.clusterManager().learnerNodes().get()); + } + } + + // Unblock the first reconfiguration. + logger().info("Unblock message."); + blockMessage.set(false); + + assertLearnerSize(2); + } + + private void assertLearnerSize(int size) throws InterruptedException { + assertTrue(waitForCondition(() -> + cluster.stream() + .filter(node -> { + try { + return node.clusterManager().isCmgLeader().get(); Review Comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@ignite.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org