denis-chudov commented on code in PR #2036:
URL: https://github.com/apache/ignite-3/pull/2036#discussion_r1187220388


##########
modules/placement-driver/src/integrationTest/java/org/apache/ignite/internal/placementdriver/MultiActorPlacementDriverTest.java:
##########
@@ -101,16 +101,16 @@ public class MultiActorPlacementDriverTest extends 
IgniteAbstractTest {
     @InjectConfiguration
     private DistributionZonesConfiguration dstZnsCfg;
 
-    List<String> placementDriverNodeNames;
-    List<String> nodeNames;
+    private List<String> placementDriverNodeNames;
+    private List<String> nodeNames;
 
-    List<Closeable> servicesToClose;
+    private List<Closeable> servicesToClose;
 
     /** The manager is used to read a data from Meta storage in the tests. */
-    MetaStorageManagerImpl metaStorageManager;
+    private MetaStorageManagerImpl metaStorageManager;
 
     /** Cluster service by node name. */
-    Map<String, ClusterService> clusterServices;
+    private Map<String, ClusterService> clusterServices;
 
     private TestInfo testInfo;

Review Comment:
   seems it's never used



##########
modules/replicator/src/integrationTest/java/org/apache/ignite/internal/replicator/ItPlacementDriverReplicaSideTest.java:
##########
@@ -0,0 +1,460 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.replicator;
+
+import static java.util.concurrent.CompletableFuture.allOf;
+import static java.util.concurrent.CompletableFuture.completedFuture;
+import static java.util.stream.Collectors.toSet;
+import static 
org.apache.ignite.internal.raft.PeersAndLearners.fromConsistentIds;
+import static 
org.apache.ignite.internal.testframework.IgniteTestUtils.testNodeName;
+import static 
org.apache.ignite.internal.testframework.IgniteTestUtils.waitForCondition;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.io.Closeable;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import 
org.apache.ignite.internal.cluster.management.ClusterManagementGroupManager;
+import 
org.apache.ignite.internal.configuration.testframework.ConfigurationExtension;
+import 
org.apache.ignite.internal.configuration.testframework.InjectConfiguration;
+import org.apache.ignite.internal.hlc.HybridClock;
+import org.apache.ignite.internal.hlc.HybridClockImpl;
+import 
org.apache.ignite.internal.placementdriver.message.PlacementDriverActorMessage;
+import 
org.apache.ignite.internal.placementdriver.message.PlacementDriverMessageGroup;
+import 
org.apache.ignite.internal.placementdriver.message.StopLeaseProlongationMessage;
+import org.apache.ignite.internal.raft.Loza;
+import org.apache.ignite.internal.raft.Peer;
+import org.apache.ignite.internal.raft.RaftGroupEventsListener;
+import org.apache.ignite.internal.raft.RaftNodeId;
+import org.apache.ignite.internal.raft.TestRaftGroupListener;
+import org.apache.ignite.internal.raft.client.TopologyAwareRaftGroupService;
+import 
org.apache.ignite.internal.raft.client.TopologyAwareRaftGroupServiceFactory;
+import org.apache.ignite.internal.raft.configuration.RaftConfiguration;
+import org.apache.ignite.internal.raft.server.RaftGroupOptions;
+import org.apache.ignite.internal.replicator.message.ReplicaMessageTestGroup;
+import org.apache.ignite.internal.replicator.message.ReplicaMessagesFactory;
+import 
org.apache.ignite.internal.replicator.message.TestReplicaMessagesFactory;
+import org.apache.ignite.internal.testframework.IgniteAbstractTest;
+import org.apache.ignite.internal.topology.LogicalTopologyServiceTestImpl;
+import org.apache.ignite.internal.util.PendingComparableValuesTracker;
+import org.apache.ignite.lang.IgniteTriConsumer;
+import org.apache.ignite.lang.NodeStoppingException;
+import org.apache.ignite.network.ClusterService;
+import org.apache.ignite.network.NetworkAddress;
+import org.apache.ignite.network.NetworkMessageHandler;
+import org.apache.ignite.network.StaticNodeFinder;
+import org.apache.ignite.raft.jraft.rpc.impl.RaftGroupEventsClientListener;
+import org.apache.ignite.utils.ClusterServiceTestUtils;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInfo;
+import org.junit.jupiter.api.extension.ExtendWith;
+
+/**
+ * These test are using an honest connection to test interconnection between 
replicas with placement driver.
+ */
+@ExtendWith(ConfigurationExtension.class)
+public class ItPlacementDriverReplicaSideTest extends IgniteAbstractTest {
+    public static final int BASE_PORT = 1234;
+
+    private static final TestReplicationGroupId GROUP_ID = new 
TestReplicationGroupId("group_1");
+
+    private static final ReplicaMessagesFactory REPLICA_MESSAGES_FACTORY = new 
ReplicaMessagesFactory();
+
+    private static final TestReplicaMessagesFactory 
TEST_REPLICA_MESSAGES_FACTORY = new TestReplicaMessagesFactory();
+
+    @InjectConfiguration("mock {retryTimeout=2000, responseTimeout=1000}")
+    private RaftConfiguration raftConfiguration;
+
+    private HybridClock clock = new HybridClockImpl();
+
+    private Set<String> placementDriverNodeNames;
+    private Set<String> nodeNames;
+
+    /** This closure handles {@link StopLeaseProlongationMessage} to check the 
replica behavior. */
+    private IgniteTriConsumer<StopLeaseProlongationMessage, String, String> 
denyLeaseHandler;
+
+    /** Cluster service by node name. */
+    private Map<String, ClusterService> clusterServices;
+
+    private Map<String, ReplicaManager> replicaManagers = new HashMap<>();
+    private Map<String, Loza> raftManagers = new HashMap<>();
+    private Map<String, TopologyAwareRaftGroupServiceFactory> 
raftClientFactory = new HashMap<>();
+
+    /** List of services to have to close before the test will be completed. */
+    private List<Closeable> servicesToClose = new ArrayList<>();
+
+    @BeforeEach
+    public void beforeTest(TestInfo testInfo) {
+        placementDriverNodeNames = IntStream.range(BASE_PORT, BASE_PORT + 
3).mapToObj(port -> testNodeName(testInfo, port))
+                .collect(toSet());
+        nodeNames = IntStream.range(BASE_PORT, BASE_PORT + 5).mapToObj(port -> 
testNodeName(testInfo, port))
+                .collect(toSet());
+
+        clusterServices = startNodes();
+
+        var cmgManager = mock(ClusterManagementGroupManager.class);
+
+        
when(cmgManager.metaStorageNodes()).thenReturn(completedFuture(placementDriverNodeNames));
+
+        for (String nodeName : nodeNames) {
+            var clusterService = clusterServices.get(nodeName);
+
+            RaftGroupEventsClientListener eventsClientListener = new 
RaftGroupEventsClientListener();
+
+            var raftManager = new Loza(
+                    clusterService,
+                    raftConfiguration,
+                    workDir.resolve(nodeName + "_loza"),
+                    clock,
+                    eventsClientListener
+            );
+
+            raftManagers.put(nodeName, raftManager);
+
+            TopologyAwareRaftGroupServiceFactory 
topologyAwareRaftGroupServiceFactory = new TopologyAwareRaftGroupServiceFactory(
+                    clusterService,
+                    new LogicalTopologyServiceTestImpl(clusterService),
+                    Loza.FACTORY,
+                    eventsClientListener
+            );
+
+            raftClientFactory.put(nodeName, 
topologyAwareRaftGroupServiceFactory);
+
+            var replicaManager = new ReplicaManager(
+                    clusterService,
+                    cmgManager,
+                    clock,
+                    Set.of(ReplicaMessageTestGroup.class)
+            );
+
+            replicaManagers.put(nodeName, replicaManager);
+
+            clusterService.start();
+            raftManager.start();
+            replicaManager.start();
+
+            servicesToClose.add(() -> {
+                try {
+                    replicaManager.beforeNodeStop();
+                    raftManager.beforeNodeStop();
+                    clusterService.beforeNodeStop();
+
+                    replicaManager.stop();
+                    raftManager.stop();
+                    clusterService.stop();
+                } catch (Exception e) {
+                    log.info("Fail to stop services [node={}]", e, nodeName);
+                }
+            });
+        }
+    }
+
+    @AfterEach
+    public void afterTest() throws Exception {
+        for (Closeable cl : servicesToClose) {
+            cl.close();
+        }
+    }
+
+    /**
+     * Starts cluster nodes.
+     *
+     * @return Cluster services.
+     */
+    public Map<String, ClusterService> startNodes() {
+        var res = new HashMap<String, ClusterService>(nodeNames.size());
+
+        var nodeFinder = new StaticNodeFinder(IntStream.range(BASE_PORT, 
BASE_PORT + 5)
+                .mapToObj(p -> new NetworkAddress("localhost", p))
+                .collect(Collectors.toList()));
+
+        int port = BASE_PORT;
+
+        for (String nodeName : nodeNames) {
+            var srvc = ClusterServiceTestUtils.clusterService(nodeName, 
port++, nodeFinder);
+
+            
srvc.messagingService().addMessageHandler(PlacementDriverMessageGroup.class, 
leaseDenyMessageHandler(srvc));
+
+            res.put(nodeName, srvc);
+        }
+
+        return res;
+    }
+
+    /**
+     * Creates a network handler to intercept {@link 
StopLeaseProlongationMessage}.
+     *
+     * @param handlerService Cluster service to handle message.
+     * @return Network handler.
+     */
+    private NetworkMessageHandler leaseDenyMessageHandler(ClusterService 
handlerService) {
+        return (msg, sender, correlationId) -> {
+            if (!(msg instanceof PlacementDriverActorMessage)) {
+                return;
+            }
+
+            var handlerNode = handlerService.topologyService().localMember();
+
+            log.info("Lease is denied [replica={}, actor={}, redirect={}]", 
sender, handlerNode.name(),
+                    ((StopLeaseProlongationMessage) msg).redirectProposal());
+
+            if (denyLeaseHandler != null) {
+                denyLeaseHandler.accept((StopLeaseProlongationMessage) msg, 
sender, handlerNode.name());
+            }
+        };
+    }
+
+    @Test
+    public void testNotificationToPlacementDriverAboutChangeLeader() throws 
Exception {
+        Set<String> grpNodes = chooseRandomNodes(3);
+
+        log.info("Replication group is based on {}", grpNodes);
+
+        var raftClientFut = createReplicationGroup(GROUP_ID, grpNodes);
+
+        var raftClient = raftClientFut.get();
+
+        raftClient.refreshLeader().get();
+
+        var leaderNodeName = raftClient.leader().consistentId();
+
+        var newLeaderNodeName = grpNodes.stream().filter(n -> 
!n.equals(leaderNodeName)).findAny().get();
+
+        log.info("Leader is moving form {} to {}", leaderNodeName, 
newLeaderNodeName);
+
+        ConcurrentHashMap<String, String> nodesToReceivedDeclineMsg = new 
ConcurrentHashMap<>();
+
+        denyLeaseHandler = (msg, from, to) -> {
+            nodesToReceivedDeclineMsg.put(to, from);
+        };
+
+        raftClient.transferLeadership(new Peer(newLeaderNodeName)).get();
+
+        var anyNode = randomNode(Set.of());
+
+        log.info("Message sent from {} to {}", anyNode, leaderNodeName);
+
+        var clusterService = clusterServices.get(anyNode);
+
+        new ReplicaService(clusterService.messagingService(), clock).invoke(
+                
clusterService.topologyService().getByConsistentId(leaderNodeName),
+                TEST_REPLICA_MESSAGES_FACTORY.primaryReplicaTestRequest()
+                        .groupId(GROUP_ID)
+                        .build()
+        );
+
+        assertTrue(waitForCondition(() -> nodesToReceivedDeclineMsg.size() == 
placementDriverNodeNames.size(), 10_000));
+
+        for (String nodeName : nodesToReceivedDeclineMsg.keySet()) {
+            assertEquals(leaderNodeName, 
nodesToReceivedDeclineMsg.get(nodeName));
+
+            assertTrue(placementDriverNodeNames.contains(nodeName));
+        }
+
+        stopReplicationGroup(GROUP_ID, grpNodes);
+    }
+
+    @Test
+    public void testNotificationToPlacementDriverAboutMajorityLoss() throws 
Exception {

Review Comment:
   both tests still are almost the same, maybe create another common method 
with basic scenario and checks?



##########
modules/replicator/src/main/java/org/apache/ignite/internal/replicator/ReplicaManager.java:
##########
@@ -170,17 +189,29 @@ private void onReplicaMessageReceived(NetworkMessage 
message, String senderConsi
             }
 
             // replicaFut is always completed here.
-            CompletableFuture<?> result = 
replicaFut.join().processRequest(request);
+            Replica replica = replicaFut.join();
+
+            CompletableFuture<?> result = replica.processRequest(request);
 
             result.handle((res, ex) -> {
                 NetworkMessage msg;
 
                 if (ex == null) {
                     msg = prepareReplicaResponse(requestTimestamp, res);
+
+                    ClusterNode localNode = 
clusterNetSvc.topologyService().localMember();
+
+                    if (request instanceof PrimaryReplicaRequest && 
!localNode.name().equals(replica.proposedPrimary())) {
+                        declineLeaseProlongation(request.groupId(), 
replica.proposedPrimary());
+                    }
                 } else {
                     LOG.warn("Failed to process replica request [request={}]", 
ex, request);
 
                     msg = prepareReplicaErrorResponse(requestTimestamp, ex);
+
+                    if (request instanceof PrimaryReplicaRequest && 
isTimeoutException(ex)) {
+                        declineLeaseProlongation(request.groupId(), null);
+                    }

Review Comment:
   Don't we need a ticket for throttling here?



##########
modules/placement-driver/src/integrationTest/java/org/apache/ignite/internal/placementdriver/MultiActorPlacementDriverTest.java:
##########
@@ -101,16 +101,16 @@ public class MultiActorPlacementDriverTest extends 
IgniteAbstractTest {
     @InjectConfiguration
     private DistributionZonesConfiguration dstZnsCfg;
 
-    List<String> placementDriverNodeNames;
-    List<String> nodeNames;
+    private List<String> placementDriverNodeNames;
+    private List<String> nodeNames;

Review Comment:
   please add empty line



##########
modules/replicator/src/main/java/org/apache/ignite/internal/replicator/ReplicaManager.java:
##########
@@ -73,15 +80,23 @@ public class ReplicaManager implements IgniteComponent {
     /** Replicator network message factory. */
     private static final ReplicaMessagesFactory REPLICA_MESSAGES_FACTORY = new 
ReplicaMessagesFactory();
 
+    private static final PlacementDriverMessagesFactory 
PLACEMENT_DRIVER_MESSAGES_FACTORY = new PlacementDriverMessagesFactory();
+
     /** Busy lock to stop synchronously. */
     private final IgniteSpinBusyLock busyLock = new IgniteSpinBusyLock();
 
     /** Prevents double stopping of the component. */
     private final AtomicBoolean stopGuard = new AtomicBoolean();
 
+    /** Meta storage service. */
+    private final CompletableFuture<Set<String>> msNodes = new 
CompletableFuture<>();

Review Comment:
   let's rename it to metaStorageNodes.



##########
modules/replicator/src/main/java/org/apache/ignite/internal/replicator/ReplicaManager.java:
##########
@@ -192,6 +223,20 @@ private void onReplicaMessageReceived(NetworkMessage 
message, String senderConsi
         }
     }
 
+    /**
+     * Checks this exception is caused of timeout or connectivity issue.
+     *
+     * @param ex An exception
+     * @return True if this exception has thrown due to timeout or connection 
problem, false otherwise.
+     */
+    private static boolean isTimeoutException(Throwable ex) {

Review Comment:
   let's rename it to `isConnectivityRelatedException`



##########
modules/replicator/src/main/java/org/apache/ignite/internal/replicator/ReplicaManager.java:
##########
@@ -170,17 +189,29 @@ private void onReplicaMessageReceived(NetworkMessage 
message, String senderConsi
             }
 
             // replicaFut is always completed here.
-            CompletableFuture<?> result = 
replicaFut.join().processRequest(request);
+            Replica replica = replicaFut.join();
+
+            CompletableFuture<?> result = replica.processRequest(request);
 
             result.handle((res, ex) -> {
                 NetworkMessage msg;
 
                 if (ex == null) {
                     msg = prepareReplicaResponse(requestTimestamp, res);
+
+                    ClusterNode localNode = 
clusterNetSvc.topologyService().localMember();
+
+                    if (request instanceof PrimaryReplicaRequest && 
!localNode.name().equals(replica.proposedPrimary())) {
+                        declineLeaseProlongation(request.groupId(), 
replica.proposedPrimary());
+                    }

Review Comment:
   Why so? It seems to me that the presence of leader on the local node should 
be checked every time, not only when exception is null.
   But essentially, this can be done by replica itself. The 
`declineLeaseProlongation` method is also a part of internal replica logic so 
it should be moved. After getting this done, we won't need `proposedPrimary` as 
public method.
   Also, I suggest to add a call of `declineLeaseProlongation` (moved to 
Replica) to onLeaderChange, to have a proper reaction on this event even if 
there is no RW load.



##########
modules/replicator/src/testFixtures/java/org/apache/ignite/internal/replicator/message/ReplicaMessageTestGroup.java:
##########
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.replicator.message;
+
+import org.apache.ignite.network.annotations.MessageGroup;
+
+/** Test replica message group. */
+@MessageGroup(groupType = 31, groupName = "TestReplicaMessages")
+public interface ReplicaMessageTestGroup {
+    /** Message type for {@link PrimaryReplicaTestRequest}. */
+    short PRIMARY_REPLICA_TEST_REQUEST = 41;

Review Comment:
   maybe some big number instead? otherwise we will have a clash with next 
number in ReplicaMessageGroup when it's added



##########
modules/placement-driver-api/src/main/java/org/apache/ignite/internal/placementdriver/message/StopLeaseProlongationMessage.java:
##########
@@ -24,4 +24,5 @@
  */
 @Transferable(PlacementDriverMessageGroup.STOP_LEASE_PROLONGATION)
 public interface StopLeaseProlongationMessage extends 
PlacementDriverActorMessage {
+    String redirectProposal();

Review Comment:
   please add javadoc



##########
modules/replicator/src/main/java/org/apache/ignite/internal/replicator/ReplicaManager.java:
##########
@@ -222,6 +267,29 @@ private void 
onPlacementDriverMessageReceived(NetworkMessage msg0, String sender
         }
     }
 
+    /**
+     * Sends stop lease prolongation message to all participants of placement 
driver group.
+     *
+     * @param groupId Replication group id.
+     * @param redirectNodeId Node consistent id to redirect.
+     */
+    private void declineLeaseProlongation(ReplicationGroupId groupId, String 
redirectNodeId) {

Review Comment:
   maybe `stopLeaseProlongation`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to