ivandasch commented on a change in pull request #8142:
URL: https://github.com/apache/ignite/pull/8142#discussion_r468646361



##########
File path: modules/ducktests/tests/ignitetest/tests/discovery_test.py
##########
@@ -117,46 +163,81 @@ def __basic_test__(self, version, with_zk=False):
 
         start = self.monotonic()
         self.servers.start()
-        data = {'Ignite cluster start time (s)': self.monotonic() - start}
+        data = {'Ignite cluster start time (s)': round(self.monotonic() - 
start, 1)}
         self.stage("Topology is ready")
 
-        # Node failure detection
-        fail_node, survived_node = 
self.choose_random_node_to_kill(self.servers)
+        if nodes_to_kill > self.servers.num_nodes - 1 or coordinator and 
nodes_to_kill > 1:
+            raise Exception("Too many nodes to kill: " + str(nodes_to_kill))
+
+        if coordinator:
+            node_chooser = lambda nodes: \
+                next(node for node in nodes if node.discovery_info().node_id 
== nodes[0].discovery_info().coordinator)
+        else:
+            node_chooser = lambda nodes: \
+                random.sample([n for n in self.servers.nodes if 
n.discovery_info().node_id !=
+                               
self.servers.nodes[0].discovery_info().coordinator], nodes_to_kill)
 
-        data["nodes"] = [node.node_id() for node in self.servers.nodes]
+        failed_nodes, survived_node = 
self.choose_node_to_kill(self.servers.nodes, node_chooser)
 
-        disco_infos = []
-        for node in self.servers.nodes:
-            disco_info = node.discovery_info()
-            disco_infos.append({
-                "id": disco_info.node_id,
-                "consistent_id": disco_info.consistent_id,
-                "coordinator": disco_info.coordinator,
-                "order": disco_info.order,
-                "int_order": disco_info.int_order,
-                "is_client": disco_info.is_client
-            })
+        ids_to_wait = [node.discovery_info().node_id for node in failed_nodes]
 
-        data["node_disco_info"] = disco_infos
+        self.stage("Stopping " + str(len(failed_nodes)) + " nodes.")
 
-        self.servers.stop_node(fail_node, clean_shutdown=False)
+        first_terminated = self.servers.stop_nodes_async(failed_nodes, 
clean_shutdown=False, wait_for_stop=False)
 
-        start = self.monotonic()
-        self.servers.await_event_on_node("Node FAILED", 
random.choice(survived_node), 60, True)
+        self.stage("Waiting for failure detection of " + 
str(len(failed_nodes)) + " nodes.")
+
+        # Keeps dates of logged node failures.
+        last_failure_detected = 0
+        logged_timestamps = []
+
+        for failed_id in ids_to_wait:
+            pattern = "Node FAILED: " + ("ZookeeperClusterNode" if with_zk 
else "TcpDiscoveryNode") + " \\[id=" \

Review comment:
       May be try to pass a regexp to grep? 

##########
File path: modules/ducktests/tests/ignitetest/tests/discovery_test.py
##########
@@ -81,28 +90,65 @@ def teardown(self):
     @cluster(num_nodes=NUM_NODES)
     @parametrize(version=str(DEV_BRANCH))
     @parametrize(version=str(LATEST_2_7))
-    def test_tcp(self, version):
+    def test_tcp_not_coordinator_single(self, version):
+        """
+        Test single-node-failure scenario (not the coordinator) with 
TcpDiscoverySpi.
+        """
+        return self.__simulate_nodes_failure(version, nodes_to_kill=1)
+
+    @cluster(num_nodes=NUM_NODES)
+    @parametrize(version=str(DEV_BRANCH))
+    @parametrize(version=str(LATEST_2_7))
+    def test_tcp_not_coordinator_two(self, version):
+        """
+        Test two-node-failure scenario (not the coordinator) with 
TcpDiscoverySpi.
+        """
+        return self.__simulate_nodes_failure(version, nodes_to_kill=2)
+
+    @cluster(num_nodes=NUM_NODES)
+    @parametrize(version=str(DEV_BRANCH))
+    @parametrize(version=str(LATEST_2_7))
+    def test_tcp_coordinator(self, version):
+        """
+        Test coordinator-failure scenario with TcpDiscoverySpi.
+        """
+        return self.__simulate_nodes_failure(version, True)
+
+    @cluster(num_nodes=NUM_NODES + 3)
+    @parametrize(version=str(DEV_BRANCH))
+    @parametrize(version=str(LATEST_2_7))
+    def test_zk_not_coordinator_single(self, version):
         """
-        Test basic discovery scenario with TcpDiscoverySpi.
+        Test single node failure scenario (not the coordinator) with ZooKeeper.
         """
-        return self.__basic_test__(version, False)
+        return self.__simulate_nodes_failure(version, nodes_to_kill=1, 
coordinator=False, with_zk=True)
 
     @cluster(num_nodes=NUM_NODES + 3)
     @parametrize(version=str(DEV_BRANCH))
     @parametrize(version=str(LATEST_2_7))
-    def test_zk(self, version):
+    def test_zk_not_coordinator_two(self, version):
         """
-        Test basic discovery scenario with ZookeeperDiscoverySpi.
+        Test two-node-failure scenario (not the coordinator) with ZooKeeper.
         """
-        return self.__basic_test__(version, True)
+        return self.__simulate_nodes_failure(version, nodes_to_kill=2, 
coordinator=False, with_zk=True)
 
-    def __basic_test__(self, version, with_zk=False):
+    @cluster(num_nodes=NUM_NODES+3)
+    @parametrize(version=str(DEV_BRANCH))
+    @parametrize(version=str(LATEST_2_7))
+    def test_zk_coordinator(self, version):
+        """
+        Test coordinator-failure scenario with ZooKeeper.
+        """
+        return self.__simulate_nodes_failure(version, coordinator=True, 
with_zk=True)
+
+    # pylint: disable=R0913,R0914
+    def __simulate_nodes_failure(self, version, coordinator=False, 
with_zk=False, nodes_to_kill=1):
         if with_zk:

Review comment:
       We can move this part to additional function and call it in each zk 
steps. 

##########
File path: modules/ducktests/tests/ignitetest/tests/discovery_test.py
##########
@@ -81,28 +90,65 @@ def teardown(self):
     @cluster(num_nodes=NUM_NODES)
     @parametrize(version=str(DEV_BRANCH))
     @parametrize(version=str(LATEST_2_7))
-    def test_tcp(self, version):
+    def test_tcp_not_coordinator_single(self, version):
+        """
+        Test single-node-failure scenario (not the coordinator) with 
TcpDiscoverySpi.
+        """
+        return self.__simulate_nodes_failure(version, nodes_to_kill=1)
+
+    @cluster(num_nodes=NUM_NODES)
+    @parametrize(version=str(DEV_BRANCH))
+    @parametrize(version=str(LATEST_2_7))
+    def test_tcp_not_coordinator_two(self, version):
+        """
+        Test two-node-failure scenario (not the coordinator) with 
TcpDiscoverySpi.
+        """
+        return self.__simulate_nodes_failure(version, nodes_to_kill=2)
+
+    @cluster(num_nodes=NUM_NODES)
+    @parametrize(version=str(DEV_BRANCH))
+    @parametrize(version=str(LATEST_2_7))
+    def test_tcp_coordinator(self, version):
+        """
+        Test coordinator-failure scenario with TcpDiscoverySpi.
+        """
+        return self.__simulate_nodes_failure(version, True)
+
+    @cluster(num_nodes=NUM_NODES + 3)
+    @parametrize(version=str(DEV_BRANCH))
+    @parametrize(version=str(LATEST_2_7))
+    def test_zk_not_coordinator_single(self, version):
         """
-        Test basic discovery scenario with TcpDiscoverySpi.
+        Test single node failure scenario (not the coordinator) with ZooKeeper.
         """
-        return self.__basic_test__(version, False)
+        return self.__simulate_nodes_failure(version, nodes_to_kill=1, 
coordinator=False, with_zk=True)
 
     @cluster(num_nodes=NUM_NODES + 3)
     @parametrize(version=str(DEV_BRANCH))
     @parametrize(version=str(LATEST_2_7))
-    def test_zk(self, version):
+    def test_zk_not_coordinator_two(self, version):
         """
-        Test basic discovery scenario with ZookeeperDiscoverySpi.
+        Test two-node-failure scenario (not the coordinator) with ZooKeeper.
         """
-        return self.__basic_test__(version, True)
+        return self.__simulate_nodes_failure(version, nodes_to_kill=2, 
coordinator=False, with_zk=True)
 
-    def __basic_test__(self, version, with_zk=False):
+    @cluster(num_nodes=NUM_NODES+3)
+    @parametrize(version=str(DEV_BRANCH))
+    @parametrize(version=str(LATEST_2_7))
+    def test_zk_coordinator(self, version):
+        """
+        Test coordinator-failure scenario with ZooKeeper.
+        """
+        return self.__simulate_nodes_failure(version, coordinator=True, 
with_zk=True)
+
+    # pylint: disable=R0913,R0914

Review comment:
       I suppose, that we can rewrite a little bit test, it's really 
overcomplicated a little bit. 
   May be it is not a good idea to turn off linter warnings here?

##########
File path: modules/ducktests/tests/ignitetest/tests/discovery_test.py
##########
@@ -117,46 +163,81 @@ def __basic_test__(self, version, with_zk=False):
 
         start = self.monotonic()
         self.servers.start()
-        data = {'Ignite cluster start time (s)': self.monotonic() - start}
+        data = {'Ignite cluster start time (s)': round(self.monotonic() - 
start, 1)}
         self.stage("Topology is ready")
 
-        # Node failure detection
-        fail_node, survived_node = 
self.choose_random_node_to_kill(self.servers)
+        if nodes_to_kill > self.servers.num_nodes - 1 or coordinator and 
nodes_to_kill > 1:
+            raise Exception("Too many nodes to kill: " + str(nodes_to_kill))
+
+        if coordinator:
+            node_chooser = lambda nodes: \
+                next(node for node in nodes if node.discovery_info().node_id 
== nodes[0].discovery_info().coordinator)
+        else:
+            node_chooser = lambda nodes: \
+                random.sample([n for n in self.servers.nodes if 
n.discovery_info().node_id !=
+                               
self.servers.nodes[0].discovery_info().coordinator], nodes_to_kill)
 
-        data["nodes"] = [node.node_id() for node in self.servers.nodes]
+        failed_nodes, survived_node = 
self.choose_node_to_kill(self.servers.nodes, node_chooser)
 
-        disco_infos = []
-        for node in self.servers.nodes:
-            disco_info = node.discovery_info()
-            disco_infos.append({
-                "id": disco_info.node_id,
-                "consistent_id": disco_info.consistent_id,
-                "coordinator": disco_info.coordinator,
-                "order": disco_info.order,
-                "int_order": disco_info.int_order,
-                "is_client": disco_info.is_client
-            })
+        ids_to_wait = [node.discovery_info().node_id for node in failed_nodes]
 
-        data["node_disco_info"] = disco_infos
+        self.stage("Stopping " + str(len(failed_nodes)) + " nodes.")
 
-        self.servers.stop_node(fail_node, clean_shutdown=False)
+        first_terminated = self.servers.stop_nodes_async(failed_nodes, 
clean_shutdown=False, wait_for_stop=False)
 
-        start = self.monotonic()
-        self.servers.await_event_on_node("Node FAILED", 
random.choice(survived_node), 60, True)
+        self.stage("Waiting for failure detection of " + 
str(len(failed_nodes)) + " nodes.")
+
+        # Keeps dates of logged node failures.
+        last_failure_detected = 0
+        logged_timestamps = []
+
+        for failed_id in ids_to_wait:
+            pattern = "Node FAILED: " + ("ZookeeperClusterNode" if with_zk 
else "TcpDiscoveryNode") + " \\[id=" \
+                      + failed_id
+
+            self.servers.await_event_on_node(pattern, survived_node, 10, 
from_the_beginning=True, backoff_sec=0.01)
 
-        data['Failure of node detected in time (s)'] = self.monotonic() - start
+            last_failure_detected = self.monotonic()
+
+            self.stage("Failure detection measured.")
+
+        for failed_id in ids_to_wait:
+            pattern = "Node FAILED: " + ("ZookeeperClusterNode" if with_zk 
else "TcpDiscoveryNode") + " \\[id=" \

Review comment:
       Same as above. Seems that it is a good candidate to separate function, 
isn't it?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to