Vladsz83 commented on a change in pull request #8211:
URL: https://github.com/apache/ignite/pull/8211#discussion_r499571435



##########
File path: modules/ducktests/tests/ignitetest/tests/discovery_test.py
##########
@@ -149,18 +177,136 @@ def _perform_node_fail_scenario(self, test_config):
 
             start_load_app(self.test_context, ignite_config=load_config, 
params=params, modules=modules)
 
-        data = simulate_nodes_failure(servers, failed_nodes, survived_node)
+        for node in failed_nodes:
+            self.logger.info(
+                "Simulating failure of node '%s' (order %d) on '%s'" % 
(node_id(node), order(node), node.name))
+
+        data = self._simulate_nodes_failure(servers, 
node_fail_task(ignite_config, test_config), failed_nodes,
+                                            survived_node)
 
         data['Ignite cluster start time (s)'] = start_servers_sec
 
         return data
 
+    def _simulate_nodes_failure(self, servers, kill_node_task, failed_nodes, 
survived_node):
+        """
+        Perform node failure scenario
+        """
+        ids_to_wait = [node_id(n) for n in failed_nodes]
+
+        _, first_terminated = servers.exec_on_nodes_async(failed_nodes, 
kill_node_task)
+
+        for node in failed_nodes:
+            self.logger.debug(
+                "Netfilter activated on '%s': %s" % (node.name, 
dump_netfilter_settings(node)))
+
+        # Keeps dates of logged node failures.
+        logged_timestamps = []
+        data = {}
+
+        for failed_id in ids_to_wait:
+            servers.await_event_on_node(failed_pattern(failed_id), 
survived_node, 15, from_the_beginning=True,
+                                        backoff_sec=0.3)
+
+            _, stdout, _ = survived_node.account.ssh_client.exec_command(
+                "grep '%s' %s" % (failed_pattern(failed_id), 
IgniteAwareService.STDOUT_STDERR_CAPTURE))
+
+            logged_timestamps.append(
+                datetime.strptime(re.match("^\\[[^\\[]+\\]", 
stdout.read().decode("utf-8")).group(),
+                                  "[%Y-%m-%d %H:%M:%S,%f]"))
+
+        self._check_results(failed_nodes, survived_node)
+
+        logged_timestamps.sort(reverse=True)
+
+        first_kill_time = epoch_mills(first_terminated)
+        detection_delay = epoch_mills(logged_timestamps[0]) - first_kill_time
+
+        data['Detection of node(s) failure (ms)'] = detection_delay
+        data['All detection delays (ms):'] = str([epoch_mills(ts) - 
first_kill_time for ts in logged_timestamps])
+        data['Nodes failed'] = len(failed_nodes)
+
+        return data
+
+    def _check_results(self, failed_nodes, survived_node):
+        """Ensures test finishes correctly."""
+        cmd = "grep '%s' %s | wc -l" % (failed_pattern(), 
IgniteAwareService.STDOUT_STDERR_CAPTURE)
+
+        failed_cnt = 
int(str(survived_node.account.ssh_client.exec_command(cmd)[1].read(), 
sys.getdefaultencoding()))
+
+        if failed_cnt != len(failed_nodes):
+            failed = str(survived_node.account.ssh_client.exec_command(
+                "grep '%s' %s" % (failed_pattern(), 
IgniteAwareService.STDOUT_STDERR_CAPTURE))[1].read(),
+                         sys.getdefaultencoding())
+
+            self.logger.warn("Node '%s' (%s) has detected the following 
failures:%s%s" % (
+                survived_node.name, node_id(survived_node), os.linesep, 
failed))
+
+            raise AssertionError(
+                "Wrong number of failed nodes: %d. Expected: %d. Check the 
logs." % (failed_cnt, len(failed_nodes)))
+
+        for service in [srv for srv in self.test_context.services if 
isinstance(srv, IgniteAwareService)]:
+            for node in [srv_node for srv_node in service.nodes if srv_node 
not in failed_nodes]:
+                cmd = "grep -i '%s' %s | wc -l" % ("local no1de segmented", 
IgniteAwareService.STDOUT_STDERR_CAPTURE)
+
+                failed = 
str(node.account.ssh_client.exec_command(cmd)[1].read(), 
sys.getdefaultencoding())
+
+                if int(failed) > 0:
+                    raise AssertionError(
+                        "Wrong node failed (segmented) on '%s'. Check the 
logs." % node.name)
+
+    def setup(self):
+        super().setup()
+
+        self.netfilter_saved_settings = tempfile.mkdtemp()

Review comment:
       Reverted, re-implemented.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to