Vladsz83 commented on a change in pull request #8211:
URL: https://github.com/apache/ignite/pull/8211#discussion_r507630622
##########
File path: modules/ducktests/tests/ignitetest/tests/discovery_test.py
##########
@@ -149,18 +179,134 @@ def _perform_node_fail_scenario(self, test_config):
start_load_app(self.test_context, ignite_config=load_config,
params=params, modules=modules)
- data = simulate_nodes_failure(servers, failed_nodes, survived_node)
+ for node in failed_nodes:
+ self.logger.info(
+ "Simulating failure of node '%s' (order %d) on '%s'" %
(node_id(node), order(node), node.name))
+
+ data = self._simulate_nodes_failure(servers,
node_fail_task(ignite_config, test_config), failed_nodes,
+ survived_node)
data['Ignite cluster start time (s)'] = start_servers_sec
return data
+ def _simulate_nodes_failure(self, servers, kill_node_task, failed_nodes,
survived_node):
+ """
+ Perform node failure scenario
+ """
+ ids_to_wait = [node_id(n) for n in failed_nodes]
+
+ _, first_terminated = servers.exec_on_nodes_async(failed_nodes,
kill_node_task)
+
+ for node in failed_nodes:
+ self.logger.debug(
+ "Netfilter activated on '%s': %s" % (node.name,
dump_netfilter_settings(node)))
+
+ # Keeps dates of logged node failures.
+ logged_timestamps = []
+ data = {}
+
+ for failed_id in ids_to_wait:
+ servers.await_event_on_node(failed_pattern(failed_id),
survived_node, 15, from_the_beginning=True,
+ backoff_sec=0.3)
+
+ _, stdout, _ = survived_node.account.ssh_client.exec_command(
+ "grep '%s' %s" % (failed_pattern(failed_id),
IgniteAwareService.STDOUT_STDERR_CAPTURE))
+
+ logged_timestamps.append(
+ datetime.strptime(re.match("^\\[[^\\[]+\\]",
stdout.read().decode("utf-8")).group(),
+ "[%Y-%m-%d %H:%M:%S,%f]"))
+
+ self._check_results(failed_nodes, survived_node)
+
+ logged_timestamps.sort(reverse=True)
+
+ first_kill_time = epoch_mills(first_terminated)
+ detection_delay = epoch_mills(logged_timestamps[0]) - first_kill_time
+
+ data['Detection of node(s) failure (ms)'] = detection_delay
+ data['All detection delays (ms):'] = str([epoch_mills(ts) -
first_kill_time for ts in logged_timestamps])
+ data['Nodes failed'] = len(failed_nodes)
+
+ return data
+
+ def _check_results(self, failed_nodes, survived_node):
Review comment:
Fixed
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]