Vladsz83 commented on a change in pull request #8159:
URL: https://github.com/apache/ignite/pull/8159#discussion_r474621436
##########
File path: modules/ducktests/tests/ignitetest/tests/discovery_test.py
##########
@@ -210,37 +177,78 @@ def __simulate_nodes_failure(self, version, properties,
nodes_to_kill=1):
logged_timestamps.sort(reverse=True)
- # Failure detection delay.
- time_holder = int((time_holder - first_terminated[0]) * 1000)
- # Failure detection delay by log.
- by_log = epoch_mills(logged_timestamps[0]) -
epoch_mills(first_terminated[1])
-
- assert by_log > 0, "Negative node failure detection delay: " + by_log
+ ". Probably it is a timezone issue."
- assert by_log <= time_holder, "Value of node failure detection delay
taken from by the node log (" + \
- str(by_log) + "ms) must be lesser than
measured value (" + str(time_holder) + \
- "ms) because watching this event
consumes extra time."
+ self.__check_and_store_results(data, int((time_holder -
first_terminated[0]) * 1000),
+ epoch_mills(logged_timestamps[0]) -
epoch_mills(first_terminated[1]))
- data['Detection of node(s) failure, measured (ms)'] = time_holder
- data['Detection of node(s) failure, by the log (ms)'] = by_log
data['Nodes failed'] = len(failed_nodes)
return data
+ @staticmethod
+ def __check_and_store_results(data, measured, delay_by_log):
+ assert delay_by_log > 0, \
+ "Negative failure detection delay from the survived node log (" +
str(delay_by_log) + "ms). It is \
+ probably an issue of the timezone or system clock settings."
+ assert delay_by_log <= measured, \
+ "Failure detection delay from the survived node log (" +
str(delay_by_log) + "ms) must be lesser than \
+ measured value (" + str(measured) + "ms) because watching this
event consumes extra time. It is \
+ probably an issue of the timezone or system clock settings."
+
+ data['Detection of node(s) failure, measured (ms)'] = measured
+ data['Detection of node(s) failure, by the log (ms)'] = delay_by_log
+
@staticmethod
def __failed_pattern(failed_node_id):
return "Node FAILED: .\\{1,\\}Node \\[id=" + failed_node_id
- def __choose_node_to_kill(self, nodes_to_kill):
+ def __choose_node_to_kill(self, kill_coordinator, nodes_to_kill):
nodes = self.servers.nodes
coordinator = nodes[0].discovery_info().coordinator
+ to_kill = []
- if nodes_to_kill < 1:
- to_kill = next(node for node in nodes if
node.discovery_info().node_id == coordinator)
- else:
- to_kill = random.sample([n for n in nodes if
n.discovery_info().node_id != coordinator], nodes_to_kill)
+ if kill_coordinator:
Review comment:
We decided to separete coordinator failure. It should fail too, but we
wanted to see the differeace with an ordinary node. Didn't we? There are set op
parametrized tests for it: kill only coordinator, kill coordinator + 1 node,
kill coordinator + 2 nodes. All ander load and without. Also: kill 1 node
without coordinator, kill 2 nodes without coordinator. Did we change our mind
and don't check exactly coordinator failure?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]