In some rare circumstances, it is possible for the membership algorithm
of Totem to deliver duplicate configuration changes.  This results in
some difficulty for application programmers who have to deal with a
configuration change with no apparent change in state.

This patch removes duplicate configuration changes if it is possible to
do so.  The conditions under which this is possible is when:
1) the previous membership and current membership are the same
2) all ring identifiers in the commit token match the current
processor's last ring id recorded at the previous entry to the
operational state

The reason for these requirements is to preserve virtual synchrony and
ensure messages are properly recovered.  In this condition, all messages
will be recovered per the protocol (OP->GATHER->COMMIT->RECOVER->OP) but
no configuration change will be delivered since we are guaranteed all
processors have necessary copies to recover all messages in the stream.

Regards
-steve
Index: exec/totemsrp.c
===================================================================
--- exec/totemsrp.c	(revision 2398)
+++ exec/totemsrp.c	(working copy)
@@ -345,6 +345,8 @@
 
 	struct memb_ring_id my_old_ring_id;
 
+	struct memb_ring_id my_last_ring_id;
+
 	int my_aru_count;
 
 	int my_merge_detect_timeout_outstanding;
@@ -397,6 +399,8 @@
 
 	unsigned int my_token_seq;
 
+	unsigned int my_deliver_confchg;
+
 	/*
 	 * Timers
 	 */
@@ -1675,10 +1679,12 @@
 		instance->my_left_memb_entries);
 	srp_addr_to_nodeid (trans_memb_list_totemip,
 		instance->my_trans_memb_list, instance->my_trans_memb_entries);
-	instance->totemsrp_confchg_fn (TOTEM_CONFIGURATION_TRANSITIONAL,
-		trans_memb_list_totemip, instance->my_trans_memb_entries,
-		left_list, instance->my_left_memb_entries,
-		0, 0, &instance->my_ring_id);
+	if (instance->my_deliver_confchg) {
+		instance->totemsrp_confchg_fn (TOTEM_CONFIGURATION_TRANSITIONAL,
+			trans_memb_list_totemip, instance->my_trans_memb_entries,
+			left_list, instance->my_left_memb_entries,
+			0, 0, &instance->my_ring_id);
+	}
 
 // TODO we need to filter to ensure we only deliver those
 // messages which are part of instance->my_deliver_memb
@@ -1693,10 +1699,12 @@
 		instance->my_new_memb_list, instance->my_new_memb_entries);
 	srp_addr_to_nodeid (joined_list_totemip, joined_list,
 		joined_list_entries);
-	instance->totemsrp_confchg_fn (TOTEM_CONFIGURATION_REGULAR,
-		new_memb_list_totemip, instance->my_new_memb_entries,
-		0, 0,
-		joined_list_totemip, joined_list_entries, &instance->my_ring_id);
+	if (instance->my_deliver_confchg) {
+		instance->totemsrp_confchg_fn (TOTEM_CONFIGURATION_REGULAR,
+			new_memb_list_totemip, instance->my_new_memb_entries,
+			0, 0,
+			joined_list_totemip, joined_list_entries, &instance->my_ring_id);
+	}
 
 	/*
 	 * The recovery sort queue now becomes the regular
@@ -1734,6 +1742,9 @@
 
 	reset_pause_timeout (instance);
 
+	memcpy (&instance->my_last_ring_id, &instance->my_ring_id,
+		sizeof (struct memb_ring_id));
+
 	return;
 }
 
@@ -1857,6 +1868,7 @@
 	char seqno_string_hex[10];
 	const struct srp_addr *addr;
 	struct memb_commit_token_memb_entry *memb_list;
+	int broken_config;
 
 	addr = (const struct srp_addr *)commit_token->end_of_commit_token;
 	memb_list = (struct memb_commit_token_memb_entry *)(addr + commit_token->addr_entries);
@@ -1889,6 +1901,31 @@
 		instance->my_memb_list, instance->my_memb_entries,
 		instance->my_trans_memb_list, &instance->my_trans_memb_entries);
 
+	/*
+	 * Determine if all processors from previous regular configuration are
+	 * transitioning to new regular configuration.  If so, don't deliver
+	 * a empty configuration change
+	 */
+	instance->my_deliver_confchg = 1;
+	if (memb_set_equal (instance->my_new_memb_list,
+		instance->my_new_memb_entries,
+		instance->my_memb_list, instance->my_memb_entries)) {
+
+		broken_config = 0;
+		for (i = 0; i < instance->my_new_memb_entries; i++) {
+			if (memcmp (&memb_list[i].ring_id,
+				&instance->my_last_ring_id,
+				sizeof (struct memb_ring_id))) {
+
+				broken_config = 1;
+				break;
+			}
+		}
+		if (broken_config == 0) {
+			instance->my_deliver_confchg = 0;
+		}
+	}
+	
 	for (i = 0; i < instance->my_new_memb_entries; i++) {
 		log_printf (instance->totemsrp_log_level_debug,
 			"position [%d] member %s:\n", i, totemip_print (&addr[i].addr[0]));
_______________________________________________
Openais mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/openais

Reply via email to