This is a back port of corosync: 69ff770544038ea813 When doing a controlled shutdown of corosync, we now send out a JOIN message with our node removed. This should speed up the case where a lot of nodes leave at the same time as they don't need to wait for the token timeout for each node.
Original-Author: Christine Caulfield <[email protected]> Signed-off-by: Angus Salkeld <[email protected]> --- exec/totemnet.c | 14 +++++++++++ exec/totemsrp.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 82 insertions(+), 1 deletions(-) diff --git a/exec/totemnet.c b/exec/totemnet.c index 3686fd1..154aa4f 100644 --- a/exec/totemnet.c +++ b/exec/totemnet.c @@ -594,6 +594,20 @@ int totemnet_finalize ( worker_thread_group_exit (&instance->worker_thread_group); + if (instance->totemnet_sockets.mcast_recv > 0) { + close (instance->totemnet_sockets.mcast_recv); + poll_dispatch_delete (instance->totemnet_poll_handle, + instance->totemnet_sockets.mcast_recv); + } + if (instance->totemnet_sockets.mcast_send > 0) { + close (instance->totemnet_sockets.mcast_send); + } + if (instance->totemnet_sockets.token > 0) { + close (instance->totemnet_sockets.token); + poll_dispatch_delete (instance->totemnet_poll_handle, + instance->totemnet_sockets.token); + } + hdb_handle_put (&totemnet_instance_database, handle); error_exit: diff --git a/exec/totemsrp.c b/exec/totemsrp.c index aeaf1b9..9a65790 100644 --- a/exec/totemsrp.c +++ b/exec/totemsrp.c @@ -89,6 +89,7 @@ #define MAXIOVS 10 #define RETRANSMIT_ENTRIES_MAX 30 #define TOKEN_SIZE_MAX 64000 /* bytes */ +#define LEAVE_DUMMY_NODEID 0 /* * Rollover handling: @@ -545,6 +546,8 @@ static int message_handler_token_hold_cancel ( int msg_len, int endian_conversion_needed); +static void memb_leave_message_send (struct totemsrp_instance *instance); + static void memb_ring_id_create_or_load (struct totemsrp_instance *, struct memb_ring_id *); static void token_callbacks_execute (struct totemsrp_instance *instance, enum totem_callback_token_type type); @@ -851,6 +854,7 @@ void totemsrp_finalize ( if (res != 0) { return; } + memb_leave_message_send (instance); hdb_handle_put (&totemsrp_instance_database, handle); } @@ -1055,6 +1059,9 @@ static void memb_consensus_set ( int found = 0; int i; + if (addr->addr[0].nodeid == LEAVE_DUMMY_NODEID) + return; + for (i = 0; i < instance->consensus_list_entries; i++) { if (srp_addr_equal(addr, &instance->consensus_list[i].addr)) { found = 1; @@ -2872,7 +2879,67 @@ static void memb_join_message_send (struct totemsrp_instance *instance) iovs); } -static void memb_merge_detect_transmit (struct totemsrp_instance *instance) +static void memb_leave_message_send (struct totemsrp_instance *instance) +{ + char memb_join_data[10000]; + struct memb_join *memb_join = (struct memb_join *)memb_join_data; + int active_memb_entries; + struct srp_addr active_memb[PROCESSOR_COUNT_MAX]; + struct iovec iovec[3]; + + log_printf (instance->totemsrp_log_level_debug, + "sending join/leave message\n"); + + /* + * add us to the failed list, and remove us from + * the members list + */ + memb_set_merge( + &instance->my_id, 1, + instance->my_failed_list, &instance->my_failed_list_entries); + + memb_set_subtract (active_memb, &active_memb_entries, + instance->my_proc_list, instance->my_proc_list_entries, + &instance->my_id, 1); + + + memb_join->header.type = MESSAGE_TYPE_MEMB_JOIN; + memb_join->header.endian_detector = ENDIAN_LOCAL; + memb_join->header.encapsulated = 0; + memb_join->header.nodeid = LEAVE_DUMMY_NODEID; + + memb_join->ring_seq = instance->my_ring_id.seq; + memb_join->proc_list_entries = active_memb_entries; + memb_join->failed_list_entries = instance->my_failed_list_entries; + srp_addr_copy (&memb_join->system_from, &instance->my_id); + memb_join->system_from.addr[0].nodeid = LEAVE_DUMMY_NODEID; + + // TODO: CC Maybe use the actual join send routine. + /* + * This mess adds the joined and failed processor lists into the join + * message + */ + iovec[0].iov_base = memb_join; + iovec[0].iov_len = sizeof (struct memb_join); + + iovec[1].iov_base = active_memb; + iovec[1].iov_len = active_memb_entries * sizeof (struct srp_addr); + + iovec[2].iov_base = instance->my_failed_list; + iovec[2].iov_len = instance->my_failed_list_entries * + sizeof (struct srp_addr); + + + if (instance->totem_config->send_join_timeout) { + usleep (random() % (instance->totem_config->send_join_timeout * 1000)); + } + + totemrrp_mcast_flush_send ( + instance->totemrrp_handle, + iovec, 3); +} + +static void memb_merge_detect_transmit (struct totemsrp_instance *instance) { struct memb_merge_detect memb_merge_detect; struct iovec iovec[2]; -- 1.7.3.1 _______________________________________________ Openais mailing list [email protected] https://lists.linux-foundation.org/mailman/listinfo/openais
