Hi, I think there is still a slight memory-leak when recovery is entered repeatedly. The recovery messages usually get freed when the operational state is entered. However if recovery is entered several times, without entering the operational state, then some messages can be leaked.
Attached is a patch that fixes the problem for me. I tested it on v1.3.1, but the patch should apply to trunk. Let me know if I've misunderstood anything, or if any of the patch needs fixing up. Cheers, Tim
From: Tim Beale <[email protected]> Fix memory leak when entering recovery repeatedly If recovery is entered repeatedly (i.e. without entering the operational state in between), then some messages were still leaked. + memb_state_operational_enter - the recovery queue was left with hanging message pointers, so when recovery was entered next it wasn't save to free these messages. Now the messages are cleared off the recovery queue first, and then the recovery queue is copied into the regular queue. + memb_state_recovery_enter - free any messages that were already on the recovery message queue or retrans message queue before we reinit them. These recovery messages get freed when operational state is entered, but if recovery were entered again before it gets to the operational state, some messages could be leaked. --- exec/totemsrp.c | 76 +++++++++++++++++++++++++++++++++++++++++++++---------- 1 files changed, 62 insertions(+), 14 deletions(-) diff --git a/exec/totemsrp.c b/exec/totemsrp.c index 16de74d..5aedc61 100644 --- a/exec/totemsrp.c +++ b/exec/totemsrp.c @@ -1787,12 +1787,6 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance) 0, 0, joined_list_totemip, joined_list_entries, &instance->my_ring_id); - /* - * The recovery sort queue now becomes the regular - * sort queue. It is necessary to copy the state - * into the regular sort queue. - */ - sq_copy (&instance->regular_sort_queue, &instance->recovery_sort_queue); instance->my_last_aru = SEQNO_START_MSG; /* When making my_proc_list smaller, ensure that the @@ -1811,20 +1805,26 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance) instance->my_failed_list_entries = 0; instance->my_high_delivered = instance->my_high_seq_received; + /* Free all messages on the recovery queue. Note that recovery messages are + * encapsulated differently, so shouldn't be copied onto the regular queue */ for (i = 0; i <= instance->my_high_delivered; i++) { void *ptr; - res = sq_item_get (&instance->regular_sort_queue, i, &ptr); + res = sq_item_get (&instance->recovery_sort_queue, i, &ptr); if (res == 0) { - struct sort_queue_item *regular_message; + struct sort_queue_item *recovery_message; - regular_message = ptr; - free (regular_message->mcast); + recovery_message = ptr; + free (recovery_message->mcast); } } - sq_items_release (&instance->regular_sort_queue, instance->my_high_delivered); + sq_items_release (&instance->recovery_sort_queue, instance->my_high_delivered); instance->last_released = instance->my_high_delivered; + /* The recovery sort queue now becomes the regular sort queue, so copy the + * state info (i.e. seq numbers). Note recovery queue should be empty now */ + sq_copy (&instance->regular_sort_queue, &instance->recovery_sort_queue); + log_printf (instance->totemsrp_log_level_debug, "entering OPERATIONAL state.\n"); log_printf (instance->totemsrp_log_level_notice, @@ -1972,6 +1972,55 @@ static void memb_state_commit_enter ( */ } +static void reinit_recovery_sort_queue (struct totemsrp_instance *instance) +{ + unsigned int i; + struct sort_queue_item *recovery_message_item; + unsigned int range; + unsigned int start; + unsigned int del_count = 0; + void *ptr; + + start = instance->recovery_sort_queue.head_seqid; + range = instance->recovery_sort_queue.pos_max; + + for (i = 0; i <= range; i++) { + if (sq_item_get(&instance->recovery_sort_queue, + i + start, &ptr) != 0) { + continue; + } + recovery_message_item = ptr; + free (recovery_message_item->mcast); + del_count++; + } + + if (del_count != 0) { + log_printf (instance->totemsrp_log_level_debug, + "Deleted %u messages from the recovery queue.\n", del_count); + } + + sq_reinit (&instance->recovery_sort_queue, SEQNO_START_MSG); +} + +static void reinit_retransmit_message_queue (struct totemsrp_instance *instance) +{ + struct message_item *msg_item; + + if (!cs_queue_is_empty (&instance->retrans_message_queue)) { + log_printf (instance->totemsrp_log_level_debug, + "Deleting %u messages from the retransmit queue.\n", + cs_queue_used (&instance->retrans_message_queue)); + } + + while (!cs_queue_is_empty (&instance->retrans_message_queue)) { + msg_item = (struct message_item *)cs_queue_item_get (&instance->retrans_message_queue); + cs_queue_item_remove (&instance->retrans_message_queue); + free (msg_item->mcast); + } + + cs_queue_reinit (&instance->retrans_message_queue); +} + static void memb_state_recovery_enter ( struct totemsrp_instance *instance, struct memb_commit_token *commit_token) @@ -1995,8 +2044,8 @@ static void memb_state_recovery_enter ( instance->my_high_ring_delivered = 0; - sq_reinit (&instance->recovery_sort_queue, SEQNO_START_MSG); - cs_queue_reinit (&instance->retrans_message_queue); + reinit_recovery_sort_queue (instance); + reinit_retransmit_message_queue (instance); low_ring_aru = instance->old_ring_state_high_seq_received; @@ -2120,7 +2169,6 @@ static void memb_state_recovery_enter ( sort_queue_item = ptr; messages_originated++; memset (&message_item, 0, sizeof (struct message_item)); - // TODO LEAK message_item.mcast = totemsrp_buffer_alloc (instance); assert (message_item.mcast); message_item.mcast->header.type = MESSAGE_TYPE_MCAST;
_______________________________________________ Openais mailing list [email protected] https://lists.linux-foundation.org/mailman/listinfo/openais
