Patch looks good, but please make sure it works with old versions of
corosync and openais before committing.

Regards
-steve

On Mon, 2010-05-03 at 20:31 +1000, Angus Salkeld wrote:
> This patch fixes the case where you do the following:
> 1: start all nodes
> 2: isolate node n1
> 3: Kill corosync on n1
> 4: unisolate node n1
> 5: start corosync on n1
> 6: start cpg on all nodes
> 7: isolate node n1
> 8: Kill corosync on n1
> 9: unisolate node n1
> 10: start corosync on n1
> 11: Waiting for config change on n2
> 
> 
> To achieve this we need to mcast some info around the
> cluster when cpg service gets a sync event.
> We need to do this to determine the correct downlist
> to use.
> 
> The problem is that all nodes do not have a consistent view
> of the cluster (a joining node think lots of other nodes are joining
> whilst the others think only one is joining). So I have
> used the number of "old_members" and then the nodeid as
> a way of desciding the sync master. So each node send it's
> nodeid, number of old_members and downlist. The sync master
> is the node with the largest num_old_members and smallest nodeid.
> 
> Would it be useful to make this information public?
> i.e. in objdb "runtime/cpg/sync_master=<nodeid>"
> Then if you are interested you could read this value,
> get notifications etc...
> 
> Regards
> Angus
> 
> Signed-off-by: Angus Salkeld <[email protected]>
> ---
>  services/cpg.c |  215 
> ++++++++++++++++++++++++++++++++++++++++----------------
>  1 files changed, 153 insertions(+), 62 deletions(-)
> 
> diff --git a/services/cpg.c b/services/cpg.c
> index 8ce74ae..e076942 100644
> --- a/services/cpg.c
> +++ b/services/cpg.c
> @@ -75,7 +75,8 @@ enum cpg_message_req_types {
>       MESSAGE_REQ_EXEC_CPG_PROCLEAVE = 1,
>       MESSAGE_REQ_EXEC_CPG_JOINLIST = 2,
>       MESSAGE_REQ_EXEC_CPG_MCAST = 3,
> -     MESSAGE_REQ_EXEC_CPG_DOWNLIST = 4
> +     MESSAGE_REQ_EXEC_CPG_DOWNLIST_OLD = 4,
> +     MESSAGE_REQ_EXEC_CPG_DOWNLIST = 5
>  };
>  
>  /*
> @@ -127,6 +128,13 @@ enum cpg_sync_state {
>       CPGSYNC_JOINLIST
>  };
>  
> +enum cpg_downlist_state_e {
> +       CPG_DOWNLIST_NONE,
> +       CPG_DOWNLIST_WAITING_FOR_MESSAGES,
> +       CPG_DOWNLIST_APPLYING,
> +};
> +static enum cpg_downlist_state_e downlist_state;
> +static struct list_head downlist_messages_head;
>  
>  struct cpg_pd {
>       void *conn;
> @@ -202,6 +210,10 @@ static void message_handler_req_exec_cpg_mcast (
>       const void *message,
>       unsigned int nodeid);
>  
> +static void message_handler_req_exec_cpg_downlist_old (
> +     const void *message,
> +     unsigned int nodeid);
> +
>  static void message_handler_req_exec_cpg_downlist (
>       const void *message,
>       unsigned int nodeid);
> @@ -212,6 +224,8 @@ static void exec_cpg_joinlist_endian_convert (void *msg);
>  
>  static void exec_cpg_mcast_endian_convert (void *msg);
>  
> +static void exec_cpg_downlist_endian_convert_old (void *msg);
> +
>  static void exec_cpg_downlist_endian_convert (void *msg);
>  
>  static void message_handler_req_lib_cpg_join (void *conn, const void 
> *message);
> @@ -326,6 +340,10 @@ static struct corosync_exec_handler cpg_exec_engine[] =
>               .exec_endian_convert_fn = exec_cpg_mcast_endian_convert
>       },
>       { /* 4 */
> +             .exec_handler_fn        = 
> message_handler_req_exec_cpg_downlist_old,
> +             .exec_endian_convert_fn = exec_cpg_downlist_endian_convert_old
> +     },
> +     { /* 5 */
>               .exec_handler_fn        = message_handler_req_exec_cpg_downlist,
>               .exec_endian_convert_fn = exec_cpg_downlist_endian_convert
>       },
> @@ -415,34 +433,30 @@ struct req_exec_cpg_mcast {
>       mar_uint8_t message[] __attribute__((aligned(8)));
>  };
>  
> -struct req_exec_cpg_downlist {
> +struct req_exec_cpg_downlist_old {
>       coroipc_request_header_t header __attribute__((aligned(8)));
>       mar_uint32_t left_nodes __attribute__((aligned(8)));
>       mar_uint32_t nodeids[PROCESSOR_COUNT_MAX]  __attribute__((aligned(8)));
>  };
>  
> -static struct req_exec_cpg_downlist g_req_exec_cpg_downlist;
> +struct req_exec_cpg_downlist {
> +     coroipc_request_header_t header __attribute__((aligned(8)));
> +     /* merge decisions */
> +     mar_uint32_t old_members __attribute__((aligned(8)));
> +     /* downlist below */
> +     mar_uint32_t left_nodes __attribute__((aligned(8)));
> +     mar_uint32_t nodeids[PROCESSOR_COUNT_MAX]  __attribute__((aligned(8)));
> +};
>  
> -static int memb_list_remove_value (unsigned int *list,
> -     size_t list_entries, int value)
> -{
> -     int j;
> -     int found = 0;
> +struct downlist_msg {
> +     mar_uint32_t sender_nodeid;
> +     mar_uint32_t old_members __attribute__((aligned(8)));
> +     mar_uint32_t left_nodes __attribute__((aligned(8)));
> +     mar_uint32_t nodeids[PROCESSOR_COUNT_MAX]  __attribute__((aligned(8)));
> +     struct list_head list;
> +};
>  
> -     for (j = 0; j < list_entries; j++) {
> -             if (list[j] == value) {
> -                     /* mark next values to be copied down */
> -                     found = 1;
> -             }
> -             else if (found) {
> -                     list[j-1] = list[j];
> -             }
> -     }
> -     if (found)
> -             return (list_entries - 1);
> -     else
> -             return list_entries;
> -}
> +static struct req_exec_cpg_downlist g_req_exec_cpg_downlist;
>  
>  static void cpg_sync_init_v2 (
>       const unsigned int *trans_list,
> @@ -451,7 +465,6 @@ static void cpg_sync_init_v2 (
>       size_t member_list_entries,
>       const struct memb_ring_id *ring_id)
>  {
> -     unsigned int lowest_nodeid = 0xffffffff;
>       int entries;
>       int i, j;
>       int found;
> @@ -465,6 +478,11 @@ static void cpg_sync_init_v2 (
>       last_sync_ring_id.nodeid = ring_id->rep.nodeid;
>       last_sync_ring_id.seq = ring_id->seq;
>  
> +     downlist_state = CPG_DOWNLIST_WAITING_FOR_MESSAGES;
> +     entries = 0;
> +     /*
> +      * Determine list of nodeids for downlist message
> +      */
>       for (i = 0; i < my_old_member_list_entries; i++) {
>               found = 0;
>               for (j = 0; j < trans_list_entries; j++) {
> @@ -474,35 +492,8 @@ static void cpg_sync_init_v2 (
>                       }
>               }
>               if (found == 0) {
> -                     my_member_list_entries = memb_list_remove_value (
> -                             my_member_list, my_member_list_entries,
> -                             my_old_member_list[i]);
> -             }
> -     }
> -
> -     for (i = 0; i < my_member_list_entries; i++) {
> -             if (my_member_list[i] < lowest_nodeid) {
> -                     lowest_nodeid = my_member_list[i];
> -             }
> -     }
> -
> -     entries = 0;
> -     if (lowest_nodeid == api->totem_nodeid_get()) {
> -             /*
> -              * Determine list of nodeids for downlist message
> -              */
> -             for (i = 0; i < my_old_member_list_entries; i++) {
> -                     found = 0;
> -                     for (j = 0; j < trans_list_entries; j++) {
> -                             if (my_old_member_list[i] == trans_list[j]) {
> -                                     found = 1;
> -                                     break;
> -                             }
> -                     }
> -                     if (found == 0) {
> -                             g_req_exec_cpg_downlist.nodeids[entries++] =
> -                                     my_old_member_list[i];
> -                     }
> +                     g_req_exec_cpg_downlist.nodeids[entries++] =
> +                             my_old_member_list[i];
>               }
>       }
>       g_req_exec_cpg_downlist.left_nodes = entries;
> @@ -525,17 +516,69 @@ static int cpg_sync_process (void)
>       return (res);
>  }
>  
> +static struct downlist_msg* downlist_master_choose (void)
> +{
> +     struct downlist_msg *cmp;
> +     struct downlist_msg *best = NULL;
> +     struct list_head *iter;
> +
> +     for (iter = downlist_messages_head.next;
> +             iter != &downlist_messages_head;
> +             iter = iter->next) {
> +
> +             cmp = list_entry(iter, struct downlist_msg, list);
> +             if (best == NULL) {
> +                     best = cmp;
> +                     continue;
> +             }
> +
> +             if (cmp->old_members < best->old_members) {
> +                     continue;
> +             }
> +             else if (cmp->old_members > best->old_members) {
> +                     best = cmp;
> +             }
> +             else if (cmp->sender_nodeid < best->sender_nodeid) {
> +                     best = cmp;
> +             }
> +
> +     }
> +     return best;
> +}
> +
> +static void downlist_messages_delete (void)
> +{
> +     struct downlist_msg *stored_msg;
> +     struct list_head *iter, *iter_next;
> +
> +     for (iter = downlist_messages_head.next;
> +             iter != &downlist_messages_head;
> +             iter = iter_next) {
> +
> +             iter_next = iter->next;
> +
> +             stored_msg = list_entry(iter, struct downlist_msg, list);
> +             list_del (&stored_msg->list);
> +             free (stored_msg);
> +     }
> +}
> +
>  static void cpg_sync_activate (void)
>  {
>       memcpy (my_old_member_list, my_member_list,
>               my_member_list_entries * sizeof (unsigned int));
>       my_old_member_list_entries = my_member_list_entries;
>  
> +     downlist_messages_delete ();
> +     downlist_state = CPG_DOWNLIST_NONE;
> +
>       notify_lib_totem_membership (NULL, my_member_list_entries, 
> my_member_list);
>  }
>  
>  static void cpg_sync_abort (void)
>  {
> +     downlist_messages_delete ();
> +     downlist_state = CPG_DOWNLIST_NONE;
>  }
>  
>  static int notify_lib_totem_membership (
> @@ -711,6 +754,7 @@ static int cpg_exec_init_fn (struct corosync_api_v1 
> *corosync_api)
>  #ifdef COROSYNC_SOLARIS
>       logsys_subsys_init();
>  #endif
> +     list_init (&downlist_messages_head);
>       api = corosync_api;
>       return (0);
>  }
> @@ -817,12 +861,17 @@ static void exec_cpg_joinlist_endian_convert (void 
> *msg_v)
>       }
>  }
>  
> +static void exec_cpg_downlist_endian_convert_old (void *msg)
> +{
> +}
> +
>  static void exec_cpg_downlist_endian_convert (void *msg)
>  {
>       struct req_exec_cpg_downlist *req_exec_cpg_downlist = msg;
>       unsigned int i;
>  
>       req_exec_cpg_downlist->left_nodes = 
> swab32(req_exec_cpg_downlist->left_nodes);
> +     req_exec_cpg_downlist->old_members = 
> swab32(req_exec_cpg_downlist->old_members);
>  
>       for (i = 0; i < req_exec_cpg_downlist->left_nodes; i++) {
>               req_exec_cpg_downlist->nodeids[i] = 
> swab32(req_exec_cpg_downlist->nodeids[i]);
> @@ -908,7 +957,13 @@ static void do_proc_join(
>                           MESSAGE_RES_CPG_CONFCHG_CALLBACK);
>  }
>  
> -static void message_handler_req_exec_cpg_downlist (
> +static void message_handler_req_exec_cpg_downlist_old (
> +     const void *message,
> +     unsigned int nodeid)
> +{
> +}
> +
> +static void message_handler_req_exec_cpg_downlist(
>       const void *message,
>       unsigned int nodeid)
>  {
> @@ -916,27 +971,61 @@ static void message_handler_req_exec_cpg_downlist (
>       int i;
>       mar_cpg_address_t left_list[1];
>       struct list_head *iter;
> +     struct downlist_msg *stored_msg;
> +     int found;
>  
> -     /*
> -             FOR OPTIMALIZATION - Make list of lists
> -     */
> +     if (downlist_state != CPG_DOWNLIST_WAITING_FOR_MESSAGES) {
> +             log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d 
> received in state %d",
> +                     req_exec_cpg_downlist->left_nodes, downlist_state);
> +             return;
> +     }
> +
> +     stored_msg = malloc (sizeof (struct downlist_msg));
> +     stored_msg->sender_nodeid = nodeid;
> +     stored_msg->old_members = req_exec_cpg_downlist->old_members;
> +     stored_msg->left_nodes = req_exec_cpg_downlist->left_nodes;
> +     memcpy (stored_msg->nodeids, req_exec_cpg_downlist->nodeids,
> +             req_exec_cpg_downlist->left_nodes * sizeof (mar_uint32_t));
> +     list_init (&stored_msg->list);
> +     list_add (&stored_msg->list, &downlist_messages_head);
>  
> -     log_printf (LOGSYS_LEVEL_DEBUG, "downlist left_list: %d\n", 
> req_exec_cpg_downlist->left_nodes);
> +     for (i = 0; i < my_member_list_entries; i++) {
> +             found = 0;
> +             for (iter = downlist_messages_head.next;
> +                     iter != &downlist_messages_head;
> +                     iter = iter->next) {
>  
> +                     stored_msg = list_entry(iter, struct downlist_msg, 
> list);
> +                     if (my_member_list[i] == stored_msg->sender_nodeid) {
> +                             found = 1;
> +                     }
> +             }
> +             if (!found) {
> +                     return;
> +             }
> +     }
> +
> +     downlist_state = CPG_DOWNLIST_APPLYING;
> +     stored_msg = downlist_master_choose ();
> +
> +     log_printf (LOGSYS_LEVEL_DEBUG, "chosen downlist from node %s",
> +             api->totem_ifaces_print(stored_msg->sender_nodeid));
> +
> +     /* send events */
>       for (iter = process_info_list_head.next; iter != 
> &process_info_list_head; ) {
>               struct process_info *pi = list_entry(iter, struct process_info, 
> list);
>               iter = iter->next;
>  
> -             for (i = 0; i < req_exec_cpg_downlist->left_nodes; i++) {
> -                     if (pi->nodeid == req_exec_cpg_downlist->nodeids[i]) {
> +             for (i = 0; i < stored_msg->left_nodes; i++) {
> +                     if (pi->nodeid == stored_msg->nodeids[i]) {
>                               left_list[0].nodeid = pi->nodeid;
>                               left_list[0].pid = pi->pid;
>                               left_list[0].reason = 
> CONFCHG_CPG_REASON_NODEDOWN;
>  
>                               notify_lib_joinlist(&pi->group, NULL,
> -                                                 0, NULL,
> -                                                 1, left_list,
> -                                                 
> MESSAGE_RES_CPG_CONFCHG_CALLBACK);
> +                                     0, NULL,
> +                                     1, left_list,
> +                                     MESSAGE_RES_CPG_CONFCHG_CALLBACK);
>                               list_del (&pi->list);
>                               free (pi);
>                               break;
> @@ -1081,6 +1170,8 @@ static int cpg_exec_send_downlist(void)
>       g_req_exec_cpg_downlist.header.id = SERVICE_ID_MAKE(CPG_SERVICE, 
> MESSAGE_REQ_EXEC_CPG_DOWNLIST);
>       g_req_exec_cpg_downlist.header.size = sizeof(struct 
> req_exec_cpg_downlist);
>  
> +     g_req_exec_cpg_downlist.old_members = my_old_member_list_entries;
> +
>       iov.iov_base = (void *)&g_req_exec_cpg_downlist;
>       iov.iov_len = g_req_exec_cpg_downlist.header.size;
>  

_______________________________________________
Openais mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/openais

Reply via email to