Cpg synchronization patch for conf change messages.
The root of the theoretical problem is that cpg_join or cpg_leave
messages are being sent via the C apis between synchronization. With
the current cpg, synchronization happens in confchg_fn, and then later
in cpg_sync_process. cpg_sync_process is called much later after
confchg_fn and introduces a small probability of a window of time for
queued in totem (but not yet ordered by totem) for those cpg_join and
cpg_leave operations to interact with the synchronization process which
should happen in one atomic operation but currently is two distinct
operations.
Regards,
Honza
Index: services/cpg.c
===================================================================
--- services/cpg.c (revision 2359)
+++ services/cpg.c (working copy)
@@ -122,6 +122,12 @@
CPD_STATE_JOIN_COMPLETED
};
+enum cpg_sync_state {
+ CPGSYNC_DOWNLIST,
+ CPGSYNC_JOINLIST
+};
+
+
struct cpg_pd {
void *conn;
mar_cpg_name_t group_name;
@@ -129,8 +135,21 @@
enum cpd_state cpd_state;
struct list_head list;
};
+
DECLARE_LIST_INIT(cpg_pd_list_head);
+static unsigned int my_member_list[PROCESSOR_COUNT_MAX];
+
+static unsigned int my_member_list_entries;
+
+static unsigned int my_old_member_list[PROCESSOR_COUNT_MAX];
+
+static unsigned int my_old_member_list_entries = 0;
+
+static struct corosync_api_v1 *api = NULL;
+
+static enum cpg_sync_state my_sync_state = CPGSYNC_DOWNLIST;
+
struct process_info {
unsigned int nodeid;
uint32_t pid;
@@ -144,18 +163,9 @@
mar_cpg_name_t group_name;
};
-static struct corosync_api_v1 *api = NULL;
-
/*
* Service Interfaces required by service_message_handler struct
*/
-static void cpg_confchg_fn (
- enum totem_configuration_type configuration_type,
- const unsigned int *member_list, size_t member_list_entries,
- const unsigned int *left_list, size_t left_list_entries,
- const unsigned int *joined_list, size_t joined_list_entries,
- const struct memb_ring_id *ring_id);
-
static int cpg_exec_init_fn (struct corosync_api_v1 *);
static int cpg_lib_init_fn (void *conn);
@@ -204,6 +214,8 @@
static int cpg_node_joinleave_send (unsigned int pid, const mar_cpg_name_t *group_name, int fn, int reason);
+static int cpg_exec_send_downlist(void);
+
static int cpg_exec_send_joinlist(void);
static void cpg_sync_init (
@@ -212,8 +224,11 @@
const struct memb_ring_id *ring_id);
static int cpg_sync_process (void);
+
static void cpg_sync_activate (void);
+
static void cpg_sync_abort (void);
+
/*
* Library Handler Definition
*/
@@ -280,7 +295,6 @@
.exec_dump_fn = NULL,
.exec_engine = cpg_exec_engine,
.exec_engine_count = sizeof (cpg_exec_engine) / sizeof (struct corosync_exec_handler),
- .confchg_fn = cpg_confchg_fn,
.sync_mode = CS_SYNC_V1,
.sync_init = cpg_sync_init,
.sync_process = cpg_sync_process,
@@ -363,20 +377,71 @@
size_t member_list_entries,
const struct memb_ring_id *ring_id)
{
+ unsigned int lowest_nodeid = 0xffffffff;
+ int entries;
+ int i, j;
+ int found;
+
+ my_sync_state = CPGSYNC_DOWNLIST;
+
+ memcpy (my_member_list, member_list, member_list_entries *
+ sizeof (unsigned int));
+ my_member_list_entries = member_list_entries;
+
+ for (i = 0; i < my_member_list_entries; i++) {
+ if (my_member_list[i] < lowest_nodeid) {
+ lowest_nodeid = my_member_list[i];
+ }
+ }
+
+ entries = 0;
+ if (lowest_nodeid == api->totem_nodeid_get()) {
+ /*
+ * Determine list of nodeids for downlist message
+ */
+ for (i = 0; i < my_old_member_list_entries; i++) {
+ found = 0;
+ for (j = 0; j < my_member_list_entries; j++) {
+ if (my_old_member_list[i] == my_member_list[j]) {
+ found = 1;
+ break;
+ }
+ }
+ if (found == 0) {
+ g_req_exec_cpg_downlist.nodeids[entries++] =
+ my_old_member_list[i];
+ }
+ }
+ }
+ g_req_exec_cpg_downlist.left_nodes = entries;
}
static int cpg_sync_process (void)
{
- return cpg_exec_send_joinlist();
+ int res = -1;
+
+ if (my_sync_state == CPGSYNC_DOWNLIST) {
+ res = cpg_exec_send_downlist();
+ if (res == -1) {
+ return (-1);
+ }
+ my_sync_state = CPGSYNC_JOINLIST;
+ }
+ if (my_sync_state == CPGSYNC_JOINLIST) {
+ res = cpg_exec_send_joinlist();
+ }
+ return (res);
}
static void cpg_sync_activate (void)
{
+ memcpy (my_old_member_list, my_member_list,
+ my_member_list_entries * sizeof (unsigned int));
+ my_old_member_list_entries = my_member_list_entries;
+}
-}
static void cpg_sync_abort (void)
{
-
}
@@ -543,56 +608,6 @@
return (result);
}
-static void cpg_confchg_fn (
- enum totem_configuration_type configuration_type,
- const unsigned int *member_list, size_t member_list_entries,
- const unsigned int *left_list, size_t left_list_entries,
- const unsigned int *joined_list, size_t joined_list_entries,
- const struct memb_ring_id *ring_id)
-{
- int i;
- uint32_t lowest_nodeid = 0xffffffff;
- struct iovec req_exec_cpg_iovec;
-
- /* We don't send the library joinlist in here because it can end up
- out of order with the rest of the messages (which are totem ordered).
- So we get the lowest nodeid to send out a list of left nodes instead.
- On receipt of that message, all nodes will then notify their local clients
- of the new joinlist */
-
- if (left_list_entries) {
- for (i = 0; i < member_list_entries; i++) {
- if (member_list[i] < lowest_nodeid)
- lowest_nodeid = member_list[i];
- }
-
- log_printf(LOGSYS_LEVEL_DEBUG, "confchg, low nodeid=%d, us = %d\n", lowest_nodeid, api->totem_nodeid_get());
- if (lowest_nodeid == api->totem_nodeid_get()) {
-
- g_req_exec_cpg_downlist.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_DOWNLIST);
- g_req_exec_cpg_downlist.header.size = sizeof(struct req_exec_cpg_downlist);
-
- g_req_exec_cpg_downlist.left_nodes = left_list_entries;
- for (i = 0; i < left_list_entries; i++) {
- g_req_exec_cpg_downlist.nodeids[i] = left_list[i];
- }
- log_printf(LOGSYS_LEVEL_DEBUG,
- "confchg, build downlist: %lu nodes\n",
- (long unsigned int) left_list_entries);
- }
- }
-
- /* Don't send this message until we get the final configuration message */
- if (configuration_type == TOTEM_CONFIGURATION_REGULAR && g_req_exec_cpg_downlist.left_nodes) {
- req_exec_cpg_iovec.iov_base = (char *)&g_req_exec_cpg_downlist;
- req_exec_cpg_iovec.iov_len = g_req_exec_cpg_downlist.header.size;
-
- api->totem_mcast (&req_exec_cpg_iovec, 1, TOTEM_AGREED);
- g_req_exec_cpg_downlist.left_nodes = 0;
- log_printf(LOGSYS_LEVEL_DEBUG, "confchg, sent downlist\n");
- }
-}
-
/* Can byteswap join & leave messages */
static void exec_cpg_procjoin_endian_convert (void *msg)
{
@@ -874,6 +889,19 @@
}
+static int cpg_exec_send_downlist(void)
+{
+ struct iovec iov;
+
+ g_req_exec_cpg_downlist.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_DOWNLIST);
+ g_req_exec_cpg_downlist.header.size = sizeof(struct req_exec_cpg_downlist);
+
+ iov.iov_base = (void *)&g_req_exec_cpg_downlist;
+ iov.iov_len = g_req_exec_cpg_downlist.header.size;
+
+ return (api->totem_mcast (&iov, 1, TOTEM_AGREED));
+}
+
static int cpg_exec_send_joinlist(void)
{
int count = 0;
_______________________________________________
Openais mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/openais