When cross communicating with whitetank, we need to determine a list of
members for the syncv2 protocol to operate upon. The reason is that the
syncv2 capable machines (ie: corosync in whitetank compat mode) may be
only a subset of the cluster when run with other openais whitetank
cluster members.
This patch sends determines the membership of the syncv2 members. It
relies on the barrier functionality in the regular sync operation to
ensure that all syncv2 determination messages are received before
starting its operation.
Regards
-steve
Index: exec/syncv2.c
===================================================================
--- exec/syncv2.c (revision 2309)
+++ exec/syncv2.c (working copy)
@@ -65,6 +65,7 @@
#define MESSAGE_REQ_SYNC_BARRIER 0
#define MESSAGE_REQ_SYNC_SERVICE_BUILD 1
+#define MESSAGE_REQ_SYNC_MEMB_DETERMINE 2
enum sync_process_state {
INIT,
@@ -96,11 +97,16 @@
int received;
};
+struct req_exec_memb_determine_message {
+ coroipc_request_header_t header;
+ struct memb_ring_id ring_id;
+};
+
struct req_exec_service_build_message {
coroipc_request_header_t header;
struct memb_ring_id ring_id;
+ int service_list_entries;
int service_list[128];
- int service_list_entries;
};
struct req_exec_barrier_message {
@@ -112,6 +118,14 @@
static struct memb_ring_id my_ring_id;
+static struct memb_ring_id my_memb_determine_ring_id;
+
+static int my_memb_determine = 0;
+
+static unsigned int my_memb_determine_list[PROCESSOR_COUNT_MAX];
+
+static unsigned int my_memb_determine_list_entries = 0;
+
static int my_processing_idx = 0;
static hdb_handle_t my_schedwrk_handle;
@@ -229,6 +243,7 @@
if (barrier_reached) {
my_processing_idx += 1;
if (my_service_list_entries == my_processing_idx) {
+ my_memb_determine_list_entries = 0;
sync_synchronization_completed ();
} else {
sync_process_enter ();
@@ -238,7 +253,7 @@
static void dummy_sync_init (
const unsigned int *member_list,
- unsigned int member_list_entries,
+ size_t member_list_entries,
const struct memb_ring_id *ring_id)
{
}
@@ -264,6 +279,30 @@
return (service_entry_a->service_id > service_entry_b->service_id);
}
+static void sync_memb_determine (unsigned int nodeid, const void *msg)
+{
+ const struct req_exec_memb_determine_message *req_exec_memb_determine_message = msg;
+ int found = 0;
+ int i;
+
+ if (memcmp (&req_exec_memb_determine_message->ring_id,
+ &my_memb_determine_ring_id, sizeof (struct memb_ring_id)) != 0) {
+
+ return;
+ }
+
+ my_memb_determine = 1;
+ for (i = 0; i < my_memb_determine_list_entries; i++) {
+ if (my_memb_determine_list[i] == nodeid) {
+ found = 1;
+ }
+ }
+ if (found == 0) {
+ my_memb_determine_list[my_memb_determine_list_entries] = nodeid;
+ my_memb_determine_list_entries += 1;
+ }
+}
+
static void sync_service_build_handler (unsigned int nodeid, const void *msg)
{
const struct req_exec_service_build_message *req_exec_service_build_message = msg;
@@ -342,9 +381,32 @@
case MESSAGE_REQ_SYNC_SERVICE_BUILD:
sync_service_build_handler (nodeid, msg);
break;
+ case MESSAGE_REQ_SYNC_MEMB_DETERMINE:
+ sync_memb_determine (nodeid, msg);
+ break;
}
}
+static void memb_determine_message_transmit (void)
+{
+ struct iovec iovec;
+ struct req_exec_memb_determine_message req_exec_memb_determine_message;
+ int res;
+
+ req_exec_memb_determine_message.header.size = sizeof (struct req_exec_memb_determine_message);
+ req_exec_memb_determine_message.header.id = MESSAGE_REQ_SYNC_MEMB_DETERMINE;
+
+ memcpy (&req_exec_memb_determine_message.ring_id,
+ &my_memb_determine_ring_id,
+ sizeof (struct memb_ring_id));
+
+ iovec.iov_base = (char *)&req_exec_memb_determine_message;
+ iovec.iov_len = sizeof (req_exec_memb_determine_message);
+
+ res = totempg_groups_mcast_joined (sync_group_handle,
+ &iovec, 1, TOTEMPG_AGREED);
+}
+
static void barrier_message_transmit (void)
{
struct iovec iovec;
@@ -397,9 +459,9 @@
/*
* No syncv2 services
*/
-assert (my_service_list_entries);
if (my_service_list_entries == 0) {
my_state = SYNC_SERVICELIST_BUILD;
+ my_memb_determine_list_entries = 0;
sync_synchronization_completed ();
return;
}
@@ -448,7 +510,8 @@
if (my_service_list[my_processing_idx].state == INIT) {
my_service_list[my_processing_idx].state = PROCESS;
- my_service_list[my_processing_idx].sync_init (my_member_list, my_member_list_entries,
+ my_service_list[my_processing_idx].sync_init (my_member_list,
+ my_member_list_entries,
&my_ring_id);
}
if (my_service_list[my_processing_idx].state == PROCESS) {
@@ -477,7 +540,14 @@
{
memcpy (&my_ring_id, ring_id, sizeof (struct memb_ring_id));
- sync_servicelist_build_enter (member_list, member_list_entries, ring_id);
+ if (my_memb_determine) {
+ my_memb_determine = 0;
+ sync_servicelist_build_enter (my_memb_determine_list,
+ my_memb_determine_list_entries, ring_id);
+ } else {
+ sync_servicelist_build_enter (member_list, member_list_entries,
+ ring_id);
+ }
}
void sync_v2_abort (void)
@@ -487,3 +557,16 @@
my_service_list[my_processing_idx].sync_abort ();
}
}
+
+void sync_v2_memb_list_determine (const struct memb_ring_id *ring_id)
+{
+ memcpy (&my_memb_determine_ring_id, ring_id,
+ sizeof (struct memb_ring_id));
+
+ memb_determine_message_transmit ();
+}
+
+void sync_v2_memb_list_abort (void)
+{
+ my_memb_determine_list_entries = 0;
+}
Index: exec/syncv2.h
===================================================================
--- exec/syncv2.h (revision 2309)
+++ exec/syncv2.h (working copy)
@@ -50,4 +50,8 @@
extern void sync_v2_abort (void);
+extern void sync_v2_memb_list_determine (const struct memb_ring_id *ring_id);
+
+extern void sync_v2_memb_list_abort (void);
+
#endif /* SYNC_H_DEFINED */
Index: exec/sync.c
===================================================================
--- exec/sync.c (revision 2309)
+++ exec/sync.c (working copy)
@@ -73,6 +73,11 @@
static int (*sync_callbacks_retrieve) (int sync_id, struct sync_callbacks *callack);
+static void (*sync_started) (
+ const struct memb_ring_id *ring_id);
+
+static void (*sync_aborted) (void);
+
static struct sync_callbacks sync_callbacks;
static int sync_processing = 0;
@@ -263,6 +268,11 @@
int sync_id,
struct sync_callbacks *callbacks),
+ void (*started) (
+ const struct memb_ring_id *ring_id),
+
+ void (*aborted) (void),
+
void (*next_start) (
const unsigned int *member_list,
size_t member_list_entries,
@@ -291,6 +301,8 @@
sync_callbacks_retrieve = callbacks_retrieve;
sync_next_start = next_start;
+ sync_started = started;
+ sync_aborted = aborted;
return (0);
}
@@ -454,10 +466,14 @@
my_member_list_entries = member_list_entries;
if (sync_processing && sync_callbacks.sync_abort != NULL) {
+ sync_aborted ();
sync_callbacks.sync_abort ();
sync_callbacks.sync_activate = NULL;
}
+ sync_started (
+ ring_id);
+
sync_primary_callback_fn (
member_list,
member_list_entries,
Index: exec/sync.h
===================================================================
--- exec/sync.h (revision 2309)
+++ exec/sync.h (working copy)
@@ -55,6 +55,11 @@
int sync_id,
struct sync_callbacks *callbacks),
+ void (*sync_started) (
+ const struct memb_ring_id *ring_id),
+
+ void (*sync_aborted) (void),
+
void (*next_start) (
const unsigned int *member_list,
size_t member_list_entries,
Index: exec/main.c
===================================================================
--- exec/main.c (revision 2309)
+++ exec/main.c (working copy)
@@ -988,6 +1000,8 @@
log_printf (LOGSYS_LEVEL_NOTICE, "Compatibility mode set to whitetank. Using V1 and V2 of the synchronization engine.\n");
sync_register (
corosync_sync_callbacks_retrieve,
+ sync_v2_memb_list_determine,
+ sync_v2_memb_list_abort,
sync_v2_start);
sync_v2_init (
_______________________________________________
Openais mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/openais