From: Liu Yuan <tailai...@taobao.com> If master is down before sending response in join phase, we have to revoke its mastership to avoid cluster hanging.
Signed-off-by: Liu Yuan <tailai...@taobao.com> --- sheep/cluster/corosync.c | 8 ++++++++ 1 files changed, 8 insertions(+), 0 deletions(-) diff --git a/sheep/cluster/corosync.c b/sheep/cluster/corosync.c index 6f1eda4..01b3673 100644 --- a/sheep/cluster/corosync.c +++ b/sheep/cluster/corosync.c @@ -541,6 +541,7 @@ static void cdrv_cpg_confchg(cpg_handle_t handle, /* dispatch leave_handler */ for (i = 0; i < left_list_entries; i++) { + int master; cevent = find_block_event(COROSYNC_EVENT_TYPE_JOIN, left_sheep + i); if (cevent) { @@ -564,6 +565,13 @@ static void cdrv_cpg_confchg(cpg_handle_t handle, if (!cevent) panic("failed to allocate memory\n"); + master = is_master(&left_sheep[i]); + if (master >= 0) + /* Master is down before new nodes finish joining. + * We have to revoke its mastership to avoid cluster hanging + */ + cpg_nodes[master].gone = 1; + cevent->type = COROSYNC_EVENT_TYPE_LEAVE; cevent->sender = left_sheep[i]; -- 1.7.8.rc3 -- sheepdog mailing list sheepdog@lists.wpkg.org http://lists.wpkg.org/mailman/listinfo/sheepdog