Due to the way the cluster status and inc_epoch works this can get us different historic epochs for different sheep in a cluster. Instead only create a previous generation in-core node list if we don't already have a valid one. Now that all recovery code operates on the in-memory node lists only that is easily possible.
Signed-off-by: Christoph Hellwig <[email protected]> diff --git a/sheep/group.c b/sheep/group.c index 4e83b1e..0edb97e 100644 --- a/sheep/group.c +++ b/sheep/group.c @@ -611,23 +611,18 @@ int log_current_epoch(void) current_vnode_info->nr_nodes); } -static void log_last_epoch(struct join_message *msg, struct sd_node *joined, +static struct vnode_info *alloc_old_vnode_info(struct sd_node *joined, struct sd_node *nodes, size_t nr_nodes) { - if ((msg->cluster_status == SD_STATUS_OK || - msg->cluster_status == SD_STATUS_HALT) && msg->inc_epoch) { - struct sd_node old_nodes[SD_MAX_NODES]; - size_t count = 0, i; - - /* exclude the newly added one */ - for (i = 0; i < nr_nodes; i++) { - if (!node_eq(nodes + i, joined)) - old_nodes[count++] = nodes[i]; - } - put_vnode_info(current_vnode_info); - current_vnode_info = alloc_vnode_info(old_nodes, count); - log_current_epoch(); + struct sd_node old_nodes[SD_MAX_NODES]; + size_t count = 0, i; + + /* exclude the newly added one */ + for (i = 0; i < nr_nodes; i++) { + if (!node_eq(nodes + i, joined)) + old_nodes[count++] = nodes[i]; } + return alloc_vnode_info(old_nodes, count); } static void finish_join(struct join_message *msg, struct sd_node *joined, @@ -639,12 +634,6 @@ static void finish_join(struct join_message *msg, struct sd_node *joined, sys->nr_copies = msg->nr_copies; sys->epoch = msg->epoch; - /* - * Make sure we have an epoch log record for the epoch before - * this node joins, as recovery expects this record to exist. - */ - log_last_epoch(msg, joined, nodes, nr_nodes); - if (msg->cluster_status != SD_STATUS_OK) { int nr_leave_nodes; uint32_t le; @@ -740,6 +729,12 @@ static void update_cluster_info(struct join_message *msg, if (msg->inc_epoch) { list_for_each_entry_safe(n, t, &sys->leave_list, list) list_del(&n->list); + + if (!old_vnode_info) { + old_vnode_info = + alloc_old_vnode_info(joined, nodes, nr_nodes); + } + start_recovery(current_vnode_info, old_vnode_info); } -- sheepdog mailing list [email protected] http://lists.wpkg.org/mailman/listinfo/sheepdog
