Currenly we can easily get into a situation where we can't read objects
after losing a node in an offline cluster and then doing a manual recovery.

To fix this call start_recovery from cluster_manual_recover.  Also move
get_vnodes_from_epoch into group.c and rename it to fit with the rest of
the vnode_info functions now that is is used outside of recovery.c.

Signed-off-by: Christoph Hellwig <[email protected]>

diff --git a/sheep/group.c b/sheep/group.c
index c2679f2..a83590c 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -201,7 +201,8 @@ void oid_to_vnodes(struct vnode_info *vnode_info, uint64_t 
oid, int nr_copies,
        }
 }
 
-struct vnode_info *alloc_vnode_info(struct sd_node *nodes, size_t nr_nodes)
+static struct vnode_info *alloc_vnode_info(struct sd_node *nodes,
+               size_t nr_nodes)
 {
        struct vnode_info *vnode_info;
 
@@ -218,6 +219,23 @@ struct vnode_info *alloc_vnode_info(struct sd_node *nodes, 
size_t nr_nodes)
        return vnode_info;
 }
 
+struct vnode_info *get_vnode_info_epoch(uint32_t epoch)
+{
+       struct sd_node nodes[SD_MAX_NODES];
+       int nr_nodes;
+
+       nr_nodes = epoch_log_read_nr(epoch, (void *)nodes, sizeof(nodes));
+       if (nr_nodes < 0) {
+               nr_nodes = epoch_log_read_remote(epoch, (void *)nodes,
+                                                sizeof(nodes));
+               if (nr_nodes == 0)
+                       return NULL;
+               nr_nodes /= sizeof(nodes[0]);
+       }
+
+       return alloc_vnode_info(nodes, nr_nodes);
+}
+
 int local_get_node_list(const struct sd_req *req, struct sd_rsp *rsp,
                               void *data)
 {
diff --git a/sheep/ops.c b/sheep/ops.c
index 27ca07d..89870a1 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -420,6 +420,7 @@ static int local_get_epoch(struct request *req)
 static int cluster_manual_recover(const struct sd_req *req, struct sd_rsp *rsp,
                                void *data)
 {
+       struct vnode_info *old_vnode_info, *vnode_info;
        int ret = SD_RES_SUCCESS;
        uint8_t c;
        uint16_t f;
@@ -429,21 +430,25 @@ static int cluster_manual_recover(const struct sd_req 
*req, struct sd_rsp *rsp,
         * 2) some nodes are physically down (same epoch condition).
         * In both case, the nodes(s) stat is WAIT_FOR_JOIN.
         */
-       if (!sys_stat_wait_join()) {
-               ret = SD_RES_MANUAL_RECOVER;
-               goto out;
-       }
+       if (!sys_stat_wait_join())
+               return SD_RES_MANUAL_RECOVER;
 
        ret = get_cluster_copies(&c);
        if (ret)
-               goto out;
+               return ret;
        ret = get_cluster_flags(&f);
        if (ret)
-               goto out;
+               return ret;
 
        sys->nr_copies = c;
        sys->flags = f;
 
+       old_vnode_info = get_vnode_info_epoch(sys->epoch);
+       if (!old_vnode_info) {
+               eprintf("cannot get vnode info for epoch %d\n", sys->epoch);
+               return SD_RES_EIO;
+       }
+
        sys->epoch++; /* some nodes are left, so we get a new epoch */
        ret = log_current_epoch();
        if (ret) {
@@ -456,7 +461,12 @@ static int cluster_manual_recover(const struct sd_req 
*req, struct sd_rsp *rsp,
                sys_stat_set(SD_STATUS_OK);
        else
                sys_stat_set(SD_STATUS_HALT);
+
+       vnode_info = get_vnode_info();
+       start_recovery(vnode_info, old_vnode_info);
+       put_vnode_info(vnode_info);
 out:
+       put_vnode_info(old_vnode_info);
        return ret;
 }
 
diff --git a/sheep/recovery.c b/sheep/recovery.c
index 9b715ea..64309df 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -55,23 +55,6 @@ static int obj_cmp(const void *oid1, const void *oid2)
        return 0;
 }
 
-static struct vnode_info *get_vnodes_from_epoch(uint32_t epoch)
-{
-       struct sd_node nodes[SD_MAX_NODES];
-       int nr_nodes;
-
-       nr_nodes = epoch_log_read_nr(epoch, (void *)nodes, sizeof(nodes));
-       if (nr_nodes < 0) {
-               nr_nodes = epoch_log_read_remote(epoch, (void *)nodes,
-                                                sizeof(nodes));
-               if (nr_nodes == 0)
-                       return NULL;
-               nr_nodes /= sizeof(nodes[0]);
-       }
-
-       return alloc_vnode_info(nodes, nr_nodes);
-}
-
 static int recover_object_from_replica(uint64_t oid,
                                       struct sd_vnode *entry,
                                       uint32_t epoch, uint32_t tgt_epoch)
@@ -225,7 +208,7 @@ again:
                        goto err;
                }
 
-               new_old = get_vnodes_from_epoch(tgt_epoch);
+               new_old = get_vnode_info_epoch(tgt_epoch);
                if (!new_old) {
                        ret = -1;
                        goto err;
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 60432c7..45d3852 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -232,10 +232,10 @@ int local_get_node_list(const struct sd_req *req, struct 
sd_rsp *rsp,
                void *data);
 
 bool have_enough_zones(void);
-struct vnode_info *alloc_vnode_info(struct sd_node *nodes, size_t nr_nodes);
 struct vnode_info *grab_vnode_info(struct vnode_info *vnode_info);
 struct vnode_info *get_vnode_info(void);
 void put_vnode_info(struct vnode_info *vnodes);
+struct vnode_info *get_vnode_info_epoch(uint32_t epoch);
 
 struct sd_vnode *oid_to_vnode(struct vnode_info *vnode_info, uint64_t oid,
                int copy_idx);
-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to