From: MORITA Kazutaka <[email protected]>

If we stop sheepdog when we have no replicas in the working directory,
sheep cannot recover the objects after restarting.  It is because
sheep doesn't look into the stale directory when creating a object
list.  This fixes the problem.

Signed-off-by: MORITA Kazutaka <[email protected]>
---
 sheep/md.c          |   19 +++++++++++++++++++
 sheep/plain_store.c |    2 ++
 sheep/recovery.c    |   11 -----------
 sheep/sheep_priv.h  |    2 ++
 4 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/sheep/md.c b/sheep/md.c
index 69210e2..14d563d 100644
--- a/sheep/md.c
+++ b/sheep/md.c
@@ -378,6 +378,25 @@ int for_each_object_in_wd(int (*func)(uint64_t oid, char 
*path, void *arg),
        return ret;
 }
 
+int for_each_object_in_stale(int (*func)(uint64_t oid, char *path, void *arg),
+                            void *arg)
+{
+       int i, ret = SD_RES_SUCCESS;
+       char path[PATH_MAX];
+
+       pthread_rwlock_rdlock(&md_lock);
+       for (i = 0; i < md_nr_disks; i++) {
+               snprintf(path, sizeof(path), "%s/.stale", md_disks[i].path);
+               sd_eprintf("%s", path);
+               ret = for_each_object_in_path(path, func, false, arg);
+               if (ret != SD_RES_SUCCESS)
+                       break;
+       }
+       pthread_rwlock_unlock(&md_lock);
+       return ret;
+}
+
+
 int for_each_obj_path(int (*func)(char *path))
 {
        int i, ret = SD_RES_SUCCESS;
diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index 5428dbe..7f6b52d 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -225,6 +225,8 @@ int default_init(void)
        if (ret != SD_RES_SUCCESS)
                return ret;
 
+       for_each_object_in_stale(init_objlist_and_vdi_bitmap, NULL);
+
        return for_each_object_in_wd(init_objlist_and_vdi_bitmap, true, NULL);
 }
 
diff --git a/sheep/recovery.c b/sheep/recovery.c
index 11253a0..20e930f 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -731,14 +731,6 @@ static void screen_object_list(struct recovery_list_work 
*rlw,
        qsort(rlw->oids, rlw->count, sizeof(uint64_t), obj_cmp);
 }
 
-static bool newly_joined(struct sd_node *node, struct recovery_work *rw)
-{
-       if (bsearch(node, rw->old_vinfo->nodes, rw->old_vinfo->nr_nodes,
-                   sizeof(struct sd_node), node_id_cmp))
-               return false;
-       return true;
-}
-
 /* Prepare the object list that belongs to this node */
 static void prepare_object_list(struct work *work)
 {
@@ -764,9 +756,6 @@ again:
                        sd_dprintf("go to the next recovery");
                        return;
                }
-               if (newly_joined(node, rw))
-                       /* new node doesn't have a list file */
-                       continue;
 
                oids = fetch_object_list(node, rw->epoch, &nr_oids);
                if (!oids)
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index f1a8c44..173cd55 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -179,6 +179,8 @@ int default_remove_object(uint64_t oid);
 int default_get_hash(uint64_t oid, uint32_t epoch, uint8_t *sha1);
 int default_purge_obj(void);
 int for_each_object_in_wd(int (*func)(uint64_t, char *, void *), bool, void *);
+int for_each_object_in_stale(int (*func)(uint64_t oid, char *path, void *arg),
+                            void *arg);
 int for_each_obj_path(int (*func)(char *path));
 
 extern struct list_head store_drivers;
-- 
1.7.9.5

-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to