[sheepdog] [PATCH] sheep: recovery without using priority list

FUKUDA Yasuhito Wed, 17 Dec 2014 00:35:21 -0800

Current sheepdog, I/O request is processed by way of the Priority Lists during 
auto-recovery.
But it might be wasted performance.


This patch omits unnecessary procedure.

Signed-off-by: Yasuhito Fukuda <[email protected]>
---
 sheep/recovery.c |  113 ++++++++++++++---------------------------------------
 1 files changed, 30 insertions(+), 83 deletions(-)

diff --git a/sheep/recovery.c b/sheep/recovery.c
index 85dad21..319fde3 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -66,9 +66,6 @@ struct recovery_info {
 
        uint64_t count;
        uint64_t *oids;
-       uint64_t *prio_oids;
-       uint64_t nr_prio_oids;
-       uint64_t nr_scheduled_prio_oids;
 
        struct vnode_info *old_vinfo;
        struct vnode_info *cur_vinfo;
@@ -84,6 +81,7 @@ static struct recovery_info *next_rinfo;
 static main_thread(struct recovery_info *) current_rinfo;
 
 static void queue_recovery_work(struct recovery_info *rinfo);
+static void free_recovery_obj_work(struct recovery_obj_work *row);
 
 /* Dynamically grown list buffer default as 4M (2T storage) */
 #define DEFAULT_LIST_BUFFER_SIZE (UINT64_C(1) << 22)
@@ -606,22 +604,38 @@ bool node_in_recovery(void)
        return main_thread_get(current_rinfo) != NULL;
 }
 
-static inline void prepare_schedule_oid(uint64_t oid)
+static void direct_recover_object_main(struct work *work)
+{
+       struct recovery_work *rw = container_of(work, struct recovery_work,
+                                               work);
+       struct recovery_obj_work *row = container_of(rw,
+                                               struct recovery_obj_work,
+                                               base);
+
+       wakeup_requests_on_oid(row->oid);
+       free_recovery_obj_work(row);
+}
+
+static inline void direct_queue_recovery_work(uint64_t oid)
 {
        struct recovery_info *rinfo = main_thread_get(current_rinfo);
 
-       if (xlfind(&oid, rinfo->prio_oids, rinfo->nr_prio_oids, oid_cmp)) {
-               sd_debug("%" PRIx64 " has been already in prio_oids", oid);
-               return;
-       }
+       struct recovery_work *rw;
+       struct recovery_obj_work *row;
+       row = xzalloc(sizeof(*row));
+       row->oid = oid;
 
-       rinfo->nr_prio_oids++;
-       rinfo->prio_oids = xrealloc(rinfo->prio_oids,
-                                   rinfo->nr_prio_oids * sizeof(uint64_t));
-       rinfo->prio_oids[rinfo->nr_prio_oids - 1] = oid;
-       sd_debug("%"PRIx64" nr_prio_oids %"PRIu64, oid, rinfo->nr_prio_oids);
+       rw = &row->base;
+       rw->work.fn = recover_object_work;
+       rw->work.done = direct_recover_object_main;
 
-       resume_suspended_recovery();
+       rw->epoch = rinfo->epoch;
+       rw->tgt_epoch = rinfo->tgt_epoch;
+       rw->rinfo = rinfo;
+       rw->cur_vinfo = grab_vnode_info(rinfo->cur_vinfo);
+       rw->old_vinfo = grab_vnode_info(rinfo->old_vinfo);
+
+       queue_work(sys->recovery_wqueue, &rw->work);
 }
 
 main_fn bool oid_in_recovery(uint64_t oid)
@@ -688,7 +702,7 @@ main_fn bool oid_in_recovery(uint64_t oid)
                return false;
        }
 
-       prepare_schedule_oid(oid);
+       direct_queue_recovery_work(oid);
        return true;
 }
 
@@ -719,7 +733,6 @@ static void free_recovery_info(struct recovery_info *rinfo)
        put_vnode_info(rinfo->cur_vinfo);
        put_vnode_info(rinfo->old_vinfo);
        free(rinfo->oids);
-       free(rinfo->prio_oids);
        for (int i = 0; i < rinfo->max_epoch; i++)
                put_vnode_info(rinfo->vinfo_array[i]);
        free(rinfo->vinfo_array);
@@ -803,78 +816,12 @@ static inline void finish_recovery(struct recovery_info 
*rinfo)
        sd_debug("recovery complete: new epoch %"PRIu32, recovered_epoch);
 }
 
-static inline bool oid_in_prio_oids(struct recovery_info *rinfo, uint64_t oid)
-{
-       for (uint64_t i = 0; i < rinfo->nr_prio_oids; i++)
-               if (rinfo->prio_oids[i] == oid)
-                       return true;
-       return false;
-}
-
-/*
- * Schedule prio_oids to be recovered first in FIFO order
- *
- * rw->next is index of the original next object to be recovered and also the
- * number of objects already recovered and being recovered.
- * we just move rw->prio_oids in between:
- *   new_oids = [0..rw->next - 1] + [rw->prio_oids] + [rw->next]
- */
-static inline void finish_schedule_oids(struct recovery_info *rinfo)
-{
-       uint64_t i, nr_recovered = rinfo->next, new_idx;
-       uint64_t *new_oids;
-
-       /* If I am the last oid, done */
-       if (nr_recovered == rinfo->count - 1)
-               goto done;
-
-       new_oids = xmalloc(list_buffer_size);
-       memcpy(new_oids, rinfo->oids, nr_recovered * sizeof(uint64_t));
-       memcpy(new_oids + nr_recovered, rinfo->prio_oids,
-              rinfo->nr_prio_oids * sizeof(uint64_t));
-       new_idx = nr_recovered + rinfo->nr_prio_oids;
-
-       for (i = rinfo->next; i < rinfo->count; i++) {
-               if (oid_in_prio_oids(rinfo, rinfo->oids[i]))
-                       continue;
-               new_oids[new_idx++] = rinfo->oids[i];
-       }
-       /* rw->count should eq new_idx, otherwise something is wrong */
-       sd_debug("%snr_recovered %" PRIu64 ", nr_prio_oids %" PRIu64 ", count %"
-                PRIu64 " = new %" PRIu64,
-                rinfo->count == new_idx ? "" : "WARN: ", nr_recovered,
-                rinfo->nr_prio_oids, rinfo->count, new_idx);
-
-       free(rinfo->oids);
-       rinfo->oids = new_oids;
-done:
-       free(rinfo->prio_oids);
-       rinfo->prio_oids = NULL;
-       rinfo->nr_scheduled_prio_oids += rinfo->nr_prio_oids;
-       rinfo->nr_prio_oids = 0;
-}
-
-/*
- * When automatic object recovery is disabled, the behavior of the
- * recovery process is like 'lazy recovery'.  This function returns
- * true if the recovery queue contains objects being accessed by
- * clients.  Sheep recovers such objects for availability even when
- * automatic object recovery is not enabled.
- */
-static bool has_scheduled_objects(struct recovery_info *rinfo)
-{
-       return rinfo->done < rinfo->nr_scheduled_prio_oids;
-}
-
 static void recover_next_object(struct recovery_info *rinfo)
 {
        if (run_next_rw())
                return;
 
-       if (rinfo->nr_prio_oids)
-               finish_schedule_oids(rinfo);
-
-       if (sys->cinfo.disable_recovery && !has_scheduled_objects(rinfo)) {
+       if (sys->cinfo.disable_recovery) {
                sd_debug("suspended");
                rinfo->suspended = true;
                /* suspend until resume_suspended_recovery() is called */
-- 
1.7.1



-- 
NTTソフトウェア株式会社
クラウド事業部 第一事業ユニット(C一BU)
福田康人(FUKUDA Yasuhito)
E-mail:[email protected]
〒220-0012 横浜市西区みなとみらい4-4-5
横浜アイマークプレイス13階
TEL:045-212-7393/FAX:045-662-7856


-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

[sheepdog] [PATCH] sheep: recovery without using priority list

Reply via email to