At Thu, 25 Dec 2014 17:17:00 +0900, FUKUDA Yasuhito wrote: > > On Thu, 18 Dec 2014 14:00:07 +0900 > Hitoshi Mitake <[email protected]> wrote: > > > At Wed, 17 Dec 2014 17:34:04 +0900, > > FUKUDA Yasuhito wrote: > > > > > > Current sheepdog, I/O request is processed by way of the Priority Lists > > > during auto-recovery. > > > But it might be wasted performance. > > > > > > This patch omits unnecessary procedure. > > > > Could you provide brief data which can show performance improvement? > > Mitake-san, > > I performed the effect measurement by vdi write command during recovery. > The results are as follows.
Thanks for your sharing. Applied, thanks. Hitoshi > > [results (response time of vdi write command)] > /* sheepdog-master */ > 1st: 2m38.765s > 2nd: 2m25.439s > 3rd: 2m32.993s > 4th: 2m30.355s > 5th: 2m27.301s > > average: 2m30.971s > > /*this-patch */ > 1st: 2m15.284s > 2nd: 2m13.842s > 3rd: 2m15.636s > 4th: 2m14.967s > 5th: 2m15.100s > > average: 2m14.966s > > [Environment] > sheepdog node: 3 > zookeeper node: 3 (same machine with sheepdog node) > copies: 3 > > [operations] > /* all sheepdog start */ > # sheep /var/lib/sheepdog > > * create VDI for test */ > # dog cluster format -c 3 > # dog vdi create test1G 1G -P > # dog vdi create test10G 10G -P > > * random data insert in test10G VDI > # cat CentOS-6.5-x86_64-bin-DVD1.iso | dog vdi write test10G 0 11000000000 > > /* delete object in one node */ > # pkill -9 sheep > # rm -rf /var/lib/sheepdog/obj/* > > /* cache clear */ > # echo 1 > /proc/sys/vm/drop_caches > > /* sheepdog start in one node */ > # sheep /var/lib/sheepdog > > ----->auto-recovery start in backend > > /* vdi write start */ > # time cat CentOS-6.5-x86_64-bin-DVD1.iso | dog vdi write test1G 0 1073741824 > > > Regards > > > > > Thanks, > > Hitoshi > > > > > > > > Signed-off-by: Yasuhito Fukuda <[email protected]> > > > --- > > > sheep/recovery.c | 113 > > > ++++++++++++++--------------------------------------- > > > 1 files changed, 30 insertions(+), 83 deletions(-) > > > > > > diff --git a/sheep/recovery.c b/sheep/recovery.c > > > index 85dad21..319fde3 100644 > > > --- a/sheep/recovery.c > > > +++ b/sheep/recovery.c > > > @@ -66,9 +66,6 @@ struct recovery_info { > > > > > > uint64_t count; > > > uint64_t *oids; > > > - uint64_t *prio_oids; > > > - uint64_t nr_prio_oids; > > > - uint64_t nr_scheduled_prio_oids; > > > > > > struct vnode_info *old_vinfo; > > > struct vnode_info *cur_vinfo; > > > @@ -84,6 +81,7 @@ static struct recovery_info *next_rinfo; > > > static main_thread(struct recovery_info *) current_rinfo; > > > > > > static void queue_recovery_work(struct recovery_info *rinfo); > > > +static void free_recovery_obj_work(struct recovery_obj_work *row); > > > > > > /* Dynamically grown list buffer default as 4M (2T storage) */ > > > #define DEFAULT_LIST_BUFFER_SIZE (UINT64_C(1) << 22) > > > @@ -606,22 +604,38 @@ bool node_in_recovery(void) > > > return main_thread_get(current_rinfo) != NULL; > > > } > > > > > > -static inline void prepare_schedule_oid(uint64_t oid) > > > +static void direct_recover_object_main(struct work *work) > > > +{ > > > + struct recovery_work *rw = container_of(work, struct recovery_work, > > > + work); > > > + struct recovery_obj_work *row = container_of(rw, > > > + struct recovery_obj_work, > > > + base); > > > + > > > + wakeup_requests_on_oid(row->oid); > > > + free_recovery_obj_work(row); > > > +} > > > + > > > +static inline void direct_queue_recovery_work(uint64_t oid) > > > { > > > struct recovery_info *rinfo = main_thread_get(current_rinfo); > > > > > > - if (xlfind(&oid, rinfo->prio_oids, rinfo->nr_prio_oids, oid_cmp)) { > > > - sd_debug("%" PRIx64 " has been already in prio_oids", oid); > > > - return; > > > - } > > > + struct recovery_work *rw; > > > + struct recovery_obj_work *row; > > > + row = xzalloc(sizeof(*row)); > > > + row->oid = oid; > > > > > > - rinfo->nr_prio_oids++; > > > - rinfo->prio_oids = xrealloc(rinfo->prio_oids, > > > - rinfo->nr_prio_oids * sizeof(uint64_t)); > > > - rinfo->prio_oids[rinfo->nr_prio_oids - 1] = oid; > > > - sd_debug("%"PRIx64" nr_prio_oids %"PRIu64, oid, rinfo->nr_prio_oids); > > > + rw = &row->base; > > > + rw->work.fn = recover_object_work; > > > + rw->work.done = direct_recover_object_main; > > > > > > - resume_suspended_recovery(); > > > + rw->epoch = rinfo->epoch; > > > + rw->tgt_epoch = rinfo->tgt_epoch; > > > + rw->rinfo = rinfo; > > > + rw->cur_vinfo = grab_vnode_info(rinfo->cur_vinfo); > > > + rw->old_vinfo = grab_vnode_info(rinfo->old_vinfo); > > > + > > > + queue_work(sys->recovery_wqueue, &rw->work); > > > } > > > > > > main_fn bool oid_in_recovery(uint64_t oid) > > > @@ -688,7 +702,7 @@ main_fn bool oid_in_recovery(uint64_t oid) > > > return false; > > > } > > > > > > - prepare_schedule_oid(oid); > > > + direct_queue_recovery_work(oid); > > > return true; > > > } > > > > > > @@ -719,7 +733,6 @@ static void free_recovery_info(struct recovery_info > > > *rinfo) > > > put_vnode_info(rinfo->cur_vinfo); > > > put_vnode_info(rinfo->old_vinfo); > > > free(rinfo->oids); > > > - free(rinfo->prio_oids); > > > for (int i = 0; i < rinfo->max_epoch; i++) > > > put_vnode_info(rinfo->vinfo_array[i]); > > > free(rinfo->vinfo_array); > > > @@ -803,78 +816,12 @@ static inline void finish_recovery(struct > > > recovery_info *rinfo) > > > sd_debug("recovery complete: new epoch %"PRIu32, recovered_epoch); > > > } > > > > > > -static inline bool oid_in_prio_oids(struct recovery_info *rinfo, > > > uint64_t oid) > > > -{ > > > - for (uint64_t i = 0; i < rinfo->nr_prio_oids; i++) > > > - if (rinfo->prio_oids[i] == oid) > > > - return true; > > > - return false; > > > -} > > > - > > > -/* > > > - * Schedule prio_oids to be recovered first in FIFO order > > > - * > > > - * rw->next is index of the original next object to be recovered and > > > also the > > > - * number of objects already recovered and being recovered. > > > - * we just move rw->prio_oids in between: > > > - * new_oids = [0..rw->next - 1] + [rw->prio_oids] + [rw->next] > > > - */ > > > -static inline void finish_schedule_oids(struct recovery_info *rinfo) > > > -{ > > > - uint64_t i, nr_recovered = rinfo->next, new_idx; > > > - uint64_t *new_oids; > > > - > > > - /* If I am the last oid, done */ > > > - if (nr_recovered == rinfo->count - 1) > > > - goto done; > > > - > > > - new_oids = xmalloc(list_buffer_size); > > > - memcpy(new_oids, rinfo->oids, nr_recovered * sizeof(uint64_t)); > > > - memcpy(new_oids + nr_recovered, rinfo->prio_oids, > > > - rinfo->nr_prio_oids * sizeof(uint64_t)); > > > - new_idx = nr_recovered + rinfo->nr_prio_oids; > > > - > > > - for (i = rinfo->next; i < rinfo->count; i++) { > > > - if (oid_in_prio_oids(rinfo, rinfo->oids[i])) > > > - continue; > > > - new_oids[new_idx++] = rinfo->oids[i]; > > > - } > > > - /* rw->count should eq new_idx, otherwise something is wrong */ > > > - sd_debug("%snr_recovered %" PRIu64 ", nr_prio_oids %" PRIu64 ", count %" > > > - PRIu64 " = new %" PRIu64, > > > - rinfo->count == new_idx ? "" : "WARN: ", nr_recovered, > > > - rinfo->nr_prio_oids, rinfo->count, new_idx); > > > - > > > - free(rinfo->oids); > > > - rinfo->oids = new_oids; > > > -done: > > > - free(rinfo->prio_oids); > > > - rinfo->prio_oids = NULL; > > > - rinfo->nr_scheduled_prio_oids += rinfo->nr_prio_oids; > > > - rinfo->nr_prio_oids = 0; > > > -} > > > - > > > -/* > > > - * When automatic object recovery is disabled, the behavior of the > > > - * recovery process is like 'lazy recovery'. This function returns > > > - * true if the recovery queue contains objects being accessed by > > > - * clients. Sheep recovers such objects for availability even when > > > - * automatic object recovery is not enabled. > > > - */ > > > -static bool has_scheduled_objects(struct recovery_info *rinfo) > > > -{ > > > - return rinfo->done < rinfo->nr_scheduled_prio_oids; > > > -} > > > - > > > static void recover_next_object(struct recovery_info *rinfo) > > > { > > > if (run_next_rw()) > > > return; > > > > > > - if (rinfo->nr_prio_oids) > > > - finish_schedule_oids(rinfo); > > > - > > > - if (sys->cinfo.disable_recovery && !has_scheduled_objects(rinfo)) { > > > + if (sys->cinfo.disable_recovery) { > > > sd_debug("suspended"); > > > rinfo->suspended = true; > > > /* suspend until resume_suspended_recovery() is called */ > > > -- > > > 1.7.1 > > > > > > > > > > > > -- > > > NTTソフトウェア株式会社 > > > クラウド事業部 第一事業ユニット(C一BU) > > > 福田康人(FUKUDA Yasuhito) > > > E-mail:[email protected] > > > 〒220-0012 横浜市西区みなとみらい4-4-5 > > > 横浜アイマークプレイス13階 > > > TEL:045-212-7393/FAX:045-662-7856 > > > > > > > > > -- > > > sheepdog mailing list > > > [email protected] > > > http://lists.wpkg.org/mailman/listinfo/sheepdog > > > -- > NTTソフトウェア株式会社 > クラウド事業部 第一事業ユニット(C一BU) > 福田康人(FUKUDA Yasuhito) > E-mail:[email protected] > 〒220-0012 横浜市西区みなとみらい4-4-5 > 横浜アイマークプレイス13階 > TEL:045-212-7393/FAX:045-662-7856 > > > -- > sheepdog mailing list > [email protected] > https://lists.wpkg.org/mailman/listinfo/sheepdog -- sheepdog mailing list [email protected] https://lists.wpkg.org/mailman/listinfo/sheepdog
