On Tue, Jun 17, 2014 at 06:08:40PM +0800, Ruoyu wrote: > In the stage of preparing object list, every node tries to fetch > entire object lists from all nodes of the cluster, and then, > screens out objects that don't belong to it's own. This behavior > transfers quite a lot of unnecessary object id over the network. > > This patch resolve the issue by: > 1. receiver node prepare objects that belong to the sender node. > 2. sender node don't screen out objects any longer because it is > already done by the receiver, but merging object lists fetched > from different nodes is essential. > > Signed-off-by: Ruoyu <lian...@ucweb.com> > --- > include/sheepdog_proto.h | 4 ++++ > sheep/object_list_cache.c | 38 +++++++++++++++++++++++++++++++++----- > sheep/recovery.c | 37 ++++++++++++++----------------------- > sheep/sheep_priv.h | 6 ++++++ > 4 files changed, 57 insertions(+), 28 deletions(-) > > diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h > index 8b5834b..76fad51 100644 > --- a/include/sheepdog_proto.h > +++ b/include/sheepdog_proto.h > @@ -176,6 +176,10 @@ struct sd_req { > uint32_t generation; > uint32_t count; > } ref; > + struct { > + uint8_t addr[16]; > + uint16_t port; > + } node_addr; > > uint32_t __pad[8]; > }; > diff --git a/sheep/object_list_cache.c b/sheep/object_list_cache.c > index eefa40a..9a7073d 100644 > --- a/sheep/object_list_cache.c > +++ b/sheep/object_list_cache.c > @@ -97,8 +97,17 @@ int objlist_cache_insert(uint64_t oid) > > int get_obj_list(const struct sd_req *hdr, struct sd_rsp *rsp, void *data) > { > - int nr = 0; > + int i = 0, j, copies; > struct objlist_cache_entry *entry; > + struct node_id peer_nid; > + struct request *req = container_of(hdr, struct request, rq); > + struct vnode_info *peer_vinfo = req->vinfo; > + const struct sd_vnode *vnodes[SD_MAX_COPIES]; > + int last = 0, end = 4096; > + uint64_t *oids = xmalloc(end * sizeof(uint64_t)); > + > + memcpy(peer_nid.addr, hdr->node_addr.addr, sizeof(peer_nid.addr)); > + peer_nid.port = hdr->node_addr.port; > > /* first try getting the cached buffer with only a read lock held */ > sd_read_lock(&obj_list_cache.lock); > @@ -116,19 +125,38 @@ int get_obj_list(const struct sd_req *hdr, struct > sd_rsp *rsp, void *data) > obj_list_cache.cache_size * sizeof(uint64_t)); > > rb_for_each_entry(entry, &obj_list_cache.root, node) { > - obj_list_cache.buf[nr++] = entry->oid; > + obj_list_cache.buf[i++] = entry->oid; > } > > out: > - if (hdr->data_length < obj_list_cache.cache_size * sizeof(uint64_t)) { > + /* Screen out objects that don't belong to that node */ > + for (i = 0; i < obj_list_cache.cache_size; i++) { > + copies = get_obj_copy_number(obj_list_cache.buf[i], > + peer_vinfo->nr_zones); > + oid_to_vnodes(obj_list_cache.buf[i], > + &peer_vinfo->vroot, copies, vnodes); > + for (j = 0; j < copies; j++) { > + if (!vnode_is_peer(vnodes[j], &peer_nid)) > + continue; > + oids[last++] = obj_list_cache.buf[i]; > + if (last >= end) { > + end *= 2; > + oids = xrealloc(oids, end * sizeof(uint64_t)); > + } > + } > + } > + > + if (hdr->data_length < last * sizeof(uint64_t)) { > sd_rw_unlock(&obj_list_cache.lock); > sd_err("GET_OBJ_LIST buffer too small"); > + free(oids); > return SD_RES_BUFFER_SMALL; > } > > - rsp->data_length = obj_list_cache.cache_size * sizeof(uint64_t); > - memcpy(data, obj_list_cache.buf, rsp->data_length); > + rsp->data_length = last * sizeof(uint64_t); > + memcpy(data, oids, rsp->data_length); > sd_rw_unlock(&obj_list_cache.lock); > + free(oids); > return SD_RES_SUCCESS; > } > > diff --git a/sheep/recovery.c b/sheep/recovery.c > index 4648966..ea67b5f 100644 > --- a/sheep/recovery.c > +++ b/sheep/recovery.c > @@ -978,6 +978,9 @@ retry: > sd_init_req(&hdr, SD_OP_GET_OBJ_LIST); > hdr.data_length = buf_size; > hdr.epoch = epoch; > + memcpy(hdr.node_addr.addr, sys->this_node.nid.addr, > + sizeof(hdr.node_addr.addr)); > + hdr.node_addr.port = sys->this_node.nid.port; > ret = sheep_exec_req(&e->nid, &hdr, buf); > > switch (ret) { > @@ -997,40 +1000,28 @@ retry: > } > > *nr_oids = rsp->data_length / sizeof(uint64_t); > - sd_debug("%zu", *nr_oids); > + sd_debug("%s: %zu objects to be fetched", > + addr_to_str(e->nid.addr, e->nid.port), *nr_oids); > return buf; > } > > -/* Screen out objects that don't belong to this node */ > -static void screen_object_list(struct recovery_list_work *rlw, > +/* Merge object lists fetched from other nodes */ > +static void merge_object_list(struct recovery_list_work *rlw, > uint64_t *oids, size_t nr_oids) > { > - struct recovery_work *rw = &rlw->base; > - const struct sd_vnode *vnodes[SD_MAX_COPIES]; > uint64_t old_count = rlw->count; > - uint64_t nr_objs; > - uint64_t i, j; > + uint64_t i; > > for (i = 0; i < nr_oids; i++) { > if (xbsearch(&oids[i], rlw->oids, old_count, obj_cmp)) > /* the object is already scheduled to be recovered */ > continue; > > - nr_objs = get_obj_copy_number(oids[i], rw->cur_vinfo->nr_zones); > - > - oid_to_vnodes(oids[i], &rw->cur_vinfo->vroot, nr_objs, vnodes); > - for (j = 0; j < nr_objs; j++) { > - if (!vnode_is_local(vnodes[j])) > - continue; > - > - rlw->oids[rlw->count++] = oids[i]; > - /* enlarge the list buffer if full */ > - if (rlw->count == list_buffer_size / sizeof(uint64_t)) { > - list_buffer_size *= 2; > - rlw->oids = xrealloc(rlw->oids, > - list_buffer_size); > - } > - break; > + rlw->oids[rlw->count++] = oids[i]; > + /* enlarge the list buffer if full */ > + if (rlw->count == list_buffer_size / sizeof(uint64_t)) { > + list_buffer_size *= 2; > + rlw->oids = xrealloc(rlw->oids, list_buffer_size); > } > } > > @@ -1072,7 +1063,7 @@ again: > oids = fetch_object_list(node, rw->epoch, &nr_oids); > if (!oids) > continue; > - screen_object_list(rlw, oids, nr_oids); > + merge_object_list(rlw, oids, nr_oids); > free(oids); > } > > diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h > index 7b33f11..02f75ad 100644 > --- a/sheep/sheep_priv.h > +++ b/sheep/sheep_priv.h > @@ -461,6 +461,12 @@ int gateway_to_peer_opcode(int opcode); > > extern uint32_t last_gathered_epoch; > > +static inline bool vnode_is_peer(const struct sd_vnode *v, > + const struct node_id *peer_nid) > +{ > + return node_id_cmp(&v->node->nid, peer_nid) == 0; > +} > + > static inline bool vnode_is_local(const struct sd_vnode *v) > { > return node_id_cmp(&v->node->nid, &sys->this_node.nid) == 0; > -- > 1.8.3.2 > > > -- > sheepdog mailing list > sheepdog@lists.wpkg.org > http://lists.wpkg.org/mailman/listinfo/sheepdog
Applied thanks Yuan -- sheepdog mailing list sheepdog@lists.wpkg.org http://lists.wpkg.org/mailman/listinfo/sheepdog