Signed-off-by: Ruoyu <lian...@ucweb.com>
---
include/sheepdog_proto.h | 4 ++++
sheep/object_list_cache.c | 38 +++++++++++++++++++++++++++++++++-----
sheep/recovery.c | 41 ++++++++++++++++++-----------------------
sheep/sheep_priv.h | 6 ++++++
4 files changed, 61 insertions(+), 28 deletions(-)
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 8b5834b..76fad51 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -176,6 +176,10 @@ struct sd_req {
uint32_t generation;
uint32_t count;
} ref;
+ struct {
+ uint8_t addr[16];
+ uint16_t port;
+ } node_addr;
uint32_t __pad[8];
};
diff --git a/sheep/object_list_cache.c b/sheep/object_list_cache.c
index eefa40a..9a7073d 100644
--- a/sheep/object_list_cache.c
+++ b/sheep/object_list_cache.c
@@ -97,8 +97,17 @@ int objlist_cache_insert(uint64_t oid)
int get_obj_list(const struct sd_req *hdr, struct sd_rsp *rsp, void *data)
{
- int nr = 0;
+ int i = 0, j, copies;
struct objlist_cache_entry *entry;
+ struct node_id peer_nid;
+ struct request *req = container_of(hdr, struct request, rq);
+ struct vnode_info *peer_vinfo = req->vinfo;
+ const struct sd_vnode *vnodes[SD_MAX_COPIES];
+ int last = 0, end = 4096;
+ uint64_t *oids = xmalloc(end * sizeof(uint64_t));
+
+ memcpy(peer_nid.addr, hdr->node_addr.addr, sizeof(peer_nid.addr));
+ peer_nid.port = hdr->node_addr.port;
/* first try getting the cached buffer with only a read lock held */
sd_read_lock(&obj_list_cache.lock);
@@ -116,19 +125,38 @@ int get_obj_list(const struct sd_req *hdr, struct sd_rsp
*rsp, void *data)
obj_list_cache.cache_size * sizeof(uint64_t));
rb_for_each_entry(entry, &obj_list_cache.root, node) {
- obj_list_cache.buf[nr++] = entry->oid;
+ obj_list_cache.buf[i++] = entry->oid;
}
out:
- if (hdr->data_length < obj_list_cache.cache_size * sizeof(uint64_t)) {
+ /* Screen out objects that don't belong to that node */
+ for (i = 0; i < obj_list_cache.cache_size; i++) {
+ copies = get_obj_copy_number(obj_list_cache.buf[i],
+ peer_vinfo->nr_zones);
+ oid_to_vnodes(obj_list_cache.buf[i],
+ &peer_vinfo->vroot, copies, vnodes);
+ for (j = 0; j < copies; j++) {
+ if (!vnode_is_peer(vnodes[j], &peer_nid))
+ continue;
+ oids[last++] = obj_list_cache.buf[i];
+ if (last >= end) {
+ end *= 2;
+ oids = xrealloc(oids, end * sizeof(uint64_t));
+ }
+ }
+ }
+
+ if (hdr->data_length < last * sizeof(uint64_t)) {
sd_rw_unlock(&obj_list_cache.lock);
sd_err("GET_OBJ_LIST buffer too small");
+ free(oids);
return SD_RES_BUFFER_SMALL;
}
- rsp->data_length = obj_list_cache.cache_size * sizeof(uint64_t);
- memcpy(data, obj_list_cache.buf, rsp->data_length);
+ rsp->data_length = last * sizeof(uint64_t);
+ memcpy(data, oids, rsp->data_length);
sd_rw_unlock(&obj_list_cache.lock);
+ free(oids);
return SD_RES_SUCCESS;
}
diff --git a/sheep/recovery.c b/sheep/recovery.c
index 4648966..dfd27bb 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -978,6 +978,9 @@ retry:
sd_init_req(&hdr, SD_OP_GET_OBJ_LIST);
hdr.data_length = buf_size;
hdr.epoch = epoch;
+ memcpy(hdr.node_addr.addr, sys->this_node.nid.addr,
+ sizeof(hdr.node_addr.addr));
+ hdr.node_addr.port = sys->this_node.nid.port;
ret = sheep_exec_req(&e->nid, &hdr, buf);
switch (ret) {
@@ -997,40 +1000,28 @@ retry:
}
*nr_oids = rsp->data_length / sizeof(uint64_t);
- sd_debug("%zu", *nr_oids);
+ sd_debug("%s: %zu objects to be fetched",
+ addr_to_str(e->nid.addr, e->nid.port), *nr_oids);
return buf;
}
-/* Screen out objects that don't belong to this node */
-static void screen_object_list(struct recovery_list_work *rlw,
+/* Merge object lists fetched from other nodes */
+static void merge_object_list(struct recovery_list_work *rlw,
uint64_t *oids, size_t nr_oids)
{
- struct recovery_work *rw = &rlw->base;
- const struct sd_vnode *vnodes[SD_MAX_COPIES];
uint64_t old_count = rlw->count;
- uint64_t nr_objs;
- uint64_t i, j;
+ uint64_t i;
for (i = 0; i < nr_oids; i++) {
if (xbsearch(&oids[i], rlw->oids, old_count, obj_cmp))
/* the object is already scheduled to be recovered */
continue;
- nr_objs = get_obj_copy_number(oids[i], rw->cur_vinfo->nr_zones);
-
- oid_to_vnodes(oids[i], &rw->cur_vinfo->vroot, nr_objs, vnodes);
- for (j = 0; j < nr_objs; j++) {
- if (!vnode_is_local(vnodes[j]))
- continue;
-
- rlw->oids[rlw->count++] = oids[i];
- /* enlarge the list buffer if full */
- if (rlw->count == list_buffer_size / sizeof(uint64_t)) {
- list_buffer_size *= 2;
- rlw->oids = xrealloc(rlw->oids,
- list_buffer_size);
- }
- break;
+ rlw->oids[rlw->count++] = oids[i];
+ /* enlarge the list buffer if full */
+ if (rlw->count == list_buffer_size / sizeof(uint64_t)) {
+ list_buffer_size *= 2;
+ rlw->oids = xrealloc(rlw->oids, list_buffer_size);
}
}
@@ -1064,6 +1055,10 @@ again:
size_t nr_oids;
struct sd_node *node = nodes + i;
+ /* No need to fetch from local node */
+ if (node_is_local(node))
+ continue;