From: HaiTing Yao <[email protected]>

Node A receives a COW write request, and then forward the request to
replica nodes B, C, D. Nodes B, C, D do COW separately. On some
conditions, nodes B, C, D maybe need read base data from node A. Now
node A is waiting for the reply from B, C, D. If the I/O threads are
busy, there is dead lock.

Signed-off-by: HaiTing Yao <[email protected]>
---
 sheep/gateway.c    |   46 ++++++++++++++++++++++++++++++++++++++++++++++
 sheep/ops.c        |    2 +-
 sheep/sheep_priv.h |    3 +++
 3 files changed, 50 insertions(+), 1 deletions(-)

diff --git a/sheep/gateway.c b/sheep/gateway.c
index e92f3ed..3e1de75 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -82,6 +82,44 @@ read_remote:
        return ret;
 }
 
+static int do_cow_at_local(struct request *req, struct sd_req *cow_hdr)
+{
+       int ret = 0;
+       char *buf = NULL;
+
+       dprintf("%" PRIx64 ", %" PRIx64 "\n", req->rq.obj.oid,
+               cow_hdr->obj.cow_oid);
+
+       buf = valloc(SD_DATA_OBJ_SIZE);
+       if (!buf) {
+               eprintf("can not allocate memory\n");
+               return -1;
+       }
+
+       if (cow_hdr->data_length != SD_DATA_OBJ_SIZE) {
+               ret = read_copy_from_replica(req->vnodes, cow_hdr->epoch,
+                       cow_hdr->obj.cow_oid, buf);
+               if (ret != SD_RES_SUCCESS) {
+                       eprintf("failed to read cow object\n");
+                       free(buf);
+                       return -1;
+               }
+       }
+
+       memcpy(buf + cow_hdr->obj.offset, req->data, cow_hdr->data_length);
+
+       free(req->data);
+       req->data = buf;
+
+       cow_hdr->data_length = SD_DATA_OBJ_SIZE;
+       cow_hdr->obj.offset = 0;
+
+       req->rq.data_length = SD_DATA_OBJ_SIZE;
+       req->rq.obj.offset = 0;
+
+       return ret;
+}
+
 int forward_write_obj_req(struct request *req)
 {
        int i, fd, ret, pollret;
@@ -106,6 +144,14 @@ int forward_write_obj_req(struct request *req)
        memcpy(&fwd_hdr, &req->rq, sizeof(fwd_hdr));
        fwd_hdr.flags |= SD_FLAG_CMD_IO_LOCAL;
 
+       if (fwd_hdr.flags & SD_FLAG_CMD_COW) {
+               ret = do_cow_at_local(req, &fwd_hdr);
+               if (!ret) {
+                       fwd_hdr.flags &= ~SD_FLAG_CMD_COW;
+                       req->rq.flags &= ~SD_FLAG_CMD_COW;
+               }
+       }
+
        wlen = fwd_hdr.data_length;
 
        nr_copies = get_nr_copies(req->vnodes);
diff --git a/sheep/ops.c b/sheep/ops.c
index e164cbc..68f7605 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -553,7 +553,7 @@ static int local_trace_cat_ops(const struct sd_req *req, 
struct sd_rsp *rsp, voi
        return SD_RES_SUCCESS;
 }
 
-static int read_copy_from_replica(struct vnode_info *vnodes, uint32_t epoch,
+int read_copy_from_replica(struct vnode_info *vnodes, uint32_t epoch,
                                  uint64_t oid, char *buf)
 {
        struct request read_req;
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 7a86533..ef78ace 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -415,4 +415,7 @@ void object_cache_delete(uint32_t vid);
 
 int object_cache_init(const char *p);
 
+int read_copy_from_replica(struct vnode_info *vnodes, uint32_t epoch,
+                                 uint64_t oid, char *buf);
+
 #endif
-- 
1.7.1

-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to