Hi,

Thanks for reporting the bug.
Can you try the below patch against the current git head?

> PS. another question since I've used --copies=2 , i've expected to find
> each sheepdog  VM block repliacted to 2 nodes not 3 but
> under /sheepdog/0/  i see that exactly the same number (with same names)
> of files where created on all 3 nodes - the only exceptions
> is that /sheepdog/0/vdi/zopa was created only on 2 nodes.
> Is that expected and what is the actual meaning --copies=N ? 

The meaning of --copies is just what you expected, and this patch will also fix
the problem, I think.

==
>From 7a45f310bd6b81f0c655217f3f1dfc63fd68c634 Mon Sep 17 00:00:00 2001
From: MORITA Kazutaka <[email protected]>
Date: Sun, 27 Dec 2009 06:08:09 +0900
Subject: [PATCH] use ANAME_COPIES as a number of replication

Signed-off-by: MORITA Kazutaka <[email protected]>
---
 collie/net.c             |    1 +
 collie/store.c           |   27 ++++++++++++++++++++
 collie/vdi.c             |   60 +++++++++++++++++++++++++++++++++------------
 include/net.h            |    3 +-
 include/sheepdog_proto.h |    5 +++-
 lib/net.c                |   36 +++++++++++++++++----------
 shepherd/shepherd.c      |    3 +-
 7 files changed, 103 insertions(+), 32 deletions(-)

diff --git a/collie/net.c b/collie/net.c
index 5505613..be19bcc 100644
--- a/collie/net.c
+++ b/collie/net.c
@@ -65,6 +65,7 @@ static void queue_request(struct request *req)
        case SD_OP_SO_NEW_VDI:
        case SD_OP_SO_LOOKUP_VDI:
        case SD_OP_SO_READ_VDIS:
+       case SD_OP_SO_STAT:
                req->work.fn = so_queue_request;
                break;
        default:
diff --git a/collie/store.c b/collie/store.c
index 4e95469..fce71ff 100644
--- a/collie/store.c
+++ b/collie/store.c
@@ -606,6 +606,33 @@ void so_queue_request(struct work *work, int idx)
        case SD_OP_SO_READ_VDIS:
                ret = so_read_vdis(req);
                break;
+       case SD_OP_SO_STAT:
+               fd = open(path, O_RDONLY);
+               if (fd < 0) {
+                       result = SD_RES_EIO;
+                       goto out;
+               }
+
+               rsp->oid = 0;
+               ret = fgetxattr(fd, ANAME_LAST_OID, &rsp->oid,
+                               sizeof(rsp->oid));
+               if (ret != sizeof(rsp->oid)) {
+                       close(fd);
+                       result = SD_RES_SYSTEM_ERROR;
+                       goto out;
+               }
+
+               rsp->copies = 0;
+               ret = fgetxattr(fd, ANAME_COPIES, &rsp->copies,
+                               sizeof(rsp->copies));
+               if (ret != sizeof(rsp->copies)) {
+                       close(fd);
+                       result = SD_RES_SYSTEM_ERROR;
+                       goto out;
+               }
+
+               result = SD_RES_SUCCESS;
+               break;
        }
 
 out:
diff --git a/collie/vdi.c b/collie/vdi.c
index 31567d0..290d919 100644
--- a/collie/vdi.c
+++ b/collie/vdi.c
@@ -84,11 +84,12 @@ int add_vdi(struct cluster_info *ci, char *name, int len, 
uint64_t size,
            uint64_t *added_oid, uint64_t base_oid, uint32_t tag)
 {
        struct sheepdog_node_list_entry entries[SD_MAX_NODES];
-       int nr_nodes;
+       int nr_nodes, nr_reqs;
        uint64_t oid = 0;
        int ret;
        int copies;
        struct sd_so_req req;
+       struct sd_so_rsp *rsp = (struct sd_so_rsp *)&req;
 
        memset(&req, 0, sizeof(req));
 
@@ -97,22 +98,31 @@ int add_vdi(struct cluster_info *ci, char *name, int len, 
uint64_t size,
        dprintf("%s (%d) %" PRIu64 ", base: %" PRIu64 "\n", name, len, size,
                base_oid);
 
-       /* todo */
-/*     copies = sb->default_nr_copies; */
-       copies = 3;
-       if (copies > nr_nodes)
-               copies = nr_nodes;
+       req.opcode = SD_OP_SO_STAT;
+       ret = exec_reqs(entries, nr_nodes, ci->epoch,
+                       SD_DIR_OID, (struct sd_req *)&req, NULL, 0, 0,
+                       nr_nodes, 1);
+       if (ret < 0)
+               return rsp->result;
+
+       copies = rsp->copies;
+       nr_reqs = copies;
+       if (nr_reqs > nr_nodes)
+               nr_reqs = nr_nodes;
+
+       memset(&req, 0, sizeof(req));
 
        req.opcode = SD_OP_SO_NEW_VDI;
        req.copies = copies;
        req.tag = tag;
 
        ret = exec_reqs(entries, nr_nodes, ci->epoch,
-                       SD_DIR_OID, (struct sd_req *)&req, name, len, 0, 
copies);
+                       SD_DIR_OID, (struct sd_req *)&req, name, len, 0,
+                       nr_reqs, nr_reqs);
 
        /* todo: error handling */
 
-       oid = ((struct sd_so_rsp *)&req)->oid;
+       oid = rsp->oid;
        *added_oid = oid;
 
        dprintf("%s (%d) %" PRIu64 ", base: %" PRIu64 "\n", name, len, size,
@@ -134,7 +144,7 @@ int lookup_vdi(struct cluster_info *ci,
               int *current)
 {
        struct sheepdog_node_list_entry entries[SD_MAX_NODES];
-       int nr_nodes;
+       int nr_nodes, nr_reqs;
        int ret, copies;
        struct sd_so_req req;
        struct sd_so_rsp *rsp = (struct sd_so_rsp *)&req;
@@ -147,16 +157,30 @@ int lookup_vdi(struct cluster_info *ci,
 
        dprintf("looking for %s %zd\n", filename, strlen(filename));
 
-       /* todo */
-       copies = 3;
-       if (copies > nr_nodes)
-               copies = nr_nodes;
+       req.opcode = SD_OP_SO_STAT;
+       ret = exec_reqs(entries, nr_nodes, ci->epoch,
+                       SD_DIR_OID, (struct sd_req *)&req, NULL, 0, 0,
+                       nr_nodes, 1);
+       if (ret < 0)
+               return rsp->result;
+
+       copies = rsp->copies;
+       nr_reqs = copies;
+       if (nr_reqs > nr_nodes)
+               nr_reqs = nr_nodes;
+
+       memset(&req, 0, sizeof(req));
+       copies = rsp->copies;
+       nr_reqs = copies;
+       if (nr_reqs > nr_nodes)
+               nr_reqs = nr_nodes;
 
        req.opcode = SD_OP_SO_LOOKUP_VDI;
        req.tag = tag;
 
        ret = exec_reqs(entries, nr_nodes, ci->epoch,
-                       SD_DIR_OID, (struct sd_req *)&req, filename, 
strlen(filename), 0, copies);
+                       SD_DIR_OID, (struct sd_req *)&req, filename, 
strlen(filename), 0,
+                       nr_reqs, 1);
 
        *oid = rsp->oid;
        if (rsp->flags & SD_VDI_RSP_FLAG_CURRENT)
@@ -186,7 +210,11 @@ int make_super_object(struct cluster_info *ci, struct 
sd_vdi_req *hdr)
        nr_nodes = build_node_list(&ci->node_list, entries);
 
        ret = exec_reqs(entries, nr_nodes, ci->epoch,
-                       SD_DIR_OID, (struct sd_req *)&req, NULL, 0, 0, 
req.copies);
+                       SD_DIR_OID, (struct sd_req *)&req, NULL, 0, 0, 
req.copies,
+                       req.copies);
 
-       return ret;
+       if (ret < 0)
+               return SD_RES_EIO;
+
+       return SD_RES_SUCCESS;
 }
diff --git a/include/net.h b/include/net.h
index b0e3df0..7bf0dbb 100644
--- a/include/net.h
+++ b/include/net.h
@@ -46,7 +46,8 @@ int read_object(struct sheepdog_node_list_entry *e,
 
 int exec_reqs(struct sheepdog_node_list_entry *e,
              int nodes, uint32_t node_version, uint64_t oid, struct sd_req 
*hdr,
-             char *data, unsigned int wdatalen, unsigned int rdatalen, int nr);
+             char *data, unsigned int wdatalen, unsigned int rdatalen, int nr,
+             int quorum);
 
 int create_listen_ports(int port, int (*callback)(int fd, void *), void *data);
 
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 4bfb4e5..9557cd8 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -48,6 +48,7 @@
 #define SD_OP_SO_DEL_VDI     0x62
 #define SD_OP_SO_LOOKUP_VDI  0x63
 #define SD_OP_SO_READ_VDIS   0x64
+#define SD_OP_SO_STAT        0x65
 
 #define SD_OP_STAT_SHEEP     0xB0
 
@@ -125,8 +126,10 @@ struct sd_so_rsp {
        uint32_t        id;
        uint32_t        data_length;
        uint32_t        result;
+       uint32_t        copies;
+       uint64_t        ctime;
        uint64_t        oid;
-       uint32_t        opcode_specific[5];
+       uint32_t        opcode_specific[2];
 };
 
 struct sd_obj_req {
diff --git a/lib/net.c b/lib/net.c
index caf592f..5e26f46 100644
--- a/lib/net.c
+++ b/lib/net.c
@@ -433,16 +433,19 @@ int read_object(struct sheepdog_node_list_entry *e,
 /* TODO: clean up with the above functions */
 int exec_reqs(struct sheepdog_node_list_entry *e,
              int nodes, uint32_t node_version, uint64_t oid, struct sd_req 
*hdr,
-             char *data, unsigned int wdatalen, unsigned int rdatalen, int nr)
+             char *data, unsigned int wdatalen, unsigned int rdatalen, int nr,
+             int quorum)
 {
        char name[128];
        int i = 0, n, fd, ret;
        int success = 0;
        struct sd_req tmp;
        struct sd_rsp *rsp = (struct sd_rsp *)&tmp;
+       unsigned wlen, rlen;
 
        for (i = 0; i < nr; i++) {
-               unsigned wlen = wdatalen, rlen = rdatalen;
+               wlen = wdatalen;
+               rlen = rdatalen;
 
                n = obj_to_sheep(e, nodes, oid, i);
 
@@ -453,8 +456,10 @@ int exec_reqs(struct sheepdog_node_list_entry *e,
                         e[n].addr[15]);
 
                fd = connect_to(name, e[n].port);
-               if (fd < 0)
+               if (fd < 0) {
+                       ((struct sd_rsp *) hdr)->result = SD_RES_EIO;
                        return -1;
+               }
 
                hdr->epoch = node_version;
                if (wdatalen) {
@@ -470,18 +475,23 @@ int exec_reqs(struct sheepdog_node_list_entry *e,
                close(fd);
 
                rsp = (struct sd_rsp *)&tmp;
-               if (rdatalen) {
-                       if (!ret) {
-                               if (rsp->result == SD_RES_SUCCESS) {
-                                       memcpy(hdr, rsp, sizeof(*rsp));
-                                       return rlen;
-                               }
-                       }
-               } else
-                       if (!ret)
+
+               if (!ret) {
+                       if (rsp->result == SD_RES_SUCCESS)
                                success++;
+               }
+
+               if (success >= quorum)
+                       break;
        }
+
        memcpy(hdr, rsp, sizeof(*rsp));
 
-       return !success;
+       if (success < quorum)
+               return -1;
+
+       if (rdatalen)
+               return rlen;
+       else
+               return wlen;
 }
diff --git a/shepherd/shepherd.c b/shepherd/shepherd.c
index 9654888..5bbf29d 100644
--- a/shepherd/shepherd.c
+++ b/shepherd/shepherd.c
@@ -398,7 +398,8 @@ int parse_vdi(vdi_parser_func_t func, void *data)
        req.opcode = SD_OP_SO_READ_VDIS;
 
        ret = exec_reqs(node_list_entries, nr_nodes, node_list_version,
-                       SD_DIR_OID, (struct sd_req *)&req, buf, 0, 
DIR_BUF_LEN,nr_nodes);
+                       SD_DIR_OID, (struct sd_req *)&req, buf, 0, DIR_BUF_LEN,
+                       nr_nodes, 1);
 
        if (ret < 0) {
                ret = 1;
-- 
1.6.5

-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to