Signed-off-by: FUJITA Tomonori <[email protected]>
---
 block/sheepdog.c |  210 ++++++++++++++++++++++++++---------------------------
 1 files changed, 103 insertions(+), 107 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 6a45cfa..e049463 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -36,6 +36,7 @@
 #define SD_OP_GET_NODE_LIST  0x19
 #define SD_OP_GET_VM_LIST    0x20
 #define SD_OP_MAKE_FS        0x21
+#define SD_OP_READ_VDIS      0x26
 
 #define SD_OP_DEBUG_INC_NVER 0xA0
 #define SD_OP_DEBUG_SET_NODE 0xA1
@@ -49,8 +50,6 @@
 #define SD_FLAG_CMD_WRITE    0x01
 #define SD_FLAG_CMD_COW      0x02
 
-#define SD_FLAG_CMD_SNAPSHOT (1U << 8)
-
 #define SD_RES_SUCCESS       0x00 /* Success */
 #define SD_RES_UNKNOWN       0x01 /* Unknown error */
 #define SD_RES_NO_OBJ        0x02 /* No object found */
@@ -77,21 +76,21 @@
 #define SD_RES_VDI_NOT_LOCKED   0x17 /* Vdi is not locked */
 #define SD_RES_SHUTDOWN      0x18 /* Sheepdog is shutting down */
 
-#define MAX_DATA_OBJS (1ULL << 18)
-#define MAX_CHILDREN 1024
-#define MAX_AIO_REQS 4096
-
 /* should be configurable? */
 #define MAX_RETRIES 6
 
+#define SD_NR_VDIS   (1U << 24)
+#define VDI_SPACE_SHIFT   32
+#define VDI_BIT (UINT64_C(1) << 63)
+#define DEAFAULT_NR_COPIES 1
+#define SD_MAX_VDI_LEN 256
+#define MAX_DATA_OBJS (1ULL << 20)
+#define MAX_CHILDREN 1024
 #define SD_DATA_OBJ_SIZE (1UL << 22)
 
 #define SD_INODE_SIZE (sizeof(struct sd_inode))
-
 #define CURRENT_VDI_ID 0
 
-#define SD_MAX_VDI_LEN 256
-
 #undef eprintf
 #define eprintf(fmt, args...)                                          \
 do {                                                                   \
@@ -162,13 +161,12 @@ struct sd_vdi_req {
        uint32_t        id;
        uint32_t        data_length;
        uint64_t        base_oid;
-       uint64_t        tag;
        uint64_t        vdi_size;
+       uint32_t        copies;
+       uint32_t        snapid;
        uint32_t        pad[2];
 };
 
-#define SD_VDI_RSP_FLAG_CURRENT 0x01;
-
 struct sd_vdi_rsp {
        uint8_t         proto_ver;
        uint8_t         opcode;
@@ -207,12 +205,15 @@ struct sd_node_rsp {
 };
 
 struct sd_inode {
+       char name[SD_MAX_VDI_LEN];
        uint64_t oid;
        uint64_t ctime;
+       uint64_t snap_ctime;
        uint64_t vdi_size;
-       uint64_t block_size;
-       uint32_t copy_policy;
-       uint32_t nr_copies;
+       uint16_t copy_policy;
+       uint8_t  nr_copies;
+       uint8_t  block_size_shift;
+       uint32_t snap_id;
        uint64_t parent_oid;
        uint64_t child_oid[MAX_CHILDREN];
        uint64_t data_oid[MAX_DATA_OBJS];
@@ -252,6 +253,8 @@ struct sd_aiocb {
        QLIST_HEAD(aioreq_head, aio_req) aioreq_head;
 };
 
+#define MAX_AIO_REQS 4096
+
 struct sd_aiostate {
        struct bdrv_sd_state *s;
        int fd;
@@ -322,19 +325,15 @@ static inline int after(uint32_t seq1, uint32_t seq2)
        return (int32_t)(seq2 - seq1) < 0;
 }
 
-static inline uint64_t oid_to_ino(uint64_t inode_oid)
-{
-       return (inode_oid >> 18) & ((1ULL << 37) - 1);
-}
-
-static inline int is_data_obj_writeable(uint64_t inode_oid, uint64_t data_oid)
+static inline int is_data_obj_writeable(struct sd_inode *inode, unsigned int 
idx)
 {
-       return oid_to_ino(inode_oid) == oid_to_ino(data_oid);
+       return (inode->oid >> VDI_SPACE_SHIFT) ==
+               (inode->data_oid[idx] >> VDI_SPACE_SHIFT);
 }
 
 static inline int is_data_obj(uint64_t oid)
 {
-       return oid & ((1ULL << 18) - 1);
+       return !(VDI_BIT & oid);
 }
 
 /*
@@ -872,7 +871,7 @@ static int get_sheep_fd(struct bdrv_sd_state *s)
 }
 
 static int parse_vdiname(const char *filename, char *vdi, int vdi_len,
-                        uint64_t *tag)
+                        uint32_t *snapid)
 {
        char *p, *q;
 
@@ -888,35 +887,37 @@ static int parse_vdiname(const char *filename, char *vdi, 
int vdi_len,
        p = strchr(vdi, ':');
        if (p) {
                *p++ = '\0';
-               *tag = strtol(p, NULL, 16);
+               *snapid = strtol(p, NULL, 16);
        } else
-               *tag = CURRENT_VDI_ID; /* search current vdi */
+               *snapid = CURRENT_VDI_ID; /* search current vdi */
 
        free(q);
 
        return 0;
 }
 
-static int find_vdi_name(struct bdrv_sd_state *s, char *filename, uint64_t tag,
-                        uint64_t *oid, int for_snapshot, int *current)
+static int find_vdi_name(struct bdrv_sd_state *s, char *filename, uint32_t 
snapid,
+                        uint64_t *oid)
 {
        int ret, fd;
        struct sd_vdi_req hdr;
        struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr;
        unsigned int wlen, rlen = 0;
+       char buf[SD_MAX_VDI_LEN];
 
        fd = connect_to_vost();
        if (fd < 0)
                return -1;
 
        memset(&hdr, 0, sizeof(hdr));
+       snprintf(buf, sizeof(buf), "%s", filename);
        hdr.opcode = SD_OP_GET_VDI_INFO;
-       wlen = strlen(filename) + 1;
-       hdr.data_length = wlen;
-       hdr.tag = tag;
+       wlen = SD_MAX_VDI_LEN;
+       hdr.data_length = SD_MAX_VDI_LEN;
+       hdr.snapid = snapid;
        hdr.flags = SD_FLAG_CMD_WRITE;
 
-       ret = do_req(fd, (struct sd_req *)&hdr, filename, &wlen, &rlen);
+       ret = do_req(fd, (struct sd_req *)&hdr, buf, &wlen, &rlen);
        if (ret) {
                ret = -1;
                goto out;
@@ -927,9 +928,8 @@ static int find_vdi_name(struct bdrv_sd_state *s, char 
*filename, uint64_t tag,
                ret = -1;
                goto out;
        }
-
        *oid = rsp->oid;
-       s->is_current = rsp->flags & SD_VDI_RSP_FLAG_CURRENT;
+
        ret = 0;
 out:
        close(fd);
@@ -1045,7 +1045,7 @@ static int sd_open(BlockDriverState *bs, const char 
*filename, int flags)
        uint64_t oid = 0;
        struct bdrv_sd_state *s = bs->opaque;
        char vdi[256];
-       uint64_t tag;
+       uint32_t snapid;
        int for_snapshot = 0, dummy;
        char *buf;
 
@@ -1070,15 +1070,17 @@ static int sd_open(BlockDriverState *bs, const char 
*filename, int flags)
                for_snapshot = 1;
 
        memset(vdi, 0, sizeof(vdi));
-       if (parse_vdiname(filename, vdi, sizeof(vdi), &tag) < 0)
+       if (parse_vdiname(filename, vdi, sizeof(vdi), &snapid) < 0)
                goto out;
 
-       ret = find_vdi_name(s, vdi, tag, &oid, for_snapshot, &s->is_current);
+       ret = find_vdi_name(s, vdi, snapid, &oid);
        if (ret)
                goto out;
 
-       if (!s->is_current)
+       if (snapid)
                eprintf("%" PRIx64 " non current inode was open.\n", oid);
+       else
+               s->is_current = 1;
 
        ret = read_vdi_obj(buf, oid, &dummy);
        if (ret)
@@ -1105,28 +1107,23 @@ static int do_sd_create(char *filename, char *tag, 
int64_t total_sectors,
        struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr;
        int fd, ret;
        unsigned int wlen, rlen = 0;
-       char buf[SD_MAX_VDI_LEN * 2];
+       char buf[SD_MAX_VDI_LEN];
 
        fd = connect_to_vost();
        if (fd < 0)
                return -1;
 
-       memset(buf, 0, sizeof(buf));
        strncpy(buf, filename, SD_MAX_VDI_LEN);
-       if (tag)
-               strncpy(buf + SD_MAX_VDI_LEN, tag, SD_MAX_VDI_LEN);
 
        memset(&hdr, 0, sizeof(hdr));
        hdr.opcode = SD_OP_NEW_VDI;
        hdr.base_oid = base_oid;
 
        wlen = SD_MAX_VDI_LEN;
-       if (tag)
-               wlen += SD_MAX_VDI_LEN;
 
        hdr.flags = SD_FLAG_CMD_WRITE;
-       if (snapshot)
-               hdr.flags |= SD_FLAG_CMD_SNAPSHOT;
+       hdr.snapid = snapshot;
+
        hdr.data_length = wlen;
        hdr.vdi_size = total_sectors * 512;
 
@@ -1166,8 +1163,8 @@ static int sd_create(const char *filename, 
QEMUOptionParameter *options)
 
        if (backing_file) {
                BlockDriverState bs;
-               char vdi[256];
-               uint64_t tag;
+               char vdi[SD_MAX_VDI_LEN];
+               uint32_t snapid;
 
                memset(&bs, 0, sizeof(bs));
 
@@ -1179,16 +1176,15 @@ static int sd_create(const char *filename, 
QEMUOptionParameter *options)
                if (ret < 0)
                        return -1;
 
-               if (parse_vdiname(backing_file, vdi, sizeof(vdi), &tag) < 0)
+               if (parse_vdiname(backing_file, vdi, sizeof(vdi), &snapid) < 0)
                        return -1;
 
                /* cannot clone from a current inode */
-               if (tag == CURRENT_VDI_ID)
+               if (snapid == CURRENT_VDI_ID)
                        return -1;
 
-               ret = find_vdi_name(bs.opaque, vdi, tag, &oid, 1, NULL);
-               struct bdrv_sd_state *s = bs.opaque;
-               if (ret || s->is_current)
+               ret = find_vdi_name(bs.opaque, vdi, snapid, &oid);
+               if (ret)
                        return -1;
        }
 
@@ -1199,8 +1195,6 @@ static void sd_close(BlockDriverState *bs)
 {
        struct bdrv_sd_state *s = bs->opaque;
 
-       eprintf("%s\n", s->name);
-
        free(s->name);
 }
 
@@ -1211,6 +1205,7 @@ static int sd_claim(BlockDriverState *bs)
        struct sd_vdi_req hdr;
        struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr;
        unsigned int wlen, rlen = 0;
+       char buf[SD_MAX_VDI_LEN];
 
        eprintf("%s\n", s->name);
 
@@ -1218,14 +1213,16 @@ static int sd_claim(BlockDriverState *bs)
        if (fd < 0)
                return -1;
 
+       memset(buf, 0, sizeof(buf));
+       strncpy(buf, s->name, SD_MAX_VDI_LEN);
        memset(&hdr, 0, sizeof(hdr));
        hdr.opcode = SD_OP_LOCK_VDI;
-       wlen = strlen(s->name) + 1;
-       hdr.data_length = wlen;
-       hdr.tag = CURRENT_VDI_ID;
+       wlen = SD_MAX_VDI_LEN;
+       hdr.data_length = SD_MAX_VDI_LEN;
+       hdr.snapid = CURRENT_VDI_ID;
        hdr.flags = SD_FLAG_CMD_WRITE;
 
-       ret = do_req(fd, (struct sd_req *)&hdr, s->name, &wlen, &rlen);
+       ret = do_req(fd, (struct sd_req *)&hdr, buf, &wlen, &rlen);
        if (ret) {
                ret = -1;
                goto out;
@@ -1407,19 +1404,19 @@ static void sd_write_bh_cb(void *p)
 
                len = min_t(unsigned long, total - done, CHUNK_SIZE - offset);
 
-               if (!oid || !is_data_obj_writeable(inode->oid, oid)) {
+               if (!oid || !is_data_obj_writeable(inode, idx)) {
                        if (!acb->write)
                                goto done;
 
                        create = 1;
                        dprintf("update ino (%" PRIu64") %"
                                PRIu64 " %" PRIu64 " %" PRIu64 "\n",
-                               inode->oid, oid, inode->oid + (idx + 1), idx);
-                       if (oid && !is_data_obj_writeable(inode->oid, oid)) {
+                               inode->oid, oid, inode->oid + idx, idx);
+                       if (oid && !is_data_obj_writeable(inode, idx)) {
                                old_oid = oid;
                                flags = SD_FLAG_CMD_COW;
                        }
-                       oid = inode->oid + (idx + 1);
+                       oid = inode->oid + idx;
                        acb->oid[i] = oid;
                        dprintf("new oid %lx\n", acb->oid[i]);
                }
@@ -1619,96 +1616,95 @@ struct sd_so_rsp {
        uint32_t        opcode_specific[2];
 };
 
-struct sheepdog_vdi_info {
-       uint64_t oid;
-       uint16_t id;
-       uint16_t name_len;
-       uint16_t tag_len;
-       uint8_t type;
-       uint8_t flags;
-       uint32_t epoch;
-       char name[SD_MAX_VDI_LEN];
-       char tag[SD_MAX_VDI_LEN];
-};
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+#define BITS_PER_BYTE          8
+#define BITS_TO_LONGS(nr)      DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
+#define DECLARE_BITMAP(name,bits) \
+       unsigned long name[BITS_TO_LONGS(bits)]
+
+#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long))
+
+static inline int test_bit(unsigned int nr, const unsigned long *addr)
+{
+       return ((1UL << (nr % BITS_PER_LONG)) &
+               (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
+}
 
-#define SD_OP_SO_READ_VDIS   0x64
+static inline uint64_t bit_to_oid(unsigned long nr)
+{
+       return ((unsigned long long)nr << VDI_SPACE_SHIFT) | VDI_BIT;
+}
 
 static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
 {
        struct bdrv_sd_state *s = bs->opaque;
-       struct sd_so_req req;
-       struct sd_rsp *rsp;
-       struct sheepdog_vdi_info *vi;
-       int i, fd, nr = 0, ret, max = 1024; /* FIXME */
-       char name[SD_MAX_VDI_LEN];
+       struct sd_req req;
+       int i, fd, nr = 1024, ret, max = BITS_TO_LONGS(SD_NR_VDIS) * 
sizeof(long);
        QEMUSnapshotInfo *sn_tab = NULL;
        unsigned wlen, rlen;
        int found = 0;
-       struct sd_inode inode;
+       static struct sd_inode inode;
+       unsigned long *vdi_inuse;
+       unsigned int start_nr;
 
-       vi = malloc(max * sizeof(*vi));
-       if (!vi)
+       vdi_inuse = malloc(max);
+       if (!vdi_inuse)
                return 0;
 
-       memset(name, 0, sizeof(name));
-       snprintf(name, sizeof(name), "%s", s->name);
-
        fd = connect_to_vost();
        if (fd < 0)
                goto out;
 
+       rlen = max;
        wlen = 0;
-       rlen = max * sizeof(*vi);
 
        memset(&req, 0, sizeof(req));
 
-       req.opcode = SD_OP_SO_READ_VDIS;
-       req.data_length = rlen;
+       req.opcode = SD_OP_READ_VDIS;
+       req.data_length = max;
 
-       ret = do_req(fd, (struct sd_req *)&req, vi, &wlen, &rlen);
+       ret = do_req(fd, (struct sd_req *)&req, vdi_inuse, &wlen, &rlen);
 
        close(fd);
        if (ret)
                goto out;
 
-       rsp = (struct sd_rsp *)&req;
-       if (rsp->result != SD_RES_SUCCESS)
-               goto out;
-
-       nr = rsp->data_length / sizeof(*vi);
        sn_tab = malloc(nr * sizeof(*sn_tab));
        if (!sn_tab)
                goto out;
 
        memset(sn_tab, 0, nr * sizeof(*sn_tab));
 
-       for (i = 0; i < nr; i++) {
+       start_nr = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT) & 
(SD_NR_VDIS - 1);
+
+       /* TODO: round up */
+       for (i = start_nr; i < SD_NR_VDIS && found < nr; i++) {
                int copies;
 
-               if (strcmp(vi[i].name, s->name) || !vi[i].id)
-                       continue;
+               if (!test_bit(i, vdi_inuse))
+                       break;
 
-               ret = read_vdi_obj((char *)&inode, vi[i].oid, &copies);
+               ret = read_vdi_obj((char *)&inode, bit_to_oid(i), &copies);
                if (ret)
                        continue;
 
-               sn_tab[found].date_sec = inode.ctime >> 32;
-               sn_tab[found].date_nsec = inode.ctime & 0xffffffff;
+               if (!strcmp(inode.name, s->name) && inode.snap_ctime) {
+                       sn_tab[found].date_sec = inode.snap_ctime >> 32;
+                       sn_tab[found].date_nsec = inode.snap_ctime & 0xffffffff;
 
-               snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), 
"%u",
-                       vi[i].id);
-               strncpy(sn_tab[found].name, vi[i].tag,  
sizeof(sn_tab[found].name));
-               found++;
+                       snprintf(sn_tab[found].id_str, 
sizeof(sn_tab[found].id_str), "%u",
+                                inode.snap_id);
+                       found++;
+               }
        }
 out:
        *psn_tab = sn_tab;
 
-       free(vi);
+       free(vdi_inuse);
 
        return found;
 }
 
-
 static QEMUOptionParameter sd_create_options[] = {
        {
                .name = BLOCK_OPT_SIZE,
-- 
1.7.0

-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to