A local collie forwards any requests so qemu doesn't need to know node information.
Signed-off-by: FUJITA Tomonori <[email protected]> --- block/sheepdog.c | 333 ++++++++++++++---------------------------------------- 1 files changed, 87 insertions(+), 246 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index e353756..6a45cfa 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -267,20 +267,9 @@ struct bdrv_sd_state { char *name; int is_current; - struct sd_aiostate aio_state_array[FD_SETSIZE]; + struct sd_aiostate aio_state_array[1]; }; -struct sheepdog_node_list_entry { - uint64_t id; - uint8_t addr[16]; - uint16_t port; - uint16_t pad; -}; - -static uint32_t s_epoch; -static int nr_nodes; -static struct sheepdog_node_list_entry *node_list_entries; - static const char * sd_strerror(int err) { int i; @@ -333,15 +322,6 @@ static inline int after(uint32_t seq1, uint32_t seq2) return (int32_t)(seq2 - seq1) < 0; } -static void set_hostname(char *name, size_t len, - struct sheepdog_node_list_entry *e) -{ - /* TODO: ipv6 */ - - snprintf(name, len, "%d.%d.%d.%d", - e->addr[12], e->addr[13], e->addr[14], e->addr[15]); -} - static inline uint64_t oid_to_ino(uint64_t inode_oid) { return (inode_oid >> 18) & ((1ULL << 37) - 1); @@ -377,24 +357,6 @@ static inline uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval) return hval; } -static inline int obj_to_sheep(struct sheepdog_node_list_entry *entries, - int nr_entries, uint64_t oid, int idx) -{ - uint64_t id; - int i; - struct sheepdog_node_list_entry *e = entries, *n; - - id = fnv_64a_buf(&oid, sizeof(oid), FNV1A_64_INIT); - - for (i = 0; i < nr_entries - 1; i++, e++) { - n = e + 1; - if (id > e->id && id <= n->id) - break; - } - - return (i + 1 + idx) % nr_entries; -} - static inline struct aio_req *alloc_aio_req(struct sd_aiostate *s, struct sd_aiocb *acb) { @@ -508,12 +470,14 @@ static int sd_schedule_bh(QEMUBHFunc *cb, struct sd_aiocb *acb) return 0; } -static int connect_to_vost(char *name, int port) +static int connect_to_vost(void) { char buf[64]; char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV]; + char name[] = "localhost"; int fd, ret; struct addrinfo hints, *res, *res0; + int port = DOG_PORT; memset(&hints, 0, sizeof(hints)); snprintf(buf, sizeof(buf), "%d", port); @@ -723,74 +687,6 @@ static void retry_aiocb(struct bdrv_sd_state *s) } } -static int get_node_list(void *buf, unsigned int size, unsigned int *epoch) -{ - int fd, ret; - unsigned int wlen; - struct sd_node_req hdr; - struct sd_node_rsp *rsp = (struct sd_node_rsp *)&hdr; - char host[] = "localhost"; - - fd = connect_to_vost(host, DOG_PORT); - if (fd < 0) - return -1; - - memset(&hdr, 0, sizeof(hdr)); - hdr.opcode = SD_OP_GET_NODE_LIST; - hdr.data_length = size; - hdr.epoch = *epoch; - - wlen = 0; - - ret = do_req(fd, (struct sd_req *)&hdr, buf, &wlen, &size); - if (ret) { - ret = -1; - goto out; - } - - if (!size) { - ret = 0; - goto out; - } - - ret = rsp->nr_nodes; - *epoch = rsp->epoch; -out: - close(fd); - - return ret; -} - -static int update_node_list(struct bdrv_sd_state *s) -{ - char *buf; - int ret; - unsigned int size, epoch = s_epoch; - - size = FD_SETSIZE * sizeof(struct sheepdog_node_list_entry); - - buf = malloc(size); - if (!buf) - return -ENOMEM; - - ret = get_node_list(buf, size, &epoch); - - if (ret <= 0) - goto out; - - memcpy(node_list_entries, buf, size); - nr_nodes = ret; - s_epoch = epoch; - - if (s && s_epoch != epoch) - retry_aiocb(s); - -out: - free(buf); - - return ret; -} - static void aio_read_response(void *opaque) { struct sd_obj_req hdr; @@ -904,7 +800,9 @@ static void aio_read_response(void *opaque) return; /* TODO: update node list and resend request */ new_node_list: - update_node_list(s->s); + eprintf("\n"); + exit(1); + return; } @@ -939,38 +837,36 @@ static int set_nodelay(int fd) return ret; } -static int get_sheep_fd(struct bdrv_sd_state *s, uint16_t idx, int *cached) +static int get_sheep_fd(struct bdrv_sd_state *s) { int ret, fd; - char name[128]; - - if (s->aio_state_array[idx].fd != -1) { - *cached = 1; - return s->aio_state_array[idx].fd; - } else - *cached = 0; - set_hostname(name, sizeof(name), &node_list_entries[idx]); + if (s->aio_state_array[0].fd != -1) + return s->aio_state_array[0].fd; - fd = connect_to_vost(name, node_list_entries[idx].port); - if (fd < 0) + fd = connect_to_vost(); + if (fd < 0) { + eprintf("%m\n"); return -1; + } ret = set_nonblocking(fd); if (ret) { + eprintf("%m\n"); close(fd); return -1; } ret = set_nodelay(fd); if (ret) { + eprintf("%m\n"); close(fd); return -1; } qemu_aio_set_fd_handler(fd, aio_read_response, NULL, aio_flush_request, - NULL, &s->aio_state_array[idx]); - s->aio_state_array[idx].fd = fd; + NULL, &s->aio_state_array[0]); + s->aio_state_array[0].fd = fd; return fd; } @@ -1002,16 +898,14 @@ static int parse_vdiname(const char *filename, char *vdi, int vdi_len, } static int find_vdi_name(struct bdrv_sd_state *s, char *filename, uint64_t tag, - uint64_t *oid, int for_snapshot, int *current, - unsigned int *epoch) + uint64_t *oid, int for_snapshot, int *current) { int ret, fd; struct sd_vdi_req hdr; struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr; - char hostname[] = "localhost"; unsigned int wlen, rlen = 0; - fd = connect_to_vost(hostname, DOG_PORT); + fd = connect_to_vost(); if (fd < 0) return -1; @@ -1021,7 +915,6 @@ static int find_vdi_name(struct bdrv_sd_state *s, char *filename, uint64_t tag, hdr.data_length = wlen; hdr.tag = tag; hdr.flags = SD_FLAG_CMD_WRITE; - hdr.epoch = s_epoch; ret = do_req(fd, (struct sd_req *)&hdr, filename, &wlen, &rlen); if (ret) { @@ -1037,7 +930,6 @@ static int find_vdi_name(struct bdrv_sd_state *s, char *filename, uint64_t tag, *oid = rsp->oid; s->is_current = rsp->flags & SD_VDI_RSP_FLAG_CURRENT; - *epoch = rsp->epoch; ret = 0; out: close(fd); @@ -1048,110 +940,80 @@ static int add_aio_request(struct bdrv_sd_state *s, struct sd_aiocb *acb, uint64_t oid, void *data, unsigned int datalen, uint64_t offset, uint8_t flags, uint64_t old_oid, int create, int write, - unsigned int iov_offset, - struct sheepdog_node_list_entry *e, int nr) + unsigned int iov_offset) { - int i = 0; int nr_copies = s->inode.nr_copies; + struct sd_obj_req hdr; + int fd; + unsigned int wlen; + int ret; + struct aio_req *aio_req; if (!nr_copies) eprintf("bug\n"); - if (!e) { - e = node_list_entries; - nr = nr_nodes; - } - - if (!write) - nr_copies = 1; - - { - struct sd_obj_req hdr; - int cached, sidx, fd; - unsigned int wlen; - int ret; - struct aio_req *aio_req; - - memset(&hdr, 0, sizeof(hdr)); - hdr.epoch = s_epoch; - - if (!write) { - wlen = 0; - hdr.opcode = SD_OP_READ_OBJ; - hdr.flags = flags; - } else if (create) { - wlen = datalen; - hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ; - hdr.flags = SD_FLAG_CMD_WRITE | flags; - } else { - wlen = datalen; - hdr.opcode = SD_OP_WRITE_OBJ; - hdr.flags = SD_FLAG_CMD_WRITE | flags; - } + memset(&hdr, 0, sizeof(hdr)); - hdr.oid = oid; - hdr.cow_oid = old_oid; - hdr.copies = s->inode.nr_copies; + if (!write) { + wlen = 0; + hdr.opcode = SD_OP_READ_OBJ; + hdr.flags = flags; + } else if (create) { + wlen = datalen; + hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ; + hdr.flags = SD_FLAG_CMD_WRITE | flags; + } else { + wlen = datalen; + hdr.opcode = SD_OP_WRITE_OBJ; + hdr.flags = SD_FLAG_CMD_WRITE | flags; + } - hdr.data_length = datalen; - hdr.offset = offset; + hdr.oid = oid; + hdr.cow_oid = old_oid; + hdr.copies = s->inode.nr_copies; - /* - * In the case of read, we try a different sheep for - * retry. - */ - if (write) - sidx = obj_to_sheep(e, nr, oid, i); - else - sidx = obj_to_sheep(e, nr, oid, acb->retries); + hdr.data_length = datalen; + hdr.offset = offset; - fd = get_sheep_fd(s, sidx, &cached); - if (fd < 0) - return -EIO; + fd = get_sheep_fd(s); + if (fd < 0) + return -EIO; - struct sd_aiostate *aio_state = &s->aio_state_array[sidx]; - aio_req = alloc_aio_req(aio_state, acb); - if (!aio_req) { - eprintf("too many requests\n"); - return -ENOMEM; - } - aio_req->iov_offset = iov_offset; - hdr.id = get_id_from_req(aio_state, aio_req); + struct sd_aiostate *aio_state = &s->aio_state_array[0]; + aio_req = alloc_aio_req(aio_state, acb); + if (!aio_req) { + eprintf("too many requests\n"); + return -ENOMEM; + } + aio_req->iov_offset = iov_offset; + hdr.id = get_id_from_req(aio_state, aio_req); - ret = send_req(fd, (struct sd_req *)&hdr, data, &wlen); - if (ret) { - free_aio_req(aio_state, aio_req); - return -EIO; - } + ret = send_req(fd, (struct sd_req *)&hdr, data, &wlen); + if (ret) { + free_aio_req(aio_state, aio_req); + return -EIO; } return 0; } -static int read_vdi_obj(char *buf, uint64_t oid, - struct sheepdog_node_list_entry *e, int nr, int *copies) +static int read_vdi_obj(char *buf, uint64_t oid, int *copies) { struct sd_obj_req hdr; struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr; unsigned int wlen, rlen; - int ret, fd, sidx, i = 0; - char host[128]; + int ret, fd, i = 0; wlen = 0; rlen = SD_INODE_SIZE; memset(&hdr, 0, sizeof(hdr)); - hdr.epoch = s_epoch; hdr.opcode = SD_OP_READ_OBJ; hdr.oid = oid; hdr.data_length = rlen; - sidx = obj_to_sheep(e, nr, oid, i); - - set_hostname(host, sizeof(host), &e[sidx]); - - fd = connect_to_vost(host, e[sidx].port); + fd = connect_to_vost(); if (fd < 0) { eprintf("failed to connect to a sheep, %d\n", i); return -1; @@ -1179,13 +1041,12 @@ static int read_vdi_obj(char *buf, uint64_t oid, /* TODO: error cleanups */ static int sd_open(BlockDriverState *bs, const char *filename, int flags) { - int nr, ret, i, j; + int ret, i, j; uint64_t oid = 0; struct bdrv_sd_state *s = bs->opaque; char vdi[256]; uint64_t tag; int for_snapshot = 0, dummy; - unsigned int epoch; char *buf; buf = malloc(SD_INODE_SIZE); @@ -1194,7 +1055,7 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) return -1; } - for (i = 0; i < FD_SETSIZE; i++) { + for (i = 0; i < ARRAY_SIZE(s->aio_state_array); i++) { struct sd_aiostate *aio_state = &s->aio_state_array[i]; for (j = 0; j < MAX_AIO_REQS; j++) { aio_state->aio_req_free[j] = &aio_state->aio_req_list[j]; @@ -1208,22 +1069,18 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) if (strstart(filename, "sheepdog:", NULL)) for_snapshot = 1; - nr = update_node_list(s); - if (nr < 0 || !nr) - goto out; - memset(vdi, 0, sizeof(vdi)); if (parse_vdiname(filename, vdi, sizeof(vdi), &tag) < 0) goto out; - ret = find_vdi_name(s, vdi, tag, &oid, for_snapshot, &s->is_current, &epoch); + ret = find_vdi_name(s, vdi, tag, &oid, for_snapshot, &s->is_current); if (ret) goto out; if (!s->is_current) eprintf("%" PRIx64 " non current inode was open.\n", oid); - ret = read_vdi_obj(buf, oid, node_list_entries, nr_nodes, &dummy); + ret = read_vdi_obj(buf, oid, &dummy); if (ret) goto out; @@ -1247,11 +1104,10 @@ static int do_sd_create(char *filename, char *tag, int64_t total_sectors, struct sd_vdi_req hdr; struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr; int fd, ret; - char hostname[] = "localhost"; unsigned int wlen, rlen = 0; char buf[SD_MAX_VDI_LEN * 2]; - fd = connect_to_vost(hostname, DOG_PORT); + fd = connect_to_vost(); if (fd < 0) return -1; @@ -1262,7 +1118,6 @@ static int do_sd_create(char *filename, char *tag, int64_t total_sectors, memset(&hdr, 0, sizeof(hdr)); hdr.opcode = SD_OP_NEW_VDI; - hdr.epoch = s_epoch; hdr.base_oid = base_oid; wlen = SD_MAX_VDI_LEN; @@ -1295,7 +1150,7 @@ static int do_sd_create(char *filename, char *tag, int64_t total_sectors, static int sd_create(const char *filename, QEMUOptionParameter *options) { - int nr, ret; + int ret; uint64_t oid = 0; int64_t total_sectors = 0; char *backing_file = NULL; @@ -1309,16 +1164,10 @@ static int sd_create(const char *filename, QEMUOptionParameter *options) options++; } - /* needs to set up s_epoch */ - nr = update_node_list(NULL); - if (nr < 0 || !nr) - return -1; - if (backing_file) { BlockDriverState bs; char vdi[256]; uint64_t tag; - unsigned int dummy; memset(&bs, 0, sizeof(bs)); @@ -1337,7 +1186,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options) if (tag == CURRENT_VDI_ID) return -1; - ret = find_vdi_name(bs.opaque, vdi, tag, &oid, 1, NULL, &dummy); + ret = find_vdi_name(bs.opaque, vdi, tag, &oid, 1, NULL); struct bdrv_sd_state *s = bs.opaque; if (ret || s->is_current) return -1; @@ -1361,12 +1210,11 @@ static int sd_claim(BlockDriverState *bs) int ret, fd; struct sd_vdi_req hdr; struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr; - char hostname[] = "localhost"; unsigned int wlen, rlen = 0; eprintf("%s\n", s->name); - fd = connect_to_vost(hostname, DOG_PORT); + fd = connect_to_vost(); if (fd < 0) return -1; @@ -1376,7 +1224,6 @@ static int sd_claim(BlockDriverState *bs) hdr.data_length = wlen; hdr.tag = CURRENT_VDI_ID; hdr.flags = SD_FLAG_CMD_WRITE; - hdr.epoch = s_epoch; ret = do_req(fd, (struct sd_req *)&hdr, s->name, &wlen, &rlen); if (ret) { @@ -1401,13 +1248,12 @@ static void sd_release(BlockDriverState *bs) struct bdrv_sd_state *s = bs->opaque; struct sd_vdi_req hdr; struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr; - char hostname[] = "localhost"; unsigned int wlen, rlen = 0; int fd, ret; eprintf("%s\n", s->name); - fd = connect_to_vost(hostname, DOG_PORT); + fd = connect_to_vost(); if (fd < 0) return; @@ -1416,7 +1262,6 @@ static void sd_release(BlockDriverState *bs) hdr.opcode = SD_OP_RELEASE_VDI; wlen = strlen(s->name) + 1; hdr.data_length = wlen; - hdr.epoch = s_epoch; hdr.flags = SD_FLAG_CMD_WRITE; ret = do_req(fd, (struct sd_req *)&hdr, s->name, &wlen, &rlen); @@ -1433,9 +1278,13 @@ static inline int nr_chunks(struct sd_aiocb *acb) (acb->sector_num * 512 / CHUNK_SIZE) + 1; } +/* FIXME */ +#define reset_all_aios(s) \ + { eprintf(""); exit(1); } + static void sd_write_done(struct sd_aiocb *acb) { - int i, ret, inode_dirty = 0, epoch_dirty = 0; + int i, ret, inode_dirty = 0; unsigned long idx = acb->sector_num * 512 / CHUNK_SIZE; struct bdrv_sd_state *s = acb->common.bs->opaque; @@ -1452,12 +1301,12 @@ static void sd_write_done(struct sd_aiocb *acb) ret = add_aio_request(s, acb, s->inode.oid, &s->inode, sizeof(s->inode), - 0, 0, 0, 0, 1, 0, NULL, 0); + 0, 0, 0, 0, 1, 0); if (ret) goto new_node_list; } - if (!inode_dirty && !epoch_dirty) + if (!inode_dirty) sd_finish_aiocb(acb); else { acb->aio_done_func = sd_finish_aiocb; @@ -1466,7 +1315,7 @@ static void sd_write_done(struct sd_aiocb *acb) return; new_node_list: - update_node_list(s); + reset_all_aios(s); } static int sd_create_branch(struct bdrv_sd_state *s) @@ -1490,7 +1339,7 @@ static int sd_create_branch(struct bdrv_sd_state *s) copies = s->inode.nr_copies; - ret = read_vdi_obj(buf, oid, node_list_entries, nr_nodes, &copies); + ret = read_vdi_obj(buf, oid, &copies); if (ret < 0) goto out; @@ -1576,7 +1425,7 @@ static void sd_write_bh_cb(void *p) } ret = add_aio_request(s, acb, oid, buf + done, len, offset, flags, old_oid, - create, 1, 0, NULL, 0); + create, 1, 0); if (ret < 0) { eprintf("may be add_aio_request is faled\n"); @@ -1602,7 +1451,7 @@ abort: sd_schedule_bh(sd_aio_bh_cb, acb); return; new_node_list: - update_node_list(s); + reset_all_aios(s); } static BlockDriverAIOCB *sd_aio_writev(BlockDriverState *bs, @@ -1639,7 +1488,7 @@ static void sd_read_done(struct sd_aiocb *acb) if (oid) { ret = add_aio_request(s, acb, oid, NULL, len, offset, 0, 0, - 0, 0, done, NULL, 0); + 0, 0, done); if (ret) goto new_node_list; } @@ -1651,7 +1500,7 @@ static void sd_read_done(struct sd_aiocb *acb) } return; new_node_list: - update_node_list(s); + reset_all_aios(s); } static void sd_readv_bh_cb(void *p) @@ -1794,7 +1643,6 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) char name[SD_MAX_VDI_LEN]; QEMUSnapshotInfo *sn_tab = NULL; unsigned wlen, rlen; - char hostname[] = "localhost"; int found = 0; struct sd_inode inode; @@ -1805,7 +1653,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) memset(name, 0, sizeof(name)); snprintf(name, sizeof(name), "%s", s->name); - fd = connect_to_vost(hostname, DOG_PORT); + fd = connect_to_vost(); if (fd < 0) goto out; @@ -1816,7 +1664,6 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) req.opcode = SD_OP_SO_READ_VDIS; req.data_length = rlen; - req.epoch = s_epoch; ret = do_req(fd, (struct sd_req *)&req, vi, &wlen, &rlen); @@ -1841,8 +1688,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) if (strcmp(vi[i].name, s->name) || !vi[i].id) continue; - ret = read_vdi_obj((char *)&inode, vi[i].oid, node_list_entries, - nr_nodes, &copies); + ret = read_vdi_obj((char *)&inode, vi[i].oid, &copies); if (ret) continue; @@ -1897,11 +1743,6 @@ BlockDriver bdrv_sheepdog = { static void bdrv_sheepdog_init(void) { - int size = FD_SETSIZE * sizeof(struct sheepdog_node_list_entry); - - node_list_entries = malloc(size); - memset(node_list_entries, 0, size); - bdrv_register(&bdrv_sheepdog); } block_init(bdrv_sheepdog_init); -- 1.5.6.5 -- sheepdog mailing list [email protected] http://lists.wpkg.org/mailman/listinfo/sheepdog
