From: Robin Dong <[email protected]> The 'append' opreation in sheepdog contains:
1. lock(data_vid) 2. read onode 3. allocate space and set o_extent.data_len to zero 4. write onode down 5. unlock(data_vid) 6. read uploading data and set o_extent.data_len in terms of req->data_length 7. write onode down We unlock(data_vid) too early to protect the updating of 'o_extent.data_len' so one process may overwrite the space allocated by another process. To solve this problem, we move set operation of 'data_len' into lock region and modify the code to write data_vid. Signed-off-by: Robin Dong <[email protected]> --- v2-->v1: 1. remove faulty modification of md.c sheep/http/kv.c | 106 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 64 insertions(+), 42 deletions(-) diff --git a/sheep/http/kv.c b/sheep/http/kv.c index 04ee462..4611e05 100644 --- a/sheep/http/kv.c +++ b/sheep/http/kv.c @@ -727,8 +727,11 @@ static int onode_allocate_extents(struct kv_onode *onode, * if we can put whole request data into extra space of last * o_extent, it don't need to allocate new extent. */ - if (req->data_length <= reserv_len) + if (req->data_length <= reserv_len) { + onode->o_extent[idx - 1].data_len += req->data_length; goto out; + } else + onode->o_extent[idx - 1].data_len += reserv_len; } count = DIV_ROUND_UP((req->data_length - reserv_len), SD_DATA_OBJ_SIZE); sys->cdrv->lock(data_vid); @@ -751,52 +754,22 @@ static int onode_allocate_extents(struct kv_onode *onode, onode->o_extent[idx].start = start; onode->o_extent[idx].count = count; - onode->o_extent[idx].data_len = 0; + onode->o_extent[idx].data_len = req->data_length - reserv_len; onode->nr_extent++; out: return ret; } -static int onode_populate_extents(struct kv_onode *onode, - struct http_request *req) +static int do_vdi_write(struct http_request *req, uint32_t data_vid, + uint64_t offset, uint64_t total, char *data_buf, + bool create) { - struct onode_extent *ext; - struct onode_extent *last_ext = onode->o_extent + onode->nr_extent - 1; - ssize_t size; - uint64_t done = 0, total, offset = 0, reserv_len; - uint64_t write_buffer_size = MIN(kv_rw_buffer, req->data_length); + uint64_t done = 0, size; int ret = SD_RES_SUCCESS; - char *data_buf = NULL; - uint32_t data_vid = onode->data_vid; - bool create = true; - - data_buf = xmalloc(write_buffer_size); - if (last_ext->data_len == 0 && onode->nr_extent == 1) { - offset = last_ext->start * SD_DATA_OBJ_SIZE + - last_ext->data_len; - last_ext->data_len += req->data_length; - } else if (last_ext->data_len > 0) { - offset = last_ext->start * SD_DATA_OBJ_SIZE + - last_ext->data_len; - last_ext->data_len += req->data_length; - create = false; - } else { - ext = last_ext - 1; - reserv_len = ext->count * SD_DATA_OBJ_SIZE - ext->data_len; - offset = ext->start * SD_DATA_OBJ_SIZE + ext->data_len; - ext->data_len += reserv_len; - last_ext->data_len = req->data_length - reserv_len; - /* - * if the previous oid has extra space, we don't need - * to use SD_OP_CREATE_AND_WRITE_OBJ on this oid. - */ - if (reserv_len > 0) - create = false; - } - total = req->data_length; while (done < total) { - size = http_request_read(req, data_buf, write_buffer_size); + size = http_request_read(req, data_buf, + MIN(kv_rw_buffer, total - done)); if (size <= 0) { sd_err("Failed to read http request: %ld", size); ret = SD_RES_EIO; @@ -804,17 +777,66 @@ static int onode_populate_extents(struct kv_onode *onode, } ret = vdi_read_write(data_vid, data_buf, size, offset, false, create); - sd_debug("vdi_write size: %"PRIu64", offset: %" - PRIu64", ret:%d", size, offset, ret); + sd_debug("vdi_write offset: %"PRIu64", size: %" PRIu64 + ", for %" PRIx32 "ret: %d", offset, size, + data_vid, ret); if (ret != SD_RES_SUCCESS) { - sd_err("Failed to write data object for %s, %s", - onode->name, sd_strerror(ret)); + sd_err("Failed to write data object for %" PRIx32 + ", %s", data_vid, sd_strerror(ret)); goto out; } done += size; offset += size; } out: + return ret; +} + +static int onode_populate_extents(struct kv_onode *onode, + struct http_request *req) +{ + struct onode_extent *ext; + struct onode_extent *last_ext = onode->o_extent + onode->nr_extent - 1; + uint64_t total, offset = 0, reserv_len; + uint64_t write_buffer_size = MIN(kv_rw_buffer, req->data_length); + int ret = SD_RES_SUCCESS; + char *data_buf = NULL; + uint32_t data_vid = onode->data_vid; + bool create = true; + + data_buf = xmalloc(write_buffer_size); + + if (last_ext->data_len < req->data_length) { + ext = last_ext - 1; + reserv_len = (req->data_length - last_ext->data_len); + offset = (ext->start + ext->count) * SD_DATA_OBJ_SIZE - + reserv_len; + ret = do_vdi_write(req, data_vid, offset, reserv_len, + data_buf, false); + if (ret != SD_RES_SUCCESS) { + sd_err("Failed to do_vdi_write data_vid: %" PRIx32 + ", offset: %" PRIx64 ", total: %" PRIx64 + ", ret: %s", data_vid, offset, reserv_len, + sd_strerror(ret)); + goto out; + } + offset = last_ext->start * SD_DATA_OBJ_SIZE; + total = last_ext->data_len; + } else { + reserv_len = (last_ext->data_len - req->data_length); + offset = last_ext->start * SD_DATA_OBJ_SIZE + reserv_len; + total = req->data_length; + if (last_ext->data_len > req->data_length) + create = false; + } + + ret = do_vdi_write(req, data_vid, offset, total, data_buf, create); + if (ret != SD_RES_SUCCESS) + sd_err("Failed to do_vdi_write data_vid: %" PRIx32 + ", offset: %" PRIx64 ", total: %" PRIx64 + ", ret: %s", data_vid, offset, total, + sd_strerror(ret)); +out: free(data_buf); return ret; } -- 1.9.1 -- sheepdog mailing list [email protected] http://lists.wpkg.org/mailman/listinfo/sheepdog
