From: Robin Dong <[email protected]>

The 'append' opreation in sheepdog contains:

1. lock(data_vid)
2. read onode
3. allocate space and set o_extent.data_len to zero
4. write onode down
5. unlock(data_vid)
6. read uploading data and set o_extent.data_len in terms of req->data_length
7. write onode down

We unlock(data_vid) too early to protect the updating of 'o_extent.data_len'
so one process may overwrite the space allocated by another process.
To solve this problem, we move set operation of 'data_len' into lock region
and modify the code to write data_vid.

Signed-off-by: Robin Dong <[email protected]>
---
v2-->v1:
  1. remove faulty modification of md.c

 sheep/http/kv.c | 106 ++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 64 insertions(+), 42 deletions(-)

diff --git a/sheep/http/kv.c b/sheep/http/kv.c
index 04ee462..4611e05 100644
--- a/sheep/http/kv.c
+++ b/sheep/http/kv.c
@@ -727,8 +727,11 @@ static int onode_allocate_extents(struct kv_onode *onode,
                 * if we can put whole request data into extra space of last
                 * o_extent, it don't need to allocate new extent.
                 */
-               if (req->data_length <= reserv_len)
+               if (req->data_length <= reserv_len) {
+                       onode->o_extent[idx - 1].data_len += req->data_length;
                        goto out;
+               } else
+                       onode->o_extent[idx - 1].data_len += reserv_len;
        }
        count = DIV_ROUND_UP((req->data_length - reserv_len), SD_DATA_OBJ_SIZE);
        sys->cdrv->lock(data_vid);
@@ -751,52 +754,22 @@ static int onode_allocate_extents(struct kv_onode *onode,
 
        onode->o_extent[idx].start = start;
        onode->o_extent[idx].count = count;
-       onode->o_extent[idx].data_len = 0;
+       onode->o_extent[idx].data_len = req->data_length - reserv_len;
        onode->nr_extent++;
 out:
        return ret;
 }
 
-static int onode_populate_extents(struct kv_onode *onode,
-                                 struct http_request *req)
+static int do_vdi_write(struct http_request *req, uint32_t data_vid,
+                       uint64_t offset, uint64_t total, char *data_buf,
+                       bool create)
 {
-       struct onode_extent *ext;
-       struct onode_extent *last_ext = onode->o_extent + onode->nr_extent - 1;
-       ssize_t size;
-       uint64_t done = 0, total, offset = 0, reserv_len;
-       uint64_t write_buffer_size = MIN(kv_rw_buffer, req->data_length);
+       uint64_t done = 0, size;
        int ret = SD_RES_SUCCESS;
-       char *data_buf = NULL;
-       uint32_t data_vid = onode->data_vid;
-       bool create = true;
-
-       data_buf = xmalloc(write_buffer_size);
-       if (last_ext->data_len == 0 && onode->nr_extent == 1) {
-               offset = last_ext->start * SD_DATA_OBJ_SIZE +
-                        last_ext->data_len;
-               last_ext->data_len += req->data_length;
-       } else if (last_ext->data_len > 0) {
-               offset = last_ext->start * SD_DATA_OBJ_SIZE +
-                        last_ext->data_len;
-               last_ext->data_len += req->data_length;
-               create = false;
-       } else {
-               ext = last_ext - 1;
-               reserv_len = ext->count * SD_DATA_OBJ_SIZE - ext->data_len;
-               offset = ext->start * SD_DATA_OBJ_SIZE + ext->data_len;
-               ext->data_len += reserv_len;
-               last_ext->data_len = req->data_length - reserv_len;
-               /*
-                * if the previous oid has extra space, we don't need
-                * to use SD_OP_CREATE_AND_WRITE_OBJ on this oid.
-                */
-               if (reserv_len > 0)
-                       create = false;
-       }
 
-       total = req->data_length;
        while (done < total) {
-               size = http_request_read(req, data_buf, write_buffer_size);
+               size = http_request_read(req, data_buf,
+                                        MIN(kv_rw_buffer, total - done));
                if (size <= 0) {
                        sd_err("Failed to read http request: %ld", size);
                        ret = SD_RES_EIO;
@@ -804,17 +777,66 @@ static int onode_populate_extents(struct kv_onode *onode,
                }
                ret = vdi_read_write(data_vid, data_buf, size, offset,
                                     false, create);
-               sd_debug("vdi_write size: %"PRIu64", offset: %"
-                        PRIu64", ret:%d", size, offset, ret);
+               sd_debug("vdi_write offset: %"PRIu64", size: %" PRIu64
+                        ", for %" PRIx32 "ret: %d", offset, size,
+                        data_vid, ret);
                if (ret != SD_RES_SUCCESS) {
-                       sd_err("Failed to write data object for %s, %s",
-                              onode->name, sd_strerror(ret));
+                       sd_err("Failed to write data object for %" PRIx32
+                              ", %s", data_vid, sd_strerror(ret));
                        goto out;
                }
                done += size;
                offset += size;
        }
 out:
+       return ret;
+}
+
+static int onode_populate_extents(struct kv_onode *onode,
+                                 struct http_request *req)
+{
+       struct onode_extent *ext;
+       struct onode_extent *last_ext = onode->o_extent + onode->nr_extent - 1;
+       uint64_t total, offset = 0, reserv_len;
+       uint64_t write_buffer_size = MIN(kv_rw_buffer, req->data_length);
+       int ret = SD_RES_SUCCESS;
+       char *data_buf = NULL;
+       uint32_t data_vid = onode->data_vid;
+       bool create = true;
+
+       data_buf = xmalloc(write_buffer_size);
+
+       if (last_ext->data_len < req->data_length) {
+               ext = last_ext - 1;
+               reserv_len = (req->data_length - last_ext->data_len);
+               offset = (ext->start + ext->count) * SD_DATA_OBJ_SIZE -
+                        reserv_len;
+               ret = do_vdi_write(req, data_vid, offset, reserv_len,
+                                  data_buf, false);
+               if (ret != SD_RES_SUCCESS) {
+                       sd_err("Failed to do_vdi_write data_vid: %" PRIx32
+                              ", offset: %" PRIx64 ", total: %" PRIx64
+                              ", ret: %s", data_vid, offset, reserv_len,
+                              sd_strerror(ret));
+                       goto out;
+               }
+               offset = last_ext->start * SD_DATA_OBJ_SIZE;
+               total = last_ext->data_len;
+       } else {
+               reserv_len = (last_ext->data_len - req->data_length);
+               offset = last_ext->start * SD_DATA_OBJ_SIZE + reserv_len;
+               total = req->data_length;
+               if (last_ext->data_len > req->data_length)
+                       create = false;
+       }
+
+       ret = do_vdi_write(req, data_vid, offset, total, data_buf, create);
+       if (ret != SD_RES_SUCCESS)
+               sd_err("Failed to do_vdi_write data_vid: %" PRIx32
+                      ", offset: %" PRIx64 ", total: %" PRIx64
+                      ", ret: %s", data_vid, offset, total,
+                      sd_strerror(ret));
+out:
        free(data_buf);
        return ret;
 }
-- 
1.9.1

-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to