looks like the recovery code wrongly assumes that the size of object
is SD_DATA_OBJ_SIZE.

Signed-off-by: FUJITA Tomonori <[email protected]>
---
 collie/store.c |   27 ++++++++++++++++++++-------
 1 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/collie/store.c b/collie/store.c
index 339dc54..f1dbdc3 100644
--- a/collie/store.c
+++ b/collie/store.c
@@ -865,7 +865,7 @@ static int __recover_one(struct recovery_work *rw,
        struct sd_obj_req hdr;
        struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
        char name[128];
-       unsigned wlen = 0, rlen = SD_DATA_OBJ_SIZE;
+       unsigned wlen = 0, rlen;
        int fd, ret;
        struct sheepdog_node_list_entry old_entry[SD_MAX_NODES],
                cur_entry[SD_MAX_NODES], *next_entry;
@@ -920,6 +920,11 @@ next:
                return -1;
        }
 
+       if (is_data_obj(oid))
+               rlen = SD_DATA_OBJ_SIZE;
+       else
+               rlen = sizeof(struct sheepdog_inode);
+
        memset(&hdr, 0, sizeof(hdr));
        hdr.opcode = SD_OP_READ_OBJ;
        hdr.oid = oid;
@@ -941,8 +946,8 @@ next:
 
        if (rsp->result == SD_RES_SUCCESS) {
                fd = ob_open(epoch, oid, O_CREAT, &ret);
-               ret = write(fd, buf, SD_DATA_OBJ_SIZE);
-               if (ret != SD_DATA_OBJ_SIZE) {
+               ret = write(fd, buf, rlen);
+               if (ret != rlen) {
                        eprintf("failed to write object\n");
                        return -1;
                }
@@ -991,7 +996,7 @@ not_found:
 static void recover_one(struct work *work, int idx)
 {
        struct recovery_work *rw = container_of(work, struct recovery_work, 
work);
-       char *buf = zero_block + idx * SD_DATA_OBJ_SIZE;
+       char *buf = NULL;
        int ret;
        uint64_t oid = *(((uint64_t *)rw->buf) + rw->done);
        struct sheepdog_node_list_entry old_entry[SD_MAX_NODES],
@@ -1002,10 +1007,15 @@ static void recover_one(struct work *work, int idx)
 
        eprintf("%d %d, %16lx\n", rw->done, rw->count, oid);
 
+       if (is_data_obj(oid))
+               buf = malloc(SD_DATA_OBJ_SIZE);
+       else
+               buf = malloc(sizeof(struct sheepdog_inode));
+
        cur_nr = epoch_log_read(epoch, (char *)cur_entry, sizeof(cur_entry));
        if (cur_nr <= 0) {
                eprintf("failed to read current epoch, %d\n", epoch);
-               return;
+               goto out;
        }
        cur_nr /= sizeof(struct sheepdog_node_list_entry);
 
@@ -1033,7 +1043,7 @@ static void recover_one(struct work *work, int idx)
        ret = __recover_one(rw, old_entry, old_nr, cur_entry, cur_nr, cur_idx,
                            copy_idx, epoch, epoch - 1, oid, buf, 
SD_DATA_OBJ_SIZE);
        if (ret == 0)
-               return;
+               goto out;
 
        for (i = 0; i < sys->nr_sobjs; i++) {
                if (i == copy_idx)
@@ -1042,10 +1052,13 @@ static void recover_one(struct work *work, int idx)
                                    cur_entry, cur_nr, cur_idx, i,
                                    epoch, epoch - 1, oid, buf, 
SD_DATA_OBJ_SIZE);
                if (ret == 0)
-                       return;
+                       goto out;
        }
 fail:
        eprintf("failed to recover object %lx\n", oid);
+out:
+       if (buf)
+               free(buf);
 }
 
 static void __start_recovery(struct work *work, int idx);
-- 
1.6.5

-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to