This is adding checksum to meta/data/parity resident on the raid5/6
log.  So recovery now can verify checksum to see if anything inside
meta/data/parity has been changed.

If anything is wrong in meta block, we stops replaying data/parity at
that position, while if anything is wrong in data/parity block, we
just skip this this meta/data/parity pair and move onto the next one.

Signed-off-by: Liu Bo <bo.li....@oracle.com>
---
 fs/btrfs/raid56.c | 235 ++++++++++++++++++++++++++++++++++++++++++++----------
 fs/btrfs/raid56.h |   4 +
 2 files changed, 197 insertions(+), 42 deletions(-)

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 8f47e56..8bc7ba4 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -43,6 +43,7 @@
 #include "async-thread.h"
 #include "check-integrity.h"
 #include "rcu-string.h"
+#include "hash.h"
 
 /* set when additional merges to this rbio are not allowed */
 #define RBIO_RMW_LOCKED_BIT    1
@@ -197,6 +198,7 @@ struct btrfs_r5l_log {
        u64 last_cp_seq;
        u64 seq;
        u64 log_start;
+       u32 uuid_csum;
        struct btrfs_r5l_io_unit *current_io;
 };
 
@@ -1309,7 +1311,7 @@ static int btrfs_r5l_get_meta(struct btrfs_r5l_log *log, 
struct btrfs_raid_bio *
        return 0;
 }
 
-static void btrfs_r5l_append_payload_meta(struct btrfs_r5l_log *log, u16 type, 
u64 location, u64 devid)
+static void btrfs_r5l_append_payload_meta(struct btrfs_r5l_log *log, u16 type, 
u64 location, u64 devid, u32 csum)
 {
        struct btrfs_r5l_io_unit *io = log->current_io;
        struct btrfs_r5l_payload *payload;
@@ -1326,11 +1328,11 @@ static void btrfs_r5l_append_payload_meta(struct 
btrfs_r5l_log *log, u16 type, u
                payload->size = cpu_to_le32(16); /* stripe_len / PAGE_SIZE */
        payload->devid = cpu_to_le64(devid);
        payload->location = cpu_to_le64(location);
+       payload->csum = cpu_to_le32(csum);
        kunmap(io->meta_page);
 
-       /* XXX: add checksum later */
        io->meta_offset += sizeof(*payload);
-       //io->meta_offset += sizeof(__le32);
+
 #ifdef BTRFS_DEBUG_R5LOG
        trace_printk("io->meta_offset %d\n", io->meta_offset);
 #endif
@@ -1380,6 +1382,10 @@ static void btrfs_r5l_log_stripe(struct btrfs_r5l_log 
*log, int data_pages, int
        int meta_size;
        int stripe, pagenr;
        struct page *page;
+       char *kaddr;
+       u32 csum;
+       u64 location;
+       u64 devid;
 
        /*
         * parity pages are contiguous on disk, thus only one
@@ -1394,8 +1400,6 @@ static void btrfs_r5l_log_stripe(struct btrfs_r5l_log 
*log, int data_pages, int
        /* add data blocks which need to be written */
        for (stripe = 0; stripe < rbio->nr_data; stripe++) {
                for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
-                       u64 location;
-                       u64 devid;
                        if (stripe < rbio->nr_data) {
                                page = page_in_rbio(rbio, stripe, pagenr, 1);
                                if (!page)
@@ -1406,7 +1410,11 @@ static void btrfs_r5l_log_stripe(struct btrfs_r5l_log 
*log, int data_pages, int
 #ifdef BTRFS_DEBUG_R5LOG
                                trace_printk("data: stripe %d pagenr %d 
location 0x%llx devid %llu\n", stripe, pagenr, location, devid);
 #endif
-                               btrfs_r5l_append_payload_meta(log, 
R5LOG_PAYLOAD_DATA, location, devid);
+                               kaddr = kmap(page);
+                               csum = btrfs_crc32c(log->uuid_csum, kaddr, 
PAGE_SIZE);
+                               kunmap(page);
+
+                               btrfs_r5l_append_payload_meta(log, 
R5LOG_PAYLOAD_DATA, location, devid, csum);
                                btrfs_r5l_append_payload_page(log, page);
                        }
                }
@@ -1414,17 +1422,26 @@ static void btrfs_r5l_log_stripe(struct btrfs_r5l_log 
*log, int data_pages, int
 
        /* add the whole parity blocks */
        for (; stripe < rbio->real_stripes; stripe++) {
-               u64 location = btrfs_compute_location(rbio, stripe, 0);
-               u64 devid = btrfs_compute_devid(rbio, stripe);
+               location = btrfs_compute_location(rbio, stripe, 0);
+               devid = btrfs_compute_devid(rbio, stripe);
 
 #ifdef BTRFS_DEBUG_R5LOG
                trace_printk("parity: stripe %d location 0x%llx devid %llu\n", 
stripe, location, devid);
 #endif
-               btrfs_r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY, 
location, devid);
                for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
                        page = rbio_stripe_page(rbio, stripe, pagenr);
+
+                       kaddr = kmap(page);
+                       if (pagenr == 0)
+                               csum = btrfs_crc32c(log->uuid_csum, kaddr, 
PAGE_SIZE);
+                       else
+                               csum = btrfs_crc32c(csum, kaddr, PAGE_SIZE);
+                       kunmap(page);
+
                        btrfs_r5l_append_payload_page(log, page);
                }
+
+               btrfs_r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY, 
location, devid, csum);
        }
 }
 
@@ -1432,12 +1449,16 @@ static void btrfs_r5l_submit_current_io(struct 
btrfs_r5l_log *log)
 {
        struct btrfs_r5l_io_unit *io = log->current_io;
        struct btrfs_r5l_meta_block *mb;
+       u32 csum;
 
        if (!io)
                return;
 
        mb = kmap(io->meta_page);
        mb->meta_size = cpu_to_le32(io->meta_offset);
+       ASSERT(mb->csum == 0);
+       csum = btrfs_crc32c(log->uuid_csum, mb, PAGE_SIZE);
+       mb->csum = cpu_to_le32(csum);
        kunmap(io->meta_page);
 
        log->current_io = NULL;
@@ -1506,6 +1527,7 @@ static int btrfs_r5l_write_empty_meta_block(struct 
btrfs_r5l_log *log, u64 pos,
 {
        struct page *page;
        struct btrfs_r5l_meta_block *mb;
+       u32 csum;
        int ret = 0;
 
 #ifdef BTRFS_DEBUG_R5LOG
@@ -1520,6 +1542,9 @@ static int btrfs_r5l_write_empty_meta_block(struct 
btrfs_r5l_log *log, u64 pos,
        mb->meta_size = cpu_to_le32(sizeof(struct btrfs_r5l_meta_block));
        mb->seq = cpu_to_le64(seq);
        mb->position = cpu_to_le64(pos);
+
+       csum = btrfs_crc32c(log->uuid_csum, mb, PAGE_SIZE);
+       mb->csum = cpu_to_le32(csum);
        kunmap(page);
 
        if (!btrfs_r5l_sync_page_io(log, log->dev, (pos >> 9), PAGE_SIZE, page, 
REQ_OP_WRITE | REQ_FUA)) {
@@ -1607,6 +1632,9 @@ static int btrfs_r5l_recover_read_page(struct 
btrfs_r5l_recover_ctx *ctx, struct
 static int btrfs_r5l_recover_load_meta(struct btrfs_r5l_recover_ctx *ctx)
 {
        struct btrfs_r5l_meta_block *mb;
+       u32 csum;
+       u32 expected;
+       int ret = 0;
 
        ret = btrfs_r5l_recover_read_page(ctx, ctx->meta_page, ctx->pos);
        if (ret)
@@ -1623,25 +1651,131 @@ static int btrfs_r5l_recover_load_meta(struct 
btrfs_r5l_recover_ctx *ctx)
 #ifdef BTRFS_DEBUG_R5LOG
                trace_printk("%s: mismatch magic %llu default %llu\n", 
__func__, le32_to_cpu(mb->magic), BTRFS_R5LOG_MAGIC);
 #endif
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out;
        }
 
-       ASSERT(le32_to_cpu(mb->meta_size) <= PAGE_SIZE);
-       kunmap(ctx->meta_page);
+       expected = le32_to_cpu(mb->csum);
+       /*
+        * when we calculate mb->csum, it's zero, so we need to zero
+        * it back.
+        */
+       mb->csum = 0;
+       csum = btrfs_crc32c(ctx->log->uuid_csum, mb, PAGE_SIZE);
+       if (csum != expected) {
+#ifdef BTRFS_DEBUG_R5LOG
+               pr_info("%s: mismatch checksum for r5l meta block\n", __func__);
+#endif
+               ret = -EINVAL;
+               goto out;
+       }
 
+       ASSERT(le32_to_cpu(mb->meta_size) <= PAGE_SIZE);
        /* meta_block */
        ctx->total_size = PAGE_SIZE;
 
-       return 0;
+out:
+       kunmap(ctx->meta_page);
+
+       return ret;
+}
+
+static int btrfs_r5l_recover_verify_checksum(struct btrfs_r5l_recover_ctx *ctx)
+{
+       u64 offset;
+       u32 meta_size;
+       u64 csum_io_offset;
+       u64 read_pos;
+       char *kaddr;
+       u32 csum;
+       int type;
+       struct btrfs_r5l_meta_block *mb;
+       struct btrfs_r5l_payload *payload;
+       struct btrfs_r5l_log *log = ctx->log;
+       struct btrfs_device *dev;
+       int ret = 0;
+
+       mb = kmap(ctx->meta_page);
+       meta_size = le32_to_cpu(mb->meta_size);
+       csum_io_offset = PAGE_SIZE;
+
+       for (offset = sizeof(struct btrfs_r5l_meta_block);
+            offset < meta_size;
+            offset += sizeof(struct btrfs_r5l_payload)) {
+               payload = (void *)mb + offset;
+
+               /* check if there is any invalid device, if so, skip writing 
this mb. */
+               dev = btrfs_find_device(log->fs_info, 
le64_to_cpu(payload->devid), NULL, NULL);
+               if (!dev || dev->missing) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+
+               type = le16_to_cpu(payload->type);
+               if (type == R5LOG_PAYLOAD_DATA) {
+                       read_pos = btrfs_r5l_ring_add(log, ctx->pos, 
csum_io_offset);
+                       csum_io_offset += PAGE_SIZE;
+
+                       ASSERT(le32_to_cpu(payload->size) == 1);
+                       ret = btrfs_r5l_recover_read_page(ctx, ctx->io_page, 
read_pos);
+                       if (ret) {
+                               ret = -EIO;
+                               goto out;
+                       }
+
+                       kaddr = kmap(ctx->io_page);
+                       csum = btrfs_crc32c(log->uuid_csum, kaddr, PAGE_SIZE);
+                       kunmap(ctx->io_page);
+               } else if (type == R5LOG_PAYLOAD_PARITY) {
+                       int i;
+                       for (i = 0; i < le32_to_cpu(payload->size); i++) {
+                               read_pos = btrfs_r5l_ring_add(log, ctx->pos, 
csum_io_offset);
+                               csum_io_offset += PAGE_SIZE;
+
+                               ret = btrfs_r5l_recover_read_page(ctx, 
ctx->io_page, read_pos);
+                               if (ret) {
+                                       ret = -EIO;
+                                       goto out;
+                               }
+
+                               kaddr = kmap(ctx->io_page);
+                               if (i == 0)
+                                       csum = btrfs_crc32c(log->uuid_csum, 
kaddr, PAGE_SIZE);
+                               else
+                                       csum = btrfs_crc32c(csum, kaddr, 
PAGE_SIZE);
+                               kunmap(ctx->io_page);
+                       }
+               } else {
+                       ASSERT(0);
+               }
+
+               if (csum != le32_to_cpu(payload->csum)) {
+                       trace_printk("r5l data csum fails location 0x%llx devid 
%llu\n", le64_to_cpu(payload->location), le64_to_cpu(payload->devid));
+                       ret = -EAGAIN;
+                       goto out;
+               }
+       }
+out:
+       kunmap(ctx->meta_page);
+       return ret;
 }
 
-static int btrfs_r5l_recover_load_data(struct btrfs_r5l_log *log, struct 
btrfs_r5l_recover_ctx *ctx)
+static int btrfs_r5l_recover_load_data(struct btrfs_r5l_recover_ctx *ctx)
 {
        u64 offset;
        struct btrfs_r5l_meta_block *mb;
-       u64 meta_size;
+       u32 meta_size;
        u64 io_offset;
+       u64 read_pos;
        struct btrfs_device *dev;
+       struct btrfs_r5l_payload *payload;
+       struct btrfs_r5l_log *log = ctx->log;
+       int ret = 0;
+
+       /* if any checksum fails, skip writing this mb. */
+       ret = btrfs_r5l_recover_verify_checksum(ctx);
+       if (ret)
+               return ret;
 
        mb = kmap(ctx->meta_page);
 
@@ -1649,67 +1783,81 @@ static int btrfs_r5l_recover_load_data(struct 
btrfs_r5l_log *log, struct btrfs_r
        offset = sizeof(struct btrfs_r5l_meta_block);
        meta_size = le32_to_cpu(mb->meta_size);
 
-       while (offset < meta_size) {
-               struct btrfs_r5l_payload *payload = (void *)mb + offset;
+       for (offset = sizeof(struct btrfs_r5l_meta_block);
+            offset < meta_size;
+            offset += sizeof(struct btrfs_r5l_payload)) {
+               payload = (void *)mb + offset;
 
                /* read data from log disk and write to payload->location */
 #ifdef BTRFS_DEBUG_R5LOG
                trace_printk("payload type %d flags %d size %d location 0x%llx 
devid %llu\n", le16_to_cpu(payload->type), le16_to_cpu(payload->flags), 
le32_to_cpu(payload->size), le64_to_cpu(payload->location), 
le64_to_cpu(payload->devid));
 #endif
 
+               /* liubo: how to handle the case where dev is suddenly off? */
                dev = btrfs_find_device(log->fs_info, 
le64_to_cpu(payload->devid), NULL, NULL);
-               if (!dev || dev->missing) {
-                       ASSERT(0);
-               }
+               ASSERT(dev && !dev->missing);
 
                if (le16_to_cpu(payload->type) == R5LOG_PAYLOAD_DATA) {
-                       ASSERT(le32_to_cpu(payload->size) == 1);
-                       btrfs_r5l_sync_page_io(log, log->dev, (ctx->pos + 
io_offset) >> 9, PAGE_SIZE, ctx->io_page, REQ_OP_READ);
-                       btrfs_r5l_sync_page_io(log, dev, 
le64_to_cpu(payload->location) >> 9, PAGE_SIZE, ctx->io_page, REQ_OP_WRITE);
+                       read_pos = btrfs_r5l_ring_add(log, ctx->pos, io_offset);
                        io_offset += PAGE_SIZE;
+
+                       ret = btrfs_r5l_recover_read_page(ctx, ctx->io_page, 
read_pos);
+                       if (ret)
+                               goto out;
+
+                       if (!btrfs_r5l_sync_page_io(log, dev, 
le64_to_cpu(payload->location) >> 9, PAGE_SIZE, ctx->io_page, REQ_OP_WRITE)) {
+                               ret = -EIO;
+                               goto out;
+                       }
                } else if (le16_to_cpu(payload->type) == R5LOG_PAYLOAD_PARITY) {
                        int i;
-                       ASSERT(le32_to_cpu(payload->size) == 16);
+
+                       ASSERT(offset + sizeof(struct btrfs_r5l_payload) == 
meta_size);
+
                        for (i = 0; i < le32_to_cpu(payload->size); i++) {
-                               /* liubo: parity are guaranteed to be
-                                * contiguous, use just one bio to
-                                * hold all pages and flush them. */
                                u64 parity_off = le64_to_cpu(payload->location) 
+ i * PAGE_SIZE;
-                               btrfs_r5l_sync_page_io(log, log->dev, (ctx->pos 
+ io_offset) >> 9, PAGE_SIZE, ctx->io_page, REQ_OP_READ);
-                               btrfs_r5l_sync_page_io(log, dev, parity_off >> 
9, PAGE_SIZE, ctx->io_page, REQ_OP_WRITE);
+                               read_pos = btrfs_r5l_ring_add(log, ctx->pos, 
io_offset);
                                io_offset += PAGE_SIZE;
+
+                               ret = btrfs_r5l_recover_read_page(ctx, 
ctx->io_page, read_pos);
+                               if (ret)
+                                       goto out;
+
+                               if (!btrfs_r5l_sync_page_io(log, dev, 
parity_off >> 9, PAGE_SIZE, ctx->io_page, REQ_OP_WRITE)) {
+                                       ret = -EIO;
+                                       goto out;
+                               }
                        }
                } else {
                        ASSERT(0);
                }
-
-               offset += sizeof(struct btrfs_r5l_payload);
        }
-       kunmap(ctx->meta_page);
 
        ctx->total_size += (io_offset - PAGE_SIZE);
-       return 0;
+out:
+       kunmap(ctx->meta_page);
+       return ret;
 }
 
-static int btrfs_r5l_recover_flush_log(struct btrfs_r5l_log *log, struct 
btrfs_r5l_recover_ctx *ctx)
+static int btrfs_r5l_recover_flush_log(struct btrfs_r5l_recover_ctx *ctx)
 {
        int ret;
 
        while (1) {
-               ret = btrfs_r5l_recover_load_meta(log, ctx);
+               ret = btrfs_r5l_recover_load_meta(ctx);
                if (ret)
                        break;
 
-               ret = btrfs_r5l_recover_load_data(log, ctx);
-               ASSERT(!ret || ret > 0);
-               if (ret)
+               ret = btrfs_r5l_recover_load_data(ctx);
+               if (ret && ret != -EAGAIN)
                        break;
 
                ctx->seq++;
-               ctx->pos = btrfs_r5l_ring_add(log, ctx->pos, ctx->total_size);
+               ctx->pos = btrfs_r5l_ring_add(ctx->log, ctx->pos, 
ctx->total_size);
        }
 
-       return ret;
+       return 0;
+}
 
 static int btrfs_r5l_recover_allocate_ra(struct btrfs_r5l_recover_ctx *ctx)
 {
@@ -1801,6 +1949,7 @@ int btrfs_r5l_load_log(struct btrfs_fs_info *fs_info, u64 
cp)
        struct page *page;
        struct btrfs_r5l_meta_block *mb;
        bool create_new = false;
+       int ret;
 
        ASSERT(log);
 
@@ -1856,10 +2005,10 @@ int btrfs_r5l_load_log(struct btrfs_fs_info *fs_info, 
u64 cp)
                log->seq = log->last_cp_seq + 1;
                log->next_checkpoint = cp;
        } else {
-               btrfs_r5l_recover_log(log);
+               ret = btrfs_r5l_recover_log(log);
        }
 
-       return 0;
+       return ret;
 }
 
 /*
@@ -3576,6 +3725,8 @@ int btrfs_set_r5log(struct btrfs_fs_info *fs_info, struct 
btrfs_device *device)
        log->device_size = round_down(log->device_size, PAGE_SIZE);
        log->dev = device;
        log->fs_info = fs_info;
+       ASSERT(sizeof(device->uuid) == BTRFS_UUID_SIZE);
+       log->uuid_csum = btrfs_crc32c(~0, device->uuid, sizeof(device->uuid));
        mutex_init(&log->io_mutex);
 
        cmpxchg(&fs_info->r5log, NULL, log);
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index 314d299..569cec8 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -87,6 +87,8 @@ struct btrfs_r5l_payload {
        /* data or parity */
        __le64 location;
        __le64 devid;
+
+       __le32 csum;
 };
 
 /* io unit starts from a meta block. */
@@ -96,6 +98,8 @@ struct btrfs_r5l_meta_block {
        /* the whole size of the block */
        __le32 meta_size;
 
+       __le32 csum;
+
        __le64 seq;
        __le64 position;
 
-- 
2.9.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to