While doing recovery, blocks are read from the raid5/6 disk one by
one, so this is adding readahead so that we can read at most 256
contiguous blocks in one read IO.

Signed-off-by: Liu Bo <bo.li....@oracle.com>
---
 fs/btrfs/raid56.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 109 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index dea33c4..24f7cbb 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1530,15 +1530,81 @@ static int btrfs_r5l_write_empty_meta_block(struct 
btrfs_r5l_log *log, u64 pos,
        return ret;
 }
 
+#define BTRFS_R5L_RECOVER_IO_POOL_SIZE BIO_MAX_PAGES
 struct btrfs_r5l_recover_ctx {
        u64 pos;
        u64 seq;
        u64 total_size;
        struct page *meta_page;
        struct page *io_page;
+
+       struct page *ra_pages[BTRFS_R5L_RECOVER_IO_POOL_SIZE];
+       struct bio *ra_bio;
+       int total;
+       int valid;
+       u64 start_offset;
+
+       struct btrfs_r5l_log *log;
 };
 
-static int btrfs_r5l_recover_load_meta(struct btrfs_r5l_log *log, struct 
btrfs_r5l_recover_ctx *ctx)
+static int btrfs_r5l_recover_read_ra(struct btrfs_r5l_recover_ctx *ctx, u64 
offset)
+{
+       bio_reset(ctx->ra_bio);
+       ctx->ra_bio->bi_bdev = ctx->log->dev->bdev;
+       ctx->ra_bio->bi_opf = REQ_OP_READ;
+       ctx->ra_bio->bi_iter.bi_sector = (ctx->log->data_offset + offset) >> 9;
+
+       ctx->valid = 0;
+       ctx->start_offset = offset;
+
+       while (ctx->valid < ctx->total) {
+               bio_add_page(ctx->ra_bio, ctx->ra_pages[ctx->valid++], 
PAGE_SIZE, 0);
+
+               offset = btrfs_r5l_ring_add(ctx->log, offset, PAGE_SIZE);
+               if (offset == 0)
+                       break;
+       }
+
+#ifdef BTRFS_DEBUG_R5LOG
+       trace_printk("to read %d pages starting from 0x%llx\n", ctx->valid, 
ctx->log->data_offset + ctx->start_offset);
+#endif
+       return submit_bio_wait(ctx->ra_bio);
+}
+
+static int btrfs_r5l_recover_read_page(struct btrfs_r5l_recover_ctx *ctx, 
struct page *page, u64 offset)
+{
+       struct page *tmp;
+       int index;
+       char *src;
+       char *dst;
+       int ret;
+
+       if (offset < ctx->start_offset || offset >= (ctx->start_offset + 
ctx->valid * PAGE_SIZE)) {
+               ret = btrfs_r5l_recover_read_ra(ctx, offset);
+               if (ret)
+                       return ret;
+       }
+
+#ifdef BTRFS_DEBUG_R5LOG
+       trace_printk("offset 0x%llx start->offset 0x%llx ctx->valid %d\n", 
offset, ctx->start_offset, ctx->valid);
+#endif
+
+       ASSERT(IS_ALIGNED(ctx->start_offset, PAGE_SIZE));
+       ASSERT(IS_ALIGNED(offset, PAGE_SIZE));
+
+       index = (offset - ctx->start_offset) >> PAGE_SHIFT;
+       ASSERT(index < ctx->valid);
+
+       tmp = ctx->ra_pages[index];
+       src = kmap(tmp);
+       dst = kmap(page);
+       memcpy(dst, src, PAGE_SIZE);
+       kunmap(page);
+       kunmap(tmp);
+       return 0;
+}
+
+static int btrfs_r5l_recover_load_meta(struct btrfs_r5l_recover_ctx *ctx)
 {
        struct btrfs_r5l_meta_block *mb;
 
@@ -1642,6 +1708,42 @@ static int btrfs_r5l_recover_flush_log(struct 
btrfs_r5l_log *log, struct btrfs_r
        }
 
        return ret;
+
+static int btrfs_r5l_recover_allocate_ra(struct btrfs_r5l_recover_ctx *ctx)
+{
+       struct page *page;
+       ctx->ra_bio = btrfs_io_bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
+
+       ctx->total = 0;
+       ctx->valid = 0;
+       while (ctx->total < BTRFS_R5L_RECOVER_IO_POOL_SIZE) {
+               page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+               if (!page)
+                       break;
+
+               ctx->ra_pages[ctx->total++] = page;
+       }
+
+       if (ctx->total == 0) {
+               bio_put(ctx->ra_bio);
+               return -ENOMEM;
+       }
+
+#ifdef BTRFS_DEBUG_R5LOG
+       trace_printk("readahead: %d allocated pages\n", ctx->total);
+#endif
+       return 0;
+}
+
+static void btrfs_r5l_recover_free_ra(struct btrfs_r5l_recover_ctx *ctx)
+{
+       int i;
+#ifdef BTRFS_DEBUG_R5LOG
+       trace_printk("readahead: %d to free pages\n", ctx->total);
+#endif
+       for (i = 0; i < ctx->total; i++)
+               __free_page(ctx->ra_pages[i]);
+       bio_put(ctx->ra_bio);
 }
 
 static void btrfs_r5l_write_super(struct btrfs_fs_info *fs_info, u64 cp);
@@ -1655,6 +1757,7 @@ static int btrfs_r5l_recover_log(struct btrfs_r5l_log 
*log)
        ctx = kzalloc(sizeof(*ctx), GFP_NOFS);
        ASSERT(ctx);
 
+       ctx->log = log;
        ctx->pos = log->last_checkpoint;
        ctx->seq = log->last_cp_seq;
        ctx->meta_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
@@ -1662,10 +1765,10 @@ static int btrfs_r5l_recover_log(struct btrfs_r5l_log 
*log)
        ctx->io_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
        ASSERT(ctx->io_page);
 
-       ret = btrfs_r5l_recover_flush_log(log, ctx);
-       if (ret) {
-               ;
-       }
+       ret = btrfs_r5l_recover_allocate_ra(ctx);
+       ASSERT(ret == 0);
+
+       btrfs_r5l_recover_flush_log(ctx);
 
        pos = ctx->pos;
        log->next_checkpoint = ctx->pos;
@@ -1684,6 +1787,7 @@ static int btrfs_r5l_recover_log(struct btrfs_r5l_log 
*log)
 #endif
        __free_page(ctx->meta_page);
        __free_page(ctx->io_page);
+       btrfs_r5l_recover_free_ra(ctx);
        kfree(ctx);
        return 0;
 }
-- 
2.9.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to