While doing recovery, blocks are read from the raid5/6 disk one by one, so this is adding readahead so that we can read at most 256 contiguous blocks in one read IO.
Signed-off-by: Liu Bo <bo.li....@oracle.com> --- fs/btrfs/raid56.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 109 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index dea33c4..24f7cbb 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1530,15 +1530,81 @@ static int btrfs_r5l_write_empty_meta_block(struct btrfs_r5l_log *log, u64 pos, return ret; } +#define BTRFS_R5L_RECOVER_IO_POOL_SIZE BIO_MAX_PAGES struct btrfs_r5l_recover_ctx { u64 pos; u64 seq; u64 total_size; struct page *meta_page; struct page *io_page; + + struct page *ra_pages[BTRFS_R5L_RECOVER_IO_POOL_SIZE]; + struct bio *ra_bio; + int total; + int valid; + u64 start_offset; + + struct btrfs_r5l_log *log; }; -static int btrfs_r5l_recover_load_meta(struct btrfs_r5l_log *log, struct btrfs_r5l_recover_ctx *ctx) +static int btrfs_r5l_recover_read_ra(struct btrfs_r5l_recover_ctx *ctx, u64 offset) +{ + bio_reset(ctx->ra_bio); + ctx->ra_bio->bi_bdev = ctx->log->dev->bdev; + ctx->ra_bio->bi_opf = REQ_OP_READ; + ctx->ra_bio->bi_iter.bi_sector = (ctx->log->data_offset + offset) >> 9; + + ctx->valid = 0; + ctx->start_offset = offset; + + while (ctx->valid < ctx->total) { + bio_add_page(ctx->ra_bio, ctx->ra_pages[ctx->valid++], PAGE_SIZE, 0); + + offset = btrfs_r5l_ring_add(ctx->log, offset, PAGE_SIZE); + if (offset == 0) + break; + } + +#ifdef BTRFS_DEBUG_R5LOG + trace_printk("to read %d pages starting from 0x%llx\n", ctx->valid, ctx->log->data_offset + ctx->start_offset); +#endif + return submit_bio_wait(ctx->ra_bio); +} + +static int btrfs_r5l_recover_read_page(struct btrfs_r5l_recover_ctx *ctx, struct page *page, u64 offset) +{ + struct page *tmp; + int index; + char *src; + char *dst; + int ret; + + if (offset < ctx->start_offset || offset >= (ctx->start_offset + ctx->valid * PAGE_SIZE)) { + ret = btrfs_r5l_recover_read_ra(ctx, offset); + if (ret) + return ret; + } + +#ifdef BTRFS_DEBUG_R5LOG + trace_printk("offset 0x%llx start->offset 0x%llx ctx->valid %d\n", offset, ctx->start_offset, ctx->valid); +#endif + + ASSERT(IS_ALIGNED(ctx->start_offset, PAGE_SIZE)); + ASSERT(IS_ALIGNED(offset, PAGE_SIZE)); + + index = (offset - ctx->start_offset) >> PAGE_SHIFT; + ASSERT(index < ctx->valid); + + tmp = ctx->ra_pages[index]; + src = kmap(tmp); + dst = kmap(page); + memcpy(dst, src, PAGE_SIZE); + kunmap(page); + kunmap(tmp); + return 0; +} + +static int btrfs_r5l_recover_load_meta(struct btrfs_r5l_recover_ctx *ctx) { struct btrfs_r5l_meta_block *mb; @@ -1642,6 +1708,42 @@ static int btrfs_r5l_recover_flush_log(struct btrfs_r5l_log *log, struct btrfs_r } return ret; + +static int btrfs_r5l_recover_allocate_ra(struct btrfs_r5l_recover_ctx *ctx) +{ + struct page *page; + ctx->ra_bio = btrfs_io_bio_alloc(GFP_NOFS, BIO_MAX_PAGES); + + ctx->total = 0; + ctx->valid = 0; + while (ctx->total < BTRFS_R5L_RECOVER_IO_POOL_SIZE) { + page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (!page) + break; + + ctx->ra_pages[ctx->total++] = page; + } + + if (ctx->total == 0) { + bio_put(ctx->ra_bio); + return -ENOMEM; + } + +#ifdef BTRFS_DEBUG_R5LOG + trace_printk("readahead: %d allocated pages\n", ctx->total); +#endif + return 0; +} + +static void btrfs_r5l_recover_free_ra(struct btrfs_r5l_recover_ctx *ctx) +{ + int i; +#ifdef BTRFS_DEBUG_R5LOG + trace_printk("readahead: %d to free pages\n", ctx->total); +#endif + for (i = 0; i < ctx->total; i++) + __free_page(ctx->ra_pages[i]); + bio_put(ctx->ra_bio); } static void btrfs_r5l_write_super(struct btrfs_fs_info *fs_info, u64 cp); @@ -1655,6 +1757,7 @@ static int btrfs_r5l_recover_log(struct btrfs_r5l_log *log) ctx = kzalloc(sizeof(*ctx), GFP_NOFS); ASSERT(ctx); + ctx->log = log; ctx->pos = log->last_checkpoint; ctx->seq = log->last_cp_seq; ctx->meta_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); @@ -1662,10 +1765,10 @@ static int btrfs_r5l_recover_log(struct btrfs_r5l_log *log) ctx->io_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); ASSERT(ctx->io_page); - ret = btrfs_r5l_recover_flush_log(log, ctx); - if (ret) { - ; - } + ret = btrfs_r5l_recover_allocate_ra(ctx); + ASSERT(ret == 0); + + btrfs_r5l_recover_flush_log(ctx); pos = ctx->pos; log->next_checkpoint = ctx->pos; @@ -1684,6 +1787,7 @@ static int btrfs_r5l_recover_log(struct btrfs_r5l_log *log) #endif __free_page(ctx->meta_page); __free_page(ctx->io_page); + btrfs_r5l_recover_free_ra(ctx); kfree(ctx); return 0; } -- 2.9.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html