On Tue, Mar 06, 2018 at 11:47:47AM +0100, David Sterba wrote: > On Fri, Mar 02, 2018 at 04:10:37PM -0700, Liu Bo wrote: > > In case of raid56, writes and rebuilds always take BTRFS_STRIPE_LEN(64K) > > as unit, however, scrub_extent() sets blocksize as unit, so rebuild > > process may be triggered on every block on a same stripe. > > > > A typical example would be that when we're replacing a disappeared disk, > > all reads on the disks get -EIO, every block (size is 4K if blocksize is > > 4K) would go thru these, > > > > scrub_handle_errored_block > > scrub_recheck_block # re-read pages one by one > > scrub_recheck_block # rebuild by calling raid56_parity_recover() > > page by page > > > > Although with raid56 stripe cache most of reads during rebuild can be > > avoided, the parity recover calculation(xor or raid6 algorithms) needs to > > be done $(BTRFS_STRIPE_LEN / blocksize) times. > > > > This makes it less stupid by doing raid56 scrub/replace on stripe length. > > missing s-o-b >
I'm surprised that checkpatch.pl didn't complain. > > --- > > fs/btrfs/scrub.c | 78 > > +++++++++++++++++++++++++++++++++++++++++++------------- > > 1 file changed, 60 insertions(+), 18 deletions(-) > > > > diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c > > index 9882513..e3203a1 100644 > > --- a/fs/btrfs/scrub.c > > +++ b/fs/btrfs/scrub.c > > @@ -1718,6 +1718,44 @@ static int scrub_submit_raid56_bio_wait(struct > > btrfs_fs_info *fs_info, > > return blk_status_to_errno(bio->bi_status); > > } > > > > +static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info, > > + struct scrub_block *sblock) > > +{ > > + struct scrub_page *first_page = sblock->pagev[0]; > > + struct bio *bio = btrfs_io_bio_alloc(BIO_MAX_PAGES); > > nontrivial initializations (variable to variable) are better put into > the statement section. > OK. > > + int page_num; > > + > > + /* All pages in sblock belongs to the same stripe on the same device. */ > > + ASSERT(first_page->dev); > > + if (first_page->dev->bdev == NULL) > > + goto out; > > + > > + bio_set_dev(bio, first_page->dev->bdev); > > + > > + for (page_num = 0; page_num < sblock->page_count; page_num++) { > > + struct scrub_page *page = sblock->pagev[page_num]; > > + > > + WARN_ON(!page->page); > > + bio_add_page(bio, page->page, PAGE_SIZE, 0); > > + } > > + > > + if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) { > > + bio_put(bio); > > + goto out; > > + } > > + > > + bio_put(bio); > > + > > + scrub_recheck_block_checksum(sblock); > > + > > + return; > > +out: > > + for (page_num = 0; page_num < sblock->page_count; page_num++) > > + sblock->pagev[page_num]->io_error = 1; > > + > > + sblock->no_io_error_seen = 0; > > +} > > + > > /* > > * this function will check the on disk data for checksum errors, header > > * errors and read I/O errors. If any I/O errors happen, the exact pages > > @@ -1733,6 +1771,10 @@ static void scrub_recheck_block(struct btrfs_fs_info > > *fs_info, > > > > sblock->no_io_error_seen = 1; > > > > + /* short cut for raid56 */ > > + if (!retry_failed_mirror && scrub_is_page_on_raid56(sblock->pagev[0])) > > + return scrub_recheck_block_on_raid56(fs_info, sblock); > > + > > for (page_num = 0; page_num < sblock->page_count; page_num++) { > > struct bio *bio; > > struct scrub_page *page = sblock->pagev[page_num]; > > @@ -1748,19 +1790,12 @@ static void scrub_recheck_block(struct > > btrfs_fs_info *fs_info, > > bio_set_dev(bio, page->dev->bdev); > > > > bio_add_page(bio, page->page, PAGE_SIZE, 0); > > - if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) { > > - if (scrub_submit_raid56_bio_wait(fs_info, bio, page)) { > > - page->io_error = 1; > > - sblock->no_io_error_seen = 0; > > - } > > - } else { > > - bio->bi_iter.bi_sector = page->physical >> 9; > > - bio_set_op_attrs(bio, REQ_OP_READ, 0); > > + bio->bi_iter.bi_sector = page->physical >> 9; > > + bio_set_op_attrs(bio, REQ_OP_READ, 0); > > https://elixir.bootlin.com/linux/latest/source/include/linux/blk_types.h#L270 > > bio_set_op_attrs should not be used OK. Thanks, -liubo > > > > > - if (btrfsic_submit_bio_wait(bio)) { > > - page->io_error = 1; > > - sblock->no_io_error_seen = 0; > > - } > > + if (btrfsic_submit_bio_wait(bio)) { > > + page->io_error = 1; > > + sblock->no_io_error_seen = 0; > > } > > > > bio_put(bio); > > @@ -2728,7 +2763,8 @@ static int scrub_find_csum(struct scrub_ctx *sctx, > > u64 logical, u8 *csum) > > } > > > > /* scrub extent tries to collect up to 64 kB for each bio */ > > -static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, > > +static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map, > > + u64 logical, u64 len, > > u64 physical, struct btrfs_device *dev, u64 flags, > > u64 gen, int mirror_num, u64 physical_for_dev_replace) > > { > > @@ -2737,13 +2773,19 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 > > logical, u64 len, > > u32 blocksize; > > > > if (flags & BTRFS_EXTENT_FLAG_DATA) { > > - blocksize = sctx->fs_info->sectorsize; > > + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) > > + blocksize = map->stripe_len; > > + else > > + blocksize = sctx->fs_info->sectorsize; > > spin_lock(&sctx->stat_lock); > > sctx->stat.data_extents_scrubbed++; > > sctx->stat.data_bytes_scrubbed += len; > > spin_unlock(&sctx->stat_lock); > > } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { > > - blocksize = sctx->fs_info->nodesize; > > + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) > > + blocksize = map->stripe_len; > > + else > > + blocksize = sctx->fs_info->nodesize; > > spin_lock(&sctx->stat_lock); > > sctx->stat.tree_extents_scrubbed++; > > sctx->stat.tree_bytes_scrubbed += len; > > @@ -2883,9 +2925,9 @@ static int scrub_extent_for_parity(struct > > scrub_parity *sparity, > > } > > > > if (flags & BTRFS_EXTENT_FLAG_DATA) { > > - blocksize = sctx->fs_info->sectorsize; > > + blocksize = sparity->stripe_len; > > } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { > > - blocksize = sctx->fs_info->nodesize; > > + blocksize = sparity->stripe_len; > > } else { > > blocksize = sctx->fs_info->sectorsize; > > WARN_ON(1); > > @@ -3595,7 +3637,7 @@ static noinline_for_stack int scrub_stripe(struct > > scrub_ctx *sctx, > > if (ret) > > goto out; > > > > - ret = scrub_extent(sctx, extent_logical, extent_len, > > + ret = scrub_extent(sctx, map, extent_logical, > > extent_len, > > extent_physical, extent_dev, flags, > > generation, extent_mirror_num, > > extent_logical - logical + physical); > > -- > > 2.9.4 > > > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > > the body of a message to majord...@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html