Re: [PATCH] block: note about cloned bios and bio_for_each_segment_all

2017-07-14 Thread Filipe Manana


On 07/14/2017 04:03 PM, David Sterba wrote:
> On Fri, Jul 14, 2017 at 09:47:30PM +0800, Ming Lei wrote:
>> On Fri, Jul 14, 2017 at 9:40 PM, David Sterba  wrote:
>>> We've switched to cloned bios in btrfs and hit a nasty bug leading to
>>> corruptions, when cloned bios are iterated by bio_for_each_segment_all.
>>
>> No, you simply can't use bio_for_each_segment_all on cloned bio, and the
>> reason is obviously.
> 
> This was not obvious to us, speaking for the btrfs developers trying to
> make more use of the of the bio API, so we had to find out the hard way.

Yep, it might be obvious to those familiar with the block layer's
internals, but for those not so familiar, it's not. There's no mention
in bio_clone_fast() that the cloned bio's bi_vcnt shouldn't be used, and
after finding that, one has to check which bio APIs use it and which
don't. In this specific btrfs issue, it lead to silent write
corruptions, making it harder to find (as opposed to crashes or other
immediate failures).

> 
> The proposed WARN_ON, possibly more sanity checks or documentation would
> help us not to trip over similar problems in the future. I try to take
> great care when dealing with code changing bios on our side so I read
> the headers, and partially the implementation, but still can miss
> something.
> 


Re: [PATCH v3 46/49] fs/btrfs: convert to bio_for_each_segment_all_sp()

2017-08-08 Thread Filipe Manana
On Tue, Aug 8, 2017 at 9:45 AM, Ming Lei  wrote:
> Cc: Chris Mason 
> Cc: Josef Bacik 
> Cc: David Sterba 
> Cc: linux-bt...@vger.kernel.org
> Signed-off-by: Ming Lei 

Can you please add some meaningful changelog? E.g., why is this
conversion needed.

> ---
>  fs/btrfs/compression.c |  3 ++-
>  fs/btrfs/disk-io.c |  3 ++-
>  fs/btrfs/extent_io.c   | 12 
>  fs/btrfs/inode.c   |  6 --
>  fs/btrfs/raid56.c  |  1 +
>  5 files changed, 17 insertions(+), 8 deletions(-)
>
> diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
> index 28746588f228..55f251a83d0b 100644
> --- a/fs/btrfs/compression.c
> +++ b/fs/btrfs/compression.c
> @@ -147,13 +147,14 @@ static void end_compressed_bio_read(struct bio *bio)
> } else {
> int i;
> struct bio_vec *bvec;
> +   struct bvec_iter_all bia;
>
> /*
>  * we have verified the checksum already, set page
>  * checked so the end_io handlers know about it
>  */
> ASSERT(!bio_flagged(bio, BIO_CLONED));
> -   bio_for_each_segment_all(bvec, cb->orig_bio, i)
> +   bio_for_each_segment_all_sp(bvec, cb->orig_bio, i, bia)
> SetPageChecked(bvec->bv_page);
>
> bio_endio(cb->orig_bio);
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 080e2ebb8aa0..a9cd75e6383d 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -963,9 +963,10 @@ static blk_status_t btree_csum_one_bio(struct bio *bio)
> struct bio_vec *bvec;
> struct btrfs_root *root;
> int i, ret = 0;
> +   struct bvec_iter_all bia;
>
> ASSERT(!bio_flagged(bio, BIO_CLONED));
> -   bio_for_each_segment_all(bvec, bio, i) {
> +   bio_for_each_segment_all_sp(bvec, bio, i, bia) {
> root = BTRFS_I(bvec->bv_page->mapping->host)->root;
> ret = csum_dirty_buffer(root->fs_info, bvec->bv_page);
> if (ret)
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index c8f6a8657bf2..4de9cfd1c385 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -2359,8 +2359,9 @@ static unsigned int get_bio_pages(struct bio *bio)
>  {
> unsigned i;
> struct bio_vec *bv;
> +   struct bvec_iter_all bia;
>
> -   bio_for_each_segment_all(bv, bio, i)
> +   bio_for_each_segment_all_sp(bv, bio, i, bia)
> ;
>
> return i;
> @@ -2463,9 +2464,10 @@ static void end_bio_extent_writepage(struct bio *bio)
> u64 start;
> u64 end;
> int i;
> +   struct bvec_iter_all bia;
>
> ASSERT(!bio_flagged(bio, BIO_CLONED));
> -   bio_for_each_segment_all(bvec, bio, i) {
> +   bio_for_each_segment_all_sp(bvec, bio, i, bia) {
> struct page *page = bvec->bv_page;
> struct inode *inode = page->mapping->host;
> struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> @@ -2534,9 +2536,10 @@ static void end_bio_extent_readpage(struct bio *bio)
> int mirror;
> int ret;
> int i;
> +   struct bvec_iter_all bia;
>
> ASSERT(!bio_flagged(bio, BIO_CLONED));
> -   bio_for_each_segment_all(bvec, bio, i) {
> +   bio_for_each_segment_all_sp(bvec, bio, i, bia) {
> struct page *page = bvec->bv_page;
> struct inode *inode = page->mapping->host;
> struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> @@ -3693,9 +3696,10 @@ static void end_bio_extent_buffer_writepage(struct bio 
> *bio)
> struct bio_vec *bvec;
> struct extent_buffer *eb;
> int i, done;
> +   struct bvec_iter_all bia;
>
> ASSERT(!bio_flagged(bio, BIO_CLONED));
> -   bio_for_each_segment_all(bvec, bio, i) {
> +   bio_for_each_segment_all_sp(bvec, bio, i, bia) {
> struct page *page = bvec->bv_page;
>
> eb = (struct extent_buffer *)page->private;
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 084ed99dd308..eeb2ff662ec4 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -8047,6 +8047,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio)
> struct bio_vec *bvec;
> struct extent_io_tree *io_tree, *failure_tree;
> int i;
> +   struct bvec_iter_all bia;
>
> if (bio->bi_status)
> goto end;
> @@ -8064,7 +8065,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio)
>
> done->uptodate = 1;
> ASSERT(!bio_flagged(bio, BIO_CLONED));
> -   bio_for_each_segment_all(bvec, bio, i)
> +   bio_for_each_segment_all_sp(bvec, bio, i, bia)
> clean_io_failure(BTRFS_I(inode)->root->fs_info, failure_tree,
>  io_tree, done->start, bvec->bv_page,
>