Ping.

On Wed, Jun 20, 2018 at 07:56:12AM -0700, Chris Mason wrote:
> For COW, btrfs expects pages dirty pages to have been through a few setup
> steps.  This includes reserving space for the new block allocations and 
> marking
> the range in the state tree for delayed allocation.
> 
> A few places outside btrfs will dirty pages directly, especially when 
> unmapping
> mmap'd pages.  In order for these to properly go through COW, we run them
> through a fixup worker to wait for stable pages, and do the delalloc prep.
> 
> 87826df0ec36 added a window where the dirty pages were cleaned, but pending
> more action from the fixup worker.  During this window, page migration can 
> jump
> in and relocate the page.  Once our fixup work actually starts, it finds
> page->mapping is NULL and we end up freeing the page without ever writing it.
> 
> This leads to crc errors and other exciting problems, since it screws up the
> whole statemachine for waiting for ordered extents.  The fix here is to keep
> the page dirty while we're waiting for the fixup worker to get to work.  This
> also makes sure the error handling in btrfs_writepage_fixup_worker does the
> right thing with dirty bits when we run out of space.
> 
> Signed-off-by: Chris Mason <[email protected]>
> ---
>  fs/btrfs/inode.c | 67 
> +++++++++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 49 insertions(+), 18 deletions(-)
> 
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 0b86cf1..5538900 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -2100,11 +2100,21 @@ static void btrfs_writepage_fixup_worker(struct 
> btrfs_work *work)
>       page = fixup->page;
>  again:
>       lock_page(page);
> -     if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
> -             ClearPageChecked(page);
> +
> +     /*
> +      * before we queued this fixup, we took a reference on the page.
> +      * page->mapping may go NULL, but it shouldn't be moved to a
> +      * different address space.
> +      */
> +     if (!page->mapping || !PageDirty(page) || !PageChecked(page))
>               goto out_page;
> -     }
>  
> +     /*
> +      * we keep the PageChecked() bit set until we're done with the
> +      * btrfs_start_ordered_extent() dance that we do below.  That
> +      * drops and retakes the page lock, so we don't want new
> +      * fixup workers queued for this page during the churn.
> +      */
>       inode = page->mapping->host;
>       page_start = page_offset(page);
>       page_end = page_offset(page) + PAGE_SIZE - 1;
> @@ -2129,33 +2139,46 @@ static void btrfs_writepage_fixup_worker(struct 
> btrfs_work *work)
>  
>       ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
>                                          PAGE_SIZE);
> -     if (ret) {
> -             mapping_set_error(page->mapping, ret);
> -             end_extent_writepage(page, ret, page_start, page_end);
> -             ClearPageChecked(page);
> -             goto out;
> -      }
> +     if (ret)
> +             goto out_error;
>  
>       ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
>                                       &cached_state, 0);
> -     if (ret) {
> -             mapping_set_error(page->mapping, ret);
> -             end_extent_writepage(page, ret, page_start, page_end);
> -             ClearPageChecked(page);
> -             goto out;
> -     }
> +     if (ret)
> +             goto out_error;
>  
> -     ClearPageChecked(page);
> -     set_page_dirty(page);
>       btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false);
> +
> +     /*
> +      * everything went as planned, we're now the proud owners of a
> +      * Dirty page with delayed allocation bits set and space reserved
> +      * for our COW destination.
> +      *
> +      * The page was dirty when we started, nothing should have cleaned it.
> +      */
> +     BUG_ON(!PageDirty(page));
> +
>  out:
>       unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
>                            &cached_state);
>  out_page:
> +     ClearPageChecked(page);
>       unlock_page(page);
>       put_page(page);
>       kfree(fixup);
>       extent_changeset_free(data_reserved);
> +     return;
> +
> +out_error:
> +     /*
> +      * We hit ENOSPC or other errors.  Update the mapping and page to
> +      * reflect the errors and clean the page.
> +      */
> +     mapping_set_error(page->mapping, ret);
> +     end_extent_writepage(page, ret, page_start, page_end);
> +     clear_page_dirty_for_io(page);
> +     SetPageError(page);
> +     goto out;
>  }
>  
>  /*
> @@ -2179,6 +2202,13 @@ static int btrfs_writepage_start_hook(struct page 
> *page, u64 start, u64 end)
>       if (TestClearPagePrivate2(page))
>               return 0;
>  
> +     /*
> +      * PageChecked is set below when we create a fixup worker for this page,
> +      * don't try to create another one if we're already PageChecked()
> +      *
> +      * The extent_io writepage code will redirty the page if we send
> +      * back EAGAIN.
> +      */
>       if (PageChecked(page))
>               return -EAGAIN;
>  
> @@ -2192,7 +2222,8 @@ static int btrfs_writepage_start_hook(struct page 
> *page, u64 start, u64 end)
>                       btrfs_writepage_fixup_worker, NULL, NULL);
>       fixup->page = page;
>       btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
> -     return -EBUSY;
> +
> +     return -EAGAIN;
>  }
>  
>  static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
> -- 
> 2.9.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to