Ping.
On Wed, Jun 20, 2018 at 07:56:12AM -0700, Chris Mason wrote: > For COW, btrfs expects pages dirty pages to have been through a few setup > steps. This includes reserving space for the new block allocations and > marking > the range in the state tree for delayed allocation. > > A few places outside btrfs will dirty pages directly, especially when > unmapping > mmap'd pages. In order for these to properly go through COW, we run them > through a fixup worker to wait for stable pages, and do the delalloc prep. > > 87826df0ec36 added a window where the dirty pages were cleaned, but pending > more action from the fixup worker. During this window, page migration can > jump > in and relocate the page. Once our fixup work actually starts, it finds > page->mapping is NULL and we end up freeing the page without ever writing it. > > This leads to crc errors and other exciting problems, since it screws up the > whole statemachine for waiting for ordered extents. The fix here is to keep > the page dirty while we're waiting for the fixup worker to get to work. This > also makes sure the error handling in btrfs_writepage_fixup_worker does the > right thing with dirty bits when we run out of space. > > Signed-off-by: Chris Mason <[email protected]> > --- > fs/btrfs/inode.c | 67 > +++++++++++++++++++++++++++++++++++++++++--------------- > 1 file changed, 49 insertions(+), 18 deletions(-) > > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index 0b86cf1..5538900 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -2100,11 +2100,21 @@ static void btrfs_writepage_fixup_worker(struct > btrfs_work *work) > page = fixup->page; > again: > lock_page(page); > - if (!page->mapping || !PageDirty(page) || !PageChecked(page)) { > - ClearPageChecked(page); > + > + /* > + * before we queued this fixup, we took a reference on the page. > + * page->mapping may go NULL, but it shouldn't be moved to a > + * different address space. > + */ > + if (!page->mapping || !PageDirty(page) || !PageChecked(page)) > goto out_page; > - } > > + /* > + * we keep the PageChecked() bit set until we're done with the > + * btrfs_start_ordered_extent() dance that we do below. That > + * drops and retakes the page lock, so we don't want new > + * fixup workers queued for this page during the churn. > + */ > inode = page->mapping->host; > page_start = page_offset(page); > page_end = page_offset(page) + PAGE_SIZE - 1; > @@ -2129,33 +2139,46 @@ static void btrfs_writepage_fixup_worker(struct > btrfs_work *work) > > ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start, > PAGE_SIZE); > - if (ret) { > - mapping_set_error(page->mapping, ret); > - end_extent_writepage(page, ret, page_start, page_end); > - ClearPageChecked(page); > - goto out; > - } > + if (ret) > + goto out_error; > > ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0, > &cached_state, 0); > - if (ret) { > - mapping_set_error(page->mapping, ret); > - end_extent_writepage(page, ret, page_start, page_end); > - ClearPageChecked(page); > - goto out; > - } > + if (ret) > + goto out_error; > > - ClearPageChecked(page); > - set_page_dirty(page); > btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false); > + > + /* > + * everything went as planned, we're now the proud owners of a > + * Dirty page with delayed allocation bits set and space reserved > + * for our COW destination. > + * > + * The page was dirty when we started, nothing should have cleaned it. > + */ > + BUG_ON(!PageDirty(page)); > + > out: > unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, > &cached_state); > out_page: > + ClearPageChecked(page); > unlock_page(page); > put_page(page); > kfree(fixup); > extent_changeset_free(data_reserved); > + return; > + > +out_error: > + /* > + * We hit ENOSPC or other errors. Update the mapping and page to > + * reflect the errors and clean the page. > + */ > + mapping_set_error(page->mapping, ret); > + end_extent_writepage(page, ret, page_start, page_end); > + clear_page_dirty_for_io(page); > + SetPageError(page); > + goto out; > } > > /* > @@ -2179,6 +2202,13 @@ static int btrfs_writepage_start_hook(struct page > *page, u64 start, u64 end) > if (TestClearPagePrivate2(page)) > return 0; > > + /* > + * PageChecked is set below when we create a fixup worker for this page, > + * don't try to create another one if we're already PageChecked() > + * > + * The extent_io writepage code will redirty the page if we send > + * back EAGAIN. > + */ > if (PageChecked(page)) > return -EAGAIN; > > @@ -2192,7 +2222,8 @@ static int btrfs_writepage_start_hook(struct page > *page, u64 start, u64 end) > btrfs_writepage_fixup_worker, NULL, NULL); > fixup->page = page; > btrfs_queue_work(fs_info->fixup_workers, &fixup->work); > - return -EBUSY; > + > + return -EAGAIN; > } > > static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, > -- > 2.9.5 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to [email protected] > More majordomo info at http://vger.kernel.org/majordomo-info.html
