On Thu, Feb 21, 2019 at 01:57:12PM +0200, Nikolay Borisov wrote: > This commit changes the implementation of cow_file_range_async in order > to get rid of the BUG_ON in the middle of the loop. Additionally it > reworks the inner loop in the hopes of making it more understandable. > > The idea is to make async_cow be a top-level structured, shared > amongst all chunks being sent for compression. This allows to perform > one memory allocation at the beginning and gracefully fail the IO if > there isn't enough memory. Now, each chunk is going to be described > by an async_chunk struct. It's the responsibility of the final chunk > to actually free the memory.
Right, preallocation is the way to fix it. Please pick a more descriptive subject, that one is too generic. > Signed-off-by: Nikolay Borisov <[email protected]> > --- > fs/btrfs/inode.c | 108 +++++++++++++++++++++++++++++++---------------- > 1 file changed, 71 insertions(+), 37 deletions(-) > > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index 05bbfd02ea49..0f82ea348164 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -366,7 +366,7 @@ struct async_extent { > struct list_head list; > }; > > -struct async_cow { > +struct async_chunk { > struct inode *inode; > struct btrfs_fs_info *fs_info; > struct page *locked_page; > @@ -375,9 +375,15 @@ struct async_cow { > unsigned int write_flags; > struct list_head extents; > struct btrfs_work work; > + atomic_t *pending; > +}; > + > +struct async_cow { > + atomic_t num_chunks; /* Number of chunks in flight */ > + struct async_chunk chunks[]; > }; > > -static noinline int add_async_extent(struct async_cow *cow, > +static noinline int add_async_extent(struct async_chunk *cow, > u64 start, u64 ram_size, > u64 compressed_size, > struct page **pages, > @@ -447,7 +453,7 @@ static inline void inode_should_defrag(struct btrfs_inode > *inode, > static noinline void compress_file_range(struct inode *inode, > struct page *locked_page, > u64 start, u64 end, > - struct async_cow *async_cow, > + struct async_chunk *async_cow, > int *num_added) > { > struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); > @@ -713,7 +719,7 @@ static void free_async_extent_pages(struct async_extent > *async_extent) > * queued. We walk all the async extents created by compress_file_range > * and send them down to the disk. > */ > -static noinline void submit_compressed_extents(struct async_cow *async_cow) > +static noinline void submit_compressed_extents(struct async_chunk *async_cow) > { > struct inode *inode = async_cow->inode; > struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); > @@ -1132,9 +1138,9 @@ static noinline int cow_file_range(struct inode *inode, > */ > static noinline void async_cow_start(struct btrfs_work *work) > { > - struct async_cow *async_cow; > + struct async_chunk *async_cow; > int num_added = 0; > - async_cow = container_of(work, struct async_cow, work); > + async_cow = container_of(work, struct async_chunk, work); > > compress_file_range(async_cow->inode, async_cow->locked_page, > async_cow->start, async_cow->end, async_cow, > @@ -1151,10 +1157,10 @@ static noinline void async_cow_start(struct > btrfs_work *work) > static noinline void async_cow_submit(struct btrfs_work *work) > { > struct btrfs_fs_info *fs_info; > - struct async_cow *async_cow; > + struct async_chunk *async_cow; > unsigned long nr_pages; > > - async_cow = container_of(work, struct async_cow, work); > + async_cow = container_of(work, struct async_chunk, work); > > fs_info = async_cow->fs_info; > nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >> > @@ -1177,11 +1183,16 @@ static noinline void async_cow_submit(struct > btrfs_work *work) > > static noinline void async_cow_free(struct btrfs_work *work) > { > - struct async_cow *async_cow; > - async_cow = container_of(work, struct async_cow, work); > + struct async_chunk *async_cow; > + async_cow = container_of(work, struct async_chunk, work); > if (async_cow->inode) > btrfs_add_delayed_iput(async_cow->inode); > - kfree(async_cow); > + /* > + * Since the pointer to 'pending' is at the beginning of the array of > + * async_cow's, freeing it ensures the whole array has been freed. > + */ > + if (atomic_dec_and_test(async_cow->pending)) > + kfree(async_cow->pending); > } > > static int cow_file_range_async(struct inode *inode, struct page > *locked_page, > @@ -1190,45 +1201,68 @@ static int cow_file_range_async(struct inode *inode, > struct page *locked_page, > unsigned int write_flags) > { > struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); > - struct async_cow *async_cow; > + struct async_cow *ctx; > + struct async_chunk *async_cow; > unsigned long nr_pages; > u64 cur_end; > + u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K); > + int i; > + bool should_compress; > > clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED, > 1, 0, NULL); > - while (start < end) { > - async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); > - BUG_ON(!async_cow); /* -ENOMEM */ > - /* > - * igrab is called higher up in the call chain, take only the > - * lightweight reference for the callback lifetime > - */ > - ihold(inode); > - async_cow->inode = inode; > - async_cow->fs_info = fs_info; > - async_cow->locked_page = locked_page; > - async_cow->start = start; > - async_cow->write_flags = write_flags; > - > - if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS && > - !btrfs_test_opt(fs_info, FORCE_COMPRESS)) > - cur_end = end; > - else > - cur_end = min(end, start + SZ_512K - 1); > > - async_cow->end = cur_end; > - INIT_LIST_HEAD(&async_cow->extents); > + if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS && > + !btrfs_test_opt(fs_info, FORCE_COMPRESS)) { > + num_chunks = 1; > + should_compress = false; > + } else { > + should_compress = true; > + } > + > + ctx = kmalloc(sizeof(struct async_cow) + num_chunks*sizeof(*async_cow), > + GFP_NOFS); There's a recently introduced macro for calculating the size of struct with a fixed header followed by array 'struct_size', please use it. > + if (!ctx) { > + unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | > + EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | > + EXTENT_DO_ACCOUNTING; > + unsigned long page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | > + PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK | > + PAGE_SET_ERROR; > + extent_clear_unlock_delalloc(inode, start, end, 0, locked_page, > + clear_bits, page_ops); > + return -ENOMEM; > + } > + > + async_cow = ctx->chunks; > + atomic_set(&ctx->num_chunks, num_chunks); > + > + for (i = 0; i < num_chunks; i++) { > > - btrfs_init_work(&async_cow->work, > + if (should_compress) > + cur_end = min(end, start + SZ_512K - 1); > + else > + cur_end = end; > + > + ihold(inode); You should preserve the comment, the refactoring does not make it obsolete or wrong. > + async_cow[i].pending= &ctx->num_chunks; > + async_cow[i].inode = inode; > + async_cow[i].start = start; > + async_cow[i].end = cur_end; > + async_cow[i].fs_info = fs_info; > + async_cow[i].locked_page = locked_page; > + async_cow[i].write_flags = write_flags; > + INIT_LIST_HEAD(&async_cow[i].extents); > + > + btrfs_init_work(&async_cow[i].work, > btrfs_delalloc_helper, > async_cow_start, async_cow_submit, > async_cow_free); > > - nr_pages = (cur_end - start + PAGE_SIZE) >> > - PAGE_SHIFT; > + nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE); > atomic_add(nr_pages, &fs_info->async_delalloc_pages); > > - btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work); > + btrfs_queue_work(fs_info->delalloc_workers, &async_cow[i].work); > > *nr_written += nr_pages; > start = cur_end + 1; > -- > 2.17.1
