On Thu, Jan 21, 2021 at 6:27 AM Qu Wenruo <w...@suse.com> wrote: > > [BUG] > There is a long existing bug in the last parameter of > btrfs_add_ordered_extent(), in commit 771ed689d2cd ("Btrfs: Optimize > compressed writeback and reads") back to 2008. > > In that ancient commit btrfs_add_ordered_extent() expects the @type > parameter to be one of the following: > - BTRFS_ORDERED_REGULAR > - BTRFS_ORDERED_NOCOW > - BTRFS_ORDERED_PREALLOC > - BTRFS_ORDERED_COMPRESSED > > But we pass 0 in cow_file_range(), which means BTRFS_ORDERED_IO_DONE. > > Ironically extra check in __btrfs_add_ordered_extent() won't set the bit > if we're seeing (type == IO_DONE || type == IO_COMPLETE), and avoid any > obvious bug. > > But this still leads to regular COW ordered extent having no bit to > indicate its type in various trace events, rendering REGULAR bit > useless. > > [FIX] > This patch will change the following aspects to avoid such problem: > - Reorder btrfs_ordered_extent::flags > Now the type bits go first (REGULAR/NOCOW/PREALLCO/COMPRESSED), then > DIRECT bit, finally extra status bits like IO_DONE/COMPLETE/IOERR. > > - Add extra ASSERT() for btrfs_add_ordered_extent_*() > > - Remove @type parameter for btrfs_add_ordered_extent_compress() > As the only valid @type here is BTRFS_ORDERED_COMPRESSED. > > - Remove the unnecessary special check for IO_DONE/COMPLETE in > __btrfs_add_ordered_extent() > This is just to make the code work, with extra ASSERT(), there are > limited values can be passed in. > > Signed-off-by: Qu Wenruo <w...@suse.com> > --- > fs/btrfs/inode.c | 4 ++-- > fs/btrfs/ordered-data.c | 18 +++++++++++++----- > fs/btrfs/ordered-data.h | 37 +++++++++++++++++++++++------------- > include/trace/events/btrfs.h | 7 ++++--- > 4 files changed, 43 insertions(+), 23 deletions(-) > > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index ef6cb7b620d0..ea9056cc5559 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -917,7 +917,6 @@ static noinline void submit_compressed_extents(struct > async_chunk *async_chunk) > ins.objectid, > async_extent->ram_size, > ins.offset, > - BTRFS_ORDERED_COMPRESSED, > async_extent->compress_type); > if (ret) { > btrfs_drop_extent_cache(inode, async_extent->start, > @@ -1127,7 +1126,8 @@ static noinline int cow_file_range(struct btrfs_inode > *inode, > free_extent_map(em); > > ret = btrfs_add_ordered_extent(inode, start, ins.objectid, > - ram_size, cur_alloc_size, 0); > + ram_size, cur_alloc_size, > + BTRFS_ORDERED_REGULAR); > if (ret) > goto out_drop_extent_cache; > > diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c > index d5d326c674b1..bd7e187d9b16 100644 > --- a/fs/btrfs/ordered-data.c > +++ b/fs/btrfs/ordered-data.c > @@ -199,8 +199,11 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode > *inode, u64 file_offset > entry->compress_type = compress_type; > entry->truncated_len = (u64)-1; > entry->qgroup_rsv = ret; > - if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) > - set_bit(type, &entry->flags); > + > + ASSERT(type == BTRFS_ORDERED_REGULAR || type == BTRFS_ORDERED_NOCOW || > + type == BTRFS_ORDERED_PREALLOC || > + type == BTRFS_ORDERED_COMPRESSED); > + set_bit(type, &entry->flags); > > if (dio) { > percpu_counter_add_batch(&fs_info->dio_bytes, num_bytes, > @@ -256,6 +259,8 @@ int btrfs_add_ordered_extent(struct btrfs_inode *inode, > u64 file_offset, > u64 disk_bytenr, u64 num_bytes, u64 > disk_num_bytes, > int type) > { > + ASSERT(type == BTRFS_ORDERED_REGULAR || type == BTRFS_ORDERED_NOCOW || > + type == BTRFS_ORDERED_PREALLOC); > return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr, > num_bytes, disk_num_bytes, type, 0, > BTRFS_COMPRESS_NONE); > @@ -265,6 +270,8 @@ int btrfs_add_ordered_extent_dio(struct btrfs_inode > *inode, u64 file_offset, > u64 disk_bytenr, u64 num_bytes, > u64 disk_num_bytes, int type) > { > + ASSERT(type == BTRFS_ORDERED_REGULAR || type == BTRFS_ORDERED_NOCOW || > + type == BTRFS_ORDERED_PREALLOC); > return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr, > num_bytes, disk_num_bytes, type, 1, > BTRFS_COMPRESS_NONE); > @@ -272,11 +279,12 @@ int btrfs_add_ordered_extent_dio(struct btrfs_inode > *inode, u64 file_offset, > > int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 > file_offset, > u64 disk_bytenr, u64 num_bytes, > - u64 disk_num_bytes, int type, > - int compress_type) > + u64 disk_num_bytes, int compress_type) > { > + ASSERT(compress_type != BTRFS_COMPRESS_NONE); > return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr, > - num_bytes, disk_num_bytes, type, 0, > + num_bytes, disk_num_bytes, > + BTRFS_ORDERED_COMPRESSED, 0, > compress_type); > } > > diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h > index 46194c2c05d4..151ec6bba405 100644 > --- a/fs/btrfs/ordered-data.h > +++ b/fs/btrfs/ordered-data.h > @@ -27,7 +27,7 @@ struct btrfs_ordered_sum { > }; > > /* > - * bits for the flags field: > + * Bits for btrfs_ordered_extent::flags. > * > * BTRFS_ORDERED_IO_DONE is set when all of the blocks are written. > * It is used to make sure metadata is inserted into the tree only once > @@ -38,24 +38,36 @@ struct btrfs_ordered_sum { > * IO is done and any metadata is inserted into the tree. > */ > enum { > + /* > + * Different types for direct io, one and only one of the 4 type can
Different types for both buffered and direct IO (except the compressed type). Also "4 type" -> "4 types". Other than that, it looks good, thanks. > + * be set when creating ordered extent. > + * > + * REGULAR: For regular non-compressed COW write > + * NOCOW: For NOCOW write into existing non-hole extent > + * PREALLOC: For NOCOW write into preallocated extent > + * COMPRESSED: For compressed COW write > + */ > + BTRFS_ORDERED_REGULAR, > + BTRFS_ORDERED_NOCOW, > + BTRFS_ORDERED_PREALLOC, > + BTRFS_ORDERED_COMPRESSED, > + > + /* > + * Extra bit for DirectIO, can only be set for > + * REGULAR/NOCOW/PREALLOC. No DIO for compressed extent. > + */ > + BTRFS_ORDERED_DIRECT, > + > + /* Extra status bits for ordered extents */ > + > /* set when all the pages are written */ > BTRFS_ORDERED_IO_DONE, > /* set when removed from the tree */ > BTRFS_ORDERED_COMPLETE, > - /* set when we want to write in place */ > - BTRFS_ORDERED_NOCOW, > - /* writing a zlib compressed extent */ > - BTRFS_ORDERED_COMPRESSED, > - /* set when writing to preallocated extent */ > - BTRFS_ORDERED_PREALLOC, > - /* set when we're doing DIO with this extent */ > - BTRFS_ORDERED_DIRECT, > /* We had an io error when writing this out */ > BTRFS_ORDERED_IOERR, > /* Set when we have to truncate an extent */ > BTRFS_ORDERED_TRUNCATED, > - /* Regular IO for COW */ > - BTRFS_ORDERED_REGULAR, > /* Used during fsync to track already logged extents */ > BTRFS_ORDERED_LOGGED, > /* We have already logged all the csums of the ordered extent */ > @@ -167,8 +179,7 @@ int btrfs_add_ordered_extent_dio(struct btrfs_inode > *inode, u64 file_offset, > u64 disk_num_bytes, int type); > int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 > file_offset, > u64 disk_bytenr, u64 num_bytes, > - u64 disk_num_bytes, int type, > - int compress_type); > + u64 disk_num_bytes, int compress_type); > void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry, > struct btrfs_ordered_sum *sum); > struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode > *inode, > diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h > index ecd24c719de4..b9896fc06160 100644 > --- a/include/trace/events/btrfs.h > +++ b/include/trace/events/btrfs.h > @@ -499,12 +499,13 @@ DEFINE_EVENT( > > #define show_ordered_flags(flags) \ > __print_flags(flags, "|", \ > - { (1 << BTRFS_ORDERED_IO_DONE), "IO_DONE" }, \ > - { (1 << BTRFS_ORDERED_COMPLETE), "COMPLETE" }, \ > + { (1 << BTRFS_ORDERED_REGULAR), "REGULAR" }, \ > { (1 << BTRFS_ORDERED_NOCOW), "NOCOW" }, \ > - { (1 << BTRFS_ORDERED_COMPRESSED), "COMPRESSED" }, \ > { (1 << BTRFS_ORDERED_PREALLOC), "PREALLOC" }, \ > + { (1 << BTRFS_ORDERED_COMPRESSED), "COMPRESSED" }, \ > { (1 << BTRFS_ORDERED_DIRECT), "DIRECT" }, \ > + { (1 << BTRFS_ORDERED_IO_DONE), "IO_DONE" }, \ > + { (1 << BTRFS_ORDERED_COMPLETE), "COMPLETE" }, \ > { (1 << BTRFS_ORDERED_IOERR), "IOERR" }, \ > { (1 << BTRFS_ORDERED_TRUNCATED), "TRUNCATED" }) > > -- > 2.30.0 > -- Filipe David Manana, “Whether you think you can, or you think you can't — you're right.”