Re: [PATCH 5/6] btrfs-progs: check: Add support for freespace tree fixing

2018-09-21 Thread Omar Sandoval
On Fri, Jun 15, 2018 at 02:06:01PM +0300, Nikolay Borisov wrote:
> Now that all the prerequisite code for proper support of free space
> tree repair is in, it's time to wire it in. This is achieved by first
> hooking the freespace tree to the __free_extent/alloc_reserved_tree_block
> functions. And then introducing a wrapper function to contains the
> existing check_space_cache and the newly introduced repair code.
> Finally, it's important to note that FST repair code first clears the
> existing FST in case of any problem found and rebuilds it from scratch.
> 
> Signed-off-by: Nikolay Borisov 
> ---
>  check/main.c  | 61 
> +--
>  extent-tree.c |  9 +
>  2 files changed, 47 insertions(+), 23 deletions(-)
> 
> diff --git a/check/main.c b/check/main.c
> index 3a5efaf615a9..44d734ff4254 100644
> --- a/check/main.c
> +++ b/check/main.c
> @@ -5321,19 +5321,6 @@ static int check_space_cache(struct btrfs_root *root)
>   int ret;
>   int error = 0;
>  
> - if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
> - btrfs_super_generation(root->fs_info->super_copy) !=
> - btrfs_super_cache_generation(root->fs_info->super_copy)) {
> - printf("cache and super generation don't match, space cache "
> -"will be invalidated\n");
> - return 0;
> - }
> -
> - if (ctx.progress_enabled) {
> - ctx.tp = TASK_FREE_SPACE;
> - task_start(ctx.info);
> - }
> -
>   while (1) {
>   cache = btrfs_lookup_first_block_group(root->fs_info, start);
>   if (!cache)
> @@ -5383,11 +5370,11 @@ static int check_space_cache(struct btrfs_root *root)
>   }
>   }
>  
> - task_stop(ctx.info);
>  
>   return error ? -EINVAL : 0;
>  }
>  
> +

Stray newline.

>  /*
>   * Check data checksum for [@bytenr, @bytenr + @num_bytes).
>   *
> @@ -9338,7 +9325,6 @@ static int do_clear_free_space_cache(struct 
> btrfs_fs_info *fs_info,
>   ret = 1;
>   goto close_out;
>   }
> - printf("Clearing free space cache\n");
>   ret = clear_free_space_cache(fs_info);
>   if (ret) {
>   error("failed to clear free space cache");
> @@ -9365,6 +9351,41 @@ static int do_clear_free_space_cache(struct 
> btrfs_fs_info *fs_info,
>   return ret;
>  }
>  
> +static int validate_free_space_cache(struct btrfs_root *root)
> +{
> +
> + int ret;
> +
> + if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
> + btrfs_super_generation(root->fs_info->super_copy) !=
> + btrfs_super_cache_generation(root->fs_info->super_copy)) {
> + printf("cache and super generation don't match, space cache "
> +"will be invalidated\n");
> + return 0;
> + }
> +
> + if (ctx.progress_enabled) {
> + ctx.tp = TASK_FREE_SPACE;
> + task_start(ctx.info);
> + }
> +
> + ret = check_space_cache(root);
> + if (ret && btrfs_fs_compat_ro(global_info, FREE_SPACE_TREE)
> + && repair) {
> + ret = do_clear_free_space_cache(global_info, 2);
> + if (ret)
> + goto out;
> +
> + ret = btrfs_create_free_space_tree(global_info);
> + if (ret)
> + error("couldn't repair freespace tree");
> + }
> +
> +out:
> + task_stop(ctx.info);
> + return ret ? -EINVAL : 0;
> +}
> +
>  const char * const cmd_check_usage[] = {
>   "btrfs check [options] ",
>   "Check structural integrity of a filesystem (unmounted).",
> @@ -9768,15 +9789,9 @@ int cmd_check(int argc, char **argv)
>   else
>   fprintf(stderr, "checking free space cache\n");
>   }
> - ret = check_space_cache(root);
> +
> + ret = validate_free_space_cache(root);
>   err |= !!ret;
> - if (ret) {
> - if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
> - error("errors found in free space tree");
> - else
> - error("errors found in free space cache");
> - goto out;
> - }
>  
>   /*
>* We used to have to have these hole extents in between our real

This approach seems reasonable.

> diff --git a/extent-tree.c b/extent-tree.c
> index b9d51b388c9a..40117f81352e 100644
> --- a/extent-tree.c
> +++ b/extent-tree.c
> @@ -29,6 +29,7 @@
>  #include "crc32c.h"
>  #include "volumes.h"
>  #include "free-space-cache.h"
> +#include "free-space-tree.h"
>  #include "utils.h"
>  
>  #define PENDING_EXTENT_INSERT 0
> @@ -2292,6 +2293,11 @@ static int __free_extent(struct btrfs_trans_handle 
> *trans,
>   BUG_ON(ret);
>   }
>  
> + ret = add_to_free_space_tree(trans, bytenr, num_bytes);
> + if (ret) {
> + 

Re: [PATCH 4/6] btrfs-progs: Add freespace tree as compat_ro supported feature

2018-09-21 Thread Omar Sandoval
On Fri, Jun 15, 2018 at 02:06:00PM +0300, Nikolay Borisov wrote:
> The RO_FREE_SPACE_TREE(_VALID) flags are required in order to be able
> to open an FST filesystem in repair mode. Add them to
> BTRFS_FEATURE_COMPAT_RO_SUPP.
> 
> Signed-off-by: Nikolay Borisov 
> ---
>  ctree.h | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/ctree.h b/ctree.h
> index ade883fecbd6..ef05e8122982 100644
> --- a/ctree.h
> +++ b/ctree.h
> @@ -497,7 +497,9 @@ struct btrfs_super_block {
>   * added here until read-write support for the free space tree is 
> implemented in
>   * btrfs-progs.
>   */
> -#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
> +#define BTRFS_FEATURE_COMPAT_RO_SUPP \
> + (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE |  \
> +  BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID)


Have you tested whether btrfs-progs commands that modify the filesystem
(e.g., btrfstune or btrfs fi label) work with this series? Because that
is a requirement for claiming that we support this bit (at which point
we can delete the comment above). Also, this needs to happen _after_ we
hook up the free space tree with the extent tree.

See here for some historical context:
https://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg57738.html


Re: [PATCH 3/6] btrfs-progs: Pull free space tree related code from kernel

2018-09-21 Thread Nikolay Borisov



On 21.09.2018 23:19, Omar Sandoval wrote:
> On Fri, Jun 15, 2018 at 02:05:59PM +0300, Nikolay Borisov wrote:
>> To help implement free space tree checker in user space some kernel
>> function are necessary, namely iterating/deleting/adding freespace
>> items, some internal search functions. Functions to populate a block
>> group based on the extent tree. The code is largely copy/paste from
>> the kernel with locking eliminated (i.e free_space_lock). It supports
>> reading/writing of both bitmap and extent based FST trees.
>>
>> Signed-off-by: Nikolay Borisov 
> 
> Why doesn't this include the bitmap <-> extent conversions? If we end up
> rebuilding the free space tree, we're never going to use the bitmap
> format, which sucks if the free space is fragmented.
> 

The idea was to have *something* working initially and then add as
deemed appropriate.


Re: [PATCH 3/6] btrfs-progs: Pull free space tree related code from kernel

2018-09-21 Thread Omar Sandoval
On Fri, Jun 15, 2018 at 02:05:59PM +0300, Nikolay Borisov wrote:
> To help implement free space tree checker in user space some kernel
> function are necessary, namely iterating/deleting/adding freespace
> items, some internal search functions. Functions to populate a block
> group based on the extent tree. The code is largely copy/paste from
> the kernel with locking eliminated (i.e free_space_lock). It supports
> reading/writing of both bitmap and extent based FST trees.
> 
> Signed-off-by: Nikolay Borisov 

Why doesn't this include the bitmap <-> extent conversions? If we end up
rebuilding the free space tree, we're never going to use the bitmap
format, which sucks if the free space is fragmented.


Re: [PATCH 1/6] btrfs-progs: Add support for freespace tree in btrfs_read_fs_root

2018-09-21 Thread Omar Sandoval
On Fri, Jun 15, 2018 at 02:05:57PM +0300, Nikolay Borisov wrote:
> For completeness sake add code to btrfs_read_fs_root so that it can
> handle the freespace tree.

Reviewed-by: Omar Sandoval 

> Signed-off-by: Nikolay Borisov 
> ---
>  disk-io.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/disk-io.c b/disk-io.c
> index 8da6e3ce5fc8..9ad826b83b3e 100644
> --- a/disk-io.c
> +++ b/disk-io.c
> @@ -664,6 +664,9 @@ struct btrfs_root *btrfs_read_fs_root(struct 
> btrfs_fs_info *fs_info,
>   if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID)
>   return fs_info->quota_enabled ? fs_info->quota_root :
>   ERR_PTR(-ENOENT);
> + if (location->objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
> +return fs_info->free_space_root ? fs_info->free_space_root :
> +  ERR_PTR(-ENOENT);
>  
>   BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID ||
>  location->offset != (u64)-1);
> -- 
> 2.7.4
> 


Re: [PATCH v8 0/6] Btrfs: implement swap file support

2018-09-21 Thread Omar Sandoval
On Fri, Sep 21, 2018 at 05:17:35PM +0200, David Sterba wrote:
> On Thu, Sep 20, 2018 at 10:41:24AM -0700, Omar Sandoval wrote:
> > On Thu, Sep 20, 2018 at 07:22:55PM +0200, David Sterba wrote:
> > > On Wed, Sep 19, 2018 at 10:02:11PM -0700, Omar Sandoval wrote:
> > > > From: Omar Sandoval 
> > > > Changes from v7 [1]:
> > > > 
> > > > - Expanded a few commit messages
> > > > - Added Johannes' acked-by on patches 1 and 2
> > > > - Rebased on v4.19-rc4
> > > 
> > > I've sent my comments, it's mostly about the usability or small
> > > enhancements. As you've got acks from MM people, I hope it would be ok
> > > if I add this series to for-next so we can give it some testing.
> > 
> > That'd be great. Feel free to grab it from my git tree
> > (https://github.com/osandov/linux/tree/btrfs-swap) if you want the
> > version with your comments addressed.
> 
> Updates looks good, branch added to the for-next snapshot and will be in
> upcoming for-next.

I got a kbuild error when building with CONFIG_SWAP=n, just pushed the
fix below on patch 6:

diff --git b/fs/btrfs/inode.c a/fs/btrfs/inode.c
index ffe266e612e3..6de98bb30c27 100644
--- b/fs/btrfs/inode.c
+++ a/fs/btrfs/inode.c
@@ -10489,6 +10489,7 @@ void btrfs_set_range_writeback(struct extent_io_tree 
*tree, u64 start, u64 end)
}
 }
 
+#ifdef CONFIG_SWAP
 /*
  * Add an entry indicating a block group or device which is pinned by a
  * swapfile. Returns 0 on success, 1 if there is already an entry for it, or a
@@ -10812,6 +10813,17 @@ static int btrfs_swap_activate(struct swap_info_struct 
*sis, struct file *file,
sis->highest_bit = bsi.nr_pages - 1;
return bsi.nr_extents;
 }
+#else
+static void btrfs_swap_deactivate(struct file *file)
+{
+}
+
+static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
+  sector_t *span)
+{
+   return -EOPNOTSUPP;
+}
+#endif
 
 static const struct inode_operations btrfs_dir_inode_operations = {
.getattr= btrfs_getattr,

> > > The MM patches would better go separately to 4.20 via the mm tree.  I
> > > did only build tests so 4.20 target is still feasible but given that
> > > it's rc4 it's a bit too close. There are some limitations posed by the
> > > swapfiles so I'd like to have a chance to do some actual tests myself
> > > and check the usability status.
> > 
> > 4.20 would be nice, but I could live with 4.21. I'll just be backporting
> > it to our internal kernel here anyways ;) Let me know how the tests go
> > and which way you want to go.
> 
> Backporting to your kernel is fine, your users will complain to you, but
> once it's in the mainline the complaints will go my way :)
> 
> As for the merge of the non-btrfs patches, I checked again and there are
> the VFS/documentation patches that haven't been CCed to the relvant
> people.  For that reason I'm not much comfortable to take them through
> my tree for the final merge. The MM part looks fine from that
> perspective.

There aren't any VFS changes, just the trivial documentation fixes.
fsdevel was Cc'd for the first four versions, but it's hard enough to
get Al to look at actual changes, let alone a documentation fix.


Re: [PATCH 3/3] btrfs: Add zstd support to btrfs

2018-09-21 Thread Daniel Kiper
On Mon, Aug 27, 2018 at 06:36:54PM -0700, Nick Terrell wrote:
> Adds zstd support to the btrfs module. I'm not sure that my changes to the
> Makefiles are correct, please let me know if I need to do something
> differently.
>
> Tested on Ubuntu-18.04 with a btrfs /boot partition with and without zstd
> compression. A test case was also added to the test suite that fails before
> the patch, and passes after.
>
> Signed-off-by: Nick Terrell 
> ---
>  Makefile.util.def|  8 -
>  grub-core/Makefile.core.def  | 10 --
>  grub-core/fs/btrfs.c | 85 
> +++-
>  tests/btrfs_test.in  |  1 +
>  tests/util/grub-fs-tester.in |  4 +--
>  5 files changed, 102 insertions(+), 6 deletions(-)
>
> diff --git a/Makefile.util.def b/Makefile.util.def
> index 3180ac880..b987dc637 100644
> --- a/Makefile.util.def
> +++ b/Makefile.util.def
> @@ -54,7 +54,7 @@ library = {
>  library = {
>name = libgrubmods.a;
>cflags = '-fno-builtin -Wno-undef';
> -  cppflags = '-I$(top_srcdir)/grub-core/lib/minilzo 
> -I$(srcdir)/grub-core/lib/xzembed -DMINILZO_HAVE_CONFIG_H';
> +  cppflags = '-I$(top_srcdir)/grub-core/lib/minilzo 
> -I$(srcdir)/grub-core/lib/xzembed -I$(top_srcdir)/grub-core/lib/zstd 
> -DMINILZO_HAVE_CONFIG_H';
>
>common_nodist = grub_script.tab.c;
>common_nodist = grub_script.yy.c;
> @@ -165,6 +165,12 @@ library = {
>common = grub-core/lib/xzembed/xz_dec_bcj.c;
>common = grub-core/lib/xzembed/xz_dec_lzma2.c;
>common = grub-core/lib/xzembed/xz_dec_stream.c;
> +  common = grub-core/lib/zstd/zstd_common.c;
> +  common = grub-core/lib/zstd/huf_decompress.c;
> +  common = grub-core/lib/zstd/fse_decompress.c;
> +  common = grub-core/lib/zstd/entropy_common.c;
> +  common = grub-core/lib/zstd/decompress.c;
> +  common = grub-core/lib/zstd/xxhash.c;
>  };
>
>  program = {
> diff --git a/grub-core/Makefile.core.def b/grub-core/Makefile.core.def
> index 9590e87d9..c24bf9147 100644
> --- a/grub-core/Makefile.core.def
> +++ b/grub-core/Makefile.core.def
> @@ -404,7 +404,7 @@ image = {
>i386_pc = boot/i386/pc/boot.S;
>
>cppflags = '-DHYBRID_BOOT=1';
> -
> +

Please do not introduce such noise...

>i386_pc_ldflags = '$(TARGET_IMG_LDFLAGS)';
>i386_pc_ldflags = '$(TARGET_IMG_BASE_LDOPT),0x7C00';
>
> @@ -1264,8 +1264,14 @@ module = {
>name = btrfs;
>common = fs/btrfs.c;
>common = lib/crc.c;
> +  common = lib/zstd/zstd_common.c;
> +  common = lib/zstd/huf_decompress.c;
> +  common = lib/zstd/fse_decompress.c;
> +  common = lib/zstd/entropy_common.c;
> +  common = lib/zstd/decompress.c;
> +  common = lib/zstd/xxhash.c;
>cflags = '$(CFLAGS_POSIX) -Wno-undef';
> -  cppflags = '-I$(srcdir)/lib/posix_wrap -I$(srcdir)/lib/minilzo 
> -DMINILZO_HAVE_CONFIG_H';
> +  cppflags = '-I$(srcdir)/lib/posix_wrap -I$(srcdir)/lib/minilzo 
> -I$(srcdir)/lib/zstd -DMINILZO_HAVE_CONFIG_H';
>  };
>
>  module = {
> diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
> index be195448d..89ed4884e 100644
> --- a/grub-core/fs/btrfs.c
> +++ b/grub-core/fs/btrfs.c
> @@ -27,6 +27,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>
> @@ -45,6 +46,9 @@ GRUB_MOD_LICENSE ("GPLv3+");
>  #define GRUB_BTRFS_LZO_BLOCK_MAX_CSIZE (GRUB_BTRFS_LZO_BLOCK_SIZE + \
>(GRUB_BTRFS_LZO_BLOCK_SIZE / 16) + 64 + 3)
>
> +#define ZSTD_BTRFS_MAX_WINDOWLOG 17
> +#define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)

Please align stuff properly, i.e.

#define ZSTD_BTRFS_MAX_WINDOWLOG 17
#define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)

> +
>  typedef grub_uint8_t grub_btrfs_checksum_t[0x20];
>  typedef grub_uint16_t grub_btrfs_uuid_t[8];
>
> @@ -212,6 +216,7 @@ struct grub_btrfs_extent_data
>  #define GRUB_BTRFS_COMPRESSION_NONE 0
>  #define GRUB_BTRFS_COMPRESSION_ZLIB 1
>  #define GRUB_BTRFS_COMPRESSION_LZO  2
> +#define GRUB_BTRFS_COMPRESSION_ZSTD  3

Ditto.

>  #define GRUB_BTRFS_OBJECT_ID_CHUNK 0x100
>
> @@ -912,6 +917,70 @@ grub_btrfs_read_inode (struct grub_btrfs_data *data,
>return grub_btrfs_read_logical (data, elemaddr, inode, sizeof (*inode), 0);
>  }
>
> +static grub_ssize_t
> +grub_btrfs_zstd_decompress(char *ibuf, grub_size_t isize, grub_off_t off,
> +   char *obuf, grub_size_t osize)
> +{
> + void *allocated = NULL;
> + char *otmpbuf = obuf;
> + grub_size_t otmpsize = osize;
> + void *wmem = NULL;
> + const grub_size_t wmem_size = ZSTD_DCtxWorkspaceBound ();
> + ZSTD_DCtx *dctx;
> + grub_size_t zstd_ret;
> + grub_ssize_t ret = -1;
> +
> + /* Zstd will fail if it can't fit the entire output in the destination
> +  * buffer, so if osize isn't large enough, allocate a temporary buffer.
> +  */
> + if (otmpsize < ZSTD_BTRFS_MAX_INPUT) {
> + allocated = grub_malloc (ZSTD_BTRFS_MAX_INPUT);
> + if (!allocated) {
> + grub_dprintf ("zstd", "outtmpbuf 

Re: [PATCH] btrfs: use common helper instead of open coding a bit test

2018-09-21 Thread Nikolay Borisov



On 21.09.2018 15:26, David Sterba wrote:
> The helper does the same math and we take care about the special case
> when flags is 0 too.
> 
> Signed-off-by: David Sterba 
Reviewed-by: Nikolay Borisov 
> ---
>  fs/btrfs/volumes.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 909c578506ee..26eb388db343 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -3691,7 +3691,7 @@ static int alloc_profile_is_valid(u64 flags, int 
> extended)
>   return !extended; /* "0" is valid for usual profiles */
>  
>   /* true if exactly one bit set */
> - return (flags & (flags - 1)) == 0;
> + return is_power_of_2(flags);
>  }
>  
>  static inline int balance_need_close(struct btrfs_fs_info *fs_info)
> 


Re: [PATCH v2] Btrfs: remove wait_ordered_rane in btrfs_evict_inode

2018-09-21 Thread David Sterba
On Sat, Sep 15, 2018 at 05:03:28AM +0800, Liu Bo wrote:
> When we delete an inode,
> 
> btrfs_evict_inode() {
> truncate_inode_pages_final()
> truncate_inode_pages_range()
> lock_page()
> truncate_cleanup_page()
>  btrfs_invalidatepage()
>   wait_on_page_writeback
>btrfs_lookup_ordered_range()
>  cancel_dirty_page()
>unlock_page()
>  ...
>  btrfs_wait_ordered_range()
>  ...
> 
> As VFS has called ->invalidatepage() to get all ordered extents done
> (if there is any) and truncated all page cache pages (no dirty pages
> to writeback after this step), wait_ordered_range() is just a noop.
> 
> Reviewed-by: David Sterba 
> Signed-off-by: Liu Bo 
> ---
> v2: More details in the description.

Thanks, patch updated.


Re: [PATCH v8 6/6] Btrfs: support swap files

2018-09-21 Thread David Sterba
On Thu, Sep 20, 2018 at 10:22:57AM -0700, Omar Sandoval wrote:
> > > + /*
> > > +  * Balance or device remove/replace/resize can move stuff around from
> > > +  * under us. The EXCL_OP flag makes sure they aren't running/won't run
> > > +  * concurrently while we are mapping the swap extents, and
> > > +  * fs_info->swapfile_pins prevents them from running while the swap file
> > > +  * is active and moving the extents. Note that this also prevents a
> > > +  * concurrent device add which isn't actually necessary, but it's not
> > > +  * really worth the trouble to allow it.
> > > +  */
> > > + if (test_and_set_bit(BTRFS_FS_EXCL_OP, _info->flags))
> > > + return -EBUSY;
> > 
> > This could be also accompanied by a message, "why does not my swapfile
> > activate?" -> "there's an exclusive operation running". I've checked if
> > there are similar messages for the other exclusive ops. There are.
> 
> Sounds good. I addressed all of your comments and pushed to
> https://github.com/osandov/linux/tree/btrfs-swap. The only thing I
> didn't change was the btrfs_info about not being able to relocate an
> active swapfile. I think it makes sense as btrfs_info since we already
> log every block group we are relocating as info (see
> describe_relocation()).

Ok, then it's consistent with the relocation mesages. I'll do another
pass just to see if the log level is sane for all messages but this can
be tuned later too, so this is only a minor issue.


Re: [PATCH v8 0/6] Btrfs: implement swap file support

2018-09-21 Thread David Sterba
On Thu, Sep 20, 2018 at 10:41:24AM -0700, Omar Sandoval wrote:
> On Thu, Sep 20, 2018 at 07:22:55PM +0200, David Sterba wrote:
> > On Wed, Sep 19, 2018 at 10:02:11PM -0700, Omar Sandoval wrote:
> > > From: Omar Sandoval 
> > > Changes from v7 [1]:
> > > 
> > > - Expanded a few commit messages
> > > - Added Johannes' acked-by on patches 1 and 2
> > > - Rebased on v4.19-rc4
> > 
> > I've sent my comments, it's mostly about the usability or small
> > enhancements. As you've got acks from MM people, I hope it would be ok
> > if I add this series to for-next so we can give it some testing.
> 
> That'd be great. Feel free to grab it from my git tree
> (https://github.com/osandov/linux/tree/btrfs-swap) if you want the
> version with your comments addressed.

Updates looks good, branch added to the for-next snapshot and will be in
upcoming for-next.

> > The MM patches would better go separately to 4.20 via the mm tree.  I
> > did only build tests so 4.20 target is still feasible but given that
> > it's rc4 it's a bit too close. There are some limitations posed by the
> > swapfiles so I'd like to have a chance to do some actual tests myself
> > and check the usability status.
> 
> 4.20 would be nice, but I could live with 4.21. I'll just be backporting
> it to our internal kernel here anyways ;) Let me know how the tests go
> and which way you want to go.

Backporting to your kernel is fine, your users will complain to you, but
once it's in the mainline the complaints will go my way :)

As for the merge of the non-btrfs patches, I checked again and there are
the VFS/documentation patches that haven't been CCed to the relvant
people.  For that reason I'm not much comfortable to take them through
my tree for the final merge. The MM part looks fine from that
perspective.


Re: [PATCH 0/3] Refactor delayed refs processing loop

2018-09-21 Thread David Sterba
On Wed, Aug 15, 2018 at 10:39:53AM +0300, Nikolay Borisov wrote:
> Here is a small series which aims to rectify the eye sore that delayed refs 
> processing loop currently is. In fact, it's actually 2 loops in the guise of 
> a 
> single 'while' construct. All in all this should bring no functional changes 
> and I've verified this with multiple xfstest runs with no problems. 
> 
> Patch 1 factors out the code which deals with selecting a ref head which has 
> delayed refs pending and locking it. 
> 
> Patch 2 introduces a new function which comprises the internal loop aka 
> processing delayed refs of a delayed head, which is more or less most of the 
> code in __btrfs_run_delayed_refs copied. The only difference is that the 
> function can return EAGAIN if we detect a delayed ref which has sequence 
> number
> higher than what is currently in the tree mod list. 
> 
> Patch 3 Finaly makes the loop in __btrfs_run_delayed_refs use the function 
> introduced in the previous patch, meaning deleting most of the code and 
> changing the loop to a 'do {} while' construct. I strived to retain all the 
> semantics of the old code so there should be no surprises. 
> 
> Nikolay Borisov (3):
>   btrfs: Factor out ref head locking code in __btrfs_run_delayed_refs
>   btrfs: Factor out loop processing all refs of a head
>   btrfs: refactor __btrfs_run_delayed_refs loop

Added to misc-next. It's been in next for some time and I haven't
noticed problems related to that patchset.


Re: [PATCH 2/3] btrfs: Factor out loop processing all refs of a head

2018-09-21 Thread David Sterba
On Wed, Aug 15, 2018 at 10:39:55AM +0300, Nikolay Borisov wrote:
> This patch introduces a new helper encompassing the implicit inner loop
> in __btrfs_run_delayed_refs which processes all the refs for a given
> head. The code is mostly copy/paste, the only difference is that if we
> detect a newer reference then -EAGAIN is returned so that callers can
> react correctly. Also at the end of the loop the head is relocked and
> btrfs_merge_delayed_refs is run again to retain the pre-refactoring
> semantics.
> 
> Signed-off-by: Nikolay Borisov 

Reviewed-by: David Sterba 


Re: [PATCH 3/3] btrfs: refactor __btrfs_run_delayed_refs loop

2018-09-21 Thread David Sterba
On Wed, Aug 15, 2018 at 10:39:56AM +0300, Nikolay Borisov wrote:
> Refactor the delayed refs loop by using the newly introduced
> btrfs_run_delayed_refs_for_head function. This greatly simplifies
> __btrfs_run_delayed_refs and makes it more obvious what is happening.
> We now have 1 loop which iterates the existing delayed_heads and then
> each selected ref head is processed by the new helper. All existing
> semantics of the code are preserved so no functional changes.

What a mess, took me some time to understand and find the hidden loop,
this is a perfect counter example. Thanks for fixing it up.

Reviewed-by: David Sterba 


> - rb_erase(>ref_node, _ref->ref_tree);

There was a merge conflict with the rb_root_cached tree update, but the
fixup was trivial.

> + } while ((nr != -1 && count < nr) || locked_ref);

I can't be the first to notice that the '-1' is signed long compared to
an unsigned long. This part is not obvious as this means to process all
delayed refs, called from the contexts like commit.  Replacing that with
something more explicit would be good.


Re: [PATCH 1/3] btrfs: Factor out ref head locking code in __btrfs_run_delayed_refs

2018-09-21 Thread David Sterba
On Wed, Aug 15, 2018 at 10:39:54AM +0300, Nikolay Borisov wrote:
> This is in preparation to refactor the giant loop in
> __btrfs_run_delayed_refs. As a first step define a new function
> which implements acquiring a reference to a btrfs_delayed_refs_head and
> use it. No functional changes.
> 
> Signed-off-by: Nikolay Borisov 
> ---
>  fs/btrfs/extent-tree.c | 54 
> ++
>  1 file changed, 37 insertions(+), 17 deletions(-)
> 
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index de6f75f5547b..6a2c86c8a756 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -2502,6 +2502,40 @@ static int cleanup_ref_head(struct btrfs_trans_handle 
> *trans,
>   return 0;
>  }
>  
> +STATIC
> +struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
> + struct btrfs_trans_handle *trans)

The STATIC is now gone from misc-next so it's just 'static'. Please
don't split the type and function name. It's allowed to fixup the style
once the function prototype is updated.

> +{
> + struct btrfs_delayed_ref_root *delayed_refs =
> + >transaction->delayed_refs;
> + struct btrfs_delayed_ref_head *head = NULL;
> + int ret;
> +
> + spin_lock(_refs->lock);
> + head = btrfs_select_ref_head(trans);
> + if (!head) {
> + spin_unlock(_refs->lock);
> + return head;
> + }
> +
> + /*
> +  * grab the lock that says we are going to process  all the refs for
> +  * this head
> +  */
> + ret = btrfs_delayed_ref_lock(trans, head);
> + spin_unlock(_refs->lock);
> + /*
> +  * we may have dropped the spin lock to get the head
> +  * mutex lock, and that might have given someone else
> +  * time to free the head.  If that's true, it has been
> +  * removed from our list and we can move on.
> +  */

As you move the entire comment, you can reformat it or reword if you
find a better way what's is supposed to say.

Otherwise

Reviewed-by: David Sterba 


Re: [PATCH RFC] btrfs: delayed-inode: Use spinlock to protect btrfs_inode::delayed_node

2018-09-21 Thread David Sterba
On Wed, Sep 19, 2018 at 02:59:58PM +0800, Qu Wenruo wrote:
> In the following case, we could trigger a use-after-free bug:
> 
>  CPU0|   CPU1
> -
> btrfs_remove_delayed_node| btrfs_get_delayed_node
> |- delayed_node =| |- node = btrfs_inode->delayed_node;
> |btrfs_inode->delayed_node   | |
> |- btrfs_release_delaedy_node()  | |
>|- ref_count_dev_and_test()   | |
>|- kmem_cache_free()  | |
>   Now delayed node is freed  | |
>  | |- refcount_inc(>refs)
> 
> In that case sine delayed_node is using kmem cache, such use-after-free
> bug won't directly cause problem, but could leads to corrupted data
> structure of other kmem cache user.
> 
> Fix it by adding btrfs_inode::delayed_node_lock to protect such
> operation.
> 
> Reported-by: sunny.s.zhang 
> Signed-off-by: Qu Wenruo 
> ---
> Please don't merge this patch yet.
> 
> The patch caused random slow down for a lot of quick test cases.
> Old tests can be executed in 1s or so now randomly needs near 20s.
> 
> It looks like the spin_lock() with root->inode_lock hold is causing the
> problem but I can't see what's going wrong.
> As the operation done with @delayed_node_lock hold is literatly tiny.
> 
> Any comment on this is welcomed.

I found the original report and discussion, so the resume is that it's
possibly caused by the refcount_t rework and the missing fix
ec35e48b2869599 .

As in time of evict there can be no active operation running and the
crash happened inside atime update. I take the bug in refcounting as a
plausible explanation so your patch does not seem to be necessary,
unless there's a reproducer on a recent kernel.


btrfs send receive ERROR: chown failed: No such file or directory

2018-09-21 Thread Leonard Lausen
Hello!

I observe the following issue with btrfs send | btrfs receive in a setup
with 2 machines and 3 btrfs file-systems. All machines run Linux 4.18.9.
Machine 1 runs btrfs-progs 4.17.1, machine 2 runs btrfs-progs 4.17 (via
https://packages.debian.org/stretch-backports/btrfs-progs).

1) Machine 1 takes regular snapshots and sends them to machine 2. btrfs 
   btrfs send ... | ssh user@machine2 "btrfs receive /path1"
2) Machine 2 backups all subvolumes stored at /path1 to a second
   independent btrfs filesystem. Let /path1/rootsnapshot be the first
   snapshot stored at /path1 (ie. it has no Parent UUID). Let
   /path1/incrementalsnapshot be a snapshot that has /path1/rootsnapshot
   as a parent. Then
   btrfs send -v /path1/rootsnapshot | btrfs receive /path2
   works without issues, but
   btrfs send -v -p /path1/rootsnapshot /path1/incrementalsnapshot | btrfs 
receive /path2
   fails as follows:
   ERROR: chown o257-4639416-0 failed: No such file or directory

No error is shown in dmesg. /path1 and /path2 denote two independent
btrfs filesystems.

Note that there was no issue with transferring incrementalsnapshot from
machine 1 to machine 2. No error is shown in dmesg.

Best regards
Leonard


[PATCH] btrfs: use common helper instead of open coding a bit test

2018-09-21 Thread David Sterba
The helper does the same math and we take care about the special case
when flags is 0 too.

Signed-off-by: David Sterba 
---
 fs/btrfs/volumes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 909c578506ee..26eb388db343 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3691,7 +3691,7 @@ static int alloc_profile_is_valid(u64 flags, int extended)
return !extended; /* "0" is valid for usual profiles */
 
/* true if exactly one bit set */
-   return (flags & (flags - 1)) == 0;
+   return is_power_of_2(flags);
 }
 
 static inline int balance_need_close(struct btrfs_fs_info *fs_info)
-- 
2.18.0



Re: [RFC PATCH v2 0/4] btrfs-progs: build distinct binaries for specific btrfs subcommands

2018-09-21 Thread Axel Burri
On 20/09/2018 10.32, Duncan wrote:
> Axel Burri posted on Thu, 20 Sep 2018 00:02:22 +0200 as excerpted:
> 
>> Now not everybody wants to install these with fscaps or setuid, but it
>> might also make sense to provide "/usr/bin/btrfs-subvolume-{show,list}",
>> as they now work for a regular user. Having both root/user binaries
>> concurrently is not an issue (e.g. in gentoo the full-featured btrfs
>> command is in "/sbin/").
> 
> That's going to be a problem for distros (or users like me with advanced 
> layouts, on gentoo too FWIW) that have the bin/sbin merge, where one is a 
> symlink to the other.

I think you got me wrong here: There will not be binaries with the same
filename. I totally agree that this would be a bad thing, no matter if
you have bin/sbin merged or not, you'll end up in either having a
collision or (even worse) rely on the order in $PATH.

With this "separated" patchset, you can install a binary
"btrfs-subvolume-show", which has the same functionality as "btrfs
subvolume show" (note the whitespace/dash), ending up with:

/sbin/btrfs
/usr/bin/btrfs-subvolume-show
/usr/bin/btrfs-subvolume-list

thus allowing a user to run "btrfs-subvolume-show" while "btrfs" is
still only visible for root. With /usr merge it would look like this:

/bin/btrfs
/bin/btrfs-subvolume-show
/bin/btrfs-subvolume-list


What I'm thinking of is a distro package which installs the separated
binaries, either with suid/fscaps (if the admin wants to provide their
users with non-restricted versions of specific commands), or without
special flags (if the admin wants to just provide the binaries that also
work for non-root user, enabled by Misono's patchset.

See my gentoo ebuild:
https://github.com/digint/gentoo/tree/btrfs-progs-separated/sys-fs/btrfs-progs-separated


> 
> FWIW I have both the /usr merge (tho reversed for me, so /usr -> . 
> instead of having to have /bin and /sbin symlinks to /usr/bin) and the 
> bin/sbin merge, along with, since I'm on amd64-nomultilib, the lib/lib64 
> merge.  So:
> 
> $$ dir -gGd /bin /sbin /usr /lib /lib64
> drwxr-xr-x 1 35688 Sep 18 22:56 /bin
> lrwxrwxrwx 1 5 Aug  7 00:29 /lib -> lib64
> drwxr-xr-x 1 78560 Sep 18 22:56 /lib64
> lrwxrwxrwx 1 3 Mar 11  2018 /sbin -> bin
> lrwxrwxrwx 1 1 Mar 11  2018 /usr -> .
> 
> 
> Of course that last one (/usr -> .) leads to /share and /include hanging 
> directly off of / as well, but it works.
> 
> But in that scheme /bin, /sbin, /usr/bin and /usr/sbin, are all the same 
> dir, so only one executable of a particularly name can exist therein.
> 


Re: [PATCH v2 2/2] btrfs: relocation: Remove redundant tree level check

2018-09-21 Thread Qu Wenruo



On 2018/9/21 下午3:33, Nikolay Borisov wrote:
> 
> 
> On 21.09.2018 10:20, Qu Wenruo wrote:
>> Commit 581c1760415c ("btrfs: Validate child tree block's level and first
>> key") has made tree block level check mandatory.
>>
>> So if tree block level doesn't match, we won't get a valid extent
>> buffer.
>> The extra WARN_ON() check can be removed completely.
>>
>> Signed-off-by: Qu Wenruo 
>> ---
>> Changelog:
>> v2:
>>   Added tags.
> 
> Nope you didn't :)

Face palm, only added in formatted patch, and just re-formatted patch again.

Anyway, David could handle it.

Thanks,
Qu

> 
>> ---
>>  fs/btrfs/relocation.c | 1 -
>>  1 file changed, 1 deletion(-)
>>
>> diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
>> index 94aa148ccde6..a554e7861b6e 100644
>> --- a/fs/btrfs/relocation.c
>> +++ b/fs/btrfs/relocation.c
>> @@ -2911,7 +2911,6 @@ static int get_tree_block_key(struct btrfs_fs_info 
>> *fs_info,
>>  free_extent_buffer(eb);
>>  return -EIO;
>>  }
>> -WARN_ON(btrfs_header_level(eb) != block->level);
>>  if (block->level == 0)
>>  btrfs_item_key_to_cpu(eb, >key, 0);
>>  else
>>


Re: [PATCH v2 1/2] btrfs: relocation: Cleanup while() loop using rbtree_postorder_for_each_entry_safe()

2018-09-21 Thread Nikolay Borisov



On 21.09.2018 10:20, Qu Wenruo wrote:
> And add one line comment explaining what we're doing for each loop.
> 
> Signed-off-by: Qu Wenruo 
> ---

Reviewed-by: Nikolay Borisov 

> changelog:
> v2:
>   Use rbtree_postorder_for_each_entry_safe() to replace for() loop.
> ---
>  fs/btrfs/relocation.c | 24 +---
>  1 file changed, 9 insertions(+), 15 deletions(-)
> 
> diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
> index 8783a1776540..94aa148ccde6 100644
> --- a/fs/btrfs/relocation.c
> +++ b/fs/btrfs/relocation.c
> @@ -2987,7 +2987,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle 
> *trans,
>   struct backref_node *node;
>   struct btrfs_path *path;
>   struct tree_block *block;
> - struct rb_node *rb_node;
> + struct tree_block *next;
>   int ret;
>   int err = 0;
>  
> @@ -2997,29 +2997,23 @@ int relocate_tree_blocks(struct btrfs_trans_handle 
> *trans,
>   goto out_free_blocks;
>   }
>  
> - rb_node = rb_first(blocks);
> - while (rb_node) {
> - block = rb_entry(rb_node, struct tree_block, rb_node);
> + /* Kick in readahead for tree blocks with missing keys */
> + rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
>   if (!block->key_ready)
>   readahead_tree_block(fs_info, block->bytenr);
> - rb_node = rb_next(rb_node);
>   }
>  
> - rb_node = rb_first(blocks);
> - while (rb_node) {
> - block = rb_entry(rb_node, struct tree_block, rb_node);
> + /* Get first keys */
> + rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
>   if (!block->key_ready) {
>   err = get_tree_block_key(fs_info, block);
>   if (err)
>   goto out_free_path;
>   }
> - rb_node = rb_next(rb_node);
>   }
>  
> - rb_node = rb_first(blocks);
> - while (rb_node) {
> - block = rb_entry(rb_node, struct tree_block, rb_node);
> -
> + /* Do tree relocation */
> + rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
>   node = build_backref_tree(rc, >key,
> block->level, block->bytenr);
>   if (IS_ERR(node)) {
> @@ -3030,11 +3024,11 @@ int relocate_tree_blocks(struct btrfs_trans_handle 
> *trans,
>   ret = relocate_tree_block(trans, rc, node, >key,
> path);
>   if (ret < 0) {
> - if (ret != -EAGAIN || rb_node == rb_first(blocks))
> + if (ret != -EAGAIN || >rb_node ==
> +   rb_first(blocks))
>   err = ret;
>   goto out;
>   }
> - rb_node = rb_next(rb_node);
>   }
>  out:
>   err = finish_pending_nodes(trans, rc, path, err);
> 


Re: [PATCH v2 2/2] btrfs: relocation: Remove redundant tree level check

2018-09-21 Thread Nikolay Borisov



On 21.09.2018 10:20, Qu Wenruo wrote:
> Commit 581c1760415c ("btrfs: Validate child tree block's level and first
> key") has made tree block level check mandatory.
> 
> So if tree block level doesn't match, we won't get a valid extent
> buffer.
> The extra WARN_ON() check can be removed completely.
> 
> Signed-off-by: Qu Wenruo 
> ---
> Changelog:
> v2:
>   Added tags.

Nope you didn't :)

> ---
>  fs/btrfs/relocation.c | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
> index 94aa148ccde6..a554e7861b6e 100644
> --- a/fs/btrfs/relocation.c
> +++ b/fs/btrfs/relocation.c
> @@ -2911,7 +2911,6 @@ static int get_tree_block_key(struct btrfs_fs_info 
> *fs_info,
>   free_extent_buffer(eb);
>   return -EIO;
>   }
> - WARN_ON(btrfs_header_level(eb) != block->level);
>   if (block->level == 0)
>   btrfs_item_key_to_cpu(eb, >key, 0);
>   else
> 


[PATCH v2 2/2] btrfs: relocation: Remove redundant tree level check

2018-09-21 Thread Qu Wenruo
Commit 581c1760415c ("btrfs: Validate child tree block's level and first
key") has made tree block level check mandatory.

So if tree block level doesn't match, we won't get a valid extent
buffer.
The extra WARN_ON() check can be removed completely.

Signed-off-by: Qu Wenruo 
---
Changelog:
v2:
  Added tags.
---
 fs/btrfs/relocation.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 94aa148ccde6..a554e7861b6e 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2911,7 +2911,6 @@ static int get_tree_block_key(struct btrfs_fs_info 
*fs_info,
free_extent_buffer(eb);
return -EIO;
}
-   WARN_ON(btrfs_header_level(eb) != block->level);
if (block->level == 0)
btrfs_item_key_to_cpu(eb, >key, 0);
else
-- 
2.19.0



[PATCH v2 1/2] btrfs: relocation: Cleanup while() loop using rbtree_postorder_for_each_entry_safe()

2018-09-21 Thread Qu Wenruo
And add one line comment explaining what we're doing for each loop.

Signed-off-by: Qu Wenruo 
---
changelog:
v2:
  Use rbtree_postorder_for_each_entry_safe() to replace for() loop.
---
 fs/btrfs/relocation.c | 24 +---
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 8783a1776540..94aa148ccde6 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2987,7 +2987,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
struct backref_node *node;
struct btrfs_path *path;
struct tree_block *block;
-   struct rb_node *rb_node;
+   struct tree_block *next;
int ret;
int err = 0;
 
@@ -2997,29 +2997,23 @@ int relocate_tree_blocks(struct btrfs_trans_handle 
*trans,
goto out_free_blocks;
}
 
-   rb_node = rb_first(blocks);
-   while (rb_node) {
-   block = rb_entry(rb_node, struct tree_block, rb_node);
+   /* Kick in readahead for tree blocks with missing keys */
+   rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
if (!block->key_ready)
readahead_tree_block(fs_info, block->bytenr);
-   rb_node = rb_next(rb_node);
}
 
-   rb_node = rb_first(blocks);
-   while (rb_node) {
-   block = rb_entry(rb_node, struct tree_block, rb_node);
+   /* Get first keys */
+   rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
if (!block->key_ready) {
err = get_tree_block_key(fs_info, block);
if (err)
goto out_free_path;
}
-   rb_node = rb_next(rb_node);
}
 
-   rb_node = rb_first(blocks);
-   while (rb_node) {
-   block = rb_entry(rb_node, struct tree_block, rb_node);
-
+   /* Do tree relocation */
+   rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
node = build_backref_tree(rc, >key,
  block->level, block->bytenr);
if (IS_ERR(node)) {
@@ -3030,11 +3024,11 @@ int relocate_tree_blocks(struct btrfs_trans_handle 
*trans,
ret = relocate_tree_block(trans, rc, node, >key,
  path);
if (ret < 0) {
-   if (ret != -EAGAIN || rb_node == rb_first(blocks))
+   if (ret != -EAGAIN || >rb_node ==
+ rb_first(blocks))
err = ret;
goto out;
}
-   rb_node = rb_next(rb_node);
}
 out:
err = finish_pending_nodes(trans, rc, path, err);
-- 
2.19.0



Re: [PATCH 1/2] btrfs: relocation: Cleanup while() loop using for()

2018-09-21 Thread Qu Wenruo



On 2018/9/21 下午2:53, Nikolay Borisov wrote:
> 
> 
> On 21.09.2018 09:45, Qu Wenruo wrote:
>> And add one line comment explaining what we're doing for each loop.
>>
>> Signed-off-by: Qu Wenruo 
>> ---
>>  fs/btrfs/relocation.c | 15 ++-
>>  1 file changed, 6 insertions(+), 9 deletions(-)
>>
>> diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
>> index 8783a1776540..d7f5a11dde20 100644
>> --- a/fs/btrfs/relocation.c
>> +++ b/fs/btrfs/relocation.c
>> @@ -2997,27 +2997,25 @@ int relocate_tree_blocks(struct btrfs_trans_handle 
>> *trans,
>>  goto out_free_blocks;
>>  }
>>  
>> -rb_node = rb_first(blocks);
>> -while (rb_node) {
>> +/* Kick in readahead for tree blocks with missing keys */
>> +for (rb_node = rb_first(blocks); rb_node; rb_node = rb_next(rb_node)) {
> 
> FYI there is already a rbtree_postorder_for_each_entry_safe functino
> which allows to iterate the rb tree in post order. It provides stronger
> guarantees than you actually need (namely the _safe ) but in any case it
> will allows you to get rid of the "block = " line as well as the
> "rb_node =" line. This will remove all superfluous code that's needed to
> handle the iteration.

Great thanks!

This is much better solution, I'll use them in next version.

Thanks,
Qu

> 
>>  block = rb_entry(rb_node, struct tree_block, rb_node);
>>  if (!block->key_ready)
>>  readahead_tree_block(fs_info, block->bytenr);
>> -rb_node = rb_next(rb_node);
>>  }
>>  
>> -rb_node = rb_first(blocks);
>> -while (rb_node) {
>> +/* Get first keys */
>> +for (rb_node = rb_first(blocks); rb_node; rb_node = rb_next(rb_node)) {
>>  block = rb_entry(rb_node, struct tree_block, rb_node);
>>  if (!block->key_ready) {
>>  err = get_tree_block_key(fs_info, block);
>>  if (err)
>>  goto out_free_path;
>>  }
>> -rb_node = rb_next(rb_node);
>>  }
>>  
>> -rb_node = rb_first(blocks);
>> -while (rb_node) {
>> +/* Do tree relocation */
>> +for (rb_node = rb_first(blocks); rb_node; rb_node = rb_next(rb_node)) {
>>  block = rb_entry(rb_node, struct tree_block, rb_node);
>>  
>>  node = build_backref_tree(rc, >key,
>> @@ -3034,7 +3032,6 @@ int relocate_tree_blocks(struct btrfs_trans_handle 
>> *trans,
>>  err = ret;
>>  goto out;
>>  }
>> -rb_node = rb_next(rb_node);
>>  }
>>  out:
>>  err = finish_pending_nodes(trans, rc, path, err);
>>


Re: [PATCH 2/2] btrfs: relocation: Remove redundant tree level check

2018-09-21 Thread Nikolay Borisov



On 21.09.2018 09:45, Qu Wenruo wrote:
> Commit 581c1760415c ("btrfs: Validate child tree block's level and first
> key") has made tree block level check mandatory.
> 
> So if tree block level doesn't match, we won't get a valid extent
> buffer.
> The extra WARN_ON() check can be removed completely.
> 
> Signed-off-by: Qu Wenruo 
Reviewed-by: Nikolay Borisov 
> ---
>  fs/btrfs/relocation.c | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
> index d7f5a11dde20..56e2fab2926d 100644
> --- a/fs/btrfs/relocation.c
> +++ b/fs/btrfs/relocation.c
> @@ -2911,7 +2911,6 @@ static int get_tree_block_key(struct btrfs_fs_info 
> *fs_info,
>   free_extent_buffer(eb);
>   return -EIO;
>   }
> - WARN_ON(btrfs_header_level(eb) != block->level);
>   if (block->level == 0)
>   btrfs_item_key_to_cpu(eb, >key, 0);
>   else
> 


Re: [PATCH 1/2] btrfs: relocation: Cleanup while() loop using for()

2018-09-21 Thread Nikolay Borisov



On 21.09.2018 09:45, Qu Wenruo wrote:
> And add one line comment explaining what we're doing for each loop.
> 
> Signed-off-by: Qu Wenruo 
> ---
>  fs/btrfs/relocation.c | 15 ++-
>  1 file changed, 6 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
> index 8783a1776540..d7f5a11dde20 100644
> --- a/fs/btrfs/relocation.c
> +++ b/fs/btrfs/relocation.c
> @@ -2997,27 +2997,25 @@ int relocate_tree_blocks(struct btrfs_trans_handle 
> *trans,
>   goto out_free_blocks;
>   }
>  
> - rb_node = rb_first(blocks);
> - while (rb_node) {
> + /* Kick in readahead for tree blocks with missing keys */
> + for (rb_node = rb_first(blocks); rb_node; rb_node = rb_next(rb_node)) {

FYI there is already a rbtree_postorder_for_each_entry_safe functino
which allows to iterate the rb tree in post order. It provides stronger
guarantees than you actually need (namely the _safe ) but in any case it
will allows you to get rid of the "block = " line as well as the
"rb_node =" line. This will remove all superfluous code that's needed to
handle the iteration.

>   block = rb_entry(rb_node, struct tree_block, rb_node);
>   if (!block->key_ready)
>   readahead_tree_block(fs_info, block->bytenr);
> - rb_node = rb_next(rb_node);
>   }
>  
> - rb_node = rb_first(blocks);
> - while (rb_node) {
> + /* Get first keys */
> + for (rb_node = rb_first(blocks); rb_node; rb_node = rb_next(rb_node)) {
>   block = rb_entry(rb_node, struct tree_block, rb_node);
>   if (!block->key_ready) {
>   err = get_tree_block_key(fs_info, block);
>   if (err)
>   goto out_free_path;
>   }
> - rb_node = rb_next(rb_node);
>   }
>  
> - rb_node = rb_first(blocks);
> - while (rb_node) {
> + /* Do tree relocation */
> + for (rb_node = rb_first(blocks); rb_node; rb_node = rb_next(rb_node)) {
>   block = rb_entry(rb_node, struct tree_block, rb_node);
>  
>   node = build_backref_tree(rc, >key,
> @@ -3034,7 +3032,6 @@ int relocate_tree_blocks(struct btrfs_trans_handle 
> *trans,
>   err = ret;
>   goto out;
>   }
> - rb_node = rb_next(rb_node);
>   }
>  out:
>   err = finish_pending_nodes(trans, rc, path, err);
> 


[PATCH 2/2] btrfs: relocation: Remove redundant tree level check

2018-09-21 Thread Qu Wenruo
Commit 581c1760415c ("btrfs: Validate child tree block's level and first
key") has made tree block level check mandatory.

So if tree block level doesn't match, we won't get a valid extent
buffer.
The extra WARN_ON() check can be removed completely.

Signed-off-by: Qu Wenruo 
---
 fs/btrfs/relocation.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index d7f5a11dde20..56e2fab2926d 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2911,7 +2911,6 @@ static int get_tree_block_key(struct btrfs_fs_info 
*fs_info,
free_extent_buffer(eb);
return -EIO;
}
-   WARN_ON(btrfs_header_level(eb) != block->level);
if (block->level == 0)
btrfs_item_key_to_cpu(eb, >key, 0);
else
-- 
2.19.0



[PATCH 1/2] btrfs: relocation: Cleanup while() loop using for()

2018-09-21 Thread Qu Wenruo
And add one line comment explaining what we're doing for each loop.

Signed-off-by: Qu Wenruo 
---
 fs/btrfs/relocation.c | 15 ++-
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 8783a1776540..d7f5a11dde20 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2997,27 +2997,25 @@ int relocate_tree_blocks(struct btrfs_trans_handle 
*trans,
goto out_free_blocks;
}
 
-   rb_node = rb_first(blocks);
-   while (rb_node) {
+   /* Kick in readahead for tree blocks with missing keys */
+   for (rb_node = rb_first(blocks); rb_node; rb_node = rb_next(rb_node)) {
block = rb_entry(rb_node, struct tree_block, rb_node);
if (!block->key_ready)
readahead_tree_block(fs_info, block->bytenr);
-   rb_node = rb_next(rb_node);
}
 
-   rb_node = rb_first(blocks);
-   while (rb_node) {
+   /* Get first keys */
+   for (rb_node = rb_first(blocks); rb_node; rb_node = rb_next(rb_node)) {
block = rb_entry(rb_node, struct tree_block, rb_node);
if (!block->key_ready) {
err = get_tree_block_key(fs_info, block);
if (err)
goto out_free_path;
}
-   rb_node = rb_next(rb_node);
}
 
-   rb_node = rb_first(blocks);
-   while (rb_node) {
+   /* Do tree relocation */
+   for (rb_node = rb_first(blocks); rb_node; rb_node = rb_next(rb_node)) {
block = rb_entry(rb_node, struct tree_block, rb_node);
 
node = build_backref_tree(rc, >key,
@@ -3034,7 +3032,6 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
err = ret;
goto out;
}
-   rb_node = rb_next(rb_node);
}
 out:
err = finish_pending_nodes(trans, rc, path, err);
-- 
2.19.0