On Tue 18-07-17 18:46:01, Davidlohr Bueso wrote:
> This patch replaces two uses of rbtrees to the cached version for
> better rb_first() performance during:
> 
> * Tree traversals to mark blocks as used during ext4_mb_init_cache()
> looping. (struct ext4_group_info)
> 
> * Storing ->curr_node for dx_readdir(). (struct dir_private_info)
> 
> Caching the leftmost node comes at negligible runtime cost, thus the
> only downside is that the aforementioned structures are enlarged for
> the extra pointer. No changes in semantics whatsoever.
> 
> Cc: [email protected]
> Signed-off-by: Davidlohr Bueso <[email protected]>

Hum, so I'm somewhat undecided whether this is worth the churn. For free
blocks rb_tree we use rb_first() only in ext4_mb_generate_from_freelist()
which gets called only when generating new buddy bitmap from on-disk bitmap
and we traverse the whole tree after that - thus the extra cost of
rb_first() is a) well hidden in the total cost of iteration, b) rather rare
anyway.

Similarly for the H-tree directory code, we call rb_first() in
ext4_dx_readdir() only to start an iteration over the whole B-tree and in
such case I don't think optimizing rb_first() makes a big difference
(maintaining cached value is going to have about the same cost as we save
by using it).

                                                                Honza

> ---
> This is part of the rbtree internal caching series:
> https://lwn.net/Articles/726809/
> 
>  fs/ext4/dir.c     | 24 ++++++++++++++----------
>  fs/ext4/ext4.h    |  4 ++--
>  fs/ext4/mballoc.c | 22 ++++++++++++----------
>  3 files changed, 28 insertions(+), 22 deletions(-)
> 
> diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
> index e8b365000d73..011df6bba13c 100644
> --- a/fs/ext4/dir.c
> +++ b/fs/ext4/dir.c
> @@ -391,18 +391,18 @@ struct fname {
>   * This functoin implements a non-recursive way of freeing all of the
>   * nodes in the red-black tree.
>   */
> -static void free_rb_tree_fname(struct rb_root *root)
> +static void free_rb_tree_fname(struct rb_root_cached *root)
>  {
>       struct fname *fname, *next;
>  
> -     rbtree_postorder_for_each_entry_safe(fname, next, root, rb_hash)
> +     rbtree_postorder_for_each_entry_safe(fname, next, &root->rb_root, 
> rb_hash)
>               while (fname) {
>                       struct fname *old = fname;
>                       fname = fname->next;
>                       kfree(old);
>               }
>  
> -     *root = RB_ROOT;
> +     *root = RB_ROOT_CACHED;
>  }
>  
>  
> @@ -441,9 +441,10 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 
> hash,
>       struct fname *fname, *new_fn;
>       struct dir_private_info *info;
>       int len;
> +     bool leftmost = true;
>  
>       info = dir_file->private_data;
> -     p = &info->root.rb_node;
> +     p = &info->root.rb_root.rb_node;
>  
>       /* Create and allocate the fname structure */
>       len = sizeof(struct fname) + ent_name->len + 1;
> @@ -475,16 +476,19 @@ int ext4_htree_store_dirent(struct file *dir_file, 
> __u32 hash,
>  
>               if (new_fn->hash < fname->hash)
>                       p = &(*p)->rb_left;
> -             else if (new_fn->hash > fname->hash)
> +             else if (new_fn->hash > fname->hash) {
>                       p = &(*p)->rb_right;
> -             else if (new_fn->minor_hash < fname->minor_hash)
> +                     leftmost = false;
> +             } else if (new_fn->minor_hash < fname->minor_hash)
>                       p = &(*p)->rb_left;
> -             else /* if (new_fn->minor_hash > fname->minor_hash) */
> +             else { /* if (new_fn->minor_hash > fname->minor_hash) */
>                       p = &(*p)->rb_right;
> +                     leftmost = false;
> +             }
>       }
>  
>       rb_link_node(&new_fn->rb_hash, parent, p);
> -     rb_insert_color(&new_fn->rb_hash, &info->root);
> +     rb_insert_color_cached(&new_fn->rb_hash, &info->root, leftmost);
>       return 0;
>  }
>  
> @@ -558,7 +562,7 @@ static int ext4_dx_readdir(struct file *file, struct 
> dir_context *ctx)
>               info->extra_fname = NULL;
>               goto next_node;
>       } else if (!info->curr_node)
> -             info->curr_node = rb_first(&info->root);
> +             info->curr_node = rb_first_cached(&info->root);
>  
>       while (1) {
>               /*
> @@ -580,7 +584,7 @@ static int ext4_dx_readdir(struct file *file, struct 
> dir_context *ctx)
>                               ctx->pos = ext4_get_htree_eof(file);
>                               break;
>                       }
> -                     info->curr_node = rb_first(&info->root);
> +                     info->curr_node = rb_first_cached(&info->root);
>               }
>  
>               fname = rb_entry(info->curr_node, struct fname, rb_hash);
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 9ebde0cd632e..0242c6801600 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2116,7 +2116,7 @@ static inline bool ext4_is_quota_file(struct inode 
> *inode)
>   * readdir operations in hash tree order.
>   */
>  struct dir_private_info {
> -     struct rb_root  root;
> +     struct          rb_root_cached  root;
>       struct rb_node  *curr_node;
>       struct fname    *extra_fname;
>       loff_t          last_pos;
> @@ -2883,7 +2883,7 @@ int ext4_update_disksize_before_punch(struct inode 
> *inode, loff_t offset,
>  
>  struct ext4_group_info {
>       unsigned long   bb_state;
> -     struct rb_root  bb_free_root;
> +     struct          rb_root_cached  bb_free_root;
>       ext4_grpblk_t   bb_first_free;  /* first free block */
>       ext4_grpblk_t   bb_free;        /* total free blocks */
>       ext4_grpblk_t   bb_fragments;   /* nr of freespace fragments */
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 581e357e8406..df36f9123a99 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -2462,7 +2462,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, 
> ext4_group_t group,
>  
>       INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
>       init_rwsem(&meta_group_info[i]->alloc_sem);
> -     meta_group_info[i]->bb_free_root = RB_ROOT;
> +     meta_group_info[i]->bb_free_root = RB_ROOT_CACHED;
>       meta_group_info[i]->bb_largest_free_order = -1;  /* uninit */
>  
>  #ifdef DOUBLE_CHECK
> @@ -2824,7 +2824,7 @@ static void ext4_free_data_in_buddy(struct super_block 
> *sb,
>       count2++;
>       ext4_lock_group(sb, entry->efd_group);
>       /* Take it out of per group rb tree */
> -     rb_erase(&entry->efd_node, &(db->bb_free_root));
> +     rb_erase_cached(&entry->efd_node, &(db->bb_free_root));
>       mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
>  
>       /*
> @@ -2836,7 +2836,7 @@ static void ext4_free_data_in_buddy(struct super_block 
> *sb,
>       if (!test_opt(sb, DISCARD))
>               EXT4_MB_GRP_CLEAR_TRIMMED(db);
>  
> -     if (!db->bb_free_root.rb_node) {
> +     if (!db->bb_free_root.rb_root.rb_node) {
>               /* No more items in the per group rb tree
>                * balance refcounts from ext4_mb_free_metadata()
>                */
> @@ -3519,7 +3519,7 @@ static void ext4_mb_generate_from_freelist(struct 
> super_block *sb, void *bitmap,
>       struct ext4_free_data *entry;
>  
>       grp = ext4_get_group_info(sb, group);
> -     n = rb_first(&(grp->bb_free_root));
> +     n = rb_first_cached(&(grp->bb_free_root));
>  
>       while (n) {
>               entry = rb_entry(n, struct ext4_free_data, efd_node);
> @@ -4624,7 +4624,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
>  static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
>                                       struct ext4_free_data *entry,
>                                       struct ext4_free_data *new_entry,
> -                                     struct rb_root *entry_rb_root)
> +                                     struct rb_root_cached *entry_rb_root)
>  {
>       if ((entry->efd_tid != new_entry->efd_tid) ||
>           (entry->efd_group != new_entry->efd_group))
> @@ -4641,7 +4641,7 @@ static void ext4_try_merge_freed_extent(struct 
> ext4_sb_info *sbi,
>       spin_lock(&sbi->s_md_lock);
>       list_del(&entry->efd_list);
>       spin_unlock(&sbi->s_md_lock);
> -     rb_erase(&entry->efd_node, entry_rb_root);
> +     rb_erase_cached(&entry->efd_node, entry_rb_root);
>       kmem_cache_free(ext4_free_data_cachep, entry);
>  }
>  
> @@ -4656,8 +4656,9 @@ ext4_mb_free_metadata(handle_t *handle, struct 
> ext4_buddy *e4b,
>       struct ext4_group_info *db = e4b->bd_info;
>       struct super_block *sb = e4b->bd_sb;
>       struct ext4_sb_info *sbi = EXT4_SB(sb);
> -     struct rb_node **n = &db->bb_free_root.rb_node, *node;
> +     struct rb_node **n = &db->bb_free_root.rb_root.rb_node, *node;
>       struct rb_node *parent = NULL, *new_node;
> +     bool leftmost = true;
>  
>       BUG_ON(!ext4_handle_valid(handle));
>       BUG_ON(e4b->bd_bitmap_page == NULL);
> @@ -4680,9 +4681,10 @@ ext4_mb_free_metadata(handle_t *handle, struct 
> ext4_buddy *e4b,
>               entry = rb_entry(parent, struct ext4_free_data, efd_node);
>               if (cluster < entry->efd_start_cluster)
>                       n = &(*n)->rb_left;
> -             else if (cluster >= (entry->efd_start_cluster + 
> entry->efd_count))
> +             else if (cluster >= (entry->efd_start_cluster + 
> entry->efd_count)) {
>                       n = &(*n)->rb_right;
> -             else {
> +                     leftmost = false;
> +             } else {
>                       ext4_grp_locked_error(sb, group, 0,
>                               ext4_group_first_block_no(sb, group) +
>                               EXT4_C2B(sbi, cluster),
> @@ -4692,7 +4694,7 @@ ext4_mb_free_metadata(handle_t *handle, struct 
> ext4_buddy *e4b,
>       }
>  
>       rb_link_node(new_node, parent, n);
> -     rb_insert_color(new_node, &db->bb_free_root);
> +     rb_insert_color_cached(new_node, &db->bb_free_root, leftmost);
>  
>       /* Now try to see the extent can be merged to left and right */
>       node = rb_prev(new_node);
> -- 
> 2.12.0
> 
-- 
Jan Kara <[email protected]>
SUSE Labs, CR

Reply via email to