On Mon, Jun 15, 2015 at 2:41 PM,  <[email protected]> wrote:
> From: Jeff Mahoney <[email protected]>
>
> When we clear the dirty bits in btrfs_delete_unused_bgs for extents
> in the empty block group, it results in btrfs_finish_extent_commit being
> unable to discard the freed extents.
>
> The block group removal patch added an alternate path to forget extents
> other than btrfs_finish_extent_commit.  As a result, any extents that
> would be freed when the block group is removed aren't discarded.  In my
> test run, with a large copy of mixed sized files followed by removal, it
> left nearly 2/3 of extents undiscarded.
>
> To clean up the block groups, we add the removed block group onto a list
> that will be discarded after transaction commit.
>
> Signed-off-by: Jeff Mahoney <[email protected]>
Reviewed-by: Filipe Manana <[email protected]>
Tested-by: Filipe Manana <[email protected]>

> ---
>  fs/btrfs/ctree.h            |  3 ++
>  fs/btrfs/extent-tree.c      | 68 
> ++++++++++++++++++++++++++++++++++++++++++---
>  fs/btrfs/free-space-cache.c | 57 +++++++++++++++++++++----------------
>  fs/btrfs/super.c            |  2 +-
>  fs/btrfs/transaction.c      |  2 ++
>  fs/btrfs/transaction.h      |  2 ++
>  6 files changed, 105 insertions(+), 29 deletions(-)
>
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 6f364e1..780acf1 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -3438,6 +3438,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle 
> *trans,
>                              struct btrfs_root *root, u64 group_start,
>                              struct extent_map *em);
>  void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
> +void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache);
> +void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache);
>  void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
>                                        struct btrfs_root *root);
>  u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
> @@ -4068,6 +4070,7 @@ __printf(5, 6)
>  void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
>                      unsigned int line, int errno, const char *fmt, ...);
>
> +const char *btrfs_decode_error(int errno);
>
>  void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
>                                struct btrfs_root *root, const char *function,
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 24b48df..3598440 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -6103,20 +6103,19 @@ int btrfs_finish_extent_commit(struct 
> btrfs_trans_handle *trans,
>                                struct btrfs_root *root)
>  {
>         struct btrfs_fs_info *fs_info = root->fs_info;
> +       struct btrfs_block_group_cache *block_group, *tmp;
> +       struct list_head *deleted_bgs;
>         struct extent_io_tree *unpin;
>         u64 start;
>         u64 end;
>         int ret;
>
> -       if (trans->aborted)
> -               return 0;
> -
>         if (fs_info->pinned_extents == &fs_info->freed_extents[0])
>                 unpin = &fs_info->freed_extents[1];
>         else
>                 unpin = &fs_info->freed_extents[0];
>
> -       while (1) {
> +       while (!trans->aborted) {
>                 mutex_lock(&fs_info->unused_bg_unpin_mutex);
>                 ret = find_first_extent_bit(unpin, 0, &start, &end,
>                                             EXTENT_DIRTY, NULL);
> @@ -6135,6 +6134,34 @@ int btrfs_finish_extent_commit(struct 
> btrfs_trans_handle *trans,
>                 cond_resched();
>         }
>
> +       /*
> +        * Transaction is finished.  We don't need the lock anymore.  We
> +        * do need to clean up the block groups in case of a transaction
> +        * abort.
> +        */
> +       deleted_bgs = &trans->transaction->deleted_bgs;
> +       list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
> +               u64 trimmed = 0;
> +
> +               ret = -EROFS;
> +               if (!trans->aborted)
> +                       ret = btrfs_discard_extent(root,
> +                                                  block_group->key.objectid,
> +                                                  block_group->key.offset,
> +                                                  &trimmed);
> +
> +               list_del_init(&block_group->bg_list);
> +               btrfs_put_block_group_trimming(block_group);
> +               btrfs_put_block_group(block_group);
> +
> +               if (ret) {
> +                       const char *errstr = btrfs_decode_error(ret);
> +                       btrfs_warn(fs_info,
> +                                  "Discard failed while removing blockgroup: 
> errno=%d %s\n",
> +                                  ret, errstr);
> +               }
> +       }
> +
>         return 0;
>  }
>
> @@ -9914,6 +9941,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle 
> *trans,
>          * currently running transaction might finish and a new one start,
>          * allowing for new block groups to be created that can reuse the same
>          * physical device locations unless we take this special care.
> +        *
> +        * There may also be an implicit trim operation if the file system
> +        * is mounted with -odiscard. The same protections must remain
> +        * in place until the extents have been discarded completely when
> +        * the transaction commit has completed.
>          */
>         remove_em = (atomic_read(&block_group->trimming) == 0);
>         /*
> @@ -9988,6 +10020,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info 
> *fs_info)
>         spin_lock(&fs_info->unused_bgs_lock);
>         while (!list_empty(&fs_info->unused_bgs)) {
>                 u64 start, end;
> +               int trimming;
>
>                 block_group = list_first_entry(&fs_info->unused_bgs,
>                                                struct btrfs_block_group_cache,
> @@ -10085,12 +10118,39 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info 
> *fs_info)
>                 spin_unlock(&block_group->lock);
>                 spin_unlock(&space_info->lock);
>
> +               /* DISCARD can flip during remount */
> +               trimming = btrfs_test_opt(root, DISCARD);
> +
> +               /* Implicit trim during transaction commit. */
> +               if (trimming)
> +                       btrfs_get_block_group_trimming(block_group);
> +
>                 /*
>                  * Btrfs_remove_chunk will abort the transaction if things go
>                  * horribly wrong.
>                  */
>                 ret = btrfs_remove_chunk(trans, root,
>                                          block_group->key.objectid);
> +
> +               if (ret) {
> +                       if (trimming)
> +                               btrfs_put_block_group_trimming(block_group);
> +                       goto end_trans;
> +               }
> +
> +               /*
> +                * If we're not mounted with -odiscard, we can just forget
> +                * about this block group. Otherwise we'll need to wait
> +                * until transaction commit to do the actual discard.
> +                */
> +               if (trimming) {
> +                       WARN_ON(!list_empty(&block_group->bg_list));
> +                       spin_lock(&trans->transaction->deleted_bgs_lock);
> +                       list_move(&block_group->bg_list,
> +                                 &trans->transaction->deleted_bgs);
> +                       spin_unlock(&trans->transaction->deleted_bgs_lock);
> +                       btrfs_get_block_group(block_group);
> +               }
>  end_trans:
>                 btrfs_end_transaction(trans, root);
>  next:
> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> index 9dbe5b5..c79253e 100644
> --- a/fs/btrfs/free-space-cache.c
> +++ b/fs/btrfs/free-space-cache.c
> @@ -3274,35 +3274,23 @@ next:
>         return ret;
>  }
>
> -int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
> -                          u64 *trimmed, u64 start, u64 end, u64 minlen)
> +void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache)
>  {
> -       int ret;
> +       atomic_inc(&cache->trimming);
> +}
>
> -       *trimmed = 0;
> +void btrfs_put_block_group_trimming(struct btrfs_block_group_cache 
> *block_group)
> +{
> +       struct extent_map_tree *em_tree;
> +       struct extent_map *em;
> +       bool cleanup;
>
>         spin_lock(&block_group->lock);
> -       if (block_group->removed) {
> -               spin_unlock(&block_group->lock);
> -               return 0;
> -       }
> -       atomic_inc(&block_group->trimming);
> +       cleanup = (atomic_dec_and_test(&block_group->trimming) &&
> +                  block_group->removed);
>         spin_unlock(&block_group->lock);
>
> -       ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
> -       if (ret)
> -               goto out;
> -
> -       ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
> -out:
> -       spin_lock(&block_group->lock);
> -       if (atomic_dec_and_test(&block_group->trimming) &&
> -           block_group->removed) {
> -               struct extent_map_tree *em_tree;
> -               struct extent_map *em;
> -
> -               spin_unlock(&block_group->lock);
> -
> +       if (cleanup) {
>                 lock_chunks(block_group->fs_info->chunk_root);
>                 em_tree = &block_group->fs_info->mapping_tree.map_tree;
>                 write_lock(&em_tree->lock);
> @@ -3326,10 +3314,31 @@ out:
>                  * this block group have left 1 entry each one. Free them.
>                  */
>                 __btrfs_remove_free_space_cache(block_group->free_space_ctl);
> -       } else {
> +       }
> +}
> +
> +int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
> +                          u64 *trimmed, u64 start, u64 end, u64 minlen)
> +{
> +       int ret;
> +
> +       *trimmed = 0;
> +
> +       spin_lock(&block_group->lock);
> +       if (block_group->removed) {
>                 spin_unlock(&block_group->lock);
> +               return 0;
>         }
> +       btrfs_get_block_group_trimming(block_group);
> +       spin_unlock(&block_group->lock);
> +
> +       ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
> +       if (ret)
> +               goto out;
>
> +       ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
> +out:
> +       btrfs_put_block_group_trimming(block_group);
>         return ret;
>  }
>
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index 2ccd8d4..a80da03 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -69,7 +69,7 @@ static struct file_system_type btrfs_fs_type;
>
>  static int btrfs_remount(struct super_block *sb, int *flags, char *data);
>
> -static const char *btrfs_decode_error(int errno)
> +const char *btrfs_decode_error(int errno)
>  {
>         char *errstr = "unknown";
>
> diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
> index 5628e25..2005262 100644
> --- a/fs/btrfs/transaction.c
> +++ b/fs/btrfs/transaction.c
> @@ -256,6 +256,8 @@ loop:
>         mutex_init(&cur_trans->cache_write_mutex);
>         cur_trans->num_dirty_bgs = 0;
>         spin_lock_init(&cur_trans->dirty_bgs_lock);
> +       INIT_LIST_HEAD(&cur_trans->deleted_bgs);
> +       spin_lock_init(&cur_trans->deleted_bgs_lock);
>         list_add_tail(&cur_trans->list, &fs_info->trans_list);
>         extent_io_tree_init(&cur_trans->dirty_pages,
>                              fs_info->btree_inode->i_mapping);
> diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
> index 0b24755..14325f2 100644
> --- a/fs/btrfs/transaction.h
> +++ b/fs/btrfs/transaction.h
> @@ -74,6 +74,8 @@ struct btrfs_transaction {
>          */
>         struct mutex cache_write_mutex;
>         spinlock_t dirty_bgs_lock;
> +       struct list_head deleted_bgs;
> +       spinlock_t deleted_bgs_lock;
>         struct btrfs_delayed_ref_root delayed_refs;
>         int aborted;
>         int dirty_bg_run;
> --
> 2.4.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Filipe David Manana,

"Reasonable men adapt themselves to the world.
 Unreasonable men adapt the world to themselves.
 That's why all progress depends on unreasonable men."
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to