On Mon, Jun 15, 2015 at 2:41 PM, <[email protected]> wrote:
> From: Jeff Mahoney <[email protected]>
>
> When we clear the dirty bits in btrfs_delete_unused_bgs for extents
> in the empty block group, it results in btrfs_finish_extent_commit being
> unable to discard the freed extents.
>
> The block group removal patch added an alternate path to forget extents
> other than btrfs_finish_extent_commit. As a result, any extents that
> would be freed when the block group is removed aren't discarded. In my
> test run, with a large copy of mixed sized files followed by removal, it
> left nearly 2/3 of extents undiscarded.
>
> To clean up the block groups, we add the removed block group onto a list
> that will be discarded after transaction commit.
>
> Signed-off-by: Jeff Mahoney <[email protected]>
Reviewed-by: Filipe Manana <[email protected]>
Tested-by: Filipe Manana <[email protected]>
> ---
> fs/btrfs/ctree.h | 3 ++
> fs/btrfs/extent-tree.c | 68
> ++++++++++++++++++++++++++++++++++++++++++---
> fs/btrfs/free-space-cache.c | 57 +++++++++++++++++++++----------------
> fs/btrfs/super.c | 2 +-
> fs/btrfs/transaction.c | 2 ++
> fs/btrfs/transaction.h | 2 ++
> 6 files changed, 105 insertions(+), 29 deletions(-)
>
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 6f364e1..780acf1 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -3438,6 +3438,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle
> *trans,
> struct btrfs_root *root, u64 group_start,
> struct extent_map *em);
> void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
> +void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache);
> +void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache);
> void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
> struct btrfs_root *root);
> u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
> @@ -4068,6 +4070,7 @@ __printf(5, 6)
> void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
> unsigned int line, int errno, const char *fmt, ...);
>
> +const char *btrfs_decode_error(int errno);
>
> void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
> struct btrfs_root *root, const char *function,
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 24b48df..3598440 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -6103,20 +6103,19 @@ int btrfs_finish_extent_commit(struct
> btrfs_trans_handle *trans,
> struct btrfs_root *root)
> {
> struct btrfs_fs_info *fs_info = root->fs_info;
> + struct btrfs_block_group_cache *block_group, *tmp;
> + struct list_head *deleted_bgs;
> struct extent_io_tree *unpin;
> u64 start;
> u64 end;
> int ret;
>
> - if (trans->aborted)
> - return 0;
> -
> if (fs_info->pinned_extents == &fs_info->freed_extents[0])
> unpin = &fs_info->freed_extents[1];
> else
> unpin = &fs_info->freed_extents[0];
>
> - while (1) {
> + while (!trans->aborted) {
> mutex_lock(&fs_info->unused_bg_unpin_mutex);
> ret = find_first_extent_bit(unpin, 0, &start, &end,
> EXTENT_DIRTY, NULL);
> @@ -6135,6 +6134,34 @@ int btrfs_finish_extent_commit(struct
> btrfs_trans_handle *trans,
> cond_resched();
> }
>
> + /*
> + * Transaction is finished. We don't need the lock anymore. We
> + * do need to clean up the block groups in case of a transaction
> + * abort.
> + */
> + deleted_bgs = &trans->transaction->deleted_bgs;
> + list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
> + u64 trimmed = 0;
> +
> + ret = -EROFS;
> + if (!trans->aborted)
> + ret = btrfs_discard_extent(root,
> + block_group->key.objectid,
> + block_group->key.offset,
> + &trimmed);
> +
> + list_del_init(&block_group->bg_list);
> + btrfs_put_block_group_trimming(block_group);
> + btrfs_put_block_group(block_group);
> +
> + if (ret) {
> + const char *errstr = btrfs_decode_error(ret);
> + btrfs_warn(fs_info,
> + "Discard failed while removing blockgroup:
> errno=%d %s\n",
> + ret, errstr);
> + }
> + }
> +
> return 0;
> }
>
> @@ -9914,6 +9941,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle
> *trans,
> * currently running transaction might finish and a new one start,
> * allowing for new block groups to be created that can reuse the same
> * physical device locations unless we take this special care.
> + *
> + * There may also be an implicit trim operation if the file system
> + * is mounted with -odiscard. The same protections must remain
> + * in place until the extents have been discarded completely when
> + * the transaction commit has completed.
> */
> remove_em = (atomic_read(&block_group->trimming) == 0);
> /*
> @@ -9988,6 +10020,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info
> *fs_info)
> spin_lock(&fs_info->unused_bgs_lock);
> while (!list_empty(&fs_info->unused_bgs)) {
> u64 start, end;
> + int trimming;
>
> block_group = list_first_entry(&fs_info->unused_bgs,
> struct btrfs_block_group_cache,
> @@ -10085,12 +10118,39 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info
> *fs_info)
> spin_unlock(&block_group->lock);
> spin_unlock(&space_info->lock);
>
> + /* DISCARD can flip during remount */
> + trimming = btrfs_test_opt(root, DISCARD);
> +
> + /* Implicit trim during transaction commit. */
> + if (trimming)
> + btrfs_get_block_group_trimming(block_group);
> +
> /*
> * Btrfs_remove_chunk will abort the transaction if things go
> * horribly wrong.
> */
> ret = btrfs_remove_chunk(trans, root,
> block_group->key.objectid);
> +
> + if (ret) {
> + if (trimming)
> + btrfs_put_block_group_trimming(block_group);
> + goto end_trans;
> + }
> +
> + /*
> + * If we're not mounted with -odiscard, we can just forget
> + * about this block group. Otherwise we'll need to wait
> + * until transaction commit to do the actual discard.
> + */
> + if (trimming) {
> + WARN_ON(!list_empty(&block_group->bg_list));
> + spin_lock(&trans->transaction->deleted_bgs_lock);
> + list_move(&block_group->bg_list,
> + &trans->transaction->deleted_bgs);
> + spin_unlock(&trans->transaction->deleted_bgs_lock);
> + btrfs_get_block_group(block_group);
> + }
> end_trans:
> btrfs_end_transaction(trans, root);
> next:
> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> index 9dbe5b5..c79253e 100644
> --- a/fs/btrfs/free-space-cache.c
> +++ b/fs/btrfs/free-space-cache.c
> @@ -3274,35 +3274,23 @@ next:
> return ret;
> }
>
> -int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
> - u64 *trimmed, u64 start, u64 end, u64 minlen)
> +void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache)
> {
> - int ret;
> + atomic_inc(&cache->trimming);
> +}
>
> - *trimmed = 0;
> +void btrfs_put_block_group_trimming(struct btrfs_block_group_cache
> *block_group)
> +{
> + struct extent_map_tree *em_tree;
> + struct extent_map *em;
> + bool cleanup;
>
> spin_lock(&block_group->lock);
> - if (block_group->removed) {
> - spin_unlock(&block_group->lock);
> - return 0;
> - }
> - atomic_inc(&block_group->trimming);
> + cleanup = (atomic_dec_and_test(&block_group->trimming) &&
> + block_group->removed);
> spin_unlock(&block_group->lock);
>
> - ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
> - if (ret)
> - goto out;
> -
> - ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
> -out:
> - spin_lock(&block_group->lock);
> - if (atomic_dec_and_test(&block_group->trimming) &&
> - block_group->removed) {
> - struct extent_map_tree *em_tree;
> - struct extent_map *em;
> -
> - spin_unlock(&block_group->lock);
> -
> + if (cleanup) {
> lock_chunks(block_group->fs_info->chunk_root);
> em_tree = &block_group->fs_info->mapping_tree.map_tree;
> write_lock(&em_tree->lock);
> @@ -3326,10 +3314,31 @@ out:
> * this block group have left 1 entry each one. Free them.
> */
> __btrfs_remove_free_space_cache(block_group->free_space_ctl);
> - } else {
> + }
> +}
> +
> +int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
> + u64 *trimmed, u64 start, u64 end, u64 minlen)
> +{
> + int ret;
> +
> + *trimmed = 0;
> +
> + spin_lock(&block_group->lock);
> + if (block_group->removed) {
> spin_unlock(&block_group->lock);
> + return 0;
> }
> + btrfs_get_block_group_trimming(block_group);
> + spin_unlock(&block_group->lock);
> +
> + ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
> + if (ret)
> + goto out;
>
> + ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
> +out:
> + btrfs_put_block_group_trimming(block_group);
> return ret;
> }
>
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index 2ccd8d4..a80da03 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -69,7 +69,7 @@ static struct file_system_type btrfs_fs_type;
>
> static int btrfs_remount(struct super_block *sb, int *flags, char *data);
>
> -static const char *btrfs_decode_error(int errno)
> +const char *btrfs_decode_error(int errno)
> {
> char *errstr = "unknown";
>
> diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
> index 5628e25..2005262 100644
> --- a/fs/btrfs/transaction.c
> +++ b/fs/btrfs/transaction.c
> @@ -256,6 +256,8 @@ loop:
> mutex_init(&cur_trans->cache_write_mutex);
> cur_trans->num_dirty_bgs = 0;
> spin_lock_init(&cur_trans->dirty_bgs_lock);
> + INIT_LIST_HEAD(&cur_trans->deleted_bgs);
> + spin_lock_init(&cur_trans->deleted_bgs_lock);
> list_add_tail(&cur_trans->list, &fs_info->trans_list);
> extent_io_tree_init(&cur_trans->dirty_pages,
> fs_info->btree_inode->i_mapping);
> diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
> index 0b24755..14325f2 100644
> --- a/fs/btrfs/transaction.h
> +++ b/fs/btrfs/transaction.h
> @@ -74,6 +74,8 @@ struct btrfs_transaction {
> */
> struct mutex cache_write_mutex;
> spinlock_t dirty_bgs_lock;
> + struct list_head deleted_bgs;
> + spinlock_t deleted_bgs_lock;
> struct btrfs_delayed_ref_root delayed_refs;
> int aborted;
> int dirty_bg_run;
> --
> 2.4.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Filipe David Manana,
"Reasonable men adapt themselves to the world.
Unreasonable men adapt the world to themselves.
That's why all progress depends on unreasonable men."
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html