Re: [PATCH] Btrfs: allow us to overcommit our enospc reservations TEST THIS PLEASE!!!

Christian Brunner Thu, 13 Oct 2011 08:03:20 -0700

2011/10/13 Josef Bacik <jo...@redhat.com>:
[...]
>> >> [  175.956273] kernel BUG at fs/btrfs/inode.c:2176!
>> >
>> > Ok I think I see what's happening, this patch replaces the previous one, 
>> > let me
>> > know how it goes.  Thanks,
>> >
>>
>> Getting a slightly different BUG this time:
>>
>
> Ok looks like I've fixed the original problem and now we're hitting a problem
> with the free space cache.  This patch will replace the last one, its all the
> fixes up to now and a new set of BUG_ON()'s to figure out which free space 
> cache
> inode is screwing us up.  Thanks,
>
> Josef
>
>
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index fc0de68..e595372 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -3334,7 +3334,7 @@ out:
>  * shrink metadata reservation for delalloc
>  */
>  static int shrink_delalloc(struct btrfs_trans_handle *trans,
> -                          struct btrfs_root *root, u64 to_reclaim, int sync)
> +                          struct btrfs_root *root, u64 to_reclaim, int 
> retries)
>  {
>        struct btrfs_block_rsv *block_rsv;
>        struct btrfs_space_info *space_info;
> @@ -3365,12 +3365,10 @@ static int shrink_delalloc(struct btrfs_trans_handle 
> *trans,
>        }
>
>        max_reclaim = min(reserved, to_reclaim);
> +       if (max_reclaim > (2 * 1024 * 1024))
> +               nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
>
>        while (loops < 1024) {
> -               /* have the flusher threads jump in and do some IO */
> -               smp_mb();
> -               nr_pages = min_t(unsigned long, nr_pages,
> -                      root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
>                writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
>
>                spin_lock(&space_info->lock);
> @@ -3384,14 +3382,22 @@ static int shrink_delalloc(struct btrfs_trans_handle 
> *trans,
>                if (reserved == 0 || reclaimed >= max_reclaim)
>                        break;
>
> -               if (trans && trans->transaction->blocked)
> +               if (trans)
>                        return -EAGAIN;
>
> -               time_left = schedule_timeout_interruptible(1);
> +               if (!retries) {
> +                       time_left = schedule_timeout_interruptible(1);
>
> -               /* We were interrupted, exit */
> -               if (time_left)
> -                       break;
> +                       /* We were interrupted, exit */
> +                       if (time_left)
> +                               break;
> +               } else {
> +                       /*
> +                        * We've already done this song and dance once, let's
> +                        * really wait for some work to get done.
> +                        */
> +                       btrfs_wait_ordered_extents(root, 0, 0);
> +               }
>
>                /* we've kicked the IO a few times, if anything has been freed,
>                 * exit.  There is no sense in looping here for a long time
> @@ -3399,15 +3405,13 @@ static int shrink_delalloc(struct btrfs_trans_handle 
> *trans,
>                 * just too many writers without enough free space
>                 */
>
> -               if (loops > 3) {
> +               if (!retries && loops > 3) {
>                        smp_mb();
>                        if (progress != space_info->reservation_progress)
>                                break;
>                }
>
>        }
> -       if (reclaimed < to_reclaim && !trans)
> -               btrfs_wait_ordered_extents(root, 0, 0);
>        return reclaimed >= to_reclaim;
>  }
>
> @@ -3552,7 +3556,7 @@ again:
>         * We do synchronous shrinking since we don't actually unreserve
>         * metadata until after the IO is completed.
>         */
> -       ret = shrink_delalloc(trans, root, num_bytes, 1);
> +       ret = shrink_delalloc(trans, root, num_bytes, retries);
>        if (ret < 0)
>                goto out;
>
> @@ -3568,17 +3572,6 @@ again:
>                goto again;
>        }
>
> -       /*
> -        * Not enough space to be reclaimed, don't bother committing the
> -        * transaction.
> -        */
> -       spin_lock(&space_info->lock);
> -       if (space_info->bytes_pinned < orig_bytes)
> -               ret = -ENOSPC;
> -       spin_unlock(&space_info->lock);
> -       if (ret)
> -               goto out;
> -
>        ret = -EAGAIN;
>        if (trans)
>                goto out;
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index d6ba353..cb63904 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -782,7 +782,8 @@ static noinline int cow_file_range(struct inode *inode,
>        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
>        int ret = 0;
>
> -       BUG_ON(btrfs_is_free_space_inode(root, inode));
> +       BUG_ON(root == root->fs_info->tree_root);
> +       BUG_ON(BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID);
>        trans = btrfs_join_transaction(root);
>        BUG_ON(IS_ERR(trans));
>        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
> @@ -2790,7 +2791,8 @@ static struct btrfs_trans_handle 
> *__unlink_start_trans(struct inode *dir,
>                return ERR_PTR(-ENOMEM);
>        }
>
> -       trans = btrfs_start_transaction(root, 0);
> +       /* 1 for the orphan item */
> +       trans = btrfs_start_transaction(root, 1);
>        if (IS_ERR(trans)) {
>                btrfs_free_path(path);
>                root->fs_info->enospc_unlink = 0;


Could it be, that the missing space for the orphan item, is the reason
for our warning?

[  105.209232] ------------[ cut here ]------------
[  105.214458] WARNING: at fs/btrfs/inode.c:2114
btrfs_orphan_commit_root+0xb0/0xc0 [btrfs]()
[  105.223794] Hardware name: ProLiant DL180 G6
[  105.228930] Modules linked in: btrfs zlib_deflate libcrc32c bonding
ipv6 serio_raw pcspkr ghes hed iTCO_wdt iTCO_vendor_support
i7core_edac edac_core ixgbe dca mdio iomemory_vsl(P) hpsa squashfs
[last unloaded: scsi_wait_scan]
[  105.253539] Pid: 1774, comm: kworker/0:2 Tainted: P
3.0.6-1.fits.2.el6.x86_64 #1
[  105.263015] Call Trace:
[  105.265956]  [<ffffffff8106344f>] warn_slowpath_common+0x7f/0xc0
[  105.272841]  [<ffffffff810634aa>] warn_slowpath_null+0x1a/0x20
[  105.279503]  [<ffffffffa022bef0>] btrfs_orphan_commit_root+0xb0/0xc0 [btrfs]
[  105.287564]  [<ffffffffa0226ce5>] commit_fs_roots+0xc5/0x1b0 [btrfs]
[  105.294824]  [<ffffffffa0227c36>]
btrfs_commit_transaction+0x3c6/0x820 [btrfs]
[  105.303044]  [<ffffffff810507c0>] ? __dequeue_entity+0x30/0x50
[  105.309745]  [<ffffffff81086410>] ? wake_up_bit+0x40/0x40
[  105.315944]  [<ffffffffa0228090>] ?
btrfs_commit_transaction+0x820/0x820 [btrfs]
[  105.324404]  [<ffffffffa02280af>] do_async_commit+0x1f/0x30 [btrfs]
[  105.331590]  [<ffffffff8107e8b8>] process_one_work+0x128/0x450
[  105.338291]  [<ffffffff810816cb>] worker_thread+0x17b/0x3c0
[  105.344708]  [<ffffffff81081550>] ? manage_workers+0x220/0x220
[  105.351407]  [<ffffffff81085d96>] kthread+0x96/0xa0
[  105.357040]  [<ffffffff815639c4>] kernel_thread_helper+0x4/0x10
[  105.363824]  [<ffffffff81085d00>] ? kthread_worker_fn+0x1a0/0x1a0
[  105.370776]  [<ffffffff815639c0>] ? gs_change+0x13/0x13
[  105.376771] ---[ end trace 144230b62b45be67 ]---

Thanks,
Christian

> @@ -2901,6 +2903,11 @@ out:
>                return ERR_PTR(err);
>        }
>
> +       ret = btrfs_block_rsv_migrate(trans->block_rsv,
> +                                     &root->fs_info->global_block_rsv,
> +                                     btrfs_calc_trans_metadata_size(root, 
> 1));
> +       BUG_ON(ret);
> +
>        trans->block_rsv = &root->fs_info->global_block_rsv;
>        return trans;
>  }
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Btrfs: allow us to overcommit our enospc reservations TEST THIS PLEASE!!!

Reply via email to