On 4.12.18 г. 17:11 ч., David Sterba wrote:
> The scrub context is allocated with GFP_KERNEL and called from
> btrfs_scrub_dev under the fs_info::device_list_mutex. This is not safe
> regarding reclaim that could try to flush filesystem data in order to
> get the memory. And the device_list_mutex is held during superblock
> commit, so this would cause a lockup.
>
> Move the alocation and initialization before any changes that require
> the mutex.
>
> Signed-off-by: David Sterba <dste...@suse.com>
> ---
> fs/btrfs/scrub.c | 30 ++++++++++++++++++------------
> 1 file changed, 18 insertions(+), 12 deletions(-)
>
> diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
> index ffcab263e057..051d14c9f013 100644
> --- a/fs/btrfs/scrub.c
> +++ b/fs/btrfs/scrub.c
> @@ -3834,13 +3834,18 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info,
> u64 devid, u64 start,
> return -EINVAL;
> }
>
> + /* Allocate outside of device_list_mutex */
> + sctx = scrub_setup_ctx(fs_info, is_dev_replace);
> + if (IS_ERR(sctx))
> + return PTR_ERR(sctx);
>
> mutex_lock(&fs_info->fs_devices->device_list_mutex);
> dev = btrfs_find_device(fs_info, devid, NULL, NULL);
> if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
> !is_dev_replace)) {
> mutex_unlock(&fs_info->fs_devices->device_list_mutex);
> - return -ENODEV;
> + ret = -ENODEV;
> + goto out_free_ctx;
> }
>
> if (!is_dev_replace && !readonly &&
> @@ -3848,7 +3853,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64
> devid, u64 start,
> mutex_unlock(&fs_info->fs_devices->device_list_mutex);
> btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable",
> rcu_str_deref(dev->name));
> - return -EROFS;
> + ret = -EROFS;
> + goto out_free_ctx;
> }
>
> mutex_lock(&fs_info->scrub_lock);
> @@ -3856,7 +3862,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64
> devid, u64 start,
> test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
> mutex_unlock(&fs_info->scrub_lock);
> mutex_unlock(&fs_info->fs_devices->device_list_mutex);
> - return -EIO;
> + ret = -EIO;
> + goto out_free_ctx;
> }
>
> btrfs_dev_replace_read_lock(&fs_info->dev_replace);
> @@ -3866,7 +3873,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64
> devid, u64 start,
> btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
> mutex_unlock(&fs_info->scrub_lock);
> mutex_unlock(&fs_info->fs_devices->device_list_mutex);
> - return -EINPROGRESS;
> + ret = -EINPROGRESS;
> + goto out_free_ctx;
> }
> btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
>
> @@ -3874,16 +3882,9 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64
> devid, u64 start,
> if (ret) {
> mutex_unlock(&fs_info->scrub_lock);
> mutex_unlock(&fs_info->fs_devices->device_list_mutex);
> - return ret;
> + goto out_free_ctx;
Don't we suffer the same issue when calling scrub_workers_get since in
it we do btrfs_alloc_workqueue which also calls kzalloc with GFP_KERNEL?
> }
>
> - sctx = scrub_setup_ctx(fs_info, is_dev_replace);
> - if (IS_ERR(sctx)) {
> - mutex_unlock(&fs_info->scrub_lock);
> - mutex_unlock(&fs_info->fs_devices->device_list_mutex);
> - scrub_workers_put(fs_info);
> - return PTR_ERR(sctx);
> - }
> sctx->readonly = readonly;
> dev->scrub_ctx = sctx;
> mutex_unlock(&fs_info->fs_devices->device_list_mutex);
> @@ -3936,6 +3937,11 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64
> devid, u64 start,
>
> scrub_put_ctx(sctx);
>
> + return ret;
> +
> +out_free_ctx:
> + scrub_free_ctx(sctx);
> +
> return ret;
> }
>
>