On 4.12.18 г. 17:11 ч., David Sterba wrote:
> The scrub context is allocated with GFP_KERNEL and called from
> btrfs_scrub_dev under the fs_info::device_list_mutex. This is not safe
> regarding reclaim that could try to flush filesystem data in order to
> get the memory. And the device_list_mutex is held during superblock
> commit, so this would cause a lockup.
> 
> Move the alocation and initialization before any changes that require
> the mutex.
> 
> Signed-off-by: David Sterba <dste...@suse.com>
> ---
>  fs/btrfs/scrub.c | 30 ++++++++++++++++++------------
>  1 file changed, 18 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
> index ffcab263e057..051d14c9f013 100644
> --- a/fs/btrfs/scrub.c
> +++ b/fs/btrfs/scrub.c
> @@ -3834,13 +3834,18 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, 
> u64 devid, u64 start,
>               return -EINVAL;
>       }
>  
> +     /* Allocate outside of device_list_mutex */
> +     sctx = scrub_setup_ctx(fs_info, is_dev_replace);
> +     if (IS_ERR(sctx))
> +             return PTR_ERR(sctx);
>  
>       mutex_lock(&fs_info->fs_devices->device_list_mutex);
>       dev = btrfs_find_device(fs_info, devid, NULL, NULL);
>       if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
>                    !is_dev_replace)) {
>               mutex_unlock(&fs_info->fs_devices->device_list_mutex);
> -             return -ENODEV;
> +             ret = -ENODEV;
> +             goto out_free_ctx;
>       }
>  
>       if (!is_dev_replace && !readonly &&
> @@ -3848,7 +3853,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 
> devid, u64 start,
>               mutex_unlock(&fs_info->fs_devices->device_list_mutex);
>               btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable",
>                               rcu_str_deref(dev->name));
> -             return -EROFS;
> +             ret = -EROFS;
> +             goto out_free_ctx;
>       }
>  
>       mutex_lock(&fs_info->scrub_lock);
> @@ -3856,7 +3862,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 
> devid, u64 start,
>           test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
>               mutex_unlock(&fs_info->scrub_lock);
>               mutex_unlock(&fs_info->fs_devices->device_list_mutex);
> -             return -EIO;
> +             ret = -EIO;
> +             goto out_free_ctx;
>       }
>  
>       btrfs_dev_replace_read_lock(&fs_info->dev_replace);
> @@ -3866,7 +3873,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 
> devid, u64 start,
>               btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
>               mutex_unlock(&fs_info->scrub_lock);
>               mutex_unlock(&fs_info->fs_devices->device_list_mutex);
> -             return -EINPROGRESS;
> +             ret = -EINPROGRESS;
> +             goto out_free_ctx;
>       }
>       btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
>  
> @@ -3874,16 +3882,9 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 
> devid, u64 start,
>       if (ret) {
>               mutex_unlock(&fs_info->scrub_lock);
>               mutex_unlock(&fs_info->fs_devices->device_list_mutex);
> -             return ret;
> +             goto out_free_ctx;

Don't we suffer the same issue when calling scrub_workers_get since in
it we do btrfs_alloc_workqueue which also calls kzalloc with GFP_KERNEL?


>       }
>  
> -     sctx = scrub_setup_ctx(fs_info, is_dev_replace);
> -     if (IS_ERR(sctx)) {
> -             mutex_unlock(&fs_info->scrub_lock);
> -             mutex_unlock(&fs_info->fs_devices->device_list_mutex);
> -             scrub_workers_put(fs_info);
> -             return PTR_ERR(sctx);
> -     }
>       sctx->readonly = readonly;
>       dev->scrub_ctx = sctx;
>       mutex_unlock(&fs_info->fs_devices->device_list_mutex);
> @@ -3936,6 +3937,11 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 
> devid, u64 start,
>  
>       scrub_put_ctx(sctx);
>  
> +     return ret;
> +
> +out_free_ctx:
> +     scrub_free_ctx(sctx);
> +
>       return ret;
>  }
>  
> 

Reply via email to