On Tue, Nov 20, 2018 at 10:34:17PM +0800, Gao Xiang wrote:
> When the managed cache is enabled, the last reference count
> of a workgroup must be used for its workstation.
> 
> Otherwise, it could lead to incorrect (un)freezes in
> the reclaim path, and it would be harmful.
> 
> A typical race as follows:
> 
> Thread 1 (In the reclaim path)  Thread 2
> workgroup_freeze(grp, 1)                                refcnt = 1
> ...
> workgroup_unfreeze(grp, 1)                              refcnt = 1
>                                 workgroup_get(grp)      refcnt = 2 (x)
> workgroup_put(grp)                                      refcnt = 1 (x)
>                                 ...unexpected behaviors
> 
> * grp is detached but still used, which violates cache-managed
>   freeze constraint.
> 
> Reviewed-by: Chao Yu <yuch...@huawei.com>
> Signed-off-by: Gao Xiang <gaoxian...@huawei.com>
> ---
>  drivers/staging/erofs/internal.h |   1 +
>  drivers/staging/erofs/utils.c    | 131 
> +++++++++++++++++++++++++++------------
>  2 files changed, 93 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/staging/erofs/internal.h 
> b/drivers/staging/erofs/internal.h
> index 57575c7f5635..89dbd0888e53 100644
> --- a/drivers/staging/erofs/internal.h
> +++ b/drivers/staging/erofs/internal.h
> @@ -250,6 +250,7 @@ static inline bool erofs_workgroup_get(struct 
> erofs_workgroup *grp, int *ocnt)
>  }
>  
>  #define __erofs_workgroup_get(grp)   atomic_inc(&(grp)->refcount)
> +#define __erofs_workgroup_put(grp)   atomic_dec(&(grp)->refcount)
>  
>  extern int erofs_workgroup_put(struct erofs_workgroup *grp);
>  
> diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c
> index ea8a962e5c95..90de8d9195b7 100644
> --- a/drivers/staging/erofs/utils.c
> +++ b/drivers/staging/erofs/utils.c
> @@ -83,12 +83,21 @@ int erofs_register_workgroup(struct super_block *sb,
>  
>       grp = xa_tag_pointer(grp, tag);
>  
> -     err = radix_tree_insert(&sbi->workstn_tree,
> -             grp->index, grp);
> +     /*
> +      * Bump up reference count before making this workgroup
> +      * visible to other users in order to avoid potential UAF
> +      * without serialized by erofs_workstn_lock.
> +      */
> +     __erofs_workgroup_get(grp);
>  
> -     if (!err) {
> -             __erofs_workgroup_get(grp);
> -     }
> +     err = radix_tree_insert(&sbi->workstn_tree,
> +                             grp->index, grp);
> +     if (unlikely(err))
> +             /*
> +              * it's safe to decrease since the workgroup isn't visible
> +              * and refcount >= 2 (cannot be freezed).
> +              */
> +             __erofs_workgroup_put(grp);
>  
>       erofs_workstn_unlock(sbi);
>       radix_tree_preload_end();
> @@ -97,19 +106,91 @@ int erofs_register_workgroup(struct super_block *sb,
>  
>  extern void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
>  
> +static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
> +{
> +     atomic_long_dec(&erofs_global_shrink_cnt);
> +     erofs_workgroup_free_rcu(grp);
> +}
> +
>  int erofs_workgroup_put(struct erofs_workgroup *grp)
>  {
>       int count = atomic_dec_return(&grp->refcount);
>  
>       if (count == 1)
>               atomic_long_inc(&erofs_global_shrink_cnt);
> -     else if (!count) {
> -             atomic_long_dec(&erofs_global_shrink_cnt);
> -             erofs_workgroup_free_rcu(grp);
> -     }
> +     else if (!count)
> +             __erofs_workgroup_free(grp);
>       return count;
>  }
>  
> +#ifdef EROFS_FS_HAS_MANAGED_CACHE
> +/* for cache-managed case, customized reclaim paths exist */
> +static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp)
> +{
> +     erofs_workgroup_unfreeze(grp, 0);
> +     __erofs_workgroup_free(grp);
> +}
> +
> +bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
> +                                 struct erofs_workgroup *grp,
> +                                 bool cleanup)
> +{
> +     /*
> +      * for managed cache enabled, the refcount of workgroups
> +      * themselves could be < 0 (freezed). So there is no guarantee
> +      * that all refcount > 0 if managed cache is enabled.
> +      */
> +     if (!erofs_workgroup_try_to_freeze(grp, 1))
> +             return false;
> +
> +     /*
> +      * note that all cached pages should be unlinked
> +      * before delete it from the radix tree.
> +      * Otherwise some cached pages of an orphan old workgroup
> +      * could be still linked after the new one is available.
> +      */
> +     if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
> +             erofs_workgroup_unfreeze(grp, 1);
> +             return false;
> +     }
> +
> +     /* it is impossible to fail after we freeze the workgroup */
> +     BUG_ON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
> +                                               grp->index)) != grp);
> +

It is not a good idea to crash the system.  Please try to recover from
this, a BUG_ON() implies that the developer has no idea how to handle
this type of error so either it can never happen (which means that the
BUG_ON can be removed), or you need to fix the logic to handle this type
of issue when it does happen.

I'm not going to take this as-is, sorry.

greg k-h

Reply via email to