On Wed, 2021-05-12 at 15:46 +0200, Jan Kara wrote:
> Ceph has a following race between hole punching and page fault:
> 
> CPU1                                  CPU2
> ceph_fallocate()
>   ...
>   ceph_zero_pagecache_range()
>                                       ceph_filemap_fault()
>                                         faults in page in the range being
>                                         punched
>   ceph_zero_objects()
> 
> And now we have a page in punched range with invalid data. Fix the
> problem by using mapping->invalidate_lock similarly to other
> filesystems. Note that using invalidate_lock also fixes a similar race
> wrt ->readpage().
> 
> CC: Jeff Layton <jlay...@kernel.org>
> CC: ceph-de...@vger.kernel.org
> Signed-off-by: Jan Kara <j...@suse.cz>
> ---
>  fs/ceph/addr.c | 9 ++++++---
>  fs/ceph/file.c | 2 ++
>  2 files changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index c1570fada3d8..6d868faf97b5 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -1401,9 +1401,11 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault 
> *vmf)
>               ret = VM_FAULT_SIGBUS;
>       } else {
>               struct address_space *mapping = inode->i_mapping;
> -             struct page *page = find_or_create_page(mapping, 0,
> -                                             mapping_gfp_constraint(mapping,
> -                                             ~__GFP_FS));
> +             struct page *page;
> +
> +             down_read(&mapping->invalidate_lock);
> +             page = find_or_create_page(mapping, 0,
> +                             mapping_gfp_constraint(mapping, ~__GFP_FS));
>               if (!page) {
>                       ret = VM_FAULT_OOM;
>                       goto out_inline;
> @@ -1424,6 +1426,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault 
> *vmf)
>               vmf->page = page;
>               ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
>  out_inline:
> +             up_read(&mapping->invalidate_lock);
>               dout("filemap_fault %p %llu read inline data ret %x\n",
>                    inode, off, ret);
>       }
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 77fc037d5beb..91693d8b458e 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -2083,6 +2083,7 @@ static long ceph_fallocate(struct file *file, int mode,
>       if (ret < 0)
>               goto unlock;
>  
> +     down_write(&inode->i_mapping->invalidate_lock);
>       ceph_zero_pagecache_range(inode, offset, length);
>       ret = ceph_zero_objects(inode, offset, length);
>  
> @@ -2095,6 +2096,7 @@ static long ceph_fallocate(struct file *file, int mode,
>               if (dirty)
>                       __mark_inode_dirty(inode, dirty);
>       }
> +     up_write(&inode->i_mapping->invalidate_lock);
>  
>       ceph_put_cap_refs(ci, got);
>  unlock:

Assuming the basic concept is sound, then this looks reasonable. 

Reviewed-by: Jeff Layton <jlay...@kernel.org>



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to