On Tue, Sep 24, 2019 at 01:15:18PM -0400, Johannes Weiner wrote:
> +static int fault_dirty_shared_page(struct vm_fault *vmf)

vm_fault_t, shirley?

> @@ -2239,16 +2241,36 @@ static void fault_dirty_shared_page(struct 
> vm_area_struct *vma,
>       mapping = page_rmapping(page);
>       unlock_page(page);
>  
> +     if (!page_mkwrite)
> +             file_update_time(vma->vm_file);
> +
> +     /*
> +      * Throttle page dirtying rate down to writeback speed.
> +      *
> +      * mapping may be NULL here because some device drivers do not
> +      * set page.mapping but still dirty their pages
> +      *
> +      * Drop the mmap_sem before waiting on IO, if we can. The file
> +      * is pinning the mapping, as per above.
> +      */
>       if ((dirtied || page_mkwrite) && mapping) {
> -             /*
> -              * Some device drivers do not set page.mapping
> -              * but still dirty their pages
> -              */
> +             struct file *fpin = NULL;
> +
> +             if ((vmf->flags &
> +                  (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT)) ==
> +                 FAULT_FLAG_ALLOW_RETRY) {
> +                     fpin = get_file(vma->vm_file);
> +                     up_read(&vma->vm_mm->mmap_sem);
> +                     ret = VM_FAULT_RETRY;
> +             }
> +
>               balance_dirty_pages_ratelimited(mapping);
> +
> +             if (fpin)
> +                     fput(fpin);
>       }
>  
> -     if (!page_mkwrite)
> -             file_update_time(vma->vm_file);
> +     return ret;
>  }

I'm not a fan of moving file_update_time() to _before_ the
balance_dirty_pages call.  Also, this is now the third place that needs
maybe_unlock_mmap_for_io, see
https://lore.kernel.org/linux-mm/20190917120852.x6x3aypwvh573kfa@box/

How about:

+       struct file *fpin = NULL;

        if ((dirtied || page_mkwrite) && mapping) {
                fpin = maybe_unlock_mmap_for_io(vmf, fpin);
                balance_dirty_pages_ratelimited(mapping);
        }
+
+       if (fpin) {
+               file_update_time(fpin);
+               fput(fpin);
+               return VM_FAULT_RETRY;
+       }

        if (!page_mkwrite)
                file_update_time(vma->vm_file);
+       return 0;
 }

>  /*
> @@ -2491,6 +2513,7 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf)
>       __releases(vmf->ptl)
>  {
>       struct vm_area_struct *vma = vmf->vma;
> +     int ret = VM_FAULT_WRITE;

vm_fault_t again.

> @@ -3576,7 +3599,6 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf)
>  static vm_fault_t do_fault(struct vm_fault *vmf)
>  {
>       struct vm_area_struct *vma = vmf->vma;
> -     struct mm_struct *vm_mm = vma->vm_mm;
>       vm_fault_t ret;
>  
>       /*
> @@ -3617,7 +3639,12 @@ static vm_fault_t do_fault(struct vm_fault *vmf)
>  
>       /* preallocated pagetable is unused: free it */
>       if (vmf->prealloc_pte) {
> -             pte_free(vm_mm, vmf->prealloc_pte);
> +             /*
> +              * XXX: Accessing vma->vm_mm now is not safe. The page
> +              * fault handler may have dropped the mmap_sem a long
> +              * time ago. Only s390 derefs that parameter.
> +              */
> +             pte_free(vma->vm_mm, vmf->prealloc_pte);

I'm confused.  This code looks like it was safe before (as it was caching
vma->vm_mm in a local variable), and now you've made it unsafe ... ?

Reply via email to