On Thu, 12 Mar 2026 20:27:19 +0000 "Lorenzo Stoakes (Oracle)" <[email protected]> 
wrote:

> Previously, when a driver needed to do something like establish a reference
> count, it could do so in the mmap hook in the knowledge that the mapping
> would succeed.
> 
> With the introduction of f_op->mmap_prepare this is no longer the case, as
> it is invoked prior to actually establishing the mapping.
> 
> To take this into account, introduce a new vm_ops->mapped callback which is
> invoked when the VMA is first mapped (though notably - not when it is
> merged - which is correct and mirrors existing mmap/open/close behaviour).
> 
> We do better that vm_ops->open() here, as this callback can return an
> error, at which point the VMA will be unmapped.
> 
> Note that vm_ops->mapped() is invoked after any mmap action is
> complete (such as I/O remapping).
> 
> We intentionally do not expose the VMA at this point, exposing only the
> fields that could be used, and an output parameter in case the operation
> needs to update the vma->vm_private_data field.
> 
> In order to deal with stacked filesystems which invoke inner filesystem's
> mmap() invocations, add __compat_vma_mapped() and invoke it on
> vfs_mmap() (via compat_vma_mmap()) to ensure that the mapped callback is
> handled when an mmap() caller invokes a nested filesystem's mmap_prepare()
> callback.
> 
> We can now also remove call_action_complete() and invoke
> mmap_action_complete() directly, as we separate out the rmap lock logic to
> be called in __mmap_region() instead via maybe_drop_file_rmap_lock().
> 
> We also abstract unmapping of a VMA on mmap action completion into its own
> helper function, unmap_vma_locked().
> 
> Additionally, update VMA userland test headers to reflect the change.
> 
> Signed-off-by: Lorenzo Stoakes (Oracle) <[email protected]>
> ---
>  include/linux/fs.h              |  9 +++-
>  include/linux/mm.h              | 17 +++++++
>  mm/internal.h                   | 10 ++++
>  mm/util.c                       | 86 ++++++++++++++++++++++++---------
>  mm/vma.c                        | 41 +++++++++++-----
>  tools/testing/vma/include/dup.h | 34 ++++++++++++-
>  6 files changed, 158 insertions(+), 39 deletions(-)
> 
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index a2628a12bd2b..c390f5c667e3 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2059,13 +2059,20 @@ static inline bool can_mmap_file(struct file *file)
>  }
>  
>  int compat_vma_mmap(struct file *file, struct vm_area_struct *vma);
> +int __vma_check_mmap_hook(struct vm_area_struct *vma);
>  
>  static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
>  {
> +     int err;
> +
>       if (file->f_op->mmap_prepare)
>               return compat_vma_mmap(file, vma);
>  
> -     return file->f_op->mmap(file, vma);
> +     err = file->f_op->mmap(file, vma);
> +     if (err)
> +             return err;
> +
> +     return __vma_check_mmap_hook(vma);
>  }
>  
>  static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc 
> *desc)
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 12a0b4c63736..7333d5db1221 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -759,6 +759,23 @@ struct vm_operations_struct {
>        * Context: User context.  May sleep.  Caller holds mmap_lock.
>        */
>       void (*close)(struct vm_area_struct *vma);
> +     /**
> +      * @mapped: Called when the VMA is first mapped in the MM. Not called if
> +      * the new VMA is merged with an adjacent VMA.
> +      *
> +      * The @vm_private_data field is an output field allowing the user to
> +      * modify vma->vm_private_data as necessary.
> +      *
> +      * ONLY valid if set from f_op->mmap_prepare. Will result in an error if
> +      * set from f_op->mmap.
> +      *
> +      * Returns %0 on success, or an error otherwise. On error, the VMA will
> +      * be unmapped.
> +      *
> +      * Context: User context.  May sleep.  Caller holds mmap_lock.
> +      */
> +     int (*mapped)(unsigned long start, unsigned long end, pgoff_t pgoff,
> +                   const struct file *file, void **vm_private_data);
>       /* Called any time before splitting to check if it's allowed */
>       int (*may_split)(struct vm_area_struct *vma, unsigned long addr);
>       int (*mremap)(struct vm_area_struct *vma);
> diff --git a/mm/internal.h b/mm/internal.h
> index 7bfa85b5e78b..f0f2cf1caa36 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -158,6 +158,8 @@ static inline void *folio_raw_mapping(const struct folio 
> *folio)
>   * mmap hook and safely handle error conditions. On error, VMA hooks will be
>   * mutated.
>   *
> + * IMPORTANT: f_op->mmap() is deprecated, prefer f_op->mmap_prepare().
> + *
>   * @file: File which backs the mapping.
>   * @vma:  VMA which we are mapping.
>   *
> @@ -201,6 +203,14 @@ static inline void vma_close(struct vm_area_struct *vma)
>  /* unmap_vmas is in mm/memory.c */
>  void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap);
>  
> +static inline void unmap_vma_locked(struct vm_area_struct *vma)
> +{
> +     const size_t len = vma_pages(vma) << PAGE_SHIFT;
> +
> +     mmap_assert_locked(vma->vm_mm);
> +     do_munmap(vma->vm_mm, vma->vm_start, len, NULL);
> +}
> +
>  #ifdef CONFIG_MMU
>  
>  static inline void get_anon_vma(struct anon_vma *anon_vma)
> diff --git a/mm/util.c b/mm/util.c
> index dba1191725b6..2b0ed54008d6 100644
> --- a/mm/util.c
> +++ b/mm/util.c
> @@ -1163,6 +1163,55 @@ void flush_dcache_folio(struct folio *folio)
>  EXPORT_SYMBOL(flush_dcache_folio);
>  #endif
>  
> +static int __compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> +     struct vm_area_desc desc = {
> +             .mm = vma->vm_mm,
> +             .file = file,
> +             .start = vma->vm_start,
> +             .end = vma->vm_end,
> +
> +             .pgoff = vma->vm_pgoff,
> +             .vm_file = vma->vm_file,
> +             .vma_flags = vma->flags,
> +             .page_prot = vma->vm_page_prot,
> +
> +             .action.type = MMAP_NOTHING, /* Default */
> +     };
> +     int err;
> +
> +     err = vfs_mmap_prepare(file, &desc);
> +     if (err)
> +             return err;
> +
> +     err = mmap_action_prepare(&desc, &desc.action);
> +     if (err)
> +             return err;
> +
> +     set_vma_from_desc(vma, &desc);
> +     return mmap_action_complete(vma, &desc.action);
> +}
> +
> +static int __compat_vma_mapped(struct file *file, struct vm_area_struct *vma)
> +{
> +     const struct vm_operations_struct *vm_ops = vma->vm_ops;
> +     void *vm_private_data = vma->vm_private_data;
> +     int err;
> +
> +     if (!vm_ops->mapped)
> +             return 0;
> +

Hello!

Can vm_ops be NULL here?  __compat_vma_mapped() is called from
compat_vma_mmap(), which is reached when a filesystem provides
mmap_prepare.  If the mmap_prepare hook does not set desc->vm_ops,
vma->vm_ops will be NULL and this dereferences a NULL pointer.

For e.g. drivers/char/mem.c, mmap_zero_prepare() would trigger
a NULL pointer dereference here.

Would need to do
        if (!vm_ops || !vm_ops->mapped)
                return 0;

here


> +     err = vm_ops->mapped(vma->vm_start, vma->vm_end, vma->vm_pgoff, file,
> +                          &vm_private_data);
> +     if (err)
> +             unmap_vma_locked(vma);

when mapped() returns an error, unmap_vma_locked(vma) is called
but execution continues into the vm_private_data update below.  After
unmap_vma_locked() the VMA may be freed (do_munmap can remove the VMA
entirely), so accessing vma->vm_private_data after that is a
use-after-free.

Probably need to do:
        if (err) {
                unmap_vma_locked(vma);
                return err;
        }

> +     /* Update private data if changed. */
> +     if (vm_private_data != vma->vm_private_data)
> +             vma->vm_private_data = vm_private_data;
> +
> +     return err;
> +}
> +
>  /**
>   * compat_vma_mmap() - Apply the file's .mmap_prepare() hook to an
>   * existing VMA and execute any requested actions.
> @@ -1191,34 +1240,26 @@ EXPORT_SYMBOL(flush_dcache_folio);
>   */
>  int compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
>  {
> -     struct vm_area_desc desc = {
> -             .mm = vma->vm_mm,
> -             .file = file,
> -             .start = vma->vm_start,
> -             .end = vma->vm_end,
> -
> -             .pgoff = vma->vm_pgoff,
> -             .vm_file = vma->vm_file,
> -             .vma_flags = vma->flags,
> -             .page_prot = vma->vm_page_prot,
> -
> -             .action.type = MMAP_NOTHING, /* Default */
> -     };
>       int err;
>  
> -     err = vfs_mmap_prepare(file, &desc);
> -     if (err)
> -             return err;
> -
> -     err = mmap_action_prepare(&desc, &desc.action);
> +     err = __compat_vma_mmap(file, vma);
>       if (err)
>               return err;
>  
> -     set_vma_from_desc(vma, &desc);
> -     return mmap_action_complete(vma, &desc.action);
> +     return __compat_vma_mapped(file, vma);
>  }
>  EXPORT_SYMBOL(compat_vma_mmap);
>  
> +int __vma_check_mmap_hook(struct vm_area_struct *vma)
> +{
> +     /* vm_ops->mapped is not valid if mmap() is specified. */
> +     if (WARN_ON_ONCE(vma->vm_ops->mapped))
> +             return -EINVAL;

I think vma->vm_ops can be NULL here. Should be:

        if (vma->vm_ops && WARN_ON_ONCE(vma->vm_ops->mapped))
                return -EINVAL;

> +
> +     return 0;
> +}
> +EXPORT_SYMBOL(__vma_check_mmap_hook);
> +
>  static void set_ps_flags(struct page_snapshot *ps, const struct folio *folio,
>                        const struct page *page)
>  {
> @@ -1316,10 +1357,7 @@ static int mmap_action_finish(struct vm_area_struct 
> *vma,
>        * invoked if we do NOT merge, so we only clean up the VMA we created.
>        */
>       if (err) {
> -             const size_t len = vma_pages(vma) << PAGE_SHIFT;
> -
> -             do_munmap(current->mm, vma->vm_start, len, NULL);
> -
> +             unmap_vma_locked(vma);
>               if (action->error_hook) {
>                       /* We may want to filter the error. */
>                       err = action->error_hook(err);
> diff --git a/mm/vma.c b/mm/vma.c
> index 054cf1d262fb..ef9f5a5365d1 100644
> --- a/mm/vma.c
> +++ b/mm/vma.c
> @@ -2705,21 +2705,35 @@ static bool can_set_ksm_flags_early(struct mmap_state 
> *map)
>       return false;
>  }
>  
> -static int call_action_complete(struct mmap_state *map,
> -                             struct mmap_action *action,
> -                             struct vm_area_struct *vma)
> +static int call_mapped_hook(struct vm_area_struct *vma)
>  {
> -     int ret;
> +     const struct vm_operations_struct *vm_ops = vma->vm_ops;
> +     void *vm_private_data = vma->vm_private_data;
> +     int err;
>  
> -     ret = mmap_action_complete(vma, action);
> +     if (!vm_ops || !vm_ops->mapped)
> +             return 0;
> +     err = vm_ops->mapped(vma->vm_start, vma->vm_end, vma->vm_pgoff,
> +                          vma->vm_file, &vm_private_data);
> +     if (err) {
> +             unmap_vma_locked(vma);
> +             return err;
> +     }
> +     /* Update private data if changed. */
> +     if (vm_private_data != vma->vm_private_data)
> +             vma->vm_private_data = vm_private_data;
> +     return 0;
> +}
>  
> -     /* If we held the file rmap we need to release it. */
> -     if (map->hold_file_rmap_lock) {
> -             struct file *file = vma->vm_file;
> +static void maybe_drop_file_rmap_lock(struct mmap_state *map,
> +                                   struct vm_area_struct *vma)
> +{
> +     struct file *file;
>  
> -             i_mmap_unlock_write(file->f_mapping);
> -     }
> -     return ret;
> +     if (!map->hold_file_rmap_lock)
> +             return;
> +     file = vma->vm_file;
> +     i_mmap_unlock_write(file->f_mapping);
>  }
>  
>  static unsigned long __mmap_region(struct file *file, unsigned long addr,
> @@ -2773,8 +2787,11 @@ static unsigned long __mmap_region(struct file *file, 
> unsigned long addr,
>       __mmap_complete(&map, vma);
>  
>       if (have_mmap_prepare && allocated_new) {
> -             error = call_action_complete(&map, &desc.action, vma);
> +             error = mmap_action_complete(vma, &desc.action);
> +             if (!error)
> +                     error = call_mapped_hook(vma);
>  
> +             maybe_drop_file_rmap_lock(&map, vma);
>               if (error)
>                       return error;
>       }
> diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
> index 908beb263307..47d8db809f31 100644
> --- a/tools/testing/vma/include/dup.h
> +++ b/tools/testing/vma/include/dup.h
> @@ -606,12 +606,34 @@ struct vm_area_struct {
>  } __randomize_layout;
>  
>  struct vm_operations_struct {
> -     void (*open)(struct vm_area_struct * area);
> +     /**
> +      * @open: Called when a VMA is remapped or split. Not called upon first
> +      * mapping a VMA.
> +      * Context: User context.  May sleep.  Caller holds mmap_lock.
> +      */
> +     void (*open)(struct vm_area_struct *vma);
>       /**
>        * @close: Called when the VMA is being removed from the MM.
>        * Context: User context.  May sleep.  Caller holds mmap_lock.
>        */
> -     void (*close)(struct vm_area_struct * area);
> +     void (*close)(struct vm_area_struct *vma);
> +     /**
> +      * @mapped: Called when the VMA is first mapped in the MM. Not called if
> +      * the new VMA is merged with an adjacent VMA.
> +      *
> +      * The @vm_private_data field is an output field allowing the user to
> +      * modify vma->vm_private_data as necessary.
> +      *
> +      * ONLY valid if set from f_op->mmap_prepare. Will result in an error if
> +      * set from f_op->mmap.
> +      *
> +      * Returns %0 on success, or an error otherwise. On error, the VMA will
> +      * be unmapped.
> +      *
> +      * Context: User context.  May sleep.  Caller holds mmap_lock.
> +      */
> +     int (*mapped)(unsigned long start, unsigned long end, pgoff_t pgoff,
> +                   const struct file *file, void **vm_private_data);
>       /* Called any time before splitting to check if it's allowed */
>       int (*may_split)(struct vm_area_struct *area, unsigned long addr);
>       int (*mremap)(struct vm_area_struct *area);
> @@ -1345,3 +1367,11 @@ static inline void vma_set_file(struct vm_area_struct 
> *vma, struct file *file)
>       swap(vma->vm_file, file);
>       fput(file);
>  }
> +
> +static inline void unmap_vma_locked(struct vm_area_struct *vma)
> +{
> +     const size_t len = vma_pages(vma) << PAGE_SHIFT;
> +
> +     mmap_assert_locked(vma->vm_mm);
> +     do_munmap(vma->vm_mm, vma->vm_start, len, NULL);
> +}
> -- 
> 2.53.0
> 
> 

Reply via email to