On Mon, Mar 16, 2026 at 2:14 PM Lorenzo Stoakes (Oracle) <[email protected]> wrote: > > While the conversion of mmap hooks to mmap_prepare is underway, we wil
nit: s/wil/will > encounter situations where mmap hooks need to invoke nested mmap_prepare > hooks. > > The nesting of mmap hooks is termed 'stacking'. In order to flexibly > facilitate the conversion of custom mmap hooks in drivers which stack, we > must split up the existing compat_vma_mapped() function into two separate > functions: > > * compat_set_desc_from_vma() - This allows the setting of a vm_area_desc > object's fields to the relevant fields of a VMA. > > * __compat_vma_mmap() - Once an mmap_prepare hook has been executed upon a > vm_area_desc object, this function performs any mmap actions specified by > the mmap_prepare hook and then invokes its vm_ops->mapped() hook if any > were specified. > > In ordinary cases, where a file's f_op->mmap_prepare() hook simply needs to > be invoked in a stacked mmap() hook, compat_vma_mmap() can be used. > > However some drivers define their own nested hooks, which are invoked in > turn by another hook. > > A concrete example is vmbus_channel->mmap_ring_buffer(), which is invoked > in turn by bin_attribute->mmap(): > > vmbus_channel->mmap_ring_buffer() has a signature of: > > int (*mmap_ring_buffer)(struct vmbus_channel *channel, > struct vm_area_struct *vma); > > And bin_attribute->mmap() has a signature of: > > int (*mmap)(struct file *, struct kobject *, > const struct bin_attribute *attr, > struct vm_area_struct *vma); > > And so compat_vma_mmap() cannot be used here for incremental conversion of > hooks from mmap() to mmap_prepare(). > > There are many such instances like this, where conversion to mmap_prepare > would otherwise cascade to a huge change set due to nesting of this kind. > > The changes in this patch mean we could now instead convert > vmbus_channel->mmap_ring_buffer() to > vmbus_channel->mmap_prepare_ring_buffer(), and implement something like: > > struct vm_area_desc desc; > int err; > > compat_set_desc_from_vm(&desc, file, vma); > err = channel->mmap_prepare_ring_buffer(channel, &desc); > if (err) > return err; > > return __compat_vma_mmap(&desc, vma); > > Allowing us to incrementally update this logic, and other logic like it. The way I understand this and the next 2 patches is that they are preperations for later replacement of mmap() with mmap_prepare() but they don't yet do that completely. Is that right? To clarify what I mean, in [1] for example, you are replacing struct uio_info.mmap with uio_info.mmap_prepare but it's still being called from uio_mmap(). IOW, you are not replacing uio_mmap with uio_mmap_prepare. Is that the next step that's not yet implemented? [1] https://lore.kernel.org/all/892a8b32e5ef64c69239ccc2d1bd364716fd7fdf.1773695307.git....@kernel.org/ > > Unfortunately, as part of this change, we need to be able to flexibly > assign to the VMA descriptor, so have to remove some of the const > declarations within the structure. > > Also update the VMA tests to reflect the changes. > > Signed-off-by: Lorenzo Stoakes (Oracle) <[email protected]> > --- > include/linux/fs.h | 3 + > include/linux/mm_types.h | 4 +- > mm/util.c | 111 +++++++++++++++++++++++--------- > mm/vma.h | 2 +- > tools/testing/vma/include/dup.h | 111 ++++++++++++++++++++------------ > 5 files changed, 157 insertions(+), 74 deletions(-) > > diff --git a/include/linux/fs.h b/include/linux/fs.h > index c390f5c667e3..0bdccfa70b44 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -2058,6 +2058,9 @@ static inline bool can_mmap_file(struct file *file) > return true; > } > > +void compat_set_desc_from_vma(struct vm_area_desc *desc, const struct file > *file, > + const struct vm_area_struct *vma); > +int __compat_vma_mmap(struct vm_area_desc *desc, struct vm_area_struct *vma); > int compat_vma_mmap(struct file *file, struct vm_area_struct *vma); > int __vma_check_mmap_hook(struct vm_area_struct *vma); > > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h > index 50685cf29792..7538d64f8848 100644 > --- a/include/linux/mm_types.h > +++ b/include/linux/mm_types.h > @@ -891,8 +891,8 @@ static __always_inline bool vma_flags_empty(vma_flags_t > *flags) > */ > struct vm_area_desc { > /* Immutable state. */ > - const struct mm_struct *const mm; > - struct file *const file; /* May vary from vm_file in stacked callers. > */ > + struct mm_struct *mm; > + struct file *file; /* May vary from vm_file in stacked callers. */ > unsigned long start; > unsigned long end; > > diff --git a/mm/util.c b/mm/util.c > index aa92e471afe1..a166c48fe894 100644 > --- a/mm/util.c > +++ b/mm/util.c > @@ -1163,34 +1163,38 @@ void flush_dcache_folio(struct folio *folio) > EXPORT_SYMBOL(flush_dcache_folio); > #endif > > -static int __compat_vma_mmap(struct file *file, struct vm_area_struct *vma) > +/** > + * compat_set_desc_from_vma() - assigns VMA descriptor @desc fields from a > VMA. > + * @desc: A VMA descriptor whose fields need to be set. > + * @file: The file object describing the file being mmap()'d. > + * @vma: The VMA whose fields we wish to assign to @desc. > + * > + * This is a compatibility function to allow an mmap() hook to call > + * mmap_prepare() hooks when drivers nest these. This function specifically > + * allows the construction of a vm_area_desc value, @desc, from a VMA @vma > for > + * the purposes of doing this. > + * > + * Once the conversion of drivers is complete this function will no longer be > + * required and will be removed. > + */ > +void compat_set_desc_from_vma(struct vm_area_desc *desc, > + const struct file *file, > + const struct vm_area_struct *vma) > { > - struct vm_area_desc desc = { > - .mm = vma->vm_mm, > - .file = file, > - .start = vma->vm_start, > - .end = vma->vm_end, > - > - .pgoff = vma->vm_pgoff, > - .vm_file = vma->vm_file, > - .vma_flags = vma->flags, > - .page_prot = vma->vm_page_prot, > - > - .action.type = MMAP_NOTHING, /* Default */ > - }; > - int err; > + desc->mm = vma->vm_mm; > + desc->file = (struct file *)file; > + desc->start = vma->vm_start; > + desc->end = vma->vm_end; > > - err = vfs_mmap_prepare(file, &desc); > - if (err) > - return err; > + desc->pgoff = vma->vm_pgoff; > + desc->vm_file = vma->vm_file; > + desc->vma_flags = vma->flags; > + desc->page_prot = vma->vm_page_prot; > > - err = mmap_action_prepare(&desc); > - if (err) > - return err; > - > - set_vma_from_desc(vma, &desc); > - return mmap_action_complete(vma, &desc.action); > + /* Default. */ > + desc->action.type = MMAP_NOTHING; > } > +EXPORT_SYMBOL(compat_set_desc_from_vma); > > static int __compat_vma_mapped(struct file *file, struct vm_area_struct *vma) > { > @@ -1211,6 +1215,49 @@ static int __compat_vma_mapped(struct file *file, > struct vm_area_struct *vma) > return err; > } > > +/** > + * __compat_vma_mmap() - Similar to compat_vma_mmap(), only it allows > + * flexibility as to how the mmap_prepare callback is invoked, which is > useful > + * for drivers which invoke nested mmap_prepare callbacks in an mmap() hook. > + * @desc: A VMA descriptor upon which an mmap_prepare() hook has already been > + * executed. > + * @vma: The VMA to which @desc should be applied. > + * > + * The function assumes that you have obtained a VMA descriptor @desc from > + * compt_set_desc_from_vma(), and already executed the mmap_prepare() hook > upon > + * it. > + * > + * It then performs any specified mmap actions, and invokes the > vm_ops->mapped() > + * hook if one is present. > + * > + * See the description of compat_vma_mmap() for more details. > + * > + * Once the conversion of drivers is complete this function will no longer be > + * required and will be removed. > + * > + * Returns: 0 on success or error. > + */ > +int __compat_vma_mmap(struct vm_area_desc *desc, > + struct vm_area_struct *vma) > +{ > + int err; > + > + /* Perform any preparatory tasks for mmap action. */ > + err = mmap_action_prepare(desc); > + if (err) > + return err; > + /* Update the VMA from the descriptor. */ > + compat_set_vma_from_desc(vma, desc); > + /* Complete any specified mmap actions. */ > + err = mmap_action_complete(vma, &desc->action); > + if (err) > + return err; > + > + /* Invoke vm_ops->mapped callback. */ > + return __compat_vma_mapped(desc->file, vma); > +} > +EXPORT_SYMBOL(__compat_vma_mmap); > + > /** > * compat_vma_mmap() - Apply the file's .mmap_prepare() hook to an > * existing VMA and execute any requested actions. > @@ -1218,10 +1265,10 @@ static int __compat_vma_mapped(struct file *file, > struct vm_area_struct *vma) > * @vma: The VMA to apply the .mmap_prepare() hook to. > * > * Ordinarily, .mmap_prepare() is invoked directly upon mmap(). However, > certain > - * stacked filesystems invoke a nested mmap hook of an underlying file. > + * stacked drivers invoke a nested mmap hook of an underlying file. > * > - * Until all filesystems are converted to use .mmap_prepare(), we must be > - * conservative and continue to invoke these stacked filesystems using the > + * Until all drivers are converted to use .mmap_prepare(), we must be > + * conservative and continue to invoke these stacked drivers using the > * deprecated .mmap() hook. > * > * However we have a problem if the underlying file system possesses an > @@ -1232,20 +1279,22 @@ static int __compat_vma_mapped(struct file *file, > struct vm_area_struct *vma) > * establishes a struct vm_area_desc descriptor, passes to the underlying > * .mmap_prepare() hook and applies any changes performed by it. > * > - * Once the conversion of filesystems is complete this function will no > longer > - * be required and will be removed. > + * Once the conversion of drivers is complete this function will no longer be > + * required and will be removed. > * > * Returns: 0 on success or error. > */ > int compat_vma_mmap(struct file *file, struct vm_area_struct *vma) > { > + struct vm_area_desc desc; > int err; > > - err = __compat_vma_mmap(file, vma); > + compat_set_desc_from_vma(&desc, file, vma); > + err = vfs_mmap_prepare(file, &desc); > if (err) > return err; > > - return __compat_vma_mapped(file, vma); > + return __compat_vma_mmap(&desc, vma); > } > EXPORT_SYMBOL(compat_vma_mmap); > > diff --git a/mm/vma.h b/mm/vma.h > index adc18f7dd9f1..a76046c39b14 100644 > --- a/mm/vma.h > +++ b/mm/vma.h > @@ -300,7 +300,7 @@ static inline int vma_iter_store_gfp(struct vma_iterator > *vmi, > * f_op->mmap() but which might have an underlying file system which > implements > * f_op->mmap_prepare(). > */ > -static inline void set_vma_from_desc(struct vm_area_struct *vma, > +static inline void compat_set_vma_from_desc(struct vm_area_struct *vma, > struct vm_area_desc *desc) > { > /* > diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h > index 114daaef4f73..6658df26698a 100644 > --- a/tools/testing/vma/include/dup.h > +++ b/tools/testing/vma/include/dup.h > @@ -519,8 +519,8 @@ enum vma_operation { > */ > struct vm_area_desc { > /* Immutable state. */ > - const struct mm_struct *const mm; > - struct file *const file; /* May vary from vm_file in stacked callers. > */ > + struct mm_struct *mm; > + struct file *file; /* May vary from vm_file in stacked callers. */ > unsigned long start; > unsigned long end; > > @@ -1272,43 +1272,92 @@ static inline void vma_set_anonymous(struct > vm_area_struct *vma) > } > > /* Declared in vma.h. */ > -static inline void set_vma_from_desc(struct vm_area_struct *vma, > +static inline void compat_set_vma_from_desc(struct vm_area_struct *vma, > struct vm_area_desc *desc); > > -static inline int __compat_vma_mmap(const struct file_operations *f_op, > - struct file *file, struct vm_area_struct *vma) > +static inline void compat_set_desc_from_vma(struct vm_area_desc *desc, > + const struct file *file, > + const struct vm_area_struct *vma) > { > - struct vm_area_desc desc = { > - .mm = vma->vm_mm, > - .file = file, > - .start = vma->vm_start, > - .end = vma->vm_end, > + desc->mm = vma->vm_mm; > + desc->file = (struct file *)file; > + desc->start = vma->vm_start; > + desc->end = vma->vm_end; > > - .pgoff = vma->vm_pgoff, > - .vm_file = vma->vm_file, > - .vma_flags = vma->flags, > - .page_prot = vma->vm_page_prot, > + desc->pgoff = vma->vm_pgoff; > + desc->vm_file = vma->vm_file; > + desc->vma_flags = vma->flags; > + desc->page_prot = vma->vm_page_prot; > > - .action.type = MMAP_NOTHING, /* Default */ > - }; > + /* Default. */ > + desc->action.type = MMAP_NOTHING; > +} > + > +static inline unsigned long vma_pages(const struct vm_area_struct *vma) > +{ > + return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; > +} > + > +static inline void unmap_vma_locked(struct vm_area_struct *vma) > +{ > + const size_t len = vma_pages(vma) << PAGE_SHIFT; > + > + mmap_assert_write_locked(vma->vm_mm); > + do_munmap(vma->vm_mm, vma->vm_start, len, NULL); > +} > + > +static inline int __compat_vma_mapped(struct file *file, struct > vm_area_struct *vma) > +{ > + const struct vm_operations_struct *vm_ops = vma->vm_ops; > int err; > > - err = f_op->mmap_prepare(&desc); > + if (!vm_ops->mapped) > + return 0; > + > + err = vm_ops->mapped(vma->vm_start, vma->vm_end, vma->vm_pgoff, file, > + &vma->vm_private_data); > if (err) > - return err; > + unmap_vma_locked(vma); > + return err; > +} > > - err = mmap_action_prepare(&desc); > +static inline int __compat_vma_mmap(struct vm_area_desc *desc, > + struct vm_area_struct *vma) > +{ > + int err; > + > + /* Perform any preparatory tasks for mmap action. */ > + err = mmap_action_prepare(desc); > + if (err) > + return err; > + /* Update the VMA from the descriptor. */ > + compat_set_vma_from_desc(vma, desc); > + /* Complete any specified mmap actions. */ > + err = mmap_action_complete(vma, &desc->action); > if (err) > return err; > > - set_vma_from_desc(vma, &desc); > - return mmap_action_complete(vma, &desc.action); > + /* Invoke vm_ops->mapped callback. */ > + return __compat_vma_mapped(desc->file, vma); > +} > + > +static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc > *desc) > +{ > + return file->f_op->mmap_prepare(desc); > } > > static inline int compat_vma_mmap(struct file *file, > struct vm_area_struct *vma) > { > - return __compat_vma_mmap(file->f_op, file, vma); > + struct vm_area_desc desc; > + int err; > + > + compat_set_desc_from_vma(&desc, file, vma); > + err = vfs_mmap_prepare(file, &desc); > + if (err) > + return err; > + > + return __compat_vma_mmap(&desc, vma); > } > > > @@ -1318,11 +1367,6 @@ static inline void vma_iter_init(struct vma_iterator > *vmi, > mas_init(&vmi->mas, &mm->mm_mt, addr); > } > > -static inline unsigned long vma_pages(struct vm_area_struct *vma) > -{ > - return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; > -} > - > static inline void mmap_assert_locked(struct mm_struct *); > static inline struct vm_area_struct *find_vma_intersection(struct mm_struct > *mm, > unsigned long start_addr, > @@ -1492,11 +1536,6 @@ static inline int vfs_mmap(struct file *file, struct > vm_area_struct *vma) > return file->f_op->mmap(file, vma); > } > > -static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc > *desc) > -{ > - return file->f_op->mmap_prepare(desc); > -} > - > static inline void vma_set_file(struct vm_area_struct *vma, struct file > *file) > { > /* Changing an anonymous vma with this is illegal */ > @@ -1521,11 +1560,3 @@ static inline pgprot_t vma_get_page_prot(vma_flags_t > vma_flags) > > return vm_get_page_prot(vm_flags); > } > - > -static inline void unmap_vma_locked(struct vm_area_struct *vma) > -{ > - const size_t len = vma_pages(vma) << PAGE_SHIFT; > - > - mmap_assert_write_locked(vma->vm_mm); > - do_munmap(vma->vm_mm, vma->vm_start, len, NULL); > -} > -- > 2.53.0 >

