hugetlb: update hugetlbfs to use mmap_prepare, mmap_complete

Lorenzo Stoakes Thu, 11 Sep 2025 23:56:21 -0700

We can now update hugetlb to make sure of the new .mmap_prepare() hook, by
deferring the reservation of pages until the VMA is fully established and
handle this in the f_op->mmap_complete() hook.


We hold the VMA write lock throughout so we can't race with faults. rmap
can discover the VMA, but this should not cause a problem.

Signed-off-by: Lorenzo Stoakes <[email protected]>
---
 fs/hugetlbfs/inode.c | 86 ++++++++++++++++++++++++--------------------
 1 file changed, 47 insertions(+), 39 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 3cfdf4091001..46d1ddc654c2 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -96,39 +96,14 @@ static const struct fs_parameter_spec 
hugetlb_fs_parameters[] = {
 #define PGOFF_LOFFT_MAX \
        (((1UL << (PAGE_SHIFT + 1)) - 1) <<  (BITS_PER_LONG - (PAGE_SHIFT + 1)))
 
-static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+static int hugetlb_file_mmap_complete(struct file *file, struct vm_area_struct 
*vma,
+                                     const void *context)
 {
        struct inode *inode = file_inode(file);
-       loff_t len, vma_len;
-       int ret;
        struct hstate *h = hstate_file(file);
-       vm_flags_t vm_flags;
-
-       /*
-        * vma address alignment (but not the pgoff alignment) has
-        * already been checked by prepare_hugepage_range.  If you add
-        * any error returns here, do so after setting VM_HUGETLB, so
-        * is_vm_hugetlb_page tests below unmap_region go the right
-        * way when do_mmap unwinds (may be important on powerpc
-        * and ia64).
-        */
-       vm_flags_set(vma, VM_HUGETLB | VM_DONTEXPAND);
-       vma->vm_ops = &hugetlb_vm_ops;
-
-       /*
-        * page based offset in vm_pgoff could be sufficiently large to
-        * overflow a loff_t when converted to byte offset.  This can
-        * only happen on architectures where sizeof(loff_t) ==
-        * sizeof(unsigned long).  So, only check in those instances.
-        */
-       if (sizeof(unsigned long) == sizeof(loff_t)) {
-               if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
-                       return -EINVAL;
-       }
-
-       /* must be huge page aligned */
-       if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
-               return -EINVAL;
+       vm_flags_t vm_flags = vma->vm_flags;
+       loff_t len, vma_len;
+       int ret = 0;
 
        vma_len = (loff_t)(vma->vm_end - vma->vm_start);
        len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
@@ -139,9 +114,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct 
vm_area_struct *vma)
        inode_lock(inode);
        file_accessed(file);
 
-       ret = -ENOMEM;
-
-       vm_flags = vma->vm_flags;
        /*
         * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
         * reserving here. Note: only for SHM hugetlbfs file, the inode
@@ -151,20 +123,55 @@ static int hugetlbfs_file_mmap(struct file *file, struct 
vm_area_struct *vma)
                vm_flags |= VM_NORESERVE;
 
        if (hugetlb_reserve_pages(inode,
-                               vma->vm_pgoff >> huge_page_order(h),
-                               len >> huge_page_shift(h), vma,
-                               vm_flags) < 0)
+                       vma->vm_pgoff >> huge_page_order(h),
+                       len >> huge_page_shift(h), vma,
+                       vm_flags) < 0) {
+               ret = -ENOMEM;
                goto out;
+       }
 
-       ret = 0;
        if (vma->vm_flags & VM_WRITE && inode->i_size < len)
                i_size_write(inode, len);
+
 out:
        inode_unlock(inode);
-
        return ret;
 }
 
+static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
+{
+       struct file *file = desc->file;
+       struct hstate *h = hstate_file(file);
+
+       /*
+        * vma address alignment (but not the pgoff alignment) has
+        * already been checked by prepare_hugepage_range.  If you add
+        * any error returns here, do so after setting VM_HUGETLB, so
+        * is_vm_hugetlb_page tests below unmap_region go the right
+        * way when do_mmap unwinds (may be important on powerpc
+        * and ia64).
+        */
+       desc->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
+       desc->vm_ops = &hugetlb_vm_ops;
+
+       /*
+        * page based offset in vm_pgoff could be sufficiently large to
+        * overflow a loff_t when converted to byte offset.  This can
+        * only happen on architectures where sizeof(loff_t) ==
+        * sizeof(unsigned long).  So, only check in those instances.
+        */
+       if (sizeof(unsigned long) == sizeof(loff_t)) {
+               if (desc->pgoff & PGOFF_LOFFT_MAX)
+                       return -EINVAL;
+       }
+
+       /* must be huge page aligned */
+       if (desc->pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
+               return -EINVAL;
+
+       return 0;
+}
+
 /*
  * Called under mmap_write_lock(mm).
  */
@@ -1219,7 +1226,8 @@ static void init_once(void *foo)
 
 static const struct file_operations hugetlbfs_file_operations = {
        .read_iter              = hugetlbfs_read_iter,
-       .mmap                   = hugetlbfs_file_mmap,
+       .mmap_prepare           = hugetlbfs_file_mmap_prepare,
+       .mmap_complete          = hugetlb_file_mmap_complete,
        .fsync                  = noop_fsync,
        .get_unmapped_area      = hugetlb_get_unmapped_area,
        .llseek                 = default_llseek,
-- 
2.51.0

[PATCH 10/16] mm/hugetlb: update hugetlbfs to use mmap_prepare, mmap_complete

Reply via email to