+/* Can we retract page tables for this file-backed VMA? */
+static bool file_backed_vma_is_retractable(struct vm_area_struct *vma)

It's not really the VMA that is retractable :)

Given that the function we are called this from is called "retract_page_tables" (and not file_backed_...) I guess I would just have called this

"page_tables_are_retractable"

"page_tables_support_retract"

Or sth. along those lines.

+{
+       /*
+        * Check vma->anon_vma to exclude MAP_PRIVATE mappings that
+        * got written to. These VMAs are likely not worth removing
+        * page tables from, as PMD-mapping is likely to be split later.
+        */
+       if (READ_ONCE(vma->anon_vma))
+               return false;
+
+       /*
+        * When a vma is registered with uffd-wp, we cannot recycle
+        * the page table because there may be pte markers installed.
+        * Other vmas can still have the same file mapped hugely, but
+        * skip this one: it will always be mapped in small page size
+        * for uffd-wp registered ranges.
+        */
+       if (userfaultfd_wp(vma))
+               return false;
+
+       /*
+        * If the VMA contains guard regions then we can't collapse it.
+        *
+        * This is set atomically on guard marker installation under mmap/VMA
+        * read lock, and here we may not hold any VMA or mmap lock at all.
+        *
+        * This is therefore serialised on the PTE page table lock, which is
+        * obtained on guard region installation after the flag is set, so this
+        * check being performed under this lock excludes races.
+        */
+       if (vma_flag_test_atomic(vma, VM_MAYBE_GUARD_BIT))
+               return false;
+
+       return true;
+}
+
  static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
  {
        struct vm_area_struct *vma;
@@ -1724,14 +1761,6 @@ static void retract_page_tables(struct address_space 
*mapping, pgoff_t pgoff)
                spinlock_t *ptl;
                bool success = false;
- /*
-                * Check vma->anon_vma to exclude MAP_PRIVATE mappings that
-                * got written to. These VMAs are likely not worth removing
-                * page tables from, as PMD-mapping is likely to be split later.
-                */
-               if (READ_ONCE(vma->anon_vma))
-                       continue;
-
                addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
                if (addr & ~HPAGE_PMD_MASK ||
                    vma->vm_end < addr + HPAGE_PMD_SIZE)
@@ -1743,14 +1772,8 @@ static void retract_page_tables(struct address_space 
*mapping, pgoff_t pgoff)
if (hpage_collapse_test_exit(mm))
                        continue;
-               /*
-                * When a vma is registered with uffd-wp, we cannot recycle
-                * the page table because there may be pte markers installed.
-                * Other vmas can still have the same file mapped hugely, but
-                * skip this one: it will always be mapped in small page size
-                * for uffd-wp registered ranges.
-                */
-               if (userfaultfd_wp(vma))
+
+               if (!file_backed_vma_is_retractable(vma))
                        continue;
/* PTEs were notified when unmapped; but now for the PMD? */
@@ -1777,15 +1800,15 @@ static void retract_page_tables(struct address_space 
*mapping, pgoff_t pgoff)
                        spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
/*
-                * Huge page lock is still held, so normally the page table
-                * must remain empty; and we have already skipped anon_vma
-                * and userfaultfd_wp() vmas.  But since the mmap_lock is not
-                * held, it is still possible for a racing userfaultfd_ioctl()
-                * to have inserted ptes or markers.  Now that we hold ptlock,
-                * repeating the anon_vma check protects from one category,
-                * and repeating the userfaultfd_wp() check from another.
+                * Huge page lock is still held, so normally the page table must
+                * remain empty; and we have already skipped anon_vma and
+                * userfaultfd_wp() vmas.  But since the mmap_lock is not held,
+                * it is still possible for a racing userfaultfd_ioctl() or
+                * madvise() to have inserted ptes or markers.  Now that we hold
+                * ptlock, repeating the retractable checks protects us from
+                * races against the prior checks.
                 */
-               if (likely(!vma->anon_vma && !userfaultfd_wp(vma))) {
+               if (likely(file_backed_vma_is_retractable(vma))) {
                        pgt_pmd = pmdp_collapse_flush(vma, addr, pmd);
                        pmdp_get_lockless_sync();
                        success = true;
diff --git a/mm/madvise.c b/mm/madvise.c
index 0b3280752bfb..5dbe40be7c65 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1141,15 +1141,21 @@ static long madvise_guard_install(struct 
madvise_behavior *madv_behavior)
                return -EINVAL;
/*
-        * If we install guard markers, then the range is no longer
-        * empty from a page table perspective and therefore it's
-        * appropriate to have an anon_vma.
-        *
-        * This ensures that on fork, we copy page tables correctly.
+        * Set atomically under read lock. All pertinent readers will need to
+        * acquire an mmap/VMA write lock to read it. All remaining readers may
+        * or may not see the flag set, but we don't care.
+        */
+       vma_flag_set_atomic(vma, VM_MAYBE_GUARD_BIT);
+

In general LGTM.

+       /*
+        * If anonymous and we are establishing page tables the VMA ought to
+        * have an anon_vma associated with it.

Do you know why? I know that as soon as we have anon folios in there we need it, but is it still required for guard regions? Patch #5 should handle the for case I guess.

Which other code depends on that?

--
Cheers

David

Reply via email to