Add a new PTE marker that results in any access causing the accessing
process to segfault.

This is preferable to PTE_MARKER_POISONED, which results in the same
handling as hardware poisoned memory, and is thus undesirable for cases
where we simply wish to 'soft' poison a range.

This is in preparation for implementing the ability to specify guard pages
at the page table level, i.e. ranges that, when accessed, should cause
process termination.

Additionally, rename zap_drop_file_uffd_wp() to zap_drop_markers() - the
function checks the ZAP_FLAG_DROP_MARKER flag so naming it for this single
purpose was simply incorrect.

We then reuse the same logic to determine whether a zap should clear a
guard entry - this should only be performed on teardown and never on
MADV_DONTNEED or the like.

Signed-off-by: Lorenzo Stoakes <[email protected]>
---
 include/linux/mm_inline.h |  2 +-
 include/linux/swapops.h   | 26 ++++++++++++++++++++++++--
 mm/hugetlb.c              |  3 +++
 mm/memory.c               | 18 +++++++++++++++---
 4 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 6f801c7b36e2..0d97a14bf051 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -531,7 +531,7 @@ static inline pte_marker copy_pte_marker(
 {
        pte_marker srcm = pte_marker_get(entry);
        /* Always copy error entries. */
-       pte_marker dstm = srcm & PTE_MARKER_POISONED;
+       pte_marker dstm = srcm & (PTE_MARKER_POISONED | PTE_MARKER_GUARD);
 
        /* Only copy PTE markers if UFFD register matches. */
        if ((srcm & PTE_MARKER_UFFD_WP) && userfaultfd_wp(dst_vma))
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index cb468e418ea1..4d0606df0791 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -426,9 +426,15 @@ typedef unsigned long pte_marker;
  * "Poisoned" here is meant in the very general sense of "future accesses are
  * invalid", instead of referring very specifically to hardware memory errors.
  * This marker is meant to represent any of various different causes of this.
+ *
+ * Note that, when encountered by the faulting logic, PTEs with this marker 
will
+ * result in VM_FAULT_HWPOISON and thus regardless trigger hardware memory 
error
+ * logic.
  */
 #define  PTE_MARKER_POISONED                   BIT(1)
-#define  PTE_MARKER_MASK                       (BIT(2) - 1)
+/* Indicates that, on fault, this PTE will case a SIGSEGV signal to be sent. */
+#define  PTE_MARKER_GUARD                      BIT(2)
+#define  PTE_MARKER_MASK                       (BIT(3) - 1)
 
 static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
 {
@@ -461,9 +467,25 @@ static inline swp_entry_t make_poisoned_swp_entry(void)
 }
 
 static inline int is_poisoned_swp_entry(swp_entry_t entry)
+{
+       /*
+        * We treat guard pages as poisoned too as these have the same semantics
+        * as poisoned ranges, only with different fault handling.
+        */
+       return is_pte_marker_entry(entry) &&
+               (pte_marker_get(entry) &
+                (PTE_MARKER_POISONED | PTE_MARKER_GUARD));
+}
+
+static inline swp_entry_t make_guard_swp_entry(void)
+{
+       return make_pte_marker_entry(PTE_MARKER_GUARD);
+}
+
+static inline int is_guard_swp_entry(swp_entry_t entry)
 {
        return is_pte_marker_entry(entry) &&
-           (pte_marker_get(entry) & PTE_MARKER_POISONED);
+               (pte_marker_get(entry) & PTE_MARKER_GUARD);
 }
 
 /*
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 906294ac85dc..50e3f6ed73ac 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6353,6 +6353,9 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct 
vm_area_struct *vma,
                                ret = VM_FAULT_HWPOISON_LARGE |
                                      VM_FAULT_SET_HINDEX(hstate_index(h));
                                goto out_mutex;
+                       } else if (marker & PTE_MARKER_GUARD) {
+                               ret = VM_FAULT_SIGSEGV;
+                               goto out_mutex;
                        }
                }
 
diff --git a/mm/memory.c b/mm/memory.c
index 0f614523b9f4..551455cd453f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1455,7 +1455,7 @@ static inline bool should_zap_folio(struct zap_details 
*details,
        return !folio_test_anon(folio);
 }
 
-static inline bool zap_drop_file_uffd_wp(struct zap_details *details)
+static inline bool zap_drop_markers(struct zap_details *details)
 {
        if (!details)
                return false;
@@ -1476,7 +1476,7 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
        if (vma_is_anonymous(vma))
                return;
 
-       if (zap_drop_file_uffd_wp(details))
+       if (zap_drop_markers(details))
                return;
 
        for (;;) {
@@ -1671,7 +1671,15 @@ static unsigned long zap_pte_range(struct mmu_gather 
*tlb,
                         * drop the marker if explicitly requested.
                         */
                        if (!vma_is_anonymous(vma) &&
-                           !zap_drop_file_uffd_wp(details))
+                           !zap_drop_markers(details))
+                               continue;
+               } else if (is_guard_swp_entry(entry)) {
+                       /*
+                        * Ordinary zapping should not remove guard PTE
+                        * markers. Only do so if we should remove PTE markers
+                        * in general.
+                        */
+                       if (!zap_drop_markers(details))
                                continue;
                } else if (is_hwpoison_entry(entry) ||
                           is_poisoned_swp_entry(entry)) {
@@ -4003,6 +4011,10 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf)
        if (marker & PTE_MARKER_POISONED)
                return VM_FAULT_HWPOISON;
 
+       /* Hitting a guard page is always a fatal condition. */
+       if (marker & PTE_MARKER_GUARD)
+               return VM_FAULT_SIGSEGV;
+
        if (pte_marker_entry_uffd_wp(entry))
                return pte_marker_handle_uffd_wp(vmf);
 
-- 
2.46.2


Reply via email to