we saw a suspected spurious fault that failed VMA
access check but passed spurious check for a user
space address access of (rw,execut). The patch is
trying to catch the case which might have indicated
a stale TLB (software bug found) and a harmful
spruious fault.

Signed-off-by: Luming Yu <luming...@intel.com>
Signed-off-by: Yongkai Wu <yongka...@tencent.com>
---
 arch/x86/mm/fault.c | 95 ++++++++++++++++++++++++++++++-----------------------
 1 file changed, 54 insertions(+), 41 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 71d4b9d4d43f..5e2a49010d87 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1004,29 +1004,7 @@ static int spurious_kernel_fault_check(unsigned long 
error_code, pte_t *pte)
        return 1;
 }
 
-/*
- * Handle a spurious fault caused by a stale TLB entry.
- *
- * This allows us to lazily refresh the TLB when increasing the
- * permissions of a kernel page (RO -> RW or NX -> X).  Doing it
- * eagerly is very expensive since that implies doing a full
- * cross-processor TLB flush, even if no stale TLB entries exist
- * on other processors.
- *
- * Spurious faults may only occur if the TLB contains an entry with
- * fewer permission than the page table entry.  Non-present (P = 0)
- * and reserved bit (R = 1) faults are never spurious.
- *
- * There are no security implications to leaving a stale TLB when
- * increasing the permissions on a page.
- *
- * Returns non-zero if a spurious fault was handled, zero otherwise.
- *
- * See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3
- * (Optional Invalidation).
- */
-static noinline int
-spurious_kernel_fault(unsigned long error_code, unsigned long address)
+static int spurious_page_table_check(unsigned long error_code, unsigned long 
address)
 {
        pgd_t *pgd;
        p4d_t *p4d;
@@ -1035,19 +1013,6 @@ spurious_kernel_fault(unsigned long error_code, unsigned 
long address)
        pte_t *pte;
        int ret;
 
-       /*
-        * Only writes to RO or instruction fetches from NX may cause
-        * spurious faults.
-        *
-        * These could be from user or supervisor accesses but the TLB
-        * is only lazily flushed after a kernel mapping protection
-        * change, so user accesses are not expected to cause spurious
-        * faults.
-        */
-       if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
-           error_code != (X86_PF_INSTR | X86_PF_PROT))
-               return 0;
-
        pgd = init_mm.pgd + pgd_index(address);
        if (!pgd_present(*pgd))
                return 0;
@@ -1090,12 +1055,52 @@ spurious_kernel_fault(unsigned long error_code, 
unsigned long address)
 
        return ret;
 }
+
+/*
+ * Handle a spurious fault caused by a stale TLB entry.
+ *
+ * This allows us to lazily refresh the TLB when increasing the
+ * permissions of a kernel page (RO -> RW or NX -> X).  Doing it
+ * eagerly is very expensive since that implies doing a full
+ * cross-processor TLB flush, even if no stale TLB entries exist
+ * on other processors.
+ *
+ * Spurious faults may only occur if the TLB contains an entry with
+ * fewer permission than the page table entry.  Non-present (P = 0)
+ * and reserved bit (R = 1) faults are never spurious.
+ *
+ * There are no security implications to leaving a stale TLB when
+ * increasing the permissions on a page.
+ *
+ * Returns non-zero if a spurious fault was handled, zero otherwise.
+ *
+ * See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3
+ * (Optional Invalidation).
+ */
+static noinline int
+spurious_kernel_fault(unsigned long error_code, unsigned long address)
+{
+       /*
+        * Only writes to RO or instruction fetches from NX may cause
+        * spurious faults.
+        *
+        * These could be from user or supervisor accesses but the TLB
+        * is only lazily flushed after a kernel mapping protection
+        * change, so user accesses are not expected to cause spurious
+        * faults.
+        */
+       if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
+           error_code != (X86_PF_INSTR | X86_PF_PROT))
+               return 0;
+
+       return spurious_page_table_check(error_code, address);
+}
 NOKPROBE_SYMBOL(spurious_kernel_fault);
 
 int show_unhandled_signals = 1;
 
 static inline int
-access_error(unsigned long error_code, struct vm_area_struct *vma)
+access_error(unsigned long error_code, unsigned long address, struct 
vm_area_struct *vma)
 {
        /* This is only called for the current mm, so: */
        bool foreign = false;
@@ -1120,19 +1125,27 @@ access_error(unsigned long error_code, struct 
vm_area_struct *vma)
        if (error_code & X86_PF_WRITE) {
                /* write, present and write, not present: */
                if (unlikely(!(vma->vm_flags & VM_WRITE)))
-                       return 1;
+                       goto maybe_spurious;
                return 0;
        }
 
        /* read, present: */
        if (unlikely(error_code & X86_PF_PROT))
-               return 1;
+               goto maybe_spurious;
 
        /* read, not present: */
        if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
-               return 1;
+               goto maybe_spurious;
 
        return 0;
+
+maybe_spurious:
+       if (spurious_page_table_check(error_code, address)) {
+               WARN_ONCE(1, "spurious fault? Failed VMA check, passed page 
table check!\n");
+               return 0;
+       }
+
+       return 1;
 }
 
 static int fault_in_kernel_space(unsigned long address)
@@ -1402,7 +1415,7 @@ void do_user_addr_fault(struct pt_regs *regs,
         * we can handle it..
         */
 good_area:
-       if (unlikely(access_error(sw_error_code, vma))) {
+       if (unlikely(access_error(sw_error_code, address, vma))) {
                bad_area_access_error(regs, sw_error_code, address, vma);
                return;
        }
-- 
2.14.4

Reply via email to