From: Dave Hansen <dave.han...@linux.intel.com>

Add the pagetable helper functions do manage the separate user space page
tables.

[ tglx: Split out from the big combo kaiser patch. Folded Andys
        simplification ]

Signed-off-by: Dave Hansen <dave.han...@linux.intel.com>
Signed-off-by: Thomas Gleixner <t...@linutronix.de>
Signed-off-by: Ingo Molnar <mi...@kernel.org>
Cc: Andy Lutomirski <l...@kernel.org>
Cc: Boris Ostrovsky <boris.ostrov...@oracle.com>
Cc: Borislav Petkov <b...@alien8.de>
Cc: Brian Gerst <brge...@gmail.com>
Cc: Dave Hansen <dave.han...@linux.intel.com>
Cc: David Laight <david.lai...@aculab.com>
Cc: Denys Vlasenko <dvlas...@redhat.com>
Cc: Eduardo Valentin <edu...@amazon.com>
Cc: Greg KH <gre...@linuxfoundation.org>
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Josh Poimboeuf <jpoim...@redhat.com>
Cc: Juergen Gross <jgr...@suse.com>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Will Deacon <will.dea...@arm.com>
Cc: aligu...@amazon.com
Cc: daniel.gr...@iaik.tugraz.at
Cc: hu...@google.com
Cc: keesc...@google.com
---
 arch/x86/include/asm/pgtable_64.h |  123 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)

--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -131,9 +131,128 @@ static inline pud_t native_pudp_get_and_
 #endif
 }
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
+ * (8k-aligned and 8k in size).  The kernel one is at the beginning 4k and
+ * the user one is in the last 4k.  To switch between them, you
+ * just need to flip the 12th bit in their addresses.
+ */
+#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT
+
+/*
+ * This generates better code than the inline assembly in
+ * __set_bit().
+ */
+static inline void *ptr_set_bit(void *ptr, int bit)
+{
+       unsigned long __ptr = (unsigned long)ptr;
+
+       __ptr |= BIT(bit);
+       return (void *)__ptr;
+}
+static inline void *ptr_clear_bit(void *ptr, int bit)
+{
+       unsigned long __ptr = (unsigned long)ptr;
+
+       __ptr &= ~BIT(bit);
+       return (void *)__ptr;
+}
+
+static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
+{
+       return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
+{
+       return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
+{
+       return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
+{
+       return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
+}
+#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+
+/*
+ * Page table pages are page-aligned.  The lower half of the top
+ * level is used for userspace and the top half for the kernel.
+ *
+ * Returns true for parts of the PGD that map userspace and
+ * false for the parts that map the kernel.
+ */
+static inline bool pgdp_maps_userspace(void *__ptr)
+{
+       unsigned long ptr = (unsigned long)__ptr;
+
+       return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2);
+}
+
+/*
+ * Take a PGD location (pgdp) and a pgd value that needs to be set there.
+ * Populates the user and returns the resulting PGD that must be set in
+ * the kernel copy of the page tables.
+ */
+static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       if (!static_cpu_has_bug(X86_BUG_CPU_SECURE_MODE_PTI))
+               return pgd;
+
+       if (pgdp_maps_userspace(pgdp)) {
+               /*
+                * The user page tables get the full PGD,
+                * accessible from userspace:
+                */
+               kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
+
+               /*
+                * If this is normal user memory, make it NX in the kernel
+                * pagetables so that, if we somehow screw up and return to
+                * usermode with the kernel CR3 loaded, we'll get a page
+                * fault instead of allowing user code to execute with
+                * the wrong CR3.
+                *
+                * As exceptions, we don't set NX if:
+                *  - this is EFI or similar, the kernel may execute from it
+                *  - we don't have NX support
+                *  - we're clearing the PGD (i.e. pgd.pgd == 0).
+                */
+               if ((pgd.pgd & _PAGE_USER) && (__supported_pte_mask & _PAGE_NX))
+                       pgd.pgd |= _PAGE_NX;
+       } else {
+               /*
+                * Changes to the high (kernel) portion of the kernelmode
+                * page tables are not automatically propagated to the
+                * usermode tables.
+                *
+                * Users should keep in mind that, unlike the kernelmode
+                * tables, there is no vmalloc_fault equivalent for the
+                * usermode tables.  Top-level entries added to init_mm's
+                * usermode pgd after boot will not be automatically
+                * propagated to other mms.
+                */
+       }
+#endif
+
+       /* return the copy of the PGD we want the kernel to use: */
+       return pgd;
+}
+
+
 static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
 {
+#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
+       p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
+#else
        *p4dp = p4d;
+#endif
 }
 
 static inline void native_p4d_clear(p4d_t *p4d)
@@ -147,7 +266,11 @@ static inline void native_p4d_clear(p4d_
 
 static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       *pgdp = pti_set_user_pgd(pgdp, pgd);
+#else
        *pgdp = pgd;
+#endif
 }
 
 static inline void native_pgd_clear(pgd_t *pgd)


Reply via email to