Commit-ID:  8c06c7740d191b9055cb9be920579d5ecdd26303
Gitweb:     https://git.kernel.org/tip/8c06c7740d191b9055cb9be920579d5ecdd26303
Author:     Dave Hansen <dave.han...@linux.intel.com>
AuthorDate: Fri, 6 Apr 2018 13:55:18 -0700
Committer:  Ingo Molnar <mi...@kernel.org>
CommitDate: Thu, 12 Apr 2018 09:06:00 +0200

x86/pti: Leave kernel text global for !PCID

Global pages are bad for hardening because they potentially let an
exploit read the kernel image via a Meltdown-style attack which
makes it easier to find gadgets.

But, global pages are good for performance because they reduce TLB
misses when making user/kernel transitions, especially when PCIDs
are not available, such as on older hardware, or where a hypervisor
has disabled them for some reason.

This patch implements a basic, sane policy: If you have PCIDs, you
only map a minimal amount of kernel text global.  If you do not have
PCIDs, you map all kernel text global.

This policy effectively makes PCIDs something that not only adds
performance but a little bit of hardening as well.

I ran a simple "lseek" microbenchmark[1] to test the benefit on
a modern Atom microserver.  Most of the benefit comes from applying
the series before this patch ("entry only"), but there is still a
signifiant benefit from this patch.

  No Global Lines (baseline  ): 6077741 lseeks/sec
  88 Global Lines (entry only): 7528609 lseeks/sec (+23.9%)
  94 Global Lines (this patch): 8433111 lseeks/sec (+38.8%)

[1.] https://github.com/antonblanchard/will-it-scale/blob/master/tests/lseek1.c

Signed-off-by: Dave Hansen <dave.han...@linux.intel.com>
Cc: Andrea Arcangeli <aarca...@redhat.com>
Cc: Andy Lutomirski <l...@kernel.org>
Cc: Arjan van de Ven <ar...@linux.intel.com>
Cc: Borislav Petkov <b...@alien8.de>
Cc: Dan Williams <dan.j.willi...@intel.com>
Cc: David Woodhouse <dw...@infradead.org>
Cc: Greg Kroah-Hartman <gre...@linuxfoundation.org>
Cc: Hugh Dickins <hu...@google.com>
Cc: Josh Poimboeuf <jpoim...@redhat.com>
Cc: Juergen Gross <jgr...@suse.com>
Cc: Kees Cook <keesc...@google.com>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Nadav Amit <na...@vmware.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: linux...@kvack.org
Link: http://lkml.kernel.org/r/20180406205518.e3d98...@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mi...@kernel.org>
---
 arch/x86/include/asm/pti.h |  2 ++
 arch/x86/mm/init_64.c      |  6 ++++
 arch/x86/mm/pti.c          | 78 +++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
index 0b5ef05b2d2d..38a17f1d5c9d 100644
--- a/arch/x86/include/asm/pti.h
+++ b/arch/x86/include/asm/pti.h
@@ -6,8 +6,10 @@
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
 extern void pti_init(void);
 extern void pti_check_boottime_disable(void);
+extern void pti_clone_kernel_text(void);
 #else
 static inline void pti_check_boottime_disable(void) { }
+static inline void pti_clone_kernel_text(void) { }
 #endif
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e6c52dbbf649..6d1ff39c2438 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1290,6 +1290,12 @@ void mark_rodata_ro(void)
                        (unsigned long) __va(__pa_symbol(_sdata)));
 
        debug_checkwx();
+
+       /*
+        * Do this after all of the manipulation of the
+        * kernel text page tables are complete.
+        */
+       pti_clone_kernel_text();
 }
 
 int kern_addr_valid(unsigned long addr)
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 1470b173963f..f1fd52f449e0 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -66,12 +66,22 @@ static void __init pti_print_if_secure(const char *reason)
                pr_info("%s\n", reason);
 }
 
+enum pti_mode {
+       PTI_AUTO = 0,
+       PTI_FORCE_OFF,
+       PTI_FORCE_ON
+} pti_mode;
+
 void __init pti_check_boottime_disable(void)
 {
        char arg[5];
        int ret;
 
+       /* Assume mode is auto unless overridden. */
+       pti_mode = PTI_AUTO;
+
        if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
+               pti_mode = PTI_FORCE_OFF;
                pti_print_if_insecure("disabled on XEN PV.");
                return;
        }
@@ -79,18 +89,23 @@ void __init pti_check_boottime_disable(void)
        ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
        if (ret > 0)  {
                if (ret == 3 && !strncmp(arg, "off", 3)) {
+                       pti_mode = PTI_FORCE_OFF;
                        pti_print_if_insecure("disabled on command line.");
                        return;
                }
                if (ret == 2 && !strncmp(arg, "on", 2)) {
+                       pti_mode = PTI_FORCE_ON;
                        pti_print_if_secure("force enabled on command line.");
                        goto enable;
                }
-               if (ret == 4 && !strncmp(arg, "auto", 4))
+               if (ret == 4 && !strncmp(arg, "auto", 4)) {
+                       pti_mode = PTI_AUTO;
                        goto autosel;
+               }
        }
 
        if (cmdline_find_option_bool(boot_command_line, "nopti")) {
+               pti_mode = PTI_FORCE_OFF;
                pti_print_if_insecure("disabled on command line.");
                return;
        }
@@ -149,7 +164,7 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
  *
  * Returns a pointer to a P4D on success, or NULL on failure.
  */
-static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
+static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
 {
        pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
        gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
@@ -177,7 +192,7 @@ static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned 
long address)
  *
  * Returns a pointer to a PMD on success, or NULL on failure.
  */
-static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
 {
        gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
        p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
@@ -267,7 +282,7 @@ static void __init pti_setup_vsyscall(void)
 static void __init pti_setup_vsyscall(void) { }
 #endif
 
-static void __init
+static void
 pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
 {
        unsigned long addr;
@@ -372,6 +387,58 @@ static void __init pti_clone_entry_text(void)
                       _PAGE_RW);
 }
 
+/*
+ * Global pages and PCIDs are both ways to make kernel TLB entries
+ * live longer, reduce TLB misses and improve kernel performance.
+ * But, leaving all kernel text Global makes it potentially accessible
+ * to Meltdown-style attacks which make it trivial to find gadgets or
+ * defeat KASLR.
+ *
+ * Only use global pages when it is really worth it.
+ */
+static inline bool pti_kernel_image_global_ok(void)
+{
+       /*
+        * Systems with PCIDs get litlle benefit from global
+        * kernel text and are not worth the downsides.
+        */
+       if (cpu_feature_enabled(X86_FEATURE_PCID))
+               return false;
+
+       /*
+        * Only do global kernel image for pti=auto.  Do the most
+        * secure thing (not global) if pti=on specified.
+        */
+       if (pti_mode != PTI_AUTO)
+               return false;
+
+       /*
+        * K8 may not tolerate the cleared _PAGE_RW on the userspace
+        * global kernel image pages.  Do the safe thing (disable
+        * global kernel image).  This is unlikely to ever be
+        * noticed because PTI is disabled by default on AMD CPUs.
+        */
+       if (boot_cpu_has(X86_FEATURE_K8))
+               return false;
+
+       return true;
+}
+
+/*
+ * For some configurations, map all of kernel text into the user page
+ * tables.  This reduces TLB misses, especially on non-PCID systems.
+ */
+void pti_clone_kernel_text(void)
+{
+       unsigned long start = PFN_ALIGN(_text);
+       unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
+
+       if (!pti_kernel_image_global_ok())
+               return;
+
+       pti_clone_pmds(start, end, _PAGE_RW);
+}
+
 /*
  * This is the only user for it and it is not arch-generic like
  * the other set_memory.h functions.  Just extern it.
@@ -388,6 +455,9 @@ void pti_set_kernel_image_nonglobal(void)
        unsigned long start = PFN_ALIGN(_text);
        unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
 
+       if (pti_kernel_image_global_ok())
+               return;
+
        pr_debug("set kernel image non-global\n");
 
        set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT);

Reply via email to