From: Dave Hansen <dave.han...@linux.intel.com>

The kernel image starts out with the Global bit set across the entire
kernel image.  The bit is cleared with set_memory_nonglobal() in the
configurations with PCIDs where we do not need the performance
benefits of the Global bit.

However, this is fragile.  It means that we are stuck opting *out* of
the less-secure (Global bit set) configuration, which seems backwards.
Let's start more secure (Global bit clear) and then let things opt
back in if they want performance, or are truly mapping common data
between kernel and userspace.

This fixes a bug.  Before this patch, there are areas that are
unmapped from the user page tables (like like everything above
0xffffffff82600000 in the example below).  These have the hallmark of
being a wrong Global area: they are no identical in the
'current_kernel' and 'current_user' page table dumps.  They are also
read-write, which means they're much more likely to contain secrets.

Before this patch:

current_kernel:---[ High Kernel Mapping ]---
current_kernel-0xffffffff80000000-0xffffffff81000000          16M               
                pmd
current_kernel-0xffffffff81000000-0xffffffff81e00000          14M     ro        
 PSE     GLB x  pmd
current_kernel-0xffffffff81e00000-0xffffffff81e11000          68K     ro        
         GLB x  pte
current_kernel-0xffffffff81e11000-0xffffffff82000000        1980K     RW        
         GLB NX pte
current_kernel-0xffffffff82000000-0xffffffff82600000           6M     ro        
 PSE     GLB NX pmd
current_kernel-0xffffffff82600000-0xffffffff82c00000           6M     RW        
 PSE     GLB NX pmd
current_kernel-0xffffffff82c00000-0xffffffff82e00000           2M     RW        
         GLB NX pte
current_kernel-0xffffffff82e00000-0xffffffff83200000           4M     RW        
 PSE     GLB NX pmd
current_kernel-0xffffffff83200000-0xffffffffa0000000         462M               
                pmd

 current_user:---[ High Kernel Mapping ]---
 current_user-0xffffffff80000000-0xffffffff81000000          16M                
               pmd
 current_user-0xffffffff81000000-0xffffffff81e00000          14M     ro         
PSE     GLB x  pmd
 current_user-0xffffffff81e00000-0xffffffff81e11000          68K     ro         
        GLB x  pte
 current_user-0xffffffff81e11000-0xffffffff82000000        1980K     RW         
        GLB NX pte
 current_user-0xffffffff82000000-0xffffffff82600000           6M     ro         
PSE     GLB NX pmd
 current_user-0xffffffff82600000-0xffffffffa0000000         474M                
               pmd

After this patch:

current_kernel:---[ High Kernel Mapping ]---
current_kernel-0xffffffff80000000-0xffffffff81000000          16M               
                pmd
current_kernel-0xffffffff81000000-0xffffffff81e00000          14M     ro        
 PSE     GLB x  pmd
current_kernel-0xffffffff81e00000-0xffffffff81e11000          68K     ro        
         GLB x  pte
current_kernel-0xffffffff81e11000-0xffffffff82000000        1980K     RW        
             NX pte
current_kernel-0xffffffff82000000-0xffffffff82600000           6M     ro        
 PSE     GLB NX pmd
current_kernel-0xffffffff82600000-0xffffffff82c00000           6M     RW        
 PSE         NX pmd
current_kernel-0xffffffff82c00000-0xffffffff82e00000           2M     RW        
             NX pte
current_kernel-0xffffffff82e00000-0xffffffff83200000           4M     RW        
 PSE         NX pmd
current_kernel-0xffffffff83200000-0xffffffffa0000000         462M               
                pmd

  current_user:---[ High Kernel Mapping ]---
  current_user-0xffffffff80000000-0xffffffff81000000          16M               
                pmd
  current_user-0xffffffff81000000-0xffffffff81e00000          14M     ro        
 PSE     GLB x  pmd
  current_user-0xffffffff81e00000-0xffffffff81e11000          68K     ro        
         GLB x  pte
  current_user-0xffffffff81e11000-0xffffffff82000000        1980K     RW        
             NX pte
  current_user-0xffffffff82000000-0xffffffff82600000           6M     ro        
 PSE     GLB NX pmd
  current_user-0xffffffff82600000-0xffffffffa0000000         474M               
                pmd

Signed-off-by: Dave Hansen <dave.han...@linux.intel.com>
Reported-by: Hugh Dickins <hu...@google.com>
Cc: Kees Cook <keesc...@google.com>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Ingo Molnar <mi...@kernel.org>
Cc: Andrea Arcangeli <aarca...@redhat.com>
Cc: Juergen Gross <jgr...@suse.com>
Cc: Josh Poimboeuf <jpoim...@redhat.com>
Cc: Greg Kroah-Hartman <gre...@linuxfoundation.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Borislav Petkov <b...@alien8.de>
Cc: Andy Lutomirski <l...@kernel.org>
Cc: Andi Kleen <a...@linux.intel.com>
---

 b/arch/x86/mm/pageattr.c |    6 ++++++
 b/arch/x86/mm/pti.c      |   34 ++++++++++++++++++++++++----------
 2 files changed, 30 insertions(+), 10 deletions(-)

diff -puN 
arch/x86/mm/pageattr.c~pti-non-pcid-clear-more-global-in-kernel-mapping 
arch/x86/mm/pageattr.c
--- a/arch/x86/mm/pageattr.c~pti-non-pcid-clear-more-global-in-kernel-mapping   
2018-08-02 14:42:35.905479118 -0700
+++ b/arch/x86/mm/pageattr.c    2018-08-02 14:42:35.912479118 -0700
@@ -1784,6 +1784,12 @@ int set_memory_nonglobal(unsigned long a
                                      __pgprot(_PAGE_GLOBAL), 0);
 }
 
+int set_memory_global(unsigned long addr, int numpages)
+{
+       return change_page_attr_set(&addr, numpages,
+                                   __pgprot(_PAGE_GLOBAL), 0);
+}
+
 static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
 {
        struct cpa_data cpa;
diff -puN arch/x86/mm/pti.c~pti-non-pcid-clear-more-global-in-kernel-mapping 
arch/x86/mm/pti.c
--- a/arch/x86/mm/pti.c~pti-non-pcid-clear-more-global-in-kernel-mapping        
2018-08-02 14:42:35.909479118 -0700
+++ b/arch/x86/mm/pti.c 2018-08-02 14:42:35.913479118 -0700
@@ -435,6 +435,13 @@ static inline bool pti_kernel_image_glob
 }
 
 /*
+ * This is the only user for these and it is not arch-generic
+ * like the other set_memory.h functions.  Just extern them.
+ */
+extern int set_memory_nonglobal(unsigned long addr, int numpages);
+extern int set_memory_global(unsigned long addr, int numpages);
+
+/*
  * For some configurations, map all of kernel text into the user page
  * tables.  This reduces TLB misses, especially on non-PCID systems.
  */
@@ -446,7 +453,8 @@ void pti_clone_kernel_text(void)
         * clone the areas past rodata, they might contain secrets.
         */
        unsigned long start = PFN_ALIGN(_text);
-       unsigned long end = (unsigned long)__end_rodata_hpage_align;
+       unsigned long end_clone  = (unsigned long)__end_rodata_hpage_align;
+       unsigned long end_global = PFN_ALIGN((unsigned long)__stop___ex_table);
 
        if (!pti_kernel_image_global_ok())
                return;
@@ -458,14 +466,18 @@ void pti_clone_kernel_text(void)
         * pti_set_kernel_image_nonglobal() did to clear the
         * global bit.
         */
-       pti_clone_pmds(start, end, _PAGE_RW);
+       pti_clone_pmds(start, end_clone, _PAGE_RW);
+
+       /*
+        * pti_clone_pmds() will set the global bit in any PMDs
+        * that it clones, but we also need to get any PTEs in
+        * the last level for areas that are not huge-page-aligned.
+        */
+
+       /* Set the global bit for normal non-__init kernel text: */
+       set_memory_global(start, (end_global - start) >> PAGE_SHIFT);
 }
 
-/*
- * This is the only user for it and it is not arch-generic like
- * the other set_memory.h functions.  Just extern it.
- */
-extern int set_memory_nonglobal(unsigned long addr, int numpages);
 void pti_set_kernel_image_nonglobal(void)
 {
        /*
@@ -477,9 +489,11 @@ void pti_set_kernel_image_nonglobal(void
        unsigned long start = PFN_ALIGN(_text);
        unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
 
-       if (pti_kernel_image_global_ok())
-               return;
-
+       /*
+        * This clears _PAGE_GLOBAL from the entire kernel image.
+        * pti_clone_kernel_text() map put _PAGE_GLOBAL back for
+        * areas that are mapped to userspace.
+        */
        set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT);
 }
 
_

Reply via email to