We were manually configuring cpu_entry_area in the usermode tables.
This was error-prone and wasted memory.  (Not much memory, but
still.)  Instead, just reference the same pagetables.

This avoids needing to keep the KPTI code and the normal
cpu_entry_area code in sync, since the KPTI code no longer cares
what's in cpu_entry_area.

[This does *not* work on the current KPTI series.  It requires that
 all the kernelmode cpu_entry_tables are pre-allocated.  That
 happens in the series as I submitted it, but tglx changed it for
 reasons that I haven't figured out.]

Signed-off-by: Andy Lutomirski <[email protected]>
---
 arch/x86/include/asm/fixmap.h | 14 +++++---
 arch/x86/include/asm/kpti.h   |  8 +++--
 arch/x86/kernel/cpu/common.c  |  3 --
 arch/x86/mm/kpti.c            | 82 ++++++++++++++++++++++++++-----------------
 4 files changed, 64 insertions(+), 43 deletions(-)

diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 839addd1eaec..a630cd2861f7 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -142,16 +142,20 @@ enum fixed_addresses {
 #ifdef CONFIG_PARAVIRT
        FIX_PARAVIRT_BOOTMAP,
 #endif
-       FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
-       FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
 #ifdef CONFIG_X86_INTEL_MID
        FIX_LNW_VRTC,
 #endif
-       /* Fixmap entries to remap the GDTs, one per processor. */
-       FIX_CPU_ENTRY_AREA_TOP,
+       FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
+       FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
+
+       /*
+        * Fixmap entries to remap the GDTs, one per processor.  Align
+        * to a PMD boundary.
+        */
+       FIX_CPU_ENTRY_AREA_TOP = round_up(FIX_TEXT_POKE0 + 1, PTRS_PER_PMD),
        FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + 
(CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,
 
-       __end_of_permanent_fixed_addresses,
+       __end_of_permanent_fixed_addresses = round_up(FIX_CPU_ENTRY_AREA_BOTTOM 
+ 1, PTRS_PER_PMD),
 
        /*
         * 512 temporary boot-time mappings, used by early_ioremap(),
diff --git a/arch/x86/include/asm/kpti.h b/arch/x86/include/asm/kpti.h
index 0c10e86ae3f8..df52cec2a53b 100644
--- a/arch/x86/include/asm/kpti.h
+++ b/arch/x86/include/asm/kpti.h
@@ -1,5 +1,8 @@
 #ifndef _ASM_X86_KPTI_H
 #define _ASM_X86_KPTI_H
+
+#include <linux/init.h>
+
 /*
  * Copyright(c) 2017 Intel Corporation. All rights reserved.
  *
@@ -34,10 +37,9 @@ extern int kpti_add_mapping(unsigned long addr, unsigned 
long size,
                              unsigned long flags);
 
 /**
- *  kpti_add_mapping_cpu_entry - map the cpu entry area
- *  @cpu: the CPU for which the entry area is being mapped
+ *  kpti_clone_cpu_entry_areas - clone cpu_entry_areas to the usermode tables
  */
-extern void kpti_add_mapping_cpu_entry(int cpu);
+extern void __init kpti_clone_cpu_entry_areas(void);
 
 /**
  *  kpti_remove_mapping - remove a kernel mapping from the userpage tables
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 00697119f983..3dc814519c92 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -606,9 +606,6 @@ void __init setup_cpu_entry_area(int cpu)
                                sizeof(struct debug_store) / PAGE_SIZE,
                                PAGE_KERNEL);
 #endif
-       /* CPU 0's mapping is done in kpti_init() */
-       if (cpu)
-               kpti_add_mapping_cpu_entry(cpu);
 }
 
 /* Load the original GDT from the per-cpu structure */
diff --git a/arch/x86/mm/kpti.c b/arch/x86/mm/kpti.c
index 52fd833845ba..cd81a7432f49 100644
--- a/arch/x86/mm/kpti.c
+++ b/arch/x86/mm/kpti.c
@@ -240,7 +240,7 @@ static pmd_t *kpti_shadow_pagetable_walk_pmd(unsigned long 
address,
  * Returns a pointer to a PTE on success, or NULL on failure.
  */
 static pte_t *kpti_shadow_pagetable_walk(unsigned long address,
-                                          unsigned long flags)
+                                        unsigned long flags)
 {
        pmd_t *pmd = kpti_shadow_pagetable_walk_pmd(address, flags);
        pte_t *pte;
@@ -401,28 +401,55 @@ static void __init kpti_init_all_pgds(void)
        WARN_ON(__ret);                                                 \
 } while (0)
 
-void kpti_add_mapping_cpu_entry(int cpu)
+void __init kpti_clone_cpu_entry_areas(void)
 {
-       kpti_add_user_map_early(get_cpu_gdt_ro(cpu), PAGE_SIZE,
-                               __PAGE_KERNEL_RO);
-
-       kpti_add_user_map_early(&get_cpu_entry_area(cpu)->tss,
-                               sizeof(get_cpu_entry_area(cpu)->tss),
-                               __PAGE_KERNEL | _PAGE_GLOBAL);
-
-       /* entry stack */
-       kpti_add_user_map_early(&get_cpu_entry_area(cpu)->SYSENTER_stack_page,
-                               
sizeof(get_cpu_entry_area(cpu)->SYSENTER_stack_page),
-                               __PAGE_KERNEL | _PAGE_GLOBAL);
-
-       /* Entry code, so needs to be EXEC */
-       kpti_add_user_map_early(&get_cpu_entry_area(cpu)->entry_trampoline,
-                               
sizeof(get_cpu_entry_area(cpu)->entry_trampoline),
-                               __PAGE_KERNEL_RX | _PAGE_GLOBAL);
-
-       kpti_add_user_map_early(&get_cpu_entry_area(cpu)->exception_stacks,
-                               
sizeof(get_cpu_entry_area(cpu)->exception_stacks),
-                               __PAGE_KERNEL | _PAGE_GLOBAL);
+       int cpu;
+       unsigned long last_pmd_addr = 0;
+
+       /* The top of the cpu_entry_area block is meant to be PMD-aligned. */
+       WARN_ON((unsigned long)(get_cpu_entry_area(NR_CPUS-1) + 1) & ~PMD_MASK);
+
+       /*
+        * Iterate over possible CPUs, not addresses: it's possible that
+        * NR_CPUs is enough larger than the actual number of possible CPUs
+        * that we have unpopulated PMDs in the cpu_entry_area range.
+        */
+       for_each_possible_cpu(cpu) {
+               pgd_t *pgd;
+               p4d_t *p4d;
+               pud_t *pud;
+               pmd_t *pmd, *target_pmd;
+               unsigned long addr =
+                       (unsigned long)get_cpu_entry_area(cpu) & PMD_MASK;
+
+               if (addr == last_pmd_addr)
+                       continue;
+               last_pmd_addr = addr;
+
+               pgd = pgd_offset_k(addr);
+               if (WARN_ON(pgd_none(*pgd)))
+                       return;
+               p4d = p4d_offset(pgd, addr);
+               if (WARN_ON(p4d_none(*p4d)))
+                   return;
+               pud = pud_offset(p4d, addr);
+               if (WARN_ON(pud_none(*pud)))
+                       return;
+               pmd = pmd_offset(pud, addr);
+               if (WARN_ON(pmd_none(*pmd)))
+                       return;
+
+               target_pmd = kpti_shadow_pagetable_walk_pmd(addr, 0);
+               if (WARN_ON(!target_pmd))
+                       return;
+
+               /*
+                * Copy the PMD.  That is, the kernelmode and usermode tables
+                * will share all last-level page tables containing
+                * cpu_entry_area mappings.
+                */
+               *target_pmd = *pmd;
+       }
 }
 
 /*
@@ -459,16 +486,7 @@ void __init kpti_init(void)
                                  sizeof(gate_desc) * NR_VECTORS,
                                  __PAGE_KERNEL_RO | _PAGE_GLOBAL);
 
-       /*
-        * We delay CPU 0's mappings because these structures are created
-        * before the page allocator is up.  Deferring it until here lets
-        * us use the plain page allocator unconditionally in the page
-        * table code above.
-        *
-        * This is OK because kpti_init() is called long before we ever run
-        * userspace and need the KERNEL_PAGE_TABLE_ISOLATION mappings.
-        */
-       kpti_add_mapping_cpu_entry(0);
+       kpti_clone_cpu_entry_areas();
 }
 
 int kpti_add_mapping(unsigned long addr, unsigned long size,
-- 
2.13.6

Reply via email to