From: Thomas Gleixner <t...@linutronix.de>

LDT is not really commonly used on 64bit so the overhead of populating the
fixmap entries on context switch for the rare LDT syscall users is a
reasonable trade off vs. having extra dynamically managed mapping space per
process.

Signed-off-by: Thomas Gleixner <t...@linutronix.de>
---
 arch/x86/include/asm/mmu_context.h |   44 ++++--------------
 arch/x86/kernel/ldt.c              |   87 +++++++++++++++++++++++++++++++------
 2 files changed, 84 insertions(+), 47 deletions(-)

--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -45,13 +45,17 @@ static inline void load_mm_cr4(struct mm
  */
 struct ldt_struct {
        /*
-        * Xen requires page-aligned LDTs with special permissions.  This is
-        * needed to prevent us from installing evil descriptors such as
+        * Xen requires page-aligned LDTs with special permissions.  This
+        * is needed to prevent us from installing evil descriptors such as
         * call gates.  On native, we could merge the ldt_struct and LDT
-        * allocations, but it's not worth trying to optimize.
+        * allocations, but it's not worth trying to optimize and it does
+        * not work with page table isolation enabled, which requires
+        * page-aligned LDT entries as well.
         */
-       struct desc_struct *entries_va;
-       unsigned int nr_entries;
+       struct desc_struct      *entries_va;
+       phys_addr_t             entries_pa;
+       unsigned int            nr_entries;
+       unsigned int            order;
 };
 
 /*
@@ -59,6 +63,7 @@ struct ldt_struct {
  */
 int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
 void destroy_context_ldt(struct mm_struct *mm);
+void load_mm_ldt(struct mm_struct *mm);
 #else  /* CONFIG_MODIFY_LDT_SYSCALL */
 static inline int init_new_context_ldt(struct task_struct *tsk,
                                       struct mm_struct *mm)
@@ -66,38 +71,11 @@ static inline int init_new_context_ldt(s
        return 0;
 }
 static inline void destroy_context_ldt(struct mm_struct *mm) {}
-#endif
-
 static inline void load_mm_ldt(struct mm_struct *mm)
 {
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
-       struct ldt_struct *ldt;
-
-       /* READ_ONCE synchronizes with smp_store_release */
-       ldt = READ_ONCE(mm->context.ldt);
-
-       /*
-        * Any change to mm->context.ldt is followed by an IPI to all
-        * CPUs with the mm active.  The LDT will not be freed until
-        * after the IPI is handled by all such CPUs.  This means that,
-        * if the ldt_struct changes before we return, the values we see
-        * will be safe, and the new values will be loaded before we run
-        * any user code.
-        *
-        * NB: don't try to convert this to use RCU without extreme care.
-        * We would still need IRQs off, because we don't want to change
-        * the local LDT after an IPI loaded a newer value than the one
-        * that we can see.
-        */
-
-       if (unlikely(ldt))
-               set_ldt(ldt->entries_va, ldt->nr_entries);
-       else
-               clear_LDT();
-#else
        clear_LDT();
-#endif
 }
+#endif
 
 static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
 {
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -22,6 +22,7 @@
 #include <asm/desc.h>
 #include <asm/mmu_context.h>
 #include <asm/syscalls.h>
+#include <asm/fixmap.h>
 
 static void refresh_ldt_segments(void)
 {
@@ -42,6 +43,61 @@ static void refresh_ldt_segments(void)
 #endif
 }
 
+#ifdef CONFIG_KERNEL_PAGE_TABLE_ISOLATION
+
+#define LDT_EPP                (PAGE_SIZE / LDT_ENTRY_SIZE)
+
+static void set_ldt_and_map(struct ldt_struct *ldt)
+{
+       phys_addr_t pa = ldt->entries_pa;
+       void *fixva;
+       int idx, i;
+
+       if (!static_cpu_has_bug(X86_BUG_CPU_SECURE_MODE_KPTI)) {
+               set_ldt(ldt->entries_va, ldt->nr_entries);
+               return;
+       }
+
+       idx = get_cpu_entry_area_index(smp_processor_id(), ldt_entries);
+       fixva = (void *) __fix_to_virt(idx);
+       for (i = 0; i < ldt->nr_entries; idx--, i += LDT_EPP, pa += PAGE_SIZE)
+               __set_fixmap(idx, pa, PAGE_KERNEL);
+       set_ldt(fixva, ldt->nr_entries);
+}
+#else
+static void set_ldt_and_map(struct ldt_struct *ldt)
+{
+       set_ldt(ldt->entries_va, ldt->nr_entries);
+}
+#endif
+
+void load_mm_ldt(struct mm_struct *mm)
+{
+       struct ldt_struct *ldt;
+
+       /* READ_ONCE synchronizes with smp_store_release */
+       ldt = READ_ONCE(mm->context.ldt);
+
+       /*
+        * Any change to mm->context.ldt is followed by an IPI to all
+        * CPUs with the mm active.  The LDT will not be freed until
+        * after the IPI is handled by all such CPUs.  This means that,
+        * if the ldt_struct changes before we return, the values we see
+        * will be safe, and the new values will be loaded before we run
+        * any user code.
+        *
+        * NB: don't try to convert this to use RCU without extreme care.
+        * We would still need IRQs off, because we don't want to change
+        * the local LDT after an IPI loaded a newer value than the one
+        * that we can see.
+        */
+
+       if (unlikely(ldt))
+               set_ldt_and_map(ldt);
+       else
+               clear_LDT();
+}
+
 /* context.lock is held for us, so we don't need any locking. */
 static void flush_ldt(void *__mm)
 {
@@ -52,26 +108,35 @@ static void flush_ldt(void *__mm)
                return;
 
        pc = &mm->context;
-       set_ldt(pc->ldt->entries_va, pc->ldt->nr_entries);
+       set_ldt_and_map(pc->ldt);
 
        refresh_ldt_segments();
 }
 
+static void __free_ldt_struct(struct ldt_struct *ldt)
+{
+       free_pages((unsigned long)ldt->entries_va, ldt->order);
+       kfree(ldt);
+}
+
 /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. 
*/
 static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
 {
        struct ldt_struct *new_ldt;
        unsigned int alloc_size;
+       struct page *page;
+       int order;
 
        if (num_entries > LDT_ENTRIES)
                return NULL;
 
-       new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
+       new_ldt = kzalloc(sizeof(struct ldt_struct), GFP_KERNEL);
        if (!new_ldt)
                return NULL;
 
        BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
        alloc_size = num_entries * LDT_ENTRY_SIZE;
+       order = get_order(alloc_size);
 
        /*
         * Xen is very picky: it requires a page-aligned LDT that has no
@@ -79,16 +144,14 @@ static struct ldt_struct *alloc_ldt_stru
         * Keep it simple: zero the whole allocation and never allocate less
         * than PAGE_SIZE.
         */
-       if (alloc_size > PAGE_SIZE)
-               new_ldt->entries_va = vzalloc(alloc_size);
-       else
-               new_ldt->entries_va = (void *)get_zeroed_page(GFP_KERNEL);
-
-       if (!new_ldt->entries_va) {
+       page = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+       if (!page) {
                kfree(new_ldt);
                return NULL;
        }
-
+       new_ldt->entries_va = page_address(page);
+       new_ldt->entries_pa = virt_to_phys(new_ldt->entries_va);
+       new_ldt->order = order;
        new_ldt->nr_entries = num_entries;
        return new_ldt;
 }
@@ -116,11 +179,7 @@ static void free_ldt_struct(struct ldt_s
                return;
 
        paravirt_free_ldt(ldt->entries_va, ldt->nr_entries);
-       if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE)
-               vfree_atomic(ldt->entries_va);
-       else
-               free_page((unsigned long)ldt->entries_va);
-       kfree(ldt);
+       __free_ldt_struct(ldt);
 }
 
 /*


Reply via email to