For mitigation of Meltdown the current L4 page table is copied to the
cpu local root page table each time a 64 bit pv guest is entered.

Copying can be avoided in cases where the guest L4 page table hasn't
been modified while running the hypervisor, e.g. when handling
interrupts or any hypercall not modifying the L4 page table or %cr3.

So add a per-cpu flag whether the copying should be performed and set
that flag only when loading a new %cr3 or modifying the L4 page table.
This includes synchronization of the cpu local root page table with
other cpus, so add a special synchronization flag for that case.

A simple performance check (compiling the hypervisor via "make -j 4")
in dom0 with 4 vcpus shows a significant improvement:

- real time drops from 113 seconds to 109 seconds
- system time drops from 165 seconds to 155 seconds

Signed-off-by: Juergen Gross <jgr...@suse.com>
Reviewed-by: Jan Beulich <jbeul...@suse.com>
---
V4:
- move setting of root_pgt_changed flag in flush_area_local() out of
  irq disabled section (Jan Beulich)
- move setting of root_pgt_changed in make_cr3() to _toggle_guest_pt()
  (Jan Beulich)
- remove most conditionals in write_ptbase() (Jan Beulich)
- don't set root_pgt_changed in do_mmu_update() for modification of
  the user page table (Jan Beulich)

V3:
- set flag locally only if affected L4 is active (Jan Beulich)
- add setting flag to flush_area_mask() (Jan Beulich)
- set flag in make_cr3() only if called for current active vcpu

To be applied on top of Jan's "Meltdown band-aid overhead reduction"
series
---
 xen/arch/x86/flushtlb.c           |  3 +++
 xen/arch/x86/mm.c                 | 38 +++++++++++++++++++++++++-------------
 xen/arch/x86/pv/domain.c          |  2 ++
 xen/arch/x86/smp.c                |  2 +-
 xen/arch/x86/x86_64/asm-offsets.c |  1 +
 xen/arch/x86/x86_64/entry.S       |  8 ++++++--
 xen/include/asm-x86/current.h     |  8 ++++++++
 xen/include/asm-x86/flushtlb.h    |  2 ++
 8 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
index 8a7a76b8ff..38cedf3b22 100644
--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -160,5 +160,8 @@ unsigned int flush_area_local(const void *va, unsigned int 
flags)
 
     local_irq_restore(irqfl);
 
+    if ( flags & FLUSH_ROOT_PGTBL )
+        get_cpu_info()->root_pgt_changed = true;
+
     return flags;
 }
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 6d39d2c8ab..fd89685486 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -504,6 +504,8 @@ void make_cr3(struct vcpu *v, mfn_t mfn)
 
 void write_ptbase(struct vcpu *v)
 {
+    if ( this_cpu(root_pgt) )
+        get_cpu_info()->root_pgt_changed = true;
     write_cr3(v->arch.cr3);
 }
 
@@ -3701,18 +3703,28 @@ long do_mmu_update(
                         break;
                     rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn,
                                       cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
-                    /*
-                     * No need to sync if all uses of the page can be accounted
-                     * to the page lock we hold, its pinned status, and uses on
-                     * this (v)CPU.
-                     */
-                    if ( !rc && !cpu_has_no_xpti &&
-                         ((page->u.inuse.type_info & PGT_count_mask) >
-                          (1 + !!(page->u.inuse.type_info & PGT_pinned) +
-                           (pagetable_get_pfn(curr->arch.guest_table) == mfn) +
-                           (pagetable_get_pfn(curr->arch.guest_table_user) ==
-                            mfn))) )
-                        sync_guest = true;
+                    if ( !rc && !cpu_has_no_xpti )
+                    {
+                        bool local_in_use = false;
+
+                        if ( pagetable_get_pfn(curr->arch.guest_table) == mfn )
+                        {
+                            local_in_use = true;
+                            get_cpu_info()->root_pgt_changed = true;
+                        }
+
+                        /*
+                         * No need to sync if all uses of the page can be
+                         * accounted to the page lock we hold, its pinned
+                         * status, and uses on this (v)CPU.
+                         */
+                        if ( (page->u.inuse.type_info & PGT_count_mask) >
+                             (1 + !!(page->u.inuse.type_info & PGT_pinned) +
+                              (pagetable_get_pfn(curr->arch.guest_table_user) 
==
+                               mfn) +
+                              local_in_use) )
+                            sync_guest = true;
+                    }
                     break;
 
                 case PGT_writable_page:
@@ -3827,7 +3839,7 @@ long do_mmu_update(
 
         cpumask_andnot(mask, pt_owner->dirty_cpumask, cpumask_of(cpu));
         if ( !cpumask_empty(mask) )
-            flush_mask(mask, FLUSH_TLB_GLOBAL);
+            flush_mask(mask, FLUSH_TLB_GLOBAL | FLUSH_ROOT_PGTBL);
     }
 
     perfc_add(num_page_updates, i);
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index 01c62e2d45..42522a2db3 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -223,6 +223,8 @@ static void _toggle_guest_pt(struct vcpu *v)
 {
     v->arch.flags ^= TF_kernel_mode;
     update_cr3(v);
+    get_cpu_info()->root_pgt_changed = true;
+
     /* Don't flush user global mappings from the TLB. Don't tick TLB clock. */
     asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" );
 
diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c
index 033dd05958..63e819ca38 100644
--- a/xen/arch/x86/smp.c
+++ b/xen/arch/x86/smp.c
@@ -208,7 +208,7 @@ void invalidate_interrupt(struct cpu_user_regs *regs)
     ack_APIC_irq();
     perfc_incr(ipis);
     if ( (flags & FLUSH_VCPU_STATE) && __sync_local_execstate() )
-        flags &= ~(FLUSH_TLB | FLUSH_TLB_GLOBAL);
+        flags &= ~(FLUSH_TLB | FLUSH_TLB_GLOBAL | FLUSH_ROOT_PGTBL);
     if ( flags & ~(FLUSH_VCPU_STATE | FLUSH_ORDER_MASK) )
         flush_area_local(flush_va, flags);
     cpumask_clear_cpu(smp_processor_id(), &flush_cpumask);
diff --git a/xen/arch/x86/x86_64/asm-offsets.c 
b/xen/arch/x86/x86_64/asm-offsets.c
index a2fea94f4c..9e2aefb00f 100644
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -143,6 +143,7 @@ void __dummy__(void)
     OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl);
     OFFSET(CPUINFO_use_shadow_spec_ctrl, struct cpu_info, 
use_shadow_spec_ctrl);
     OFFSET(CPUINFO_bti_ist_info, struct cpu_info, bti_ist_info);
+    OFFSET(CPUINFO_root_pgt_changed, struct cpu_info, root_pgt_changed);
     DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
     BLANK();
 
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 45d9842d09..30c9da5446 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -128,10 +128,13 @@ restore_all_guest:
         /* Copy guest mappings and switch to per-CPU root page table. */
         mov   VCPU_cr3(%rbx), %r9
         GET_STACK_END(dx)
-        mov   STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rdi
+        mov   STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rax
+        cmpb  $0, STACK_CPUINFO_FIELD(root_pgt_changed)(%rdx)
+        je    .Lrag_copy_done
+        movb  $0, STACK_CPUINFO_FIELD(root_pgt_changed)(%rdx)
         movabs $PADDR_MASK & PAGE_MASK, %rsi
         movabs $DIRECTMAP_VIRT_START, %rcx
-        mov   %rdi, %rax
+        mov   %rax, %rdi
         and   %rsi, %rdi
         jz    .Lrag_keep_cr3
         and   %r9, %rsi
@@ -148,6 +151,7 @@ restore_all_guest:
         sub   $(ROOT_PAGETABLE_FIRST_XEN_SLOT - \
                 ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rdi
         rep movsq
+.Lrag_copy_done:
         mov   STACK_CPUINFO_FIELD(cr4)(%rdx), %rdi
         mov   %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx)
         mov   %rdi, %rsi
diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h
index 3a0e1eef36..f2491b4423 100644
--- a/xen/include/asm-x86/current.h
+++ b/xen/include/asm-x86/current.h
@@ -59,6 +59,14 @@ struct cpu_info {
     bool         use_shadow_spec_ctrl;
     uint8_t      bti_ist_info;
 
+    /*
+     * The following field controls copying of the L4 page table of 64-bit
+     * PV guests to the per-cpu root page table on entering the guest context.
+     * If set the L4 page table is being copied to the root page table and
+     * the field will be reset.
+     */
+    bool         root_pgt_changed;
+
     unsigned long __pad;
     /* get_stack_bottom() must be 16-byte aligned */
 };
diff --git a/xen/include/asm-x86/flushtlb.h b/xen/include/asm-x86/flushtlb.h
index 2cade9cbfb..052f0fa403 100644
--- a/xen/include/asm-x86/flushtlb.h
+++ b/xen/include/asm-x86/flushtlb.h
@@ -103,6 +103,8 @@ void write_cr3(unsigned long cr3);
 #define FLUSH_VA_VALID   0x800
  /* Flush CPU state */
 #define FLUSH_VCPU_STATE 0x1000
+ /* Flush the per-cpu root page table */
+#define FLUSH_ROOT_PGTBL 0x2000
 
 /* Flush local TLBs/caches. */
 unsigned int flush_area_local(const void *va, unsigned int flags);
-- 
2.13.6


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Reply via email to