Module Name: src
Committed By: cherry
Date: Sat Jul 16 10:59:46 UTC 2011
Modified Files:
src/sys/arch/x86/include [cherry-xenmp]: cpu.h
src/sys/arch/x86/x86 [cherry-xenmp]: pmap.c
src/sys/arch/xen/x86 [cherry-xenmp]: cpu.c x86_xpmap.c xen_pmap.c
Log Message:
Introduce a per-cpu "shadow" for pmap_kernel()'s L4 page
To generate a diff of this commit:
cvs rdiff -u -r1.34.2.2 -r1.34.2.3 src/sys/arch/x86/include/cpu.h
cvs rdiff -u -r1.121.2.2 -r1.121.2.3 src/sys/arch/x86/x86/pmap.c
cvs rdiff -u -r1.56.2.2 -r1.56.2.3 src/sys/arch/xen/x86/cpu.c
cvs rdiff -u -r1.26.2.3 -r1.26.2.4 src/sys/arch/xen/x86/x86_xpmap.c
cvs rdiff -u -r1.2.2.1 -r1.2.2.2 src/sys/arch/xen/x86/xen_pmap.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/arch/x86/include/cpu.h
diff -u src/sys/arch/x86/include/cpu.h:1.34.2.2 src/sys/arch/x86/include/cpu.h:1.34.2.3
--- src/sys/arch/x86/include/cpu.h:1.34.2.2 Thu Jun 23 14:19:48 2011
+++ src/sys/arch/x86/include/cpu.h Sat Jul 16 10:59:45 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.h,v 1.34.2.2 2011/06/23 14:19:48 cherry Exp $ */
+/* $NetBSD: cpu.h,v 1.34.2.3 2011/07/16 10:59:45 cherry Exp $ */
/*-
* Copyright (c) 1990 The Regents of the University of California.
@@ -184,6 +184,8 @@
#if defined(XEN) && defined(__x86_64__)
/* Currently active user PGD (can't use rcr3() with Xen) */
+ pd_entry_t * ci_kpm_pdir; /* per-cpu L4 PD (va) */
+ paddr_t ci_kpm_pdirpa; /* per-cpu L4 PD (pa) */
paddr_t ci_xen_current_user_pgd;
#endif
@@ -232,6 +234,11 @@
int ci_padout __aligned(64);
};
+#ifdef __x86_64__
+#define ci_pdirpa(ci, index) \
+ ((ci)->ci_kpm_pdirpa + (index) * sizeof(pd_entry_t))
+#endif /* __x86_64__ */
+
/*
* Macros to handle (some) trapframe registers for common x86 code.
*/
Index: src/sys/arch/x86/x86/pmap.c
diff -u src/sys/arch/x86/x86/pmap.c:1.121.2.2 src/sys/arch/x86/x86/pmap.c:1.121.2.3
--- src/sys/arch/x86/x86/pmap.c:1.121.2.2 Thu Jun 23 14:19:48 2011
+++ src/sys/arch/x86/x86/pmap.c Sat Jul 16 10:59:46 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap.c,v 1.121.2.2 2011/06/23 14:19:48 cherry Exp $ */
+/* $NetBSD: pmap.c,v 1.121.2.3 2011/07/16 10:59:46 cherry Exp $ */
/*-
* Copyright (c) 2008, 2010 The NetBSD Foundation, Inc.
@@ -171,7 +171,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.121.2.2 2011/06/23 14:19:48 cherry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.121.2.3 2011/07/16 10:59:46 cherry Exp $");
#include "opt_user_ldt.h"
#include "opt_lockdebug.h"
@@ -778,7 +778,7 @@
ci->ci_tlbstate = TLBSTATE_VALID;
atomic_or_32(&pmap->pm_cpus, cpumask);
atomic_or_32(&pmap->pm_kernel_cpus, cpumask);
- lcr3(pmap_pdirpa(pmap, 0));
+ cpu_load_pmap(pmap);
}
pmap->pm_ncsw = l->l_ncsw;
*pmap2 = curpmap;
@@ -1515,6 +1515,15 @@
break;
pdes_pa = newp;
}
+
+ /* sync to per-cpu PD */
+ xpq_queue_lock();
+ xpq_queue_pte_update(
+ xpmap_ptom_masked(ci_pdirpa(&cpu_info_primary,
+ pl_i(0, PTP_LEVELS))),
+ pmap_kernel()->pm_pdir[pl_i(0, PTP_LEVELS)]);
+ xpq_queue_unlock();
+ pmap_pte_flush();
#else /* XEN */
pd_entry_t *pdes;
@@ -1575,15 +1584,18 @@
void
pmap_cpu_init_late(struct cpu_info *ci)
{
+ /*
+ * The BP has already its own PD page allocated during early
+ * MD startup.
+ */
+ if (ci == &cpu_info_primary)
+ return;
+
#ifdef PAE
int ret;
struct pglist pg;
struct vm_page *vmap;
- /* The BP has already its own L3 page allocated in locore.S. */
- if (ci == &cpu_info_primary)
- return;
-
/*
* Allocate a page for the per-CPU L3 PD. cr3 being 32 bits, PA musts
* resides below the 4GB boundary.
@@ -1607,7 +1619,35 @@
VM_PROT_READ | VM_PROT_WRITE, 0);
pmap_update(pmap_kernel());
+
+ xpq_queue_lock();
+ xpq_queue_pin_l3_table(xpmap_ptom_masked(ci->ci_pae_l3_pdirpa));
+ xpq_queue_unlock();
#endif
+#if defined(XEN) && defined (__x86_64)
+ KASSERT(ci != NULL);
+
+ ci->ci_kpm_pdir = (pd_entry_t *)uvm_km_alloc(kernel_map,
+ PAGE_SIZE, 0, UVM_KMF_WIRED | UVM_KMF_ZERO | UVM_KMF_NOWAIT);
+ if (ci->ci_kpm_pdir == NULL) {
+ panic("%s: failed to allocate L4 per-cpu PD for CPU %d\n",
+ __func__, cpu_index(ci));
+ }
+ ci->ci_kpm_pdirpa = vtophys((vaddr_t) ci->ci_kpm_pdir);
+ KASSERT(ci->ci_kpm_pdirpa != 0);
+
+ cpu_load_pmap(pmap_kernel());
+
+ pmap_kenter_pa((vaddr_t)ci->ci_kpm_pdir, ci->ci_kpm_pdirpa,
+ VM_PROT_READ, 0);
+
+ pmap_update(pmap_kernel());
+
+ xpq_queue_lock();
+ xpq_queue_pin_l4_table(xpmap_ptom_masked(ci->ci_kpm_pdirpa));
+ xpq_queue_unlock();
+
+#endif /* defined(XEN) && defined (__x86_64__) */
}
/*
@@ -1825,8 +1865,24 @@
* clear it before freeing
*/
if (pmap_pdirpa(pmap, 0) == curcpu()->ci_xen_current_user_pgd
- && level == PTP_LEVELS - 1)
+ && level == PTP_LEVELS - 1) {
pmap_pte_set(&pmap_kernel()->pm_pdir[index], 0);
+ /*
+ * Update the per-cpu PD on all cpus the current
+ * pmap is active on
+ */
+ CPU_INFO_ITERATOR cii;
+ struct cpu_info *ci;
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ if (ci == NULL) {
+ continue;
+ }
+ if (ci->ci_cpumask & pmap->pm_cpus) {
+ pmap_pte_set(&ci->ci_kpm_pdir[index], 0);
+ }
+ }
+ }
+
# endif /*__x86_64__ */
invaladdr = level == 1 ? (vaddr_t)ptes :
(vaddr_t)pdes[level - 2];
@@ -1926,6 +1982,21 @@
pmap_pte_set(&pmap_kernel()->pm_pdir[index],
(pd_entry_t) (pmap_pa2pte(pa)
| PG_u | PG_RW | PG_V));
+ /*
+ * Update the per-cpu PD on all cpus the current
+ * pmap is active on
+ */
+ CPU_INFO_ITERATOR cii;
+ struct cpu_info *ci;
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ if (ci == NULL) {
+ continue;
+ }
+ if (ci->ci_cpumask & pmap->pm_cpus) {
+ pmap_pte_set(&ci->ci_kpm_pdir[index],
+ (pd_entry_t) (pmap_pa2pte(pa) | PG_u | PG_RW | PG_V));
+ }
+ }
}
#endif /* XEN && __x86_64__ */
pmap_pte_flush();
@@ -4098,10 +4169,13 @@
#endif
for (level = lvl; level > 1; level--) {
- if (level == PTP_LEVELS)
+ if (level == PTP_LEVELS){
pdep = pmap_kernel()->pm_pdir;
- else
+ }
+ else {
+
pdep = pdes[level - 2];
+ }
va = kva;
index = pl_i_roundup(kva, level);
endindex = index + needed_ptps[level - 1] - 1;
@@ -4112,10 +4186,21 @@
pmap_get_physpage(va, level - 1, &pa);
#ifdef XEN
xpq_queue_lock();
- xpq_queue_pte_update((level == PTP_LEVELS) ?
- xpmap_ptom(pmap_pdirpa(pmap_kernel(), i)) :
- xpmap_ptetomach(&pdep[i]),
- pmap_pa2pte(pa) | PG_k | PG_V | PG_RW);
+ switch (level) {
+ case PTP_LEVELS: /* L4 */
+ xpq_queue_pte_update(
+ xpmap_ptom(pmap_pdirpa(pmap_kernel(), i)),
+ pmap_pa2pte(pa) | PG_k | PG_V | PG_RW);
+ xpq_queue_pte_update(
+ xpmap_ptom(ci_pdirpa(&cpu_info_primary, i)),
+ pmap_pa2pte(pa) | PG_k | PG_V | PG_RW);
+
+ break;
+ default: /* All other levels */
+ xpq_queue_pte_update(
+ xpmap_ptetomach(&pdep[i]),
+ pmap_pa2pte(pa) | PG_k | PG_V | PG_RW);
+ }
#ifdef PAE
if (level == PTP_LEVELS && i > L2_SLOT_KERN) {
/* update real kernel PD too */
Index: src/sys/arch/xen/x86/cpu.c
diff -u src/sys/arch/xen/x86/cpu.c:1.56.2.2 src/sys/arch/xen/x86/cpu.c:1.56.2.3
--- src/sys/arch/xen/x86/cpu.c:1.56.2.2 Thu Jun 23 14:19:50 2011
+++ src/sys/arch/xen/x86/cpu.c Sat Jul 16 10:59:46 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.c,v 1.56.2.2 2011/06/23 14:19:50 cherry Exp $ */
+/* $NetBSD: cpu.c,v 1.56.2.3 2011/07/16 10:59:46 cherry Exp $ */
/* NetBSD: cpu.c,v 1.18 2004/02/20 17:35:01 yamt Exp */
/*-
@@ -66,7 +66,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.56.2.2 2011/06/23 14:19:50 cherry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.56.2.3 2011/07/16 10:59:46 cherry Exp $");
#include "opt_ddb.h"
#include "opt_multiprocessor.h"
@@ -456,7 +456,7 @@
cpu_get_tsc_freq(ci);
cpu_init(ci);
cpu_set_tss_gates(ci);
- pmap_cpu_init_late(ci);
+ pmap_cpu_init_late(ci); /* XXX: cosmetic */
/* Every processor needs to init it's own ipi h/w (similar to lapic) */
xen_ipi_init();
@@ -507,6 +507,7 @@
//gdt_init_cpu(ci);
cpu_set_tss_gates(ci);
+ pmap_cpu_init_late(ci);
cpu_start_secondary(ci);
if (ci->ci_flags & CPUF_PRESENT) {
@@ -782,8 +783,6 @@
aprint_debug_dev(ci->ci_dev, "running\n");
- printf("\n\nAbout to switch to idle_loop()\n\n");
-
cpu_switchto(NULL, ci->ci_data.cpu_idlelwp, true);
panic("switch to idle_loop context returned!\n");
@@ -999,6 +998,10 @@
initctx->ctrlreg[0] = pcb->pcb_cr0;
initctx->ctrlreg[1] = 0; /* "resuming" from kernel - no User cr3. */
initctx->ctrlreg[2] = pcb->pcb_cr2; /* XXX: */
+ /*
+ * Use pmap_kernel() L4 PD directly, until we setup the
+ * per-cpu L4 PD in pmap_cpu_init_late()
+ */
initctx->ctrlreg[3] = xpmap_ptom(pcb->pcb_cr3);
initctx->ctrlreg[4] = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT;
@@ -1183,7 +1186,6 @@
x86_disable_intr();
if (!__predict_false(ci->ci_want_resched)) {
idle_block();
-
} else {
x86_enable_intr();
}
@@ -1219,34 +1221,45 @@
#ifdef __x86_64__
int i, s;
- pd_entry_t *old_pgd, *new_pgd;
- paddr_t addr;
+ pd_entry_t *new_pgd;
struct cpu_info *ci;
+ paddr_t l3_shadow_pa;
- /* kernel pmap always in cr3 and should never go in user cr3 */
- if (pmap_pdirpa(pmap, 0) != pmap_pdirpa(pmap_kernel(), 0)) {
- ci = curcpu();
- /*
- * Map user space address in kernel space and load
- * user cr3
- */
- s = splvm();
- new_pgd = pmap->pm_pdir;
- old_pgd = pmap_kernel()->pm_pdir;
- addr = xpmap_ptom(pmap_pdirpa(pmap_kernel(), 0));
- xpq_queue_lock();
- for (i = 0; i < PDIR_SLOT_PTE;
- i++, addr += sizeof(pd_entry_t)) {
- if ((new_pgd[i] & PG_V) || (old_pgd[i] & PG_V))
- xpq_queue_pte_update(addr, new_pgd[i]);
- }
- xpq_queue_unlock();
- tlbflush();
- xpq_queue_lock();
+ ci = curcpu();
+ l3_shadow_pa = xpmap_ptom_masked(ci->ci_kpm_pdirpa);
+
+ /*
+ * Map user space address in kernel space and load
+ * user cr3
+ */
+ s = splvm();
+ new_pgd = pmap->pm_pdir;
+
+ xpq_queue_lock();
+ /* Copy source pmap L4 PDEs (in user addr. range) to shadow */
+ for (i = 0; i < PDIR_SLOT_PTE; i++) {
+ xpq_queue_pte_update(l3_shadow_pa + i * sizeof(pd_entry_t), new_pgd[i]);
+ }
+
+ /* Copy kernel mappings */
+ new_pgd = pmap_kernel()->pm_pdir;
+ for (i = PDIR_SLOT_KERN; i < nkptp[PTP_LEVELS - 1]; i++) {
+ xpq_queue_pte_update(l3_shadow_pa + i * sizeof(pd_entry_t), new_pgd[i]);
+ }
+
+ xpq_queue_unlock();
+ tlbflush();
+ xpq_queue_lock();
+ if (__predict_true(pmap != pmap_kernel())) {
xen_set_user_pgd(pmap_pdirpa(pmap, 0));
ci->ci_xen_current_user_pgd = pmap_pdirpa(pmap, 0);
- xpq_queue_unlock();
- splx(s);
}
+ else {
+ xpq_queue_pt_switch(l3_shadow_pa);
+ ci->ci_xen_current_user_pgd = 0;
+ }
+ xpq_queue_unlock();
+ splx(s);
+
#endif /* __x86_64__ */
}
Index: src/sys/arch/xen/x86/x86_xpmap.c
diff -u src/sys/arch/xen/x86/x86_xpmap.c:1.26.2.3 src/sys/arch/xen/x86/x86_xpmap.c:1.26.2.4
--- src/sys/arch/xen/x86/x86_xpmap.c:1.26.2.3 Mon Jun 27 10:23:21 2011
+++ src/sys/arch/xen/x86/x86_xpmap.c Sat Jul 16 10:59:46 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: x86_xpmap.c,v 1.26.2.3 2011/06/27 10:23:21 cherry Exp $ */
+/* $NetBSD: x86_xpmap.c,v 1.26.2.4 2011/07/16 10:59:46 cherry Exp $ */
/*
* Copyright (c) 2006 Mathieu Ropert <[email protected]>
@@ -69,7 +69,7 @@
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.26.2.3 2011/06/27 10:23:21 cherry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.26.2.4 2011/07/16 10:59:46 cherry Exp $");
#include "opt_xen.h"
#include "opt_ddb.h"
@@ -555,6 +555,8 @@
* for L3[3].
*/
static const int l2_4_count = 6;
+#elif defined(__x86_64__)
+static const int l2_4_count = PTP_LEVELS;
#else
static const int l2_4_count = PTP_LEVELS - 1;
#endif
@@ -665,7 +667,6 @@
return (init_tables + ((count + l2_4_count) * PAGE_SIZE));
}
-
/*
* Build a new table and switch to it
* old_count is # of old tables (including PGD, PDTPE and PDE)
@@ -740,13 +741,19 @@
memset (bt_pgd, 0, PAGE_SIZE);
avail = new_pgd + PAGE_SIZE;
#if PTP_LEVELS > 3
+ /* per-cpu "shadow" pmd */
+ pd_entry_t *bt_cpu_pgd = bt_pgd;
+ bt_pgd = (pd_entry_t *) avail;
+ memset(bt_pgd, 0, PAGE_SIZE);
+ avail += PAGE_SIZE;
+
/* Install level 3 */
pdtpe = (pd_entry_t *) avail;
memset (pdtpe, 0, PAGE_SIZE);
avail += PAGE_SIZE;
addr = ((u_long) pdtpe) - KERNBASE;
- bt_pgd[pl4_pi(KERNTEXTOFF)] =
+ bt_pgd[pl4_pi(KERNTEXTOFF)] = bt_cpu_pgd[pl4_pi(KERNTEXTOFF)] =
xpmap_ptom_masked(addr) | PG_k | PG_RW | PG_V;
__PRINTK(("L3 va %#lx pa %#" PRIxPADDR " entry %#" PRIxPADDR
@@ -938,12 +945,14 @@
xpq_queue_pin_l2_table(xpmap_ptom_masked(addr));
#endif
#else /* PAE */
- /* recursive entry in higher-level PD */
- bt_pgd[PDIR_SLOT_PTE] =
+ /* recursive entry in higher-level per-cpu PD and pmap_kernel() */
+ bt_pgd[PDIR_SLOT_PTE] = bt_cpu_pgd[PDIR_SLOT_PTE] =
xpmap_ptom_masked(new_pgd - KERNBASE) | PG_k | PG_V;
- __PRINTK(("bt_pgd[PDIR_SLOT_PTE] va %#" PRIxVADDR " pa %#" PRIxPADDR
+ __PRINTK(("bt_cpu_pgd[PDIR_SLOT_PTE] va %#" PRIxVADDR " pa %#" PRIxPADDR
" entry %#" PRIxPADDR "\n", new_pgd, (paddr_t)new_pgd - KERNBASE,
- bt_pgd[PDIR_SLOT_PTE]));
+ bt_cpu_pgd[PDIR_SLOT_PTE]));
+
+
/* Mark tables RO */
xen_bt_set_readonly((vaddr_t) pde);
#endif
@@ -966,7 +975,7 @@
#ifdef PAE
PDPpaddr = (u_long)pde - KERNBASE; /* PDP is the L2 with PAE */
#else
- PDPpaddr = (u_long)new_pgd - KERNBASE;
+ PDPpaddr = (u_long)bt_pgd - KERNBASE;
#endif
/* Switch to new tables */
@@ -988,6 +997,12 @@
xpmap_ptom_masked(addr) | PG_k | PG_V);
xpq_flush_queue();
}
+#elif defined(__x86_64__)
+ if (final) {
+ /* save the address of the shadow L4 pgd page */
+ cpu_info_primary.ci_kpm_pdir = bt_cpu_pgd;
+ cpu_info_primary.ci_kpm_pdirpa = ((paddr_t) bt_cpu_pgd - KERNBASE);
+ }
#endif
/* Now we can safely reclaim space taken by old tables */
Index: src/sys/arch/xen/x86/xen_pmap.c
diff -u src/sys/arch/xen/x86/xen_pmap.c:1.2.2.1 src/sys/arch/xen/x86/xen_pmap.c:1.2.2.2
--- src/sys/arch/xen/x86/xen_pmap.c:1.2.2.1 Thu Jun 23 14:19:50 2011
+++ src/sys/arch/xen/x86/xen_pmap.c Sat Jul 16 10:59:46 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: xen_pmap.c,v 1.2.2.1 2011/06/23 14:19:50 cherry Exp $ */
+/* $NetBSD: xen_pmap.c,v 1.2.2.2 2011/07/16 10:59:46 cherry Exp $ */
/*
* Copyright (c) 2007 Manuel Bouyer.
@@ -102,7 +102,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xen_pmap.c,v 1.2.2.1 2011/06/23 14:19:50 cherry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xen_pmap.c,v 1.2.2.2 2011/07/16 10:59:46 cherry Exp $");
#include "opt_user_ldt.h"
#include "opt_lockdebug.h"
@@ -275,18 +275,29 @@
for (i = 0; i < PDP_SIZE; i++) {
npde = pmap_pa2pte(
pmap_pdirpa(pmap, i * NPDPG)) | PG_k | PG_V;
+
+ xpq_queue_pte_update(xpmap_ptetomach(&APDP_PDE[i]),
+ npde);
+
+ /* APDP_PDE is per-cpu */
+ xpq_queue_invlpg((vaddr_t) &APDP_PDE[i]);
+
+ /*
+ * Install temporary recursive mapping L4 in
+ * the user pmap. XXX: What's this for ?
+ */
xpq_queue_pte_update(
xpmap_ptom(pmap_pdirpa(pmap, PDIR_SLOT_PTE + i)),
npde);
- xpq_queue_pte_update(xpmap_ptetomach(&APDP_PDE[i]),
- npde);
+
+ xen_bcast_invlpg((vaddr_t) &pmap->pm_pdir[PDIR_SLOT_PTE + i]);
+
#ifdef PAE
/* update shadow entry too */
xpq_queue_pte_update(
xpmap_ptetomach(&APDP_PDE_SHADOW[i]), npde);
#endif /* PAE */
- xpq_queue_invlpg(
- (vaddr_t)&pmap->pm_pdir[PDIR_SLOT_PTE + i]);
+
}
if (pmap_valid_entry(opde))
pmap_apte_flush(ourpmap);