Module Name:    src
Committed By:   maxv
Date:           Sun Feb 11 09:39:37 UTC 2018

Modified Files:
        src/sys/arch/amd64/amd64: machdep.c
        src/sys/arch/x86/conf: files.x86
Added Files:
        src/sys/arch/x86/x86: svs.c

Log Message:
Move SVS into x86/svs.c


To generate a diff of this commit:
cvs rdiff -u -r1.297 -r1.298 src/sys/arch/amd64/amd64/machdep.c
cvs rdiff -u -r1.92 -r1.93 src/sys/arch/x86/conf/files.x86
cvs rdiff -u -r0 -r1.1 src/sys/arch/x86/x86/svs.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/amd64/amd64/machdep.c
diff -u src/sys/arch/amd64/amd64/machdep.c:1.297 src/sys/arch/amd64/amd64/machdep.c:1.298
--- src/sys/arch/amd64/amd64/machdep.c:1.297	Sun Feb  4 17:03:21 2018
+++ src/sys/arch/amd64/amd64/machdep.c	Sun Feb 11 09:39:36 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: machdep.c,v 1.297 2018/02/04 17:03:21 maxv Exp $	*/
+/*	$NetBSD: machdep.c,v 1.298 2018/02/11 09:39:36 maxv Exp $	*/
 
 /*
  * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
@@ -110,7 +110,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.297 2018/02/04 17:03:21 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.298 2018/02/11 09:39:36 maxv Exp $");
 
 /* #define XENDEBUG_LOW  */
 
@@ -123,7 +123,6 @@ __KERNEL_RCSID(0, "$NetBSD: machdep.c,v 
 #include "opt_realmem.h"
 #include "opt_xen.h"
 #include "opt_kaslr.h"
-#include "opt_svs.h"
 #ifndef XEN
 #include "opt_physmem.h"
 #endif
@@ -2236,391 +2235,3 @@ mm_md_direct_mapped_phys(paddr_t paddr, 
 	return true;
 }
 #endif
-
-/* -------------------------------------------------------------------------- */
-
-#ifdef SVS
-/*
- * Separate Virtual Space
- *
- * A per-cpu L4 page is maintained in ci_svs_updirpa. During each context
- * switch to a user pmap, updirpa is populated with the entries of the new
- * pmap, minus what we don't want to have mapped in userland.
- *
- * Note on locking/synchronization here:
- *
- * (a) Touching ci_svs_updir without holding ci_svs_mtx first is *not*
- *     allowed.
- *
- * (b) pm_kernel_cpus contains the set of CPUs that have the pmap loaded
- *     in their CR3 register. It must *not* be replaced by pm_cpus.
- *
- * (c) When a context switch on the current CPU is made from a user LWP
- *     towards a kernel LWP, CR3 is not updated. Therefore, the pmap's
- *     pm_kernel_cpus still contains the current CPU. It implies that the
- *     remote CPUs that execute other threads of the user process we just
- *     left will keep synchronizing us against their changes.
- *
- * List of areas that are removed from userland:
- *     PTE Space         [OK]
- *     Direct Map        [OK]
- *     Remote PCPU Areas [OK]
- *     Kernel Heap       [OK]
- *     Kernel Image      [OK]
- *
- * TODO:
- *
- * (a) The NMI stack is not double-entered. Therefore if we ever receive
- *     an NMI and leave it, the content of the stack will be visible to
- *     userland (via Meltdown). Normally we never leave NMIs, unless a
- *     privileged user launched PMCs. That's unlikely to happen, our PMC
- *     support is pretty minimal.
- *
- * (b) Enable SVS depending on the CPU model, and add a sysctl to disable
- *     it dynamically.
- *
- * (c) Narrow down the entry points: hide the 'jmp handler' instructions.
- *     This makes sense on GENERIC_KASLR kernels.
- *
- * (d) Right now there is only one global LDT, and that's not compatible
- *     with USER_LDT.
- */
-
-struct svs_utls {
-	paddr_t kpdirpa;
-	uint64_t scratch;
-	vaddr_t rsp0;
-};
-
-static pd_entry_t *
-svs_tree_add(struct cpu_info *ci, vaddr_t va)
-{
-	extern const vaddr_t ptp_masks[];
-	extern const int ptp_shifts[];
-	extern const long nbpd[];
-	pd_entry_t *dstpde;
-	size_t i, pidx, mod;
-	struct vm_page *pg;
-	paddr_t pa;
-
-	dstpde = ci->ci_svs_updir;
-	mod = (size_t)-1;
-
-	for (i = PTP_LEVELS; i > 1; i--) {
-		pidx = pl_i(va % mod, i);
-
-		if (!pmap_valid_entry(dstpde[pidx])) {
-			pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
-			if (pg == 0)
-				panic("%s: failed to allocate PA for CPU %d\n",
-					__func__, cpu_index(ci));
-			pa = VM_PAGE_TO_PHYS(pg);
-
-			dstpde[pidx] = PG_V | PG_RW | pa;
-		}
-
-		pa = (paddr_t)(dstpde[pidx] & PG_FRAME);
-		dstpde = (pd_entry_t *)PMAP_DIRECT_MAP(pa);
-		mod = nbpd[i-1];
-	}
-
-	return dstpde;
-}
-
-static void
-svs_page_add(struct cpu_info *ci, vaddr_t va)
-{
-	pd_entry_t *srcpde, *dstpde, pde;
-	size_t idx, pidx;
-	paddr_t pa;
-
-	/* Create levels L4, L3 and L2. */
-	dstpde = svs_tree_add(ci, va);
-
-	pidx = pl1_i(va % NBPD_L2);
-
-	/*
-	 * If 'va' is in a large page, we need to compute its physical
-	 * address manually.
-	 */
-	idx = pl2_i(va);
-	srcpde = L2_BASE;
-	if (!pmap_valid_entry(srcpde[idx])) {
-		panic("%s: L2 page not mapped", __func__);
-	}
-	if (srcpde[idx] & PG_PS) {
-		pa = srcpde[idx] & PG_2MFRAME;
-		pa += (paddr_t)(va % NBPD_L2);
-		pde = (srcpde[idx] & ~(PG_PS|PG_2MFRAME)) | pa;
-
-		if (pmap_valid_entry(dstpde[pidx])) {
-			panic("%s: L1 page already mapped", __func__);
-		}
-		dstpde[pidx] = pde;
-		return;
-	}
-
-	/*
-	 * Normal page, just copy the PDE.
-	 */
-	idx = pl1_i(va);
-	srcpde = L1_BASE;
-	if (!pmap_valid_entry(srcpde[idx])) {
-		panic("%s: L1 page not mapped", __func__);
-	}
-	if (pmap_valid_entry(dstpde[pidx])) {
-		panic("%s: L1 page already mapped", __func__);
-	}
-	dstpde[pidx] = srcpde[idx];
-}
-
-static void
-svs_rsp0_init(struct cpu_info *ci)
-{
-	const cpuid_t cid = cpu_index(ci);
-	vaddr_t va, rsp0;
-	pd_entry_t *pd;
-	size_t pidx;
-
-	rsp0 = (vaddr_t)&pcpuarea->ent[cid].rsp0;
-
-	/* The first page is a redzone. */
-	va = rsp0 + PAGE_SIZE;
-
-	/* Create levels L4, L3 and L2. */
-	pd = svs_tree_add(ci, va);
-
-	/* Get the info for L1. */
-	pidx = pl1_i(va % NBPD_L2);
-	if (pmap_valid_entry(pd[pidx])) {
-		panic("%s: rsp0 page already mapped", __func__);
-	}
-
-	ci->ci_svs_rsp0_pte = (pt_entry_t *)&pd[pidx];
-	ci->ci_svs_rsp0 = rsp0 + PAGE_SIZE + sizeof(struct trapframe);
-	ci->ci_svs_ursp0 = ci->ci_svs_rsp0 - sizeof(struct trapframe);
-	ci->ci_svs_krsp0 = 0;
-}
-
-static void
-svs_utls_init(struct cpu_info *ci)
-{
-	const vaddr_t utlsva = (vaddr_t)&pcpuarea->utls;
-	struct svs_utls *utls;
-	struct vm_page *pg;
-	pd_entry_t *pd;
-	size_t pidx;
-	paddr_t pa;
-	vaddr_t va;
-
-	/* Create levels L4, L3 and L2. */
-	pd = svs_tree_add(ci, utlsva);
-
-	/* Allocate L1. */
-	pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
-	if (pg == 0)
-		panic("%s: failed to allocate PA for CPU %d\n", __func__,
-		    cpu_index(ci));
-	pa = VM_PAGE_TO_PHYS(pg);
-
-	/* Enter L1. */
-	if (pmap_valid_entry(L1_BASE[pl1_i(utlsva)])) {
-		panic("%s: local page already mapped", __func__);
-	}
-	pidx = pl1_i(utlsva % NBPD_L2);
-	if (pmap_valid_entry(pd[pidx])) {
-		panic("%s: L1 page already mapped", __func__);
-	}
-	pd[pidx] = PG_V | PG_RW | pmap_pg_nx | pa;
-
-	/*
-	 * Now, allocate a VA in the kernel map, that points to the UTLS
-	 * page.
-	 */
-	va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
-	    UVM_KMF_VAONLY|UVM_KMF_NOWAIT);
-	if (va == 0) {
-		panic("%s: unable to allocate VA\n", __func__);
-	}
-	pmap_kenter_pa(va, pa, VM_PROT_READ|VM_PROT_WRITE, 0);
-	pmap_update(pmap_kernel());
-
-	ci->ci_svs_utls = va;
-
-	/* Initialize the constant fields of the UTLS page */
-	utls = (struct svs_utls *)ci->ci_svs_utls;
-	utls->rsp0 = ci->ci_svs_rsp0;
-}
-
-static void
-svs_range_add(struct cpu_info *ci, vaddr_t va, size_t size)
-{
-	size_t i, n;
-
-	KASSERT(size % PAGE_SIZE == 0);
-	n = size / PAGE_SIZE;
-	for (i = 0; i < n; i++) {
-		svs_page_add(ci, va + i * PAGE_SIZE);
-	}
-}
-
-void
-cpu_svs_init(struct cpu_info *ci)
-{
-	extern char __text_user_start;
-	extern char __text_user_end;
-	const cpuid_t cid = cpu_index(ci);
-	struct vm_page *pg;
-
-	KASSERT(ci != NULL);
-
-	pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
-	if (pg == 0)
-		panic("%s: failed to allocate L4 PA for CPU %d\n",
-			__func__, cpu_index(ci));
-	ci->ci_svs_updirpa = VM_PAGE_TO_PHYS(pg);
-
-	ci->ci_svs_updir = (pt_entry_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
-		UVM_KMF_VAONLY | UVM_KMF_NOWAIT);
-	if (ci->ci_svs_updir == NULL)
-		panic("%s: failed to allocate L4 VA for CPU %d\n",
-			__func__, cpu_index(ci));
-
-	pmap_kenter_pa((vaddr_t)ci->ci_svs_updir, ci->ci_svs_updirpa,
-		VM_PROT_READ | VM_PROT_WRITE, 0);
-
-	pmap_update(pmap_kernel());
-
-	ci->ci_svs_kpdirpa = pmap_pdirpa(pmap_kernel(), 0);
-
-	mutex_init(&ci->ci_svs_mtx, MUTEX_DEFAULT, IPL_VM);
-
-	svs_page_add(ci, (vaddr_t)&pcpuarea->idt);
-	svs_page_add(ci, (vaddr_t)&pcpuarea->ldt);
-	svs_range_add(ci, (vaddr_t)&pcpuarea->ent[cid],
-	    offsetof(struct pcpu_entry, rsp0));
-	svs_range_add(ci, (vaddr_t)&__text_user_start,
-	    (vaddr_t)&__text_user_end - (vaddr_t)&__text_user_start);
-
-	svs_rsp0_init(ci);
-	svs_utls_init(ci);
-}
-
-void
-svs_pmap_sync(struct pmap *pmap, int index)
-{
-	CPU_INFO_ITERATOR cii;
-	struct cpu_info *ci;
-	cpuid_t cid;
-
-	KASSERT(pmap != NULL);
-	KASSERT(pmap != pmap_kernel());
-	KASSERT(mutex_owned(pmap->pm_lock));
-	KASSERT(kpreempt_disabled());
-	KASSERT(index < 255);
-
-	for (CPU_INFO_FOREACH(cii, ci)) {
-		cid = cpu_index(ci);
-
-		if (!kcpuset_isset(pmap->pm_kernel_cpus, cid)) {
-			continue;
-		}
-
-		/* take the lock and check again */
-		mutex_enter(&ci->ci_svs_mtx);
-		if (kcpuset_isset(pmap->pm_kernel_cpus, cid)) {
-			ci->ci_svs_updir[index] = pmap->pm_pdir[index];
-		}
-		mutex_exit(&ci->ci_svs_mtx);
-	}
-}
-
-void
-svs_lwp_switch(struct lwp *oldlwp, struct lwp *newlwp)
-{
-	struct cpu_info *ci = curcpu();
-	struct svs_utls *utls;
-	struct pcb *pcb;
-	pt_entry_t *pte;
-	uintptr_t rsp0;
-	vaddr_t va;
-
-	if (newlwp->l_flag & LW_SYSTEM) {
-		return;
-	}
-
-#ifdef DIAGNOSTIC
-	if (oldlwp != NULL && !(oldlwp->l_flag & LW_SYSTEM)) {
-		pcb = lwp_getpcb(oldlwp);
-		rsp0 = pcb->pcb_rsp0;
-		va = rounddown(rsp0, PAGE_SIZE);
-		KASSERT(ci->ci_svs_krsp0 == rsp0 - sizeof(struct trapframe));
-		pte = ci->ci_svs_rsp0_pte;
-		KASSERT(*pte == L1_BASE[pl1_i(va)]);
-	}
-#endif
-
-	pcb = lwp_getpcb(newlwp);
-	rsp0 = pcb->pcb_rsp0;
-	va = rounddown(rsp0, PAGE_SIZE);
-
-	/* Update the kernel rsp0 in cpu_info */
-	ci->ci_svs_krsp0 = rsp0 - sizeof(struct trapframe);
-	KASSERT((ci->ci_svs_krsp0 % PAGE_SIZE) ==
-	    (ci->ci_svs_ursp0 % PAGE_SIZE));
-
-	utls = (struct svs_utls *)ci->ci_svs_utls;
-	utls->scratch = 0;
-
-	/*
-	 * Enter the user rsp0. We don't need to flush the TLB here, since
-	 * the user page tables are not loaded.
-	 */
-	pte = ci->ci_svs_rsp0_pte;
-	*pte = L1_BASE[pl1_i(va)];
-}
-
-static inline pt_entry_t
-svs_pte_atomic_read(struct pmap *pmap, size_t idx)
-{
-	/*
-	 * XXX: We don't have a basic atomic_fetch_64 function?
-	 */
-	return atomic_cas_64(&pmap->pm_pdir[idx], 666, 666);
-}
-
-/*
- * We may come here with the pmap unlocked. So read its PTEs atomically. If
- * a remote CPU is updating them at the same time, it's not a problem: the
- * remote CPU will call svs_pmap_sync afterwards, and our updirpa will be
- * synchronized properly.
- */
-void
-svs_pdir_switch(struct pmap *pmap)
-{
-	struct cpu_info *ci = curcpu();
-	struct svs_utls *utls;
-	pt_entry_t pte;
-	size_t i;
-
-	KASSERT(kpreempt_disabled());
-	KASSERT(pmap != pmap_kernel());
-
-	ci->ci_svs_kpdirpa = pmap_pdirpa(pmap, 0);
-
-	/* Update the info in the UTLS page */
-	utls = (struct svs_utls *)ci->ci_svs_utls;
-	utls->kpdirpa = ci->ci_svs_kpdirpa;
-
-	mutex_enter(&ci->ci_svs_mtx);
-
-	/* User slots. */
-	for (i = 0; i < 255; i++) {
-		pte = svs_pte_atomic_read(pmap, i);
-		ci->ci_svs_updir[i] = pte;
-	}
-
-	mutex_exit(&ci->ci_svs_mtx);
-}
-#endif
-

Index: src/sys/arch/x86/conf/files.x86
diff -u src/sys/arch/x86/conf/files.x86:1.92 src/sys/arch/x86/conf/files.x86:1.93
--- src/sys/arch/x86/conf/files.x86:1.92	Mon Jan 22 19:37:45 2018
+++ src/sys/arch/x86/conf/files.x86	Sun Feb 11 09:39:37 2018
@@ -1,4 +1,4 @@
-#	$NetBSD: files.x86,v 1.92 2018/01/22 19:37:45 jdolecek Exp $
+#	$NetBSD: files.x86,v 1.93 2018/02/11 09:39:37 maxv Exp $
 
 # options for MP configuration through the MP spec
 defflag opt_mpbios.h MPBIOS MPVERBOSE MPDEBUG MPBIOS_SCANPCI
@@ -97,6 +97,7 @@ file 	arch/x86/x86/pmap.c		machdep
 file 	arch/x86/x86/x86_tlb.c		machdep
 file 	arch/x86/x86/pmc.c		machdep
 file	arch/x86/x86/procfs_machdep.c	procfs
+file 	arch/x86/x86/svs.c		machdep & svs
 file	arch/x86/x86/sys_machdep.c	machdep
 file	arch/x86/x86/syscall.c		machdep
 file	arch/x86/x86/tsc.c		machdep

Added files:

Index: src/sys/arch/x86/x86/svs.c
diff -u /dev/null src/sys/arch/x86/x86/svs.c:1.1
--- /dev/null	Sun Feb 11 09:39:37 2018
+++ src/sys/arch/x86/x86/svs.c	Sun Feb 11 09:39:37 2018
@@ -0,0 +1,426 @@
+/*	$NetBSD: svs.c,v 1.1 2018/02/11 09:39:37 maxv Exp $	*/
+
+/*
+ * Copyright (c) 2018 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Maxime Villard.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: svs.c,v 1.1 2018/02/11 09:39:37 maxv Exp $");
+
+#include "opt_svs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/cpu.h>
+
+#include <uvm/uvm.h>
+#include <uvm/uvm_page.h>
+
+/*
+ * Separate Virtual Space
+ *
+ * A per-cpu L4 page is maintained in ci_svs_updirpa. During each context
+ * switch to a user pmap, updirpa is populated with the entries of the new
+ * pmap, minus what we don't want to have mapped in userland.
+ *
+ * Note on locking/synchronization here:
+ *
+ * (a) Touching ci_svs_updir without holding ci_svs_mtx first is *not*
+ *     allowed.
+ *
+ * (b) pm_kernel_cpus contains the set of CPUs that have the pmap loaded
+ *     in their CR3 register. It must *not* be replaced by pm_cpus.
+ *
+ * (c) When a context switch on the current CPU is made from a user LWP
+ *     towards a kernel LWP, CR3 is not updated. Therefore, the pmap's
+ *     pm_kernel_cpus still contains the current CPU. It implies that the
+ *     remote CPUs that execute other threads of the user process we just
+ *     left will keep synchronizing us against their changes.
+ *
+ * List of areas that are removed from userland:
+ *     PTE Space         [OK]
+ *     Direct Map        [OK]
+ *     Remote PCPU Areas [OK]
+ *     Kernel Heap       [OK]
+ *     Kernel Image      [OK]
+ *
+ * TODO:
+ *
+ * (a) The NMI stack is not double-entered. Therefore if we ever receive
+ *     an NMI and leave it, the content of the stack will be visible to
+ *     userland (via Meltdown). Normally we never leave NMIs, unless a
+ *     privileged user launched PMCs. That's unlikely to happen, our PMC
+ *     support is pretty minimal.
+ *
+ * (b) Enable SVS depending on the CPU model, and add a sysctl to disable
+ *     it dynamically.
+ *
+ * (c) Narrow down the entry points: hide the 'jmp handler' instructions.
+ *     This makes sense on GENERIC_KASLR kernels.
+ *
+ * (d) Right now there is only one global LDT, and that's not compatible
+ *     with USER_LDT.
+ */
+
+struct svs_utls {
+	paddr_t kpdirpa;
+	uint64_t scratch;
+	vaddr_t rsp0;
+};
+
+static pd_entry_t *
+svs_tree_add(struct cpu_info *ci, vaddr_t va)
+{
+	extern const vaddr_t ptp_masks[];
+	extern const int ptp_shifts[];
+	extern const long nbpd[];
+	pd_entry_t *dstpde;
+	size_t i, pidx, mod;
+	struct vm_page *pg;
+	paddr_t pa;
+
+	dstpde = ci->ci_svs_updir;
+	mod = (size_t)-1;
+
+	for (i = PTP_LEVELS; i > 1; i--) {
+		pidx = pl_i(va % mod, i);
+
+		if (!pmap_valid_entry(dstpde[pidx])) {
+			pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
+			if (pg == 0)
+				panic("%s: failed to allocate PA for CPU %d\n",
+					__func__, cpu_index(ci));
+			pa = VM_PAGE_TO_PHYS(pg);
+
+			dstpde[pidx] = PG_V | PG_RW | pa;
+		}
+
+		pa = (paddr_t)(dstpde[pidx] & PG_FRAME);
+		dstpde = (pd_entry_t *)PMAP_DIRECT_MAP(pa);
+		mod = nbpd[i-1];
+	}
+
+	return dstpde;
+}
+
+static void
+svs_page_add(struct cpu_info *ci, vaddr_t va)
+{
+	pd_entry_t *srcpde, *dstpde, pde;
+	size_t idx, pidx;
+	paddr_t pa;
+
+	/* Create levels L4, L3 and L2. */
+	dstpde = svs_tree_add(ci, va);
+
+	pidx = pl1_i(va % NBPD_L2);
+
+	/*
+	 * If 'va' is in a large page, we need to compute its physical
+	 * address manually.
+	 */
+	idx = pl2_i(va);
+	srcpde = L2_BASE;
+	if (!pmap_valid_entry(srcpde[idx])) {
+		panic("%s: L2 page not mapped", __func__);
+	}
+	if (srcpde[idx] & PG_PS) {
+		pa = srcpde[idx] & PG_2MFRAME;
+		pa += (paddr_t)(va % NBPD_L2);
+		pde = (srcpde[idx] & ~(PG_PS|PG_2MFRAME)) | pa;
+
+		if (pmap_valid_entry(dstpde[pidx])) {
+			panic("%s: L1 page already mapped", __func__);
+		}
+		dstpde[pidx] = pde;
+		return;
+	}
+
+	/*
+	 * Normal page, just copy the PDE.
+	 */
+	idx = pl1_i(va);
+	srcpde = L1_BASE;
+	if (!pmap_valid_entry(srcpde[idx])) {
+		panic("%s: L1 page not mapped", __func__);
+	}
+	if (pmap_valid_entry(dstpde[pidx])) {
+		panic("%s: L1 page already mapped", __func__);
+	}
+	dstpde[pidx] = srcpde[idx];
+}
+
+static void
+svs_rsp0_init(struct cpu_info *ci)
+{
+	const cpuid_t cid = cpu_index(ci);
+	vaddr_t va, rsp0;
+	pd_entry_t *pd;
+	size_t pidx;
+
+	rsp0 = (vaddr_t)&pcpuarea->ent[cid].rsp0;
+
+	/* The first page is a redzone. */
+	va = rsp0 + PAGE_SIZE;
+
+	/* Create levels L4, L3 and L2. */
+	pd = svs_tree_add(ci, va);
+
+	/* Get the info for L1. */
+	pidx = pl1_i(va % NBPD_L2);
+	if (pmap_valid_entry(pd[pidx])) {
+		panic("%s: rsp0 page already mapped", __func__);
+	}
+
+	ci->ci_svs_rsp0_pte = (pt_entry_t *)&pd[pidx];
+	ci->ci_svs_rsp0 = rsp0 + PAGE_SIZE + sizeof(struct trapframe);
+	ci->ci_svs_ursp0 = ci->ci_svs_rsp0 - sizeof(struct trapframe);
+	ci->ci_svs_krsp0 = 0;
+}
+
+static void
+svs_utls_init(struct cpu_info *ci)
+{
+	const vaddr_t utlsva = (vaddr_t)&pcpuarea->utls;
+	struct svs_utls *utls;
+	struct vm_page *pg;
+	pd_entry_t *pd;
+	size_t pidx;
+	paddr_t pa;
+	vaddr_t va;
+
+	/* Create levels L4, L3 and L2. */
+	pd = svs_tree_add(ci, utlsva);
+
+	/* Allocate L1. */
+	pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
+	if (pg == 0)
+		panic("%s: failed to allocate PA for CPU %d\n", __func__,
+		    cpu_index(ci));
+	pa = VM_PAGE_TO_PHYS(pg);
+
+	/* Enter L1. */
+	if (pmap_valid_entry(L1_BASE[pl1_i(utlsva)])) {
+		panic("%s: local page already mapped", __func__);
+	}
+	pidx = pl1_i(utlsva % NBPD_L2);
+	if (pmap_valid_entry(pd[pidx])) {
+		panic("%s: L1 page already mapped", __func__);
+	}
+	pd[pidx] = PG_V | PG_RW | pmap_pg_nx | pa;
+
+	/*
+	 * Now, allocate a VA in the kernel map, that points to the UTLS
+	 * page.
+	 */
+	va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
+	    UVM_KMF_VAONLY|UVM_KMF_NOWAIT);
+	if (va == 0) {
+		panic("%s: unable to allocate VA\n", __func__);
+	}
+	pmap_kenter_pa(va, pa, VM_PROT_READ|VM_PROT_WRITE, 0);
+	pmap_update(pmap_kernel());
+
+	ci->ci_svs_utls = va;
+
+	/* Initialize the constant fields of the UTLS page */
+	utls = (struct svs_utls *)ci->ci_svs_utls;
+	utls->rsp0 = ci->ci_svs_rsp0;
+}
+
+static void
+svs_range_add(struct cpu_info *ci, vaddr_t va, size_t size)
+{
+	size_t i, n;
+
+	KASSERT(size % PAGE_SIZE == 0);
+	n = size / PAGE_SIZE;
+	for (i = 0; i < n; i++) {
+		svs_page_add(ci, va + i * PAGE_SIZE);
+	}
+}
+
+void
+cpu_svs_init(struct cpu_info *ci)
+{
+	extern char __text_user_start;
+	extern char __text_user_end;
+	const cpuid_t cid = cpu_index(ci);
+	struct vm_page *pg;
+
+	KASSERT(ci != NULL);
+
+	pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
+	if (pg == 0)
+		panic("%s: failed to allocate L4 PA for CPU %d\n",
+			__func__, cpu_index(ci));
+	ci->ci_svs_updirpa = VM_PAGE_TO_PHYS(pg);
+
+	ci->ci_svs_updir = (pt_entry_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
+		UVM_KMF_VAONLY | UVM_KMF_NOWAIT);
+	if (ci->ci_svs_updir == NULL)
+		panic("%s: failed to allocate L4 VA for CPU %d\n",
+			__func__, cpu_index(ci));
+
+	pmap_kenter_pa((vaddr_t)ci->ci_svs_updir, ci->ci_svs_updirpa,
+		VM_PROT_READ | VM_PROT_WRITE, 0);
+
+	pmap_update(pmap_kernel());
+
+	ci->ci_svs_kpdirpa = pmap_pdirpa(pmap_kernel(), 0);
+
+	mutex_init(&ci->ci_svs_mtx, MUTEX_DEFAULT, IPL_VM);
+
+	svs_page_add(ci, (vaddr_t)&pcpuarea->idt);
+	svs_page_add(ci, (vaddr_t)&pcpuarea->ldt);
+	svs_range_add(ci, (vaddr_t)&pcpuarea->ent[cid],
+	    offsetof(struct pcpu_entry, rsp0));
+	svs_range_add(ci, (vaddr_t)&__text_user_start,
+	    (vaddr_t)&__text_user_end - (vaddr_t)&__text_user_start);
+
+	svs_rsp0_init(ci);
+	svs_utls_init(ci);
+}
+
+void
+svs_pmap_sync(struct pmap *pmap, int index)
+{
+	CPU_INFO_ITERATOR cii;
+	struct cpu_info *ci;
+	cpuid_t cid;
+
+	KASSERT(pmap != NULL);
+	KASSERT(pmap != pmap_kernel());
+	KASSERT(mutex_owned(pmap->pm_lock));
+	KASSERT(kpreempt_disabled());
+	KASSERT(index < 255);
+
+	for (CPU_INFO_FOREACH(cii, ci)) {
+		cid = cpu_index(ci);
+
+		if (!kcpuset_isset(pmap->pm_kernel_cpus, cid)) {
+			continue;
+		}
+
+		/* take the lock and check again */
+		mutex_enter(&ci->ci_svs_mtx);
+		if (kcpuset_isset(pmap->pm_kernel_cpus, cid)) {
+			ci->ci_svs_updir[index] = pmap->pm_pdir[index];
+		}
+		mutex_exit(&ci->ci_svs_mtx);
+	}
+}
+
+void
+svs_lwp_switch(struct lwp *oldlwp, struct lwp *newlwp)
+{
+	struct cpu_info *ci = curcpu();
+	struct svs_utls *utls;
+	struct pcb *pcb;
+	pt_entry_t *pte;
+	uintptr_t rsp0;
+	vaddr_t va;
+
+	if (newlwp->l_flag & LW_SYSTEM) {
+		return;
+	}
+
+#ifdef DIAGNOSTIC
+	if (oldlwp != NULL && !(oldlwp->l_flag & LW_SYSTEM)) {
+		pcb = lwp_getpcb(oldlwp);
+		rsp0 = pcb->pcb_rsp0;
+		va = rounddown(rsp0, PAGE_SIZE);
+		KASSERT(ci->ci_svs_krsp0 == rsp0 - sizeof(struct trapframe));
+		pte = ci->ci_svs_rsp0_pte;
+		KASSERT(*pte == L1_BASE[pl1_i(va)]);
+	}
+#endif
+
+	pcb = lwp_getpcb(newlwp);
+	rsp0 = pcb->pcb_rsp0;
+	va = rounddown(rsp0, PAGE_SIZE);
+
+	/* Update the kernel rsp0 in cpu_info */
+	ci->ci_svs_krsp0 = rsp0 - sizeof(struct trapframe);
+	KASSERT((ci->ci_svs_krsp0 % PAGE_SIZE) ==
+	    (ci->ci_svs_ursp0 % PAGE_SIZE));
+
+	utls = (struct svs_utls *)ci->ci_svs_utls;
+	utls->scratch = 0;
+
+	/*
+	 * Enter the user rsp0. We don't need to flush the TLB here, since
+	 * the user page tables are not loaded.
+	 */
+	pte = ci->ci_svs_rsp0_pte;
+	*pte = L1_BASE[pl1_i(va)];
+}
+
+static inline pt_entry_t
+svs_pte_atomic_read(struct pmap *pmap, size_t idx)
+{
+	/*
+	 * XXX: We don't have a basic atomic_fetch_64 function?
+	 */
+	return atomic_cas_64(&pmap->pm_pdir[idx], 666, 666);
+}
+
+/*
+ * We may come here with the pmap unlocked. So read its PTEs atomically. If
+ * a remote CPU is updating them at the same time, it's not a problem: the
+ * remote CPU will call svs_pmap_sync afterwards, and our updirpa will be
+ * synchronized properly.
+ */
+void
+svs_pdir_switch(struct pmap *pmap)
+{
+	struct cpu_info *ci = curcpu();
+	struct svs_utls *utls;
+	pt_entry_t pte;
+	size_t i;
+
+	KASSERT(kpreempt_disabled());
+	KASSERT(pmap != pmap_kernel());
+
+	ci->ci_svs_kpdirpa = pmap_pdirpa(pmap, 0);
+
+	/* Update the info in the UTLS page */
+	utls = (struct svs_utls *)ci->ci_svs_utls;
+	utls->kpdirpa = ci->ci_svs_kpdirpa;
+
+	mutex_enter(&ci->ci_svs_mtx);
+
+	/* User slots. */
+	for (i = 0; i < 255; i++) {
+		pte = svs_pte_atomic_read(pmap, i);
+		ci->ci_svs_updir[i] = pte;
+	}
+
+	mutex_exit(&ci->ci_svs_mtx);
+}

Reply via email to