Module Name:    src
Committed By:   matt
Date:           Mon Jul 11 16:06:09 UTC 2016

Modified Files:
        src/sys/uvm/pmap: pmap.c pmap.h pmap_segtab.c pmap_synci.c pmap_tlb.c
            pmap_tlb.h tlb.h vmpagemd.h
Added Files:
        src/sys/uvm/pmap: pmap_synci.h

Log Message:
Changes so that MIPS can use the common pmap.
Change/augment the virtual cache alias callbacks.


To generate a diff of this commit:
cvs rdiff -u -r1.14 -r1.15 src/sys/uvm/pmap/pmap.c
cvs rdiff -u -r1.6 -r1.7 src/sys/uvm/pmap/pmap.h
cvs rdiff -u -r1.2 -r1.3 src/sys/uvm/pmap/pmap_segtab.c \
    src/sys/uvm/pmap/pmap_synci.c src/sys/uvm/pmap/tlb.h \
    src/sys/uvm/pmap/vmpagemd.h
cvs rdiff -u -r0 -r1.1 src/sys/uvm/pmap/pmap_synci.h
cvs rdiff -u -r1.12 -r1.13 src/sys/uvm/pmap/pmap_tlb.c
cvs rdiff -u -r1.8 -r1.9 src/sys/uvm/pmap/pmap_tlb.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/uvm/pmap/pmap.c
diff -u src/sys/uvm/pmap/pmap.c:1.14 src/sys/uvm/pmap/pmap.c:1.15
--- src/sys/uvm/pmap/pmap.c:1.14	Thu Jul  7 06:55:44 2016
+++ src/sys/uvm/pmap/pmap.c	Mon Jul 11 16:06:09 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.14 2016/07/07 06:55:44 msaitoh Exp $	*/
+/*	$NetBSD: pmap.c,v 1.15 2016/07/11 16:06:09 matt Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2001 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
 
 #include <sys/cdefs.h>
 
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.14 2016/07/07 06:55:44 msaitoh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.15 2016/07/11 16:06:09 matt Exp $");
 
 /*
  *	Manages physical address maps.
@@ -102,22 +102,22 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.1
 #define __PMAP_PRIVATE
 
 #include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
+#include <sys/atomic.h>
 #include <sys/buf.h>
+#include <sys/cpu.h>
+#include <sys/mutex.h>
 #include <sys/pool.h>
 #include <sys/atomic.h>
 #include <sys/mutex.h>
 #include <sys/atomic.h>
-#include <sys/socketvar.h>	/* XXX: for sock_loan_thresh */
 
 #include <uvm/uvm.h>
 
-#define	PMAP_COUNT(name)	(pmap_evcnt_##name.ev_count++ + 0)
-#define PMAP_COUNTER(name, desc) \
-static struct evcnt pmap_evcnt_##name = \
-	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap", desc); \
-EVCNT_ATTACH_STATIC(pmap_evcnt_##name)
+#if defined(MULTIPROCESSOR) && defined(PMAP_VIRTUAL_CACHE_ALIASES) \
+    && !defined(PMAP_NO_PV_UNCACHED)
+#error PMAP_VIRTUAL_CACHE_ALIASES with MULTIPROCESSOR requires \
+ PMAP_NO_PV_UNCACHED to be defined
+#endif
 
 PMAP_COUNTER(remove_kernel_calls, "remove kernel calls");
 PMAP_COUNTER(remove_kernel_pages, "kernel pages unmapped");
@@ -132,8 +132,6 @@ PMAP_COUNTER(prefer_requests, "prefer re
 PMAP_COUNTER(prefer_adjustments, "prefer adjustments");
 
 PMAP_COUNTER(idlezeroed_pages, "pages idle zeroed");
-PMAP_COUNTER(zeroed_pages, "pages zeroed");
-PMAP_COUNTER(copied_pages, "pages copied");
 
 PMAP_COUNTER(kenter_pa, "kernel fast mapped pages");
 PMAP_COUNTER(kenter_pa_bad, "kernel fast mapped pages (bad color)");
@@ -190,20 +188,22 @@ PMAP_COUNTER(page_protect, "page_protect
 #define PMAP_ASID_RESERVED 0
 CTASSERT(PMAP_ASID_RESERVED == 0);
 
-/*
- * Initialize the kernel pmap.
- */
-#ifdef MULTIPROCESSOR
-#define	PMAP_SIZE	offsetof(struct pmap, pm_pai[PMAP_TLB_MAX])
-#else
-#define	PMAP_SIZE	sizeof(struct pmap)
-kmutex_t pmap_pvlist_mutex __aligned(COHERENCY_UNIT);
+#ifndef PMAP_SEGTAB_ALIGN
+#define PMAP_SEGTAB_ALIGN	/* nothing */
+#endif
+#ifdef _LP64
+pmap_segtab_t	pmap_kstart_segtab PMAP_SEGTAB_ALIGN; /* first mid-level segtab for kernel */
+#endif
+pmap_segtab_t	pmap_kern_segtab PMAP_SEGTAB_ALIGN = { /* top level segtab for kernel */
+#ifdef _LP64
+	.seg_seg[(VM_MIN_KERNEL_ADDRESS & XSEGOFSET) >> SEGSHIFT] = &pmap_kstart_segtab,
 #endif
+};
 
 struct pmap_kernel kernel_pmap_store = {
 	.kernel_pmap = {
 		.pm_count = 1,
-		.pm_segtab = PMAP_INVALID_SEGTAB_ADDRESS,
+		.pm_segtab = &pmap_kern_segtab,
 		.pm_minaddr = VM_MIN_KERNEL_ADDRESS,
 		.pm_maxaddr = VM_MAX_KERNEL_ADDRESS,
 	},
@@ -211,7 +211,7 @@ struct pmap_kernel kernel_pmap_store = {
 
 struct pmap * const kernel_pmap_ptr = &kernel_pmap_store.kernel_pmap;
 
-struct pmap_limits pmap_limits = {
+struct pmap_limits pmap_limits = {	/* VA and PA limits */
 	.virtual_start = VM_MIN_KERNEL_ADDRESS,
 };
 
@@ -231,23 +231,24 @@ struct pool pmap_pv_pool;
 #ifndef PMAP_PV_LOWAT
 #define	PMAP_PV_LOWAT	16
 #endif
-int		pmap_pv_lowat = PMAP_PV_LOWAT;
+int	pmap_pv_lowat = PMAP_PV_LOWAT;
 
-bool		pmap_initialized = false;
+bool	pmap_initialized = false;
 #define	PMAP_PAGE_COLOROK_P(a, b) \
 		((((int)(a) ^ (int)(b)) & pmap_page_colormask) == 0)
-u_int		pmap_page_colormask;
+u_int	pmap_page_colormask;
 
-#define PAGE_IS_MANAGED(pa)	\
-	(pmap_initialized == true && vm_physseg_find(atop(pa), NULL) != -1)
+#define PAGE_IS_MANAGED(pa)	(pmap_initialized && uvm_pageismanaged(pa))
 
 #define PMAP_IS_ACTIVE(pm)						\
 	((pm) == pmap_kernel() || 					\
 	 (pm) == curlwp->l_proc->p_vmspace->vm_map.pmap)
 
 /* Forward function declarations */
+void pmap_page_remove(struct vm_page *);
+static void pmap_pvlist_check(struct vm_page_md *);
 void pmap_remove_pv(pmap_t, vaddr_t, struct vm_page *, bool);
-void pmap_enter_pv(pmap_t, vaddr_t, struct vm_page *, u_int *);
+void pmap_enter_pv(pmap_t, vaddr_t, struct vm_page *, pt_entry_t *, u_int);
 
 /*
  * PV table management functions.
@@ -265,7 +266,50 @@ struct pool_allocator pmap_pv_page_alloc
 #if !defined(MULTIPROCESSOR) || !defined(PMAP_MD_NEED_TLB_MISS_LOCK)
 #define	pmap_md_tlb_miss_lock_enter()	do { } while(/*CONSTCOND*/0)
 #define	pmap_md_tlb_miss_lock_exit()	do { } while(/*CONSTCOND*/0)
-#endif	/* !MULTIPROCESSOR || !PMAP_MD_NEED_TLB_MISS_LOCK */
+#endif /* !MULTIPROCESSOR || !PMAP_MD_NEED_TLB_MISS_LOCK */
+
+#ifndef MULTIPROCESSOR
+kmutex_t pmap_pvlist_mutex	__cacheline_aligned;
+#endif
+
+/*
+ * Debug functions.
+ */
+
+static inline void
+pmap_asid_check(pmap_t pm, const char *func)
+{
+#ifdef DEBUG
+	if (!PMAP_IS_ACTIVE(pm))
+		return;
+
+	struct pmap_asid_info * const pai = PMAP_PAI(pm, cpu_tlb_info(curcpu()));
+	tlb_asid_t asid = tlb_get_asid();
+	if (asid != pai->pai_asid)
+		panic("%s: inconsistency for active TLB update: %u <-> %u",
+		    func, asid, pai->pai_asid);
+#endif
+}
+
+static void
+pmap_addr_range_check(pmap_t pmap, vaddr_t sva, vaddr_t eva, const char *func)
+{
+#ifdef DEBUG
+	if (pmap == pmap_kernel()) {
+		if (sva < VM_MIN_KERNEL_ADDRESS)
+			panic("%s: kva %#"PRIxVADDR" not in range",
+			    func, sva);
+		if (eva >= pmap_limits.virtual_end)
+			panic("%s: kva %#"PRIxVADDR" not in range",
+			    func, eva);
+	} else {
+		if (eva > VM_MAXUSER_ADDRESS)
+			panic("%s: uva %#"PRIxVADDR" not in range",
+			    func, eva);
+		pmap_asid_check(pmap, func);
+	}
+#endif
+}
 
 /*
  * Misc. functions.
@@ -274,18 +318,18 @@ struct pool_allocator pmap_pv_page_alloc
 bool
 pmap_page_clear_attributes(struct vm_page_md *mdpg, u_int clear_attributes)
 {
-	volatile u_int * const attrp = &mdpg->mdpg_attrs;
+	volatile unsigned long * const attrp = &mdpg->mdpg_attrs;
 #ifdef MULTIPROCESSOR
 	for (;;) {
 		u_int old_attr = *attrp;
 		if ((old_attr & clear_attributes) == 0)
 			return false;
 		u_int new_attr = old_attr & ~clear_attributes;
-		if (old_attr == atomic_cas_uint(attrp, old_attr, new_attr))
+		if (old_attr == atomic_cas_ulong(attrp, old_attr, new_attr))
 			return true;
 	}
 #else
-	u_int old_attr = *attrp;
+	unsigned long old_attr = *attrp;
 	if ((old_attr & clear_attributes) == 0)
 		return false;
 	*attrp &= ~clear_attributes;
@@ -297,7 +341,7 @@ void
 pmap_page_set_attributes(struct vm_page_md *mdpg, u_int set_attributes)
 {
 #ifdef MULTIPROCESSOR
-	atomic_or_uint(&mdpg->mdpg_attrs, set_attributes);
+	atomic_or_ulong(&mdpg->mdpg_attrs, set_attributes);
 #else
 	mdpg->mdpg_attrs |= set_attributes;
 #endif
@@ -307,17 +351,19 @@ static void
 pmap_page_syncicache(struct vm_page *pg)
 {
 #ifndef MULTIPROCESSOR
-	struct pmap * const curpmap = curcpu()->ci_curpm;
+	struct pmap * const curpmap = curlwp->l_proc->p_vmspace->vm_map.pmap;
 #endif
 	struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg);
 	pv_entry_t pv = &mdpg->mdpg_first;
 	kcpuset_t *onproc;
 #ifdef MULTIPROCESSOR
 	kcpuset_create(&onproc, true);
+	KASSERT(onproc != NULL);
 #else
 	onproc = NULL;
 #endif
-	(void)VM_PAGEMD_PVLIST_LOCK(mdpg, false);
+	VM_PAGEMD_PVLIST_READLOCK(mdpg);
+	pmap_pvlist_check(mdpg);
 
 	if (pv->pv_pmap != NULL) {
 		for (; pv != NULL; pv = pv->pv_next) {
@@ -334,13 +380,14 @@ pmap_page_syncicache(struct vm_page *pg)
 #endif
 		}
 	}
+	pmap_pvlist_check(mdpg);
 	VM_PAGEMD_PVLIST_UNLOCK(mdpg);
 	kpreempt_disable();
 	pmap_md_page_syncicache(pg, onproc);
+	kpreempt_enable();
 #ifdef MULTIPROCESSOR
 	kcpuset_destroy(onproc);
 #endif
-	kpreempt_enable();
 }
 
 /*
@@ -402,24 +449,58 @@ pmap_growkernel(vaddr_t maxkvaddr)
 vaddr_t
 pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp)
 {
-	u_int npgs;
+	size_t npgs;
 	paddr_t pa;
 	vaddr_t va;
+	struct vm_physseg *maybe_seg = NULL;
+	u_int maybe_bank = vm_nphysseg;
 
 	size = round_page(size);
 	npgs = atop(size);
 
+	aprint_debug("%s: need %zu pages\n", __func__, npgs);
+
 	for (u_int bank = 0; bank < vm_nphysseg; bank++) {
 		struct vm_physseg * const seg = VM_PHYSMEM_PTR(bank);
 		if (uvm.page_init_done == true)
 			panic("pmap_steal_memory: called _after_ bootstrap");
 
-		if (seg->avail_start != seg->start ||
-		    seg->avail_start >= seg->avail_end)
+		aprint_debug("%s: seg %u: %#"PRIxPADDR" %#"PRIxPADDR" %#"PRIxPADDR" %#"PRIxPADDR"\n",
+		    __func__, bank,
+		    seg->avail_start, seg->start,
+		    seg->avail_end, seg->end);
+
+		if (seg->avail_start != seg->start
+		    || seg->avail_start >= seg->avail_end) {
+			aprint_debug("%s: seg %u: bad start\n", __func__, bank);
 			continue;
+		}
 
-		if ((seg->avail_end - seg->avail_start) < npgs)
+		if (seg->avail_end - seg->avail_start < npgs) {
+			aprint_debug("%s: seg %u: too small for %zu pages\n",
+			    __func__, bank, npgs);
 			continue;
+		}
+
+		if (!pmap_md_ok_to_steal_p(seg, npgs)) {
+			continue;
+		}
+
+		/*
+		 * Always try to allocate from the segment with the least
+		 * amount of space left.
+		 */
+#define VM_PHYSMEM_SPACE(s)	((s)->avail_end - (s)->avail_start)
+		if (maybe_seg == NULL 
+		    || VM_PHYSMEM_SPACE(seg) < VM_PHYSMEM_SPACE(maybe_seg)) {
+			maybe_seg = seg;
+			maybe_bank = bank;
+		}
+	}
+
+	if (maybe_seg) {
+		struct vm_physseg * const seg = maybe_seg;
+		u_int bank = maybe_bank;
 
 		/*
 		 * There are enough pages here; steal them!
@@ -435,11 +516,17 @@ pmap_steal_memory(vsize_t size, vaddr_t 
 			if (vm_nphysseg == 1)
 				panic("pmap_steal_memory: out of memory!");
 
+			aprint_debug("%s: seg %u: %zu pages stolen (removed)\n",
+			    __func__, bank, npgs);
 			/* Remove this segment from the list. */
 			vm_nphysseg--;
-			if (bank < vm_nphysseg)
-				memmove(seg, seg+1,
-				    sizeof(*seg) * (vm_nphysseg - bank));
+			for (u_int x = bank; x < vm_nphysseg; x++) {
+				/* structure copy */
+				VM_PHYSMEM_PTR_SWAP(x, x + 1);
+			}
+		} else {
+			aprint_debug("%s: seg %u: %zu pages stolen (%#"PRIxPADDR" left)\n",
+			    __func__, bank, npgs, VM_PHYSMEM_SPACE(seg));
 		}
 
 		va = pmap_md_map_poolpage(pa, size);
@@ -450,7 +537,7 @@ pmap_steal_memory(vsize_t size, vaddr_t 
 	/*
 	 * If we got here, there was no memory left.
 	 */
-	panic("pmap_steal_memory: no memory to steal");
+	panic("pmap_steal_memory: no memory to steal %zu pages", npgs);
 }
 
 /*
@@ -478,6 +565,11 @@ pmap_init(void)
 	 */
 	pool_setlowat(&pmap_pv_pool, pmap_pv_lowat);
 
+	/*
+	 * Set the page colormask but allow pmap_md_init to override it.
+	 */
+	pmap_page_colormask = ptoa(uvmexp.colormask);
+
 	pmap_md_init();
 
 	/*
@@ -501,12 +593,10 @@ pmap_init(void)
 pmap_t
 pmap_create(void)
 {
-	pmap_t pmap;
-
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
 	PMAP_COUNT(create);
 
-	pmap = pool_get(&pmap_pmap_pool, PR_WAITOK);
+	pmap_t pmap = pool_get(&pmap_pmap_pool, PR_WAITOK);
 	memset(pmap, 0, PMAP_SIZE);
 
 	KASSERT(pmap->pm_pai[0].pai_link.le_prev == NULL);
@@ -520,9 +610,12 @@ pmap_create(void)
 #ifdef MULTIPROCESSOR
 	kcpuset_create(&pmap->pm_active, true);
 	kcpuset_create(&pmap->pm_onproc, true);
+	KASSERT(pmap->pm_active != NULL);
+	KASSERT(pmap->pm_onproc != NULL);
 #endif
 
-	UVMHIST_LOG(pmaphist, "<- pmap %p", pmap,0,0,0);
+	UVMHIST_LOG(pmaphist, " <-- done (pmap=%p)", pmap, 0, 0, 0);
+
 	return pmap;
 }
 
@@ -535,15 +628,16 @@ void
 pmap_destroy(pmap_t pmap)
 {
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
-	UVMHIST_LOG(pmaphist, "(pmap=%p)", pmap, 0,0,0);
+	UVMHIST_LOG(pmaphist, "(pmap=%p)", pmap, 0, 0, 0);
 
 	if (atomic_dec_uint_nv(&pmap->pm_count) > 0) {
 		PMAP_COUNT(dereference);
+		UVMHIST_LOG(pmaphist, " <-- done (deref)", 0, 0, 0, 0);
 		return;
 	}
 
-	KASSERT(pmap->pm_count == 0);
 	PMAP_COUNT(destroy);
+	KASSERT(pmap->pm_count == 0);
 	kpreempt_disable();
 	pmap_md_tlb_miss_lock_enter();
 	pmap_tlb_asid_release_all(pmap);
@@ -553,12 +647,14 @@ pmap_destroy(pmap_t pmap)
 #ifdef MULTIPROCESSOR
 	kcpuset_destroy(pmap->pm_active);
 	kcpuset_destroy(pmap->pm_onproc);
+	pmap->pm_active = NULL;
+	pmap->pm_onproc = NULL;
 #endif
 
 	pool_put(&pmap_pmap_pool, pmap);
 	kpreempt_enable();
 
-	UVMHIST_LOG(pmaphist, "<- done", 0,0,0,0);
+	UVMHIST_LOG(pmaphist, " <-- done (freed)", 0, 0, 0, 0);
 }
 
 /*
@@ -567,16 +663,15 @@ pmap_destroy(pmap_t pmap)
 void
 pmap_reference(pmap_t pmap)
 {
-
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
-	UVMHIST_LOG(pmaphist, "(pmap=%p)", pmap, 0,0,0);
+	UVMHIST_LOG(pmaphist, "(pmap=%p)", pmap, 0, 0, 0);
 	PMAP_COUNT(reference);
 
 	if (pmap != NULL) {
 		atomic_inc_uint(&pmap->pm_count);
 	}
 
-	UVMHIST_LOG(pmaphist, "<- done", 0,0,0,0);
+	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
 }
 
 /*
@@ -588,7 +683,7 @@ pmap_activate(struct lwp *l)
 	pmap_t pmap = l->l_proc->p_vmspace->vm_map.pmap;
 
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
-	UVMHIST_LOG(pmaphist, "(l=%p (pmap=%p))", l, pmap, 0,0);
+	UVMHIST_LOG(pmaphist, "(l=%p (pmap=%p))", l, pmap, 0, 0);
 	PMAP_COUNT(activate);
 
 	kpreempt_disable();
@@ -600,9 +695,122 @@ pmap_activate(struct lwp *l)
 	pmap_md_tlb_miss_lock_exit();
 	kpreempt_enable();
 
-	UVMHIST_LOG(pmaphist, "<- done", 0,0,0,0);
+	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
+}
+
+/*
+ * Remove this page from all physical maps in which it resides.
+ * Reflects back modify bits to the pager.
+ */
+void
+pmap_page_remove(struct vm_page *pg)
+{
+	struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg);
+
+	kpreempt_disable();
+	VM_PAGEMD_PVLIST_LOCK(mdpg);
+	pmap_pvlist_check(mdpg);
+
+	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
+
+	pv_entry_t pv = &mdpg->mdpg_first;
+	if (pv->pv_pmap == NULL) {
+		VM_PAGEMD_PVLIST_UNLOCK(mdpg);
+		kpreempt_enable();
+		UVMHIST_LOG(pmaphist, " <-- done (empty)", 0, 0, 0, 0);
+		return;
+	}
+
+	pv_entry_t npv;
+	pv_entry_t pvp = NULL;
+
+	for (; pv != NULL; pv = npv) {
+		npv = pv->pv_next;
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
+		if (pv->pv_va & PV_KENTER) {
+			UVMHIST_LOG(pmaphist, " pv %p pmap %p va %"
+			    PRIxVADDR" skip", pv, pv->pv_pmap, pv->pv_va, 0);
+
+			KASSERT(pv->pv_pmap == pmap_kernel());
+
+			/* Assume no more - it'll get fixed if there are */
+			pv->pv_next = NULL;
+
+			/*
+			 * pvp is non-null when we already have a PV_KENTER
+			 * pv in pvh_first; otherwise we haven't seen a
+			 * PV_KENTER pv and we need to copy this one to
+			 * pvh_first
+			 */
+			if (pvp) {
+				/*
+				 * The previous PV_KENTER pv needs to point to
+				 * this PV_KENTER pv
+				 */
+				pvp->pv_next = pv;
+			} else {
+				pv_entry_t fpv = &mdpg->mdpg_first;
+				*fpv = *pv;
+				KASSERT(fpv->pv_pmap == pmap_kernel());
+			}
+			pvp = pv;
+			continue;
+		}
+#endif
+		const pmap_t pmap = pv->pv_pmap;
+		vaddr_t va = trunc_page(pv->pv_va);
+		pt_entry_t * const ptep = pmap_pte_lookup(pmap, va);
+		KASSERTMSG(ptep != NULL, "%#"PRIxVADDR " %#"PRIxVADDR, va,
+		    pmap_limits.virtual_end);
+		pt_entry_t pte = *ptep;
+		UVMHIST_LOG(pmaphist, " pv %p pmap %p va %"PRIxVADDR
+		    " pte %#"PRIxPTE, pv, pmap, va, pte_value(pte));
+		if (!pte_valid_p(pte))
+			continue;
+		const bool is_kernel_pmap_p = (pmap == pmap_kernel());
+		if (is_kernel_pmap_p) {
+			PMAP_COUNT(remove_kernel_pages);
+		} else {
+			PMAP_COUNT(remove_user_pages);
+		}
+		if (pte_wired_p(pte))
+			pmap->pm_stats.wired_count--;
+		pmap->pm_stats.resident_count--;
+
+		pmap_md_tlb_miss_lock_enter();
+		const pt_entry_t npte = pte_nv_entry(is_kernel_pmap_p);
+		*ptep = npte;
+		/*
+		 * Flush the TLB for the given address.
+		 */
+		pmap_tlb_invalidate_addr(pmap, va);
+		pmap_md_tlb_miss_lock_exit();
+
+		/*
+		 * non-null means this is a non-pvh_first pv, so we should
+		 * free it.
+		 */
+		if (pvp) {
+			KASSERT(pvp->pv_pmap == pmap_kernel());
+			KASSERT(pvp->pv_next == NULL);
+			pmap_pv_free(pv);
+		} else {
+			pv->pv_pmap = NULL;
+			pv->pv_next = NULL;
+		}
+	}
+
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
+	pmap_page_clear_attributes(mdpg, VM_PAGEMD_UNCACHED);
+#endif
+	pmap_pvlist_check(mdpg);
+	VM_PAGEMD_PVLIST_UNLOCK(mdpg);
+	kpreempt_enable();
+
+	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
 }
 
+
 /*
  *	Make a previously active pmap (vmspace) inactive.
  */
@@ -612,25 +820,28 @@ pmap_deactivate(struct lwp *l)
 	pmap_t pmap = l->l_proc->p_vmspace->vm_map.pmap;
 
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
-	UVMHIST_LOG(pmaphist, "(l=%p (pmap=%p))", l, pmap, 0,0);
+	UVMHIST_LOG(pmaphist, "(l=%p (pmap=%p))", l, pmap, 0, 0);
 	PMAP_COUNT(deactivate);
 
 	kpreempt_disable();
+	KASSERT(l == curlwp || l->l_cpu == curlwp->l_cpu);
 	pmap_md_tlb_miss_lock_enter();
 	curcpu()->ci_pmap_user_segtab = PMAP_INVALID_SEGTAB_ADDRESS;
+#ifdef _LP64
+	curcpu()->ci_pmap_user_seg0tab = NULL;
+#endif
 	pmap_tlb_asid_deactivate(pmap);
 	pmap_md_tlb_miss_lock_exit();
 	kpreempt_enable();
 
-	UVMHIST_LOG(pmaphist, "<- done", 0,0,0,0);
+	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
 }
 
 void
 pmap_update(struct pmap *pmap)
 {
-
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
-	UVMHIST_LOG(pmaphist, "(pmap=%p)", pmap, 0,0,0);
+	UVMHIST_LOG(pmaphist, "(pmap=%p)", pmap, 0, 0, 0);
 	PMAP_COUNT(update);
 
 	kpreempt_disable();
@@ -656,7 +867,8 @@ pmap_update(struct pmap *pmap)
 	pmap_md_tlb_miss_lock_exit();
 	kpreempt_enable();
 
-	UVMHIST_LOG(pmaphist, "<- done", 0,0,0,0);
+	UVMHIST_LOG(pmaphist, " <-- done%s",
+	    (pmap == pmap_kernel()) ? " (kernel)" : "", 0, 0, 0);
 }
 
 /*
@@ -674,7 +886,7 @@ pmap_pte_remove(pmap_t pmap, vaddr_t sva
 	const bool is_kernel_pmap_p = (pmap == pmap_kernel());
 
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
-	UVMHIST_LOG(pmaphist, "(pmap=%p %sva=%"PRIxVADDR"..%"PRIxVADDR,
+	UVMHIST_LOG(pmaphist, "(pmap=%p %sva=%#"PRIxVADDR"..%#"PRIxVADDR,
 	    pmap, (is_kernel_pmap_p ? "(kernel) " : ""), sva, eva);
 	UVMHIST_LOG(pmaphist, "ptep=%p, flags(npte)=%#"PRIxPTR")",
 	    ptep, flags, 0, 0);
@@ -682,20 +894,20 @@ pmap_pte_remove(pmap_t pmap, vaddr_t sva
 	KASSERT(kpreempt_disabled());
 
 	for (; sva < eva; sva += NBPG, ptep++) {
-		pt_entry_t pt_entry = *ptep;
-		if (!pte_valid_p(pt_entry))
+		const pt_entry_t pte = *ptep;
+		if (!pte_valid_p(pte))
 			continue;
-		if (is_kernel_pmap_p)
-			PMAP_COUNT(remove_kernel_calls);
-		else
+		if (is_kernel_pmap_p) {
+			PMAP_COUNT(remove_kernel_pages);
+		} else {
 			PMAP_COUNT(remove_user_pages);
-		if (pte_wired_p(pt_entry))
+		}
+		if (pte_wired_p(pte))
 			pmap->pm_stats.wired_count--;
 		pmap->pm_stats.resident_count--;
-		struct vm_page *pg = PHYS_TO_VM_PAGE(pte_to_paddr(pt_entry));
+		struct vm_page * const pg = PHYS_TO_VM_PAGE(pte_to_paddr(pte));
 		if (__predict_true(pg != NULL)) {
-			pmap_remove_pv(pmap, sva, pg,
-			   pte_modified_p(pt_entry));
+			pmap_remove_pv(pmap, sva, pg, pte_modified_p(pte));
 		}
 		pmap_md_tlb_miss_lock_enter();
 		*ptep = npte;
@@ -705,6 +917,9 @@ pmap_pte_remove(pmap_t pmap, vaddr_t sva
 		pmap_tlb_invalidate_addr(pmap, sva);
 		pmap_md_tlb_miss_lock_exit();
 	}
+
+	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
+
 	return false;
 }
 
@@ -718,28 +933,22 @@ pmap_remove(pmap_t pmap, vaddr_t sva, va
 	UVMHIST_LOG(pmaphist, "(pmap=%p, va=%#"PRIxVADDR"..%#"PRIxVADDR")",
 	    pmap, sva, eva, 0);
 
-	if (is_kernel_pmap_p)
+	if (is_kernel_pmap_p) {
 		PMAP_COUNT(remove_kernel_calls);
-	else
+	} else {
 		PMAP_COUNT(remove_user_calls);
-#ifdef PARANOIADIAG
-	if (sva < pm->pm_minaddr || eva > pm->pm_maxaddr)
-		panic("%s: va range %#"PRIxVADDR"-%#"PRIxVADDR" not in range",
-		    __func__, sva, eva - 1);
-	if (PMAP_IS_ACTIVE(pmap)) {
-		struct pmap_asid_info * const pai = PMAP_PAI(pmap, curcpu());
-		uint32_t asid = tlb_get_asid();
-		if (asid != pai->pai_asid) {
-			panic("%s: inconsistency for active TLB flush"
-			    ": %d <-> %d", __func__, asid, pai->pai_asid);
-		}
 	}
+#ifdef PMAP_FAULTINFO
+	curpcb->pcb_faultinfo.pfi_faultaddr = 0;
+	curpcb->pcb_faultinfo.pfi_repeats = 0;
+	curpcb->pcb_faultinfo.pfi_faultpte = NULL;
 #endif
 	kpreempt_disable();
+	pmap_addr_range_check(pmap, sva, eva, __func__);
 	pmap_pte_process(pmap, sva, eva, pmap_pte_remove, npte);
 	kpreempt_enable();
 
-	UVMHIST_LOG(pmaphist, "<- done", 0,0,0,0);
+	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
 }
 
 /*
@@ -767,55 +976,47 @@ pmap_page_protect(struct vm_page *pg, vm
 	/* copy_on_write */
 	case VM_PROT_READ:
 	case VM_PROT_READ|VM_PROT_EXECUTE:
-		(void)VM_PAGEMD_PVLIST_LOCK(mdpg, false);
 		pv = &mdpg->mdpg_first;
+		kpreempt_disable();
+		VM_PAGEMD_PVLIST_READLOCK(mdpg);
+		pmap_pvlist_check(mdpg);
 		/*
-		 * Loop over all current mappings setting/clearing as appropriate.
+		 * Loop over all current mappings setting/clearing as apropos.
 		 */
 		if (pv->pv_pmap != NULL) {
 			while (pv != NULL) {
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
+				if (pv->pv_va & PV_KENTER) {
+					pv = pv->pv_next;
+					continue;
+				}
+#endif
 				const pmap_t pmap = pv->pv_pmap;
-				const uint16_t gen = VM_PAGEMD_PVLIST_GEN(mdpg);
-				va = pv->pv_va;
-				VM_PAGEMD_PVLIST_UNLOCK(mdpg);
+				va = trunc_page(pv->pv_va);
+				const uintptr_t gen =
+				    VM_PAGEMD_PVLIST_UNLOCK(mdpg);
 				pmap_protect(pmap, va, va + PAGE_SIZE, prot);
 				KASSERT(pv->pv_pmap == pmap);
 				pmap_update(pmap);
-				if (gen != VM_PAGEMD_PVLIST_LOCK(mdpg, false)) {
+				if (gen != VM_PAGEMD_PVLIST_READLOCK(mdpg)) {
 					pv = &mdpg->mdpg_first;
 				} else {
 					pv = pv->pv_next;
 				}
+				pmap_pvlist_check(mdpg);
 			}
 		}
+		pmap_pvlist_check(mdpg);
 		VM_PAGEMD_PVLIST_UNLOCK(mdpg);
+		kpreempt_enable();
 		break;
 
 	/* remove_all */
 	default:
-		/*
-		 * Do this first so that for each unmapping, pmap_remove_pv
-		 * won't try to sync the icache.
-		 */
-		if (pmap_page_clear_attributes(mdpg, VM_PAGEMD_EXECPAGE)) {
-			UVMHIST_LOG(pmapexechist, "pg %p (pa %#"PRIxPADDR
-			    "): execpage cleared", pg, VM_PAGE_TO_PHYS(pg),0,0);
-			PMAP_COUNT(exec_uncached_page_protect);
-		}
-		(void)VM_PAGEMD_PVLIST_LOCK(mdpg, false);
-		pv = &mdpg->mdpg_first;
-		while (pv->pv_pmap != NULL) {
-			const pmap_t pmap = pv->pv_pmap;
-			va = pv->pv_va;
-			VM_PAGEMD_PVLIST_UNLOCK(mdpg);
-			pmap_remove(pmap, va, va + PAGE_SIZE);
-			pmap_update(pmap);
-			(void)VM_PAGEMD_PVLIST_LOCK(mdpg, false);
-		}
-		VM_PAGEMD_PVLIST_UNLOCK(mdpg);
+		pmap_page_remove(pg);
 	}
 
-	UVMHIST_LOG(pmaphist, "<- done", 0,0,0,0);
+	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
 }
 
 static bool
@@ -825,7 +1026,7 @@ pmap_pte_protect(pmap_t pmap, vaddr_t sv
 	const vm_prot_t prot = (flags & VM_PROT_ALL);
 
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
-	UVMHIST_LOG(pmaphist, "(pmap=%p %sva=%"PRIxVADDR"..%"PRIxVADDR,
+	UVMHIST_LOG(pmaphist, "(pmap=%p %sva=%#"PRIxVADDR"..%#"PRIxVADDR,
 	    pmap, (pmap == pmap_kernel() ? "(kernel) " : ""), sva, eva);
 	UVMHIST_LOG(pmaphist, "ptep=%p, flags(npte)=%#"PRIxPTR")",
 	    ptep, flags, 0, 0);
@@ -835,38 +1036,42 @@ pmap_pte_protect(pmap_t pmap, vaddr_t sv
 	 * Change protection on every valid mapping within this segment.
 	 */
 	for (; sva < eva; sva += NBPG, ptep++) {
-		pt_entry_t pt_entry = *ptep;
-		if (!pte_valid_p(pt_entry))
+		pt_entry_t pte = *ptep;
+		if (!pte_valid_p(pte))
 			continue;
-		struct vm_page * const pg =
-		    PHYS_TO_VM_PAGE(pte_to_paddr(pt_entry));
-		if (pg != NULL && pte_modified_p(pt_entry)) {
+		struct vm_page * const pg = PHYS_TO_VM_PAGE(pte_to_paddr(pte));
+		if (pg != NULL && pte_modified_p(pte)) {
 			struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg);
-			pmap_md_vca_clean(pg, sva, PMAP_WBINV);
 			if (VM_PAGEMD_EXECPAGE_P(mdpg)) {
 				KASSERT(mdpg->mdpg_first.pv_pmap != NULL);
-				if (pte_cached_p(pt_entry)) {
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
+				if (VM_PAGEMD_CACHED_P(mdpg)) {
+#endif
 					UVMHIST_LOG(pmapexechist,
 					    "pg %p (pa %#"PRIxPADDR"): %s",
 					    pg, VM_PAGE_TO_PHYS(pg),
 					    "syncicached performed", 0);
 					pmap_page_syncicache(pg);
 					PMAP_COUNT(exec_synced_protect);
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
 				}
+#endif
 			}
 		}
-		pt_entry = pte_prot_downgrade(pt_entry, prot);
-		if (*ptep != pt_entry) {
+		pte = pte_prot_downgrade(pte, prot);
+		if (*ptep != pte) {
 			pmap_md_tlb_miss_lock_enter();
-			*ptep = pt_entry;
+			*ptep = pte;
 			/*
 			 * Update the TLB if needed.
 			 */
-			pmap_tlb_update_addr(pmap, sva, pt_entry,
-			    PMAP_TLB_NEED_IPI);
+			pmap_tlb_update_addr(pmap, sva, pte, PMAP_TLB_NEED_IPI);
 			pmap_md_tlb_miss_lock_exit();
 		}
 	}
+
+	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
+
 	return false;
 }
 
@@ -877,57 +1082,46 @@ pmap_pte_protect(pmap_t pmap, vaddr_t sv
 void
 pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
 {
-
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
 	UVMHIST_LOG(pmaphist,
-	    "  pmap=%p, va=%#"PRIxVADDR"..%#"PRIxVADDR" port=%#x)",
+	    "(pmap=%p, va=%#"PRIxVADDR"..%#"PRIxVADDR", prot=%u)",
 	    pmap, sva, eva, prot);
 	PMAP_COUNT(protect);
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
-		UVMHIST_LOG(pmaphist, "<- done", 0,0,0,0);
+		UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
 		return;
 	}
 
-#ifdef PARANOIADIAG
-	if (sva < pm->pm_minaddr || eva > pm->pm_maxaddr)
-		panic("%s: va range %#"PRIxVADDR"-%#"PRIxVADDR" not in range",
-		    __func__, sva, eva - 1);
-	if (PMAP_IS_ACTIVE(pmap)) {
-		struct pmap_asid_info * const pai = PMAP_PAI(pmap, curcpu());
-		uint32_t asid = tlb_get_asid();
-		if (asid != pai->pai_asid) {
-			panic("%s: inconsistency for active TLB update"
-			    ": %d <-> %d", __func__, asid, pai->pai_asid);
-		}
-	}
-#endif
-
 	/*
 	 * Change protection on every valid mapping within this segment.
 	 */
 	kpreempt_disable();
+	pmap_addr_range_check(pmap, sva, eva, __func__);
 	pmap_pte_process(pmap, sva, eva, pmap_pte_protect, prot);
 	kpreempt_enable();
 
-	UVMHIST_LOG(pmaphist, "<- done", 0,0,0,0);
+	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
 }
 
-#if defined(__PMAP_VIRTUAL_CACHE_ALIASES)
+#if defined(PMAP_VIRTUAL_CACHE_ALIASES) && !defined(PMAP_NO_PV_UNCACHED)
 /*
  *	pmap_page_cache:
  *
  *	Change all mappings of a managed page to cached/uncached.
  */
-static void
+void
 pmap_page_cache(struct vm_page *pg, bool cached)
 {
 	struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg);
+
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
 	UVMHIST_LOG(pmaphist, "(pg=%p (pa %#"PRIxPADDR") cached=%s)",
 	    pg, VM_PAGE_TO_PHYS(pg), cached ? "true" : "false", 0);
+
 	KASSERT(kpreempt_disabled());
+	KASSERT(VM_PAGEMD_PVLIST_LOCKED_P(mdpg));
 
 	if (cached) {
 		pmap_page_clear_attributes(mdpg, VM_PAGEMD_UNCACHED);
@@ -937,32 +1131,28 @@ pmap_page_cache(struct vm_page *pg, bool
 		PMAP_COUNT(page_cache_evictions);
 	}
 
-	KASSERT(VM_PAGEMD_PVLIST_LOCKED_P(mdpg));
-	KASSERT(kpreempt_disabled());
-	for (pv_entry_t pv = &mdpg->mdpg_first;
-	     pv != NULL;
-	     pv = pv->pv_next) {
+	for (pv_entry_t pv = &mdpg->mdpg_first; pv != NULL; pv = pv->pv_next) {
 		pmap_t pmap = pv->pv_pmap;
-		vaddr_t va = pv->pv_va;
+		vaddr_t va = trunc_page(pv->pv_va);
 
 		KASSERT(pmap != NULL);
 		KASSERT(pmap != pmap_kernel() || !pmap_md_direct_mapped_vaddr_p(va));
 		pt_entry_t * const ptep = pmap_pte_lookup(pmap, va);
 		if (ptep == NULL)
 			continue;
-		pt_entry_t pt_entry = *ptep;
-		if (pte_valid_p(pt_entry)) {
-			pt_entry = pte_cached_change(pt_entry, cached);
+		pt_entry_t pte = *ptep;
+		if (pte_valid_p(pte)) {
+			pte = pte_cached_change(pte, cached);
 			pmap_md_tlb_miss_lock_enter();
-			*ptep = pt_entry;
-			pmap_tlb_update_addr(pmap, va, pt_entry,
-			    PMAP_TLB_NEED_IPI);
+			*ptep = pte;
+			pmap_tlb_update_addr(pmap, va, pte, PMAP_TLB_NEED_IPI);
 			pmap_md_tlb_miss_lock_exit();
 		}
 	}
-	UVMHIST_LOG(pmaphist, "<- done", 0,0,0,0);
+
+	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
 }
-#endif	/* __PMAP_VIRTUAL_CACHE_ALIASES */
+#endif	/* PMAP_VIRTUAL_CACHE_ALIASES && !PMAP_NO_PV_UNCACHED */
 
 /*
  *	Insert the given physical page (p) at
@@ -979,18 +1169,25 @@ pmap_page_cache(struct vm_page *pg, bool
 int
 pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
 {
-	pt_entry_t npte;
 	const bool wired = (flags & PMAP_WIRED) != 0;
 	const bool is_kernel_pmap_p = (pmap == pmap_kernel());
+	u_int update_flags = (flags & VM_PROT_ALL) != 0 ? PMAP_TLB_INSERT : 0;
 #ifdef UVMHIST
-	struct kern_history * const histp = 
+	struct kern_history * const histp =
 	    ((prot & VM_PROT_EXECUTE) ? &pmapexechist : &pmaphist);
 #endif
 
-	UVMHIST_FUNC(__func__);
+	UVMHIST_FUNC(__func__); UVMHIST_CALLED(*histp);
 #define VM_PROT_STRING(prot) \
-	&"\0    (R)\0  (W)\0  (RW)\0 (X)\0  (RX)\0 (WX)\0 (RWX)\0"[UVM_PROTECTION(prot)*6]
-	UVMHIST_CALLED(*histp);
+	&"\0     " \
+	 "(R)\0  " \
+	 "(W)\0  " \
+	 "(RW)\0 " \
+	 "(X)\0  " \
+	 "(RX)\0 " \
+	 "(WX)\0 " \
+	 "(RWX)\0"[UVM_PROTECTION(prot)*6]
+ 	UVMHIST_LOG(*histp, "(pmap=%p, va=%#"PRIxVADDR", pa=%#"PRIxPADDR,
 	UVMHIST_LOG(*histp, "(pmap=%p, va=%#"PRIxVADDR", pa=%#"PRIxPADDR,
 	    pmap, va, pa, 0);
 	UVMHIST_LOG(*histp, "prot=%#x%s flags=%#x%s)",
@@ -1006,29 +1203,27 @@ pmap_enter(pmap_t pmap, vaddr_t va, padd
 		if (!good_color)
 			PMAP_COUNT(user_mappings_bad);
 	}
-#if defined(DEBUG) || defined(DIAGNOSTIC) || defined(PARANOIADIAG)
-	if (va < pmap->pm_minaddr || va >= pmap->pm_maxaddr)
-		panic("%s: %s %#"PRIxVADDR" too big",
-		    __func__, is_kernel_pmap_p ? "kva" : "uva", va);
-#endif
+	pmap_addr_range_check(pmap, va, va, __func__);
 
-	KASSERTMSG(prot & VM_PROT_READ,
-	    "%s: no READ (%#x) in prot %#x", __func__, VM_PROT_READ, prot);
+	KASSERTMSG(prot & VM_PROT_READ, "no READ (%#x) in prot %#x",
+	    VM_PROT_READ, prot);
 
 	struct vm_page * const pg = PHYS_TO_VM_PAGE(pa);
-	struct vm_page_md *mdpg;
+	struct vm_page_md * const mdpg = (pg ? VM_PAGE_TO_MD(pg) : NULL);
 
 	if (pg) {
-		mdpg = VM_PAGE_TO_MD(pg);
 		/* Set page referenced/modified status based on flags */
-		if (flags & VM_PROT_WRITE)
+		if (flags & VM_PROT_WRITE) {
 			pmap_page_set_attributes(mdpg, VM_PAGEMD_MODIFIED|VM_PAGEMD_REFERENCED);
-		else if (flags & VM_PROT_ALL)
+		} else if (flags & VM_PROT_ALL) {
 			pmap_page_set_attributes(mdpg, VM_PAGEMD_REFERENCED);
+		}
 
-#ifdef __PMAP_VIRTUAL_CACHE_ALIASES
-		if (!VM_PAGEMD_CACHED(pg))
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
+		if (!VM_PAGEMD_CACHED_P(mdpg)) {
 			flags |= PMAP_NOCACHE;
+			PMAP_COUNT(uncached_mappings);
+		}
 #endif
 
 		PMAP_COUNT(managed_mappings);
@@ -1037,25 +1232,27 @@ pmap_enter(pmap_t pmap, vaddr_t va, padd
 		 * Assumption: if it is not part of our managed memory
 		 * then it must be device memory which may be volatile.
 		 */
-		mdpg = NULL;
-		flags |= PMAP_NOCACHE;
+		if ((flags & PMAP_CACHE_MASK) == 0)
+			flags |= PMAP_NOCACHE;
 		PMAP_COUNT(unmanaged_mappings);
 	}
 
-	npte = pte_make_enter(pa, mdpg, prot, flags, is_kernel_pmap_p);
+	pt_entry_t npte = pte_make_enter(pa, mdpg, prot, flags,
+	    is_kernel_pmap_p);
 
 	kpreempt_disable();
+
 	pt_entry_t * const ptep = pmap_pte_reserve(pmap, va, flags);
 	if (__predict_false(ptep == NULL)) {
 		kpreempt_enable();
-		UVMHIST_LOG(*histp, "<- ENOMEM", 0,0,0,0);
+		UVMHIST_LOG(*histp, " <-- ENOMEM", 0, 0, 0, 0);
 		return ENOMEM;
 	}
-	pt_entry_t opte = *ptep;
+	const pt_entry_t opte = *ptep;
 
 	/* Done after case that may sleep/return. */
 	if (pg)
-		pmap_enter_pv(pmap, va, pg, &npte);
+		pmap_enter_pv(pmap, va, pg, &npte, 0);
 
 	/*
 	 * Now validate mapping with desired protection/wiring.
@@ -1067,7 +1264,8 @@ pmap_enter(pmap_t pmap, vaddr_t va, padd
 		npte = pte_wire_entry(npte);
 	}
 
-	UVMHIST_LOG(*histp, "new pte %#x (pa %#"PRIxPADDR")", npte, pa, 0,0);
+	UVMHIST_LOG(*histp, "new pte %#"PRIxPTE" (pa %#"PRIxPADDR")",
+	    pte_value(npte), pa, 0, 0);
 
 	if (pte_valid_p(opte) && pte_to_paddr(opte) != pa) {
 		pmap_remove(pmap, va, va + NBPG);
@@ -1075,15 +1273,16 @@ pmap_enter(pmap_t pmap, vaddr_t va, padd
 	}
 
 	KASSERT(pte_valid_p(npte));
-	bool resident = pte_valid_p(opte);
-	if (!resident)
+	const bool resident = pte_valid_p(opte);
+	if (resident) {
+		update_flags |= PMAP_TLB_NEED_IPI;
+	} else {
 		pmap->pm_stats.resident_count++;
+	}
+
 	pmap_md_tlb_miss_lock_enter();
 	*ptep = npte;
-
-	pmap_tlb_update_addr(pmap, va, npte,
-	    ((flags & VM_PROT_ALL) ? PMAP_TLB_INSERT : 0)
-	    | (resident ? PMAP_TLB_NEED_IPI : 0));
+	pmap_tlb_update_addr(pmap, va, npte, update_flags);
 	pmap_md_tlb_miss_lock_exit();
 	kpreempt_enable();
 
@@ -1118,52 +1317,59 @@ pmap_enter(pmap_t pmap, vaddr_t va, padd
 		PMAP_COUNT(exec_mappings);
 		pmap_page_syncicache(pg);
 		pmap_page_clear_attributes(mdpg, VM_PAGEMD_EXECPAGE);
-		UVMHIST_LOG(pmapexechist,
+		UVMHIST_LOG(*histp,
 		    "va=%#"PRIxVADDR" pg %p: %s syncicache%s",
 		    va, pg, "immediate", " (writeable)");
 	}
 
-	if (prot & VM_PROT_EXECUTE) {
-		UVMHIST_LOG(pmapexechist, "<- 0 (OK)", 0,0,0,0);
-	} else {
-		UVMHIST_LOG(pmaphist, "<- 0 (OK)", 0,0,0,0);
-	}
+	UVMHIST_LOG(*histp, " <-- 0 (OK)", 0, 0, 0, 0);
 	return 0;
 }
 
 void
 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
 {
+	pmap_t pmap = pmap_kernel();
 	struct vm_page * const pg = PHYS_TO_VM_PAGE(pa);
-	struct vm_page_md *mdpg;
+	struct vm_page_md * const mdpg = (pg ? VM_PAGE_TO_MD(pg) : NULL);
 
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
-	UVMHIST_LOG(pmaphist, "(va=%#"PRIxVADDR" pa=%#"PRIxPADDR
-	    ", prot=%#x, flags=%#x)", va, pa, prot, flags);
+	UVMHIST_LOG(pmaphist,
+	    "(va=%#"PRIxVADDR", pa=%#"PRIxPADDR", prot=%u, flags=%#x)",
+	    va, pa, prot, flags);
 	PMAP_COUNT(kenter_pa);
 
-	if (pg == NULL) {
-		mdpg = NULL;
+	if (mdpg == NULL) {
 		PMAP_COUNT(kenter_pa_unmanaged);
-		flags |= PMAP_NOCACHE;
+		if ((flags & PMAP_CACHE_MASK) == 0)
+			flags |= PMAP_NOCACHE;
 	} else {
-		mdpg = VM_PAGE_TO_MD(pg);
+		if ((flags & PMAP_NOCACHE) == 0 && !PMAP_PAGE_COLOROK_P(pa, va))
+			PMAP_COUNT(kenter_pa_bad);
 	}
 
-	if ((flags & PMAP_NOCACHE) == 0 && !PMAP_PAGE_COLOROK_P(pa, va))
-		PMAP_COUNT(kenter_pa_bad);
-
-	const pt_entry_t npte = pte_make_kenter_pa(pa, mdpg, prot, flags);
+	pt_entry_t npte = pte_make_kenter_pa(pa, mdpg, prot, flags);
 	kpreempt_disable();
-	pt_entry_t * const ptep = pmap_pte_reserve(pmap_kernel(), va, 0);
-	KASSERT(ptep != NULL);
+	pt_entry_t * const ptep = pmap_pte_lookup(pmap, va);
+	KASSERTMSG(ptep != NULL, "%#"PRIxVADDR " %#"PRIxVADDR, va,
+	    pmap_limits.virtual_end);
 	KASSERT(!pte_valid_p(*ptep));
-	pmap_md_tlb_miss_lock_enter();
-	*ptep = npte;
+
+	/*
+	 * No need to track non-managed pages or PMAP_KMPAGEs pages for aliases
+	 */
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
+	if (pg != NULL && (flags & PMAP_KMPAGE) == 0) {
+		pmap_enter_pv(pmap, va, pg, &npte, PV_KENTER);
+	}
+#endif
+
 	/*
 	 * We have the option to force this mapping into the TLB but we
 	 * don't.  Instead let the next reference to the page do it.
 	 */
+	pmap_md_tlb_miss_lock_enter();
+	*ptep = npte;
 	pmap_tlb_update_addr(pmap_kernel(), va, npte, 0);
 	pmap_md_tlb_miss_lock_exit();
 	kpreempt_enable();
@@ -1175,38 +1381,49 @@ pmap_kenter_pa(vaddr_t va, paddr_t pa, v
 			    ((long *)va)[i], va, ((long *)pa)[i], pa);
 	}
 #endif
-	UVMHIST_LOG(pmaphist, "<- done", 0,0,0,0);
+
+	UVMHIST_LOG(pmaphist, " <-- done (ptep=%p)", ptep, 0, 0, 0);
 }
 
+/*
+ *	Remove the given range of addresses from the kernel map.
+ *
+ *	It is assumed that the start and end are properly
+ *	rounded to the page size.
+ */
+
 static bool
 pmap_pte_kremove(pmap_t pmap, vaddr_t sva, vaddr_t eva, pt_entry_t *ptep,
 	uintptr_t flags)
 {
-	const pt_entry_t new_pt_entry = pte_nv_entry(true);
+	const pt_entry_t new_pte = pte_nv_entry(true);
+
+	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
+	UVMHIST_LOG(pmaphist,
+	    "(pmap=%p, sva=%#"PRIxVADDR", eva=%#"PRIxVADDR", ptep=%p)",
+	    pmap, sva, eva, ptep);
 
 	KASSERT(kpreempt_disabled());
 
-	/*
-	 * Set every pt on every valid mapping within this segment.
-	 */
 	for (; sva < eva; sva += NBPG, ptep++) {
-		pt_entry_t pt_entry = *ptep;
-		if (!pte_valid_p(pt_entry)) {
+		pt_entry_t pte = *ptep;
+		if (!pte_valid_p(pte))
 			continue;
-		}
 
 		PMAP_COUNT(kremove_pages);
-		struct vm_page * const pg =
-		    PHYS_TO_VM_PAGE(pte_to_paddr(pt_entry));
-		if (pg != NULL)
-			pmap_md_vca_clean(pg, sva, PMAP_WBINV);
+		struct vm_page * const pg = PHYS_TO_VM_PAGE(pte_to_paddr(pte));
+		if (pg != NULL) {
+			pmap_remove_pv(pmap, sva, pg, !pte_readonly_p(pte));
+		}
 
 		pmap_md_tlb_miss_lock_enter();
-		*ptep = new_pt_entry;
-		pmap_tlb_invalidate_addr(pmap_kernel(), sva);
+		*ptep = new_pte;
+		pmap_tlb_invalidate_addr(pmap, sva);
 		pmap_md_tlb_miss_lock_exit();
 	}
 
+	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
+
 	return false;
 }
 
@@ -1217,19 +1434,22 @@ pmap_kremove(vaddr_t va, vsize_t len)
 	const vaddr_t eva = round_page(va + len);
 
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
-	UVMHIST_LOG(pmaphist, "(va=%#"PRIxVADDR" len=%#"PRIxVSIZE")",
-	    va, len, 0,0);
+	UVMHIST_LOG(pmaphist, "(va=%#"PRIxVADDR", len=%#"PRIxVSIZE")",
+	    va, len, 0, 0);
 
 	kpreempt_disable();
 	pmap_pte_process(pmap_kernel(), sva, eva, pmap_pte_kremove, 0);
 	kpreempt_enable();
 
-	UVMHIST_LOG(pmaphist, "<- done", 0,0,0,0);
+	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
 }
 
 void
 pmap_remove_all(struct pmap *pmap)
 {
+	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
+	UVMHIST_LOG(pmaphist, "(pm=%p)", pmap, 0, 0, 0);
+
 	KASSERT(pmap != pmap_kernel());
 
 	kpreempt_disable();
@@ -1238,12 +1458,27 @@ pmap_remove_all(struct pmap *pmap)
 	 * tlb_invalidate_addrs().
 	 */
 	pmap_md_tlb_miss_lock_enter();
-	pmap_tlb_asid_deactivate(pmap);
+#ifdef MULTIPROCESSOR
+	// This should be the last CPU with this pmap onproc
+	KASSERT(!kcpuset_isotherset(pmap->pm_onproc, cpu_index(curcpu())));
+	if (kcpuset_isset(pmap->pm_onproc, cpu_index(curcpu())))
+#endif
+		pmap_tlb_asid_deactivate(pmap);
+#ifdef MULTIPROCESSOR
+	KASSERT(kcpuset_iszero(pmap->pm_onproc));
+#endif
 	pmap_tlb_asid_release_all(pmap);
 	pmap_md_tlb_miss_lock_exit();
 	pmap->pm_flags |= PMAP_DEFERRED_ACTIVATE;
 
+#ifdef PMAP_FAULTINFO
+	curpcb->pcb_faultinfo.pfi_faultaddr = 0;
+	curpcb->pcb_faultinfo.pfi_repeats = 0;
+	curpcb->pcb_faultinfo.pfi_faultpte = NULL;
+#endif
 	kpreempt_enable();
+
+	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
 }
 
 /*
@@ -1256,36 +1491,26 @@ pmap_remove_all(struct pmap *pmap)
 void
 pmap_unwire(pmap_t pmap, vaddr_t va)
 {
-
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
-	UVMHIST_LOG(pmaphist, "(pmap=%p va=%#"PRIxVADDR")", pmap, va, 0,0);
+	UVMHIST_LOG(pmaphist, "(pmap=%p, va=%#"PRIxVADDR")", pmap, va, 0, 0);
 	PMAP_COUNT(unwire);
 
 	/*
 	 * Don't need to flush the TLB since PG_WIRED is only in software.
 	 */
-#ifdef PARANOIADIAG
-	if (va < pmap->pm_minaddr || pmap->pm_maxaddr <= va)
-		panic("pmap_unwire");
-#endif
 	kpreempt_disable();
+	pmap_addr_range_check(pmap, va, va, __func__);
 	pt_entry_t * const ptep = pmap_pte_lookup(pmap, va);
-	pt_entry_t pt_entry = *ptep;
-#ifdef DIAGNOSTIC
-	if (ptep == NULL)
-		panic("%s: pmap %p va %#"PRIxVADDR" invalid STE",
-		    __func__, pmap, va);
-#endif
+	KASSERTMSG(ptep != NULL, "pmap %p va %#"PRIxVADDR" invalid STE",
+	    pmap, va);
+	pt_entry_t pte = *ptep;
+	KASSERTMSG(pte_valid_p(pte),
+	    "pmap %p va %#"PRIxVADDR" invalid PTE %#"PRIxPTE" @ %p",
+	    pmap, va, pte_value(pte), ptep);
 
-#ifdef DIAGNOSTIC
-	if (!pte_valid_p(pt_entry))
-		panic("pmap_unwire: pmap %p va %#"PRIxVADDR" invalid PTE",
-		    pmap, va);
-#endif
-
-	if (pte_wired_p(pt_entry)) {
+	if (pte_wired_p(pte)) {
 		pmap_md_tlb_miss_lock_enter();
-		*ptep = pte_unwire_entry(*ptep);
+		*ptep = pte_unwire_entry(pte);
 		pmap_md_tlb_miss_lock_exit();
 		pmap->pm_stats.wired_count--;
 	}
@@ -1296,6 +1521,8 @@ pmap_unwire(pmap_t pmap, vaddr_t va)
 	}
 #endif
 	kpreempt_enable();
+
+	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
 }
 
 /*
@@ -1309,8 +1536,6 @@ pmap_extract(pmap_t pmap, vaddr_t va, pa
 {
 	paddr_t pa;
 
-	//UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
-	//UVMHIST_LOG(pmaphist, "(pmap=%p va=%#"PRIxVADDR")", pmap, va, 0,0);
 	if (pmap == pmap_kernel()) {
 		if (pmap_md_direct_mapped_vaddr_p(va)) {
 			pa = pmap_md_direct_mapped_vaddr_to_paddr(va);
@@ -1318,16 +1543,14 @@ pmap_extract(pmap_t pmap, vaddr_t va, pa
 		}
 		if (pmap_md_io_vaddr_p(va))
 			panic("pmap_extract: io address %#"PRIxVADDR"", va);
+
+		if (va >= pmap_limits.virtual_end)
+			panic("%s: illegal kernel mapped address %#"PRIxVADDR,
+			    __func__, va);
 	}
 	kpreempt_disable();
-	pt_entry_t * const ptep = pmap_pte_lookup(pmap, va);
-	if (ptep == NULL) {
-		//UVMHIST_LOG(pmaphist, "<- false (not in segmap)", 0,0,0,0);
-		kpreempt_enable();
-		return false;
-	}
-	if (!pte_valid_p(*ptep)) {
-		//UVMHIST_LOG(pmaphist, "<- false (PTE not valid)", 0,0,0,0);
+	const pt_entry_t * const ptep = pmap_pte_lookup(pmap, va);
+	if (ptep == NULL || !pte_valid_p(*ptep)) {
 		kpreempt_enable();
 		return false;
 	}
@@ -1337,7 +1560,6 @@ done:
 	if (pap != NULL) {
 		*pap = pa;
 	}
-	//UVMHIST_LOG(pmaphist, "<- true (pa %#"PRIxPADDR")", pa, 0,0,0);
 	return true;
 }
 
@@ -1352,7 +1574,6 @@ void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vaddr_t dst_addr, vsize_t len,
     vaddr_t src_addr)
 {
-
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
 	PMAP_COUNT(copy);
 }
@@ -1373,7 +1594,7 @@ pmap_clear_reference(struct vm_page *pg)
 
 	bool rv = pmap_page_clear_attributes(mdpg, VM_PAGEMD_REFERENCED);
 
-	UVMHIST_LOG(pmaphist, "<- %s", rv ? "true" : "false", 0,0,0);
+	UVMHIST_LOG(pmaphist, " <-- %s", rv ? "true" : "false", 0, 0, 0);
 
 	return rv;
 }
@@ -1387,7 +1608,6 @@ pmap_clear_reference(struct vm_page *pg)
 bool
 pmap_is_referenced(struct vm_page *pg)
 {
-
 	return VM_PAGEMD_REFERENCED_P(VM_PAGE_TO_MD(pg));
 }
 
@@ -1400,7 +1620,6 @@ pmap_clear_modify(struct vm_page *pg)
 	struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg);
 	pv_entry_t pv = &mdpg->mdpg_first;
 	pv_entry_t pv_next;
-	uint16_t gen;
 
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
 	UVMHIST_LOG(pmaphist, "(pg=%p (%#"PRIxPADDR"))",
@@ -1423,11 +1642,11 @@ pmap_clear_modify(struct vm_page *pg)
 		}
 	}
 	if (!pmap_page_clear_attributes(mdpg, VM_PAGEMD_MODIFIED)) {
-		UVMHIST_LOG(pmaphist, "<- false", 0,0,0,0);
+		UVMHIST_LOG(pmaphist, " <-- false", 0, 0, 0, 0);
 		return false;
 	}
 	if (pv->pv_pmap == NULL) {
-		UVMHIST_LOG(pmaphist, "<- true (no mappings)", 0,0,0,0);
+		UVMHIST_LOG(pmaphist, " <-- true (no mappings)", 0, 0, 0, 0);
 		return true;
 	}
 
@@ -1437,35 +1656,44 @@ pmap_clear_modify(struct vm_page *pg)
 	 * flush the VAC first if there is one.
 	 */
 	kpreempt_disable();
-	gen = VM_PAGEMD_PVLIST_LOCK(mdpg, false);
+	KASSERT(!VM_PAGEMD_PVLIST_LOCKED_P(mdpg));
+	VM_PAGEMD_PVLIST_READLOCK(mdpg);
+	pmap_pvlist_check(mdpg);
 	for (; pv != NULL; pv = pv_next) {
 		pmap_t pmap = pv->pv_pmap;
-		vaddr_t va = pv->pv_va;
+		vaddr_t va = trunc_page(pv->pv_va);
+
+		pv_next = pv->pv_next;
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
+		if (pv->pv_va & PV_KENTER)
+			continue;
+#endif
 		pt_entry_t * const ptep = pmap_pte_lookup(pmap, va);
 		KASSERT(ptep);
-		pv_next = pv->pv_next;
-		pt_entry_t pt_entry = pte_prot_nowrite(*ptep);
-		if (*ptep == pt_entry) {
+		pt_entry_t pte = pte_prot_nowrite(*ptep);
+		if (*ptep == pte) {
 			continue;
 		}
-		pmap_md_vca_clean(pg, va, PMAP_WBINV);
+		KASSERT(pte_valid_p(pte));
+		const uintptr_t gen = VM_PAGEMD_PVLIST_UNLOCK(mdpg);
 		pmap_md_tlb_miss_lock_enter();
-		*ptep = pt_entry;
-		VM_PAGEMD_PVLIST_UNLOCK(mdpg);
+		*ptep = pte;
 		pmap_tlb_invalidate_addr(pmap, va);
 		pmap_md_tlb_miss_lock_exit();
 		pmap_update(pmap);
-		if (__predict_false(gen != VM_PAGEMD_PVLIST_LOCK(mdpg, false))) {
+		if (__predict_false(gen != VM_PAGEMD_PVLIST_READLOCK(mdpg))) {
 			/*
 			 * The list changed!  So restart from the beginning.
 			 */
 			pv_next = &mdpg->mdpg_first;
+			pmap_pvlist_check(mdpg);
 		}
 	}
+	pmap_pvlist_check(mdpg);
 	VM_PAGEMD_PVLIST_UNLOCK(mdpg);
 	kpreempt_enable();
 
-	UVMHIST_LOG(pmaphist, "<- true (mappings changed)", 0,0,0,0);
+	UVMHIST_LOG(pmaphist, " <-- true (mappings changed)", 0, 0, 0, 0);
 	return true;
 }
 
@@ -1478,7 +1706,6 @@ pmap_clear_modify(struct vm_page *pg)
 bool
 pmap_is_modified(struct vm_page *pg)
 {
-
 	return VM_PAGEMD_MODIFIED_P(VM_PAGE_TO_MD(pg));
 }
 
@@ -1498,17 +1725,31 @@ pmap_set_modified(paddr_t pa)
 /******************** pv_entry management ********************/
 
 static void
-pmap_check_pvlist(struct vm_page *pg)
+pmap_pvlist_check(struct vm_page_md *mdpg)
 {
-#ifdef PARANOIADIAG
-	struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg);
-	pt_entry_t pv = &mdpg->mdpg_first;
+#ifdef DEBUG
+	pv_entry_t pv = &mdpg->mdpg_first;
 	if (pv->pv_pmap != NULL) {
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
+		const u_int colormask = uvmexp.colormask;
+		u_int colors = 0;
+#endif
 		for (; pv != NULL; pv = pv->pv_next) {
-			KASSERT(!pmap_md_direct_mapped_vaddr_p(pv->pv_va));
+			KASSERT(pv->pv_pmap != pmap_kernel() || !pmap_md_direct_mapped_vaddr_p(pv->pv_va));
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
+			colors |= __BIT(atop(pv->pv_va) & colormask);
+#endif
 		}
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
+		// Assert there if there more than 1 color mapped, that they
+		// are uncached.
+		KASSERTMSG(!pmap_md_virtual_cache_aliasing_p()
+		    || colors == 0 || (colors & (colors-1)) == 0
+		    || VM_PAGEMD_UNCACHED_P(mdpg), "colors=%#x uncached=%u",
+		    colors, VM_PAGEMD_UNCACHED_P(mdpg));
+#endif
 	}
-#endif /* PARANOIADIAG */
+#endif /* DEBUG */
 }
 
 /*
@@ -1516,27 +1757,32 @@ pmap_check_pvlist(struct vm_page *pg)
  * physical to virtual map table.
  */
 void
-pmap_enter_pv(pmap_t pmap, vaddr_t va, struct vm_page *pg, u_int *npte)
+pmap_enter_pv(pmap_t pmap, vaddr_t va, struct vm_page *pg, pt_entry_t *nptep,
+    u_int flags)
 {
 	struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg);
 	pv_entry_t pv, npv, apv;
-	int16_t gen;
-	bool first __unused = false;
+#ifdef UVMHIST
+	bool first = false;
+#endif
 
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
 	UVMHIST_LOG(pmaphist,
 	    "(pmap=%p va=%#"PRIxVADDR" pg=%p (%#"PRIxPADDR")",
 	    pmap, va, pg, VM_PAGE_TO_PHYS(pg));
-	UVMHIST_LOG(pmaphist, "nptep=%p (%#x))", npte, *npte, 0, 0);
+	UVMHIST_LOG(pmaphist, "nptep=%p (%#"PRIxPTE"))",
+	    nptep, pte_value(*nptep), 0, 0);
 
 	KASSERT(kpreempt_disabled());
 	KASSERT(pmap != pmap_kernel() || !pmap_md_direct_mapped_vaddr_p(va));
+	KASSERTMSG(pmap != pmap_kernel() || !pmap_md_io_vaddr_p(va),
+	    "va %#"PRIxVADDR, va);
 
 	apv = NULL;
-	pv = &mdpg->mdpg_first;
-	gen = VM_PAGEMD_PVLIST_LOCK(mdpg, true);
-	pmap_check_pvlist(pg);
+	VM_PAGEMD_PVLIST_LOCK(mdpg);
 again:
+	pv = &mdpg->mdpg_first;
+	pmap_pvlist_check(mdpg);
 	if (pv->pv_pmap == NULL) {
 		KASSERT(pv->pv_next == NULL);
 		/*
@@ -1544,15 +1790,25 @@ again:
 		 */
 		PMAP_COUNT(primary_mappings);
 		PMAP_COUNT(mappings);
+#ifdef UVMHIST
 		first = true;
-#ifdef __PMAP_VIRTUAL_CACHE_ALIASES
-		pmap_page_clear_attributes(pg, VM_PAGEMD_UNCACHED);
+#endif
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
+		KASSERT(VM_PAGEMD_CACHED_P(mdpg));
+		// If the new mapping has an incompatible color the last
+		// mapping of this page, clean the page before using it.
+		if (!PMAP_PAGE_COLOROK_P(va, pv->pv_va)) {
+			pmap_md_vca_clean(pg, PMAP_WBINV);
+		}
 #endif
 		pv->pv_pmap = pmap;
-		pv->pv_va = va;
+		pv->pv_va = va | flags;
 	} else {
-		if (pmap_md_vca_add(pg, va, npte))
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
+		if (pmap_md_vca_add(pg, va, nptep)) {
 			goto again;
+		}
+#endif
 
 		/*
 		 * There is at least one other VA mapping this page.
@@ -1566,20 +1822,24 @@ again:
 		const paddr_t pa = VM_PAGE_TO_PHYS(pg);
 #endif
 		for (npv = pv; npv; npv = npv->pv_next) {
-			if (pmap == npv->pv_pmap && va == npv->pv_va) {
+			if (pmap == npv->pv_pmap
+			    && va == trunc_page(npv->pv_va)) {
 #ifdef PARANOIADIAG
 				pt_entry_t *ptep = pmap_pte_lookup(pmap, va);
-				pt_entry_t pt_entry = (ptep ? *ptep : 0);
-				if (!pte_valid_p(pt_entry)
-				    || pte_to_paddr(pt_entry) != pa)
-					printf(
-		"pmap_enter_pv: found va %#"PRIxVADDR" pa %#"PRIxPADDR" in pv_table but != %x\n",
-					    va, pa, pt_entry);
+				pt_entry_t pte = (ptep != NULL) ? *ptep : 0;
+				if (!pte_valid_p(pte) || pte_to_paddr(pte) != pa)
+					printf("%s: found va %#"PRIxVADDR
+					    " pa %#"PRIxPADDR
+					    " in pv_table but != %#"PRIxPTE"\n",
+					    __func__, va, pa, pte_value(pte));
 #endif
 				PMAP_COUNT(remappings);
 				VM_PAGEMD_PVLIST_UNLOCK(mdpg);
 				if (__predict_false(apv != NULL))
 					pmap_pv_free(apv);
+
+				UVMHIST_LOG(pmaphist, " <-- done pv=%p%s",
+				    pv, " (reused)", 0, 0);
 				return;
 			}
 		}
@@ -1587,9 +1847,10 @@ again:
 			/*
 			 * To allocate a PV, we have to release the PVLIST lock
 			 * so get the page generation.  We allocate the PV, and
-			 * then reacquire the lock.  
+			 * then reacquire the lock.
 			 */
-			VM_PAGEMD_PVLIST_UNLOCK(mdpg);
+			pmap_pvlist_check(mdpg);
+			const uintptr_t gen = VM_PAGEMD_PVLIST_UNLOCK(mdpg);
 
 			apv = (pv_entry_t)pmap_pv_alloc();
 			if (apv == NULL)
@@ -1597,28 +1858,39 @@ again:
 
 			/*
 			 * If the generation has changed, then someone else
-			 * tinkered with this page so we should
-			 * start over.
+			 * tinkered with this page so we should start over.
 			 */
-			uint16_t oldgen = gen;
-			gen = VM_PAGEMD_PVLIST_LOCK(mdpg, true);
-			if (gen != oldgen)
+			if (gen != VM_PAGEMD_PVLIST_LOCK(mdpg))
 				goto again;
 		}
 		npv = apv;
 		apv = NULL;
-		npv->pv_va = va;
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
+		/*
+		 * If need to deal with virtual cache aliases, keep mappings
+		 * in the kernel pmap at the head of the list.  This allows
+		 * the VCA code to easily use them for cache operations if
+		 * present.
+		 */
+		pmap_t kpmap = pmap_kernel();
+		if (pmap != kpmap) {
+			while (pv->pv_pmap == kpmap && pv->pv_next != NULL) {
+				pv = pv->pv_next;
+			}
+		}
+#endif
+		npv->pv_va = va | flags;
 		npv->pv_pmap = pmap;
 		npv->pv_next = pv->pv_next;
 		pv->pv_next = npv;
 		PMAP_COUNT(mappings);
 	}
-	pmap_check_pvlist(pg);
+	pmap_pvlist_check(mdpg);
 	VM_PAGEMD_PVLIST_UNLOCK(mdpg);
 	if (__predict_false(apv != NULL))
 		pmap_pv_free(apv);
 
-	UVMHIST_LOG(pmaphist, "<- done pv=%p%s",
+	UVMHIST_LOG(pmaphist, " <-- done pv=%p%s",
 	    pv, first ? " (first pv)" : "",0,0);
 }
 
@@ -1638,15 +1910,16 @@ pmap_remove_pv(pmap_t pmap, vaddr_t va, 
 
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist);
 	UVMHIST_LOG(pmaphist,
-	    "(pmap=%p va=%#"PRIxVADDR" pg=%p (pa %#"PRIxPADDR")\n",
+	    "(pmap=%p, va=%#"PRIxVADDR", pg=%p (pa %#"PRIxPADDR")",
 	    pmap, va, pg, VM_PAGE_TO_PHYS(pg));
-	UVMHIST_LOG(pmaphist, "dirty=%s)", dirty ? "true" : "false", 0,0,0);
+	UVMHIST_LOG(pmaphist, "dirty=%s)", dirty ? "true" : "false", 0, 0, 0);
 
 	KASSERT(kpreempt_disabled());
+	KASSERT((va & PAGE_MASK) == 0);
 	pv = &mdpg->mdpg_first;
 
-	(void)VM_PAGEMD_PVLIST_LOCK(mdpg, true);
-	pmap_check_pvlist(pg);
+	VM_PAGEMD_PVLIST_LOCK(mdpg);
+	pmap_pvlist_check(mdpg);
 
 	/*
 	 * If it is the first entry on the list, it is actually
@@ -1656,14 +1929,14 @@ pmap_remove_pv(pmap_t pmap, vaddr_t va, 
 	 */
 
 	last = false;
-	if (pmap == pv->pv_pmap && va == pv->pv_va) {
+	if (pmap == pv->pv_pmap && va == trunc_page(pv->pv_va)) {
 		npv = pv->pv_next;
 		if (npv) {
 			*pv = *npv;
 			KASSERT(pv->pv_pmap != NULL);
 		} else {
-#ifdef __PMAP_VIRTUAL_CACHE_ALIASES
-			pmap_page_clear_attributes(pg, VM_PAGEMD_UNCACHED);
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
+			pmap_page_clear_attributes(mdpg, VM_PAGEMD_UNCACHED);
 #endif
 			pv->pv_pmap = NULL;
 			last = true;	/* Last mapping removed */
@@ -1672,18 +1945,21 @@ pmap_remove_pv(pmap_t pmap, vaddr_t va, 
 	} else {
 		for (npv = pv->pv_next; npv; pv = npv, npv = npv->pv_next) {
 			PMAP_COUNT(remove_pvsearch);
-			if (pmap == npv->pv_pmap && va == npv->pv_va)
+			if (pmap == npv->pv_pmap && va == trunc_page(npv->pv_va))
 				break;
 		}
 		if (npv) {
 			pv->pv_next = npv->pv_next;
 		}
 	}
-	pmap_md_vca_remove(pg, va);
 
-	pmap_check_pvlist(pg);
+	pmap_pvlist_check(mdpg);
 	VM_PAGEMD_PVLIST_UNLOCK(mdpg);
 
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
+	pmap_md_vca_remove(pg, va, dirty, last);
+#endif
+
 	/*
 	 * Free the pv_entry if needed.
 	 */
@@ -1716,7 +1992,8 @@ pmap_remove_pv(pmap_t pmap, vaddr_t va, 
 			PMAP_COUNT(exec_synced_remove);
 		}
 	}
-	UVMHIST_LOG(pmaphist, "<- done", 0,0,0,0);
+
+	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
 }
 
 #if defined(MULTIPROCESSOR)
@@ -1743,18 +2020,17 @@ pmap_pvlist_lock_init(size_t cache_line_
 	 */
 	for (size_t i = 0; i < nlocks; lock_va += cache_line_size, i++) {
 		kmutex_t * const lock = (kmutex_t *)lock_va;
-		mutex_init(lock, MUTEX_DEFAULT, IPL_VM);
+		mutex_init(lock, MUTEX_DEFAULT, IPL_HIGH);
 		pli->pli_locks[i] = lock;
 	}
 	pli->pli_lock_mask = nlocks - 1;
 }
 
-uint16_t
-pmap_pvlist_lock(struct vm_page_md *mdpg, bool list_change)
+kmutex_t *
+pmap_pvlist_lock_addr(struct vm_page_md *mdpg)
 {
 	struct pmap_pvlist_info * const pli = &pmap_pvlist_info;
 	kmutex_t *lock = mdpg->mdpg_lock;
-	int16_t gen;
 
 	/*
 	 * Allocate a lock on an as-needed basis.  This will hopefully give us
@@ -1776,33 +2052,20 @@ pmap_pvlist_lock(struct vm_page_md *mdpg
 	}
 
 	/*
-	 * Now finally lock the pvlists.
+	 * Now finally provide the lock.
 	 */
-	mutex_spin_enter(lock);
-
-	/*
-	 * If the locker will be changing the list, increment the high 16 bits
-	 * of attrs so we use that as a generation number.
-	 */
-	gen = VM_PAGEMD_PVLIST_GEN(mdpg);		/* get old value */
-	if (list_change)
-		atomic_add_int(&mdpg->mdpg_attrs, 0x10000);
-
-	/*
-	 * Return the generation number.
-	 */
-	return gen;
+	return lock;
 }
 #else /* !MULTIPROCESSOR */
 void
 pmap_pvlist_lock_init(size_t cache_line_size)
 {
-	mutex_init(&pmap_pvlist_mutex, MUTEX_DEFAULT, IPL_VM);
+	mutex_init(&pmap_pvlist_mutex, MUTEX_DEFAULT, IPL_HIGH);
 }
 
 #ifdef MODULAR
-uint16_t
-pmap_pvlist_lock(struct vm_page_md *mdpg, bool list_change)
+kmutex_t *
+pmap_pvlist_lock_addr(struct vm_page_md *mdpg)
 {
 	/*
 	 * We just use a global lock.
@@ -1812,11 +2075,9 @@ pmap_pvlist_lock(struct vm_page_md *mdpg
 	}
 
 	/*
-	 * Now finally lock the pvlists.
+	 * Now finally provide the lock.
 	 */
-	mutex_spin_enter(mdpg->mdpg_lock);
-
-	return 0;
+	return mdpg->mdpg_lock;
 }
 #endif /* MODULAR */
 #endif /* !MULTIPROCESSOR */
@@ -1829,7 +2090,7 @@ pmap_pvlist_lock(struct vm_page_md *mdpg
 void *
 pmap_pv_page_alloc(struct pool *pp, int flags)
 {
-	struct vm_page *pg = PMAP_ALLOC_POOLPAGE(UVM_PGA_USERESERVE);
+	struct vm_page * const pg = PMAP_ALLOC_POOLPAGE(UVM_PGA_USERESERVE);
 	if (pg == NULL)
 		return NULL;
 
@@ -1849,9 +2110,13 @@ pmap_pv_page_free(struct pool *pp, void 
 	KASSERT(pmap_md_direct_mapped_vaddr_p(va));
 	const paddr_t pa = pmap_md_direct_mapped_vaddr_to_paddr(va);
 	struct vm_page * const pg = PHYS_TO_VM_PAGE(pa);
-	struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg);
-	pmap_md_vca_remove(pg, va);
-	pmap_page_clear_attributes(mdpg, VM_PAGEMD_POOLPAGE);
+	KASSERT(pg != NULL);
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
+	kpreempt_disable();
+	pmap_md_vca_remove(pg, va, true, true);
+	kpreempt_enable();
+#endif
+	pmap_page_clear_attributes(VM_PAGE_TO_MD(pg), VM_PAGEMD_POOLPAGE);
 	uvm_pagefree(pg);
 }
 
@@ -1863,8 +2128,6 @@ pmap_pv_page_free(struct pool *pp, void 
 void
 pmap_prefer(vaddr_t foff, vaddr_t *vap, vsize_t sz, int td)
 {
-	vaddr_t	va;
-	vsize_t d;
 	vsize_t prefer_mask = ptoa(uvmexp.colormask);
 
 	PMAP_COUNT(prefer_requests);
@@ -1872,13 +2135,11 @@ pmap_prefer(vaddr_t foff, vaddr_t *vap, 
 	prefer_mask |= pmap_md_cache_prefer_mask();
 
 	if (prefer_mask) {
-		va = *vap;
-
-		d = foff - va;
-		d &= prefer_mask;
+		vaddr_t	va = *vap;
+		vsize_t d = (foff - va) & prefer_mask;
 		if (d) {
 			if (td)
-				*vap = trunc_page(va -((-d) & prefer_mask));
+				*vap = trunc_page(va - ((-d) & prefer_mask));
 			else
 				*vap = round_page(va + d);
 			PMAP_COUNT(prefer_adjustments);
@@ -1891,30 +2152,24 @@ pmap_prefer(vaddr_t foff, vaddr_t *vap, 
 vaddr_t
 pmap_map_poolpage(paddr_t pa)
 {
-
 	struct vm_page * const pg = PHYS_TO_VM_PAGE(pa);
 	KASSERT(pg);
 	struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg);
 	pmap_page_set_attributes(mdpg, VM_PAGEMD_POOLPAGE);
 
-	const vaddr_t va = pmap_md_map_poolpage(pa, NBPG);
-	pmap_md_vca_add(pg, va, NULL);
-	return va;
+	return pmap_md_map_poolpage(pa, NBPG);
 }
 
 paddr_t
 pmap_unmap_poolpage(vaddr_t va)
 {
-
 	KASSERT(pmap_md_direct_mapped_vaddr_p(va));
 	paddr_t pa = pmap_md_direct_mapped_vaddr_to_paddr(va);
 
 	struct vm_page * const pg = PHYS_TO_VM_PAGE(pa);
-	KASSERT(pg);
-	struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg);
-	pmap_page_clear_attributes(mdpg, VM_PAGEMD_POOLPAGE);
+	KASSERT(pg != NULL);
+	pmap_page_clear_attributes(VM_PAGE_TO_MD(pg), VM_PAGEMD_POOLPAGE);
 	pmap_md_unmap_poolpage(va, NBPG);
-	pmap_md_vca_remove(pg, va);
 
 	return pa;
 }

Index: src/sys/uvm/pmap/pmap.h
diff -u src/sys/uvm/pmap/pmap.h:1.6 src/sys/uvm/pmap/pmap.h:1.7
--- src/sys/uvm/pmap/pmap.h:1.6	Thu Jul  7 06:55:44 2016
+++ src/sys/uvm/pmap/pmap.h	Mon Jul 11 16:06:09 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.h,v 1.6 2016/07/07 06:55:44 msaitoh Exp $	*/
+/*	$NetBSD: pmap.h,v 1.7 2016/07/11 16:06:09 matt Exp $	*/
 
 /*
  * Copyright (c) 1992, 1993
@@ -152,6 +152,15 @@ struct pmap_limits {
 	vaddr_t virtual_end;
 };
 
+/*
+ * Initialize the kernel pmap.
+ */      
+#ifdef MULTIPROCESSOR
+#define PMAP_SIZE	offsetof(struct pmap, pm_pai[PMAP_TLB_MAX])
+#else       
+#define PMAP_SIZE	sizeof(struct pmap)
+#endif      
+
 /* 
  * The pools from which pmap structures and sub-structures are allocated.
  */
@@ -162,6 +171,10 @@ extern struct pool_allocator pmap_pv_pag
 extern struct pmap_kernel kernel_pmap_store;
 extern struct pmap_limits pmap_limits;
 
+extern u_int pmap_page_colormask;
+
+extern pmap_segtab_t pmap_kern_segtab;
+
 #define	pmap_wired_count(pmap) 	((pmap)->pm_stats.wired_count)
 #define pmap_resident_count(pmap) ((pmap)->pm_stats.resident_count)
 
@@ -173,12 +186,17 @@ void	pmap_set_modified(paddr_t);
 bool	pmap_page_clear_attributes(struct vm_page_md *, u_int);
 void	pmap_page_set_attributes(struct vm_page_md *, u_int);
 void	pmap_pvlist_lock_init(size_t);
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
+void	pmap_page_cache(struct vm_page *, bool cached);
+#endif
+
 
 #define	PMAP_WB		0
 #define	PMAP_WBINV	1
 #define	PMAP_INV	2
 
-uint16_t pmap_pvlist_lock(struct vm_page_md *, bool);
+//uint16_t pmap_pvlist_lock(struct vm_page_md *, bool);
+kmutex_t *pmap_pvlist_lock_addr(struct vm_page_md *);
 
 #define	PMAP_STEAL_MEMORY	/* enable pmap_steal_memory() */
 #define	PMAP_GROWKERNEL		/* enable pmap_growkernel() */
@@ -193,5 +211,11 @@ struct vm_page *pmap_md_alloc_poolpage(i
 #define	PMAP_MAP_POOLPAGE(pa)		pmap_map_poolpage(pa)
 #define	PMAP_UNMAP_POOLPAGE(va)		pmap_unmap_poolpage(va)
 
+#define PMAP_COUNT(name)	(pmap_evcnt_##name.ev_count++ + 0)
+#define PMAP_COUNTER(name, desc) \
+struct evcnt pmap_evcnt_##name = \
+	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap", desc); \
+EVCNT_ATTACH_STATIC(pmap_evcnt_##name)
+
 #endif	/* _KERNEL */
 #endif	/* _COMMON_PMAP_H_ */

Index: src/sys/uvm/pmap/pmap_segtab.c
diff -u src/sys/uvm/pmap/pmap_segtab.c:1.2 src/sys/uvm/pmap/pmap_segtab.c:1.3
--- src/sys/uvm/pmap/pmap_segtab.c:1.2	Thu Jun 11 08:04:44 2015
+++ src/sys/uvm/pmap/pmap_segtab.c	Mon Jul 11 16:06:09 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap_segtab.c,v 1.2 2015/06/11 08:04:44 matt Exp $	*/
+/*	$NetBSD: pmap_segtab.c,v 1.3 2016/07/11 16:06:09 matt Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2001 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
 
 #include <sys/cdefs.h>
 
-__KERNEL_RCSID(0, "$NetBSD: pmap_segtab.c,v 1.2 2015/06/11 08:04:44 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap_segtab.c,v 1.3 2016/07/11 16:06:09 matt Exp $");
 
 /*
  *	Manages physical address maps.
@@ -346,16 +346,17 @@ void
 pmap_segtab_activate(struct pmap *pm, struct lwp *l)
 {
 	if (l == curlwp) {
+		struct cpu_info * const ci = l->l_cpu;
 		KASSERT(pm == l->l_proc->p_vmspace->vm_map.pmap);
 		if (pm == pmap_kernel()) {
-			l->l_cpu->ci_pmap_user_segtab = (void*)0xdeadbabe;
+			ci->ci_pmap_user_segtab = PMAP_INVALID_SEGTAB_ADDRESS;
 #ifdef _LP64
-			l->l_cpu->ci_pmap_user_seg0tab = (void*)0xdeadbabe;
+			ci->ci_pmap_user_seg0tab = PMAP_INVALID_SEGTAB_ADDRESS;
 #endif
 		} else {
-			l->l_cpu->ci_pmap_user_segtab = pm->pm_segtab;
+			ci->ci_pmap_user_segtab = pm->pm_segtab;
 #ifdef _LP64
-			l->l_cpu->ci_pmap_user_seg0tab = pm->pm_segtab->seg_seg[0];
+			ci->ci_pmap_user_seg0tab = pm->pm_segtab->seg_seg[0];
 #endif
 		}
 	}
Index: src/sys/uvm/pmap/pmap_synci.c
diff -u src/sys/uvm/pmap/pmap_synci.c:1.2 src/sys/uvm/pmap/pmap_synci.c:1.3
--- src/sys/uvm/pmap/pmap_synci.c:1.2	Tue Jul  2 09:35:48 2013
+++ src/sys/uvm/pmap/pmap_synci.c	Mon Jul 11 16:06:09 2016
@@ -29,7 +29,7 @@
 
 #include <sys/cdefs.h>
 
-__KERNEL_RCSID(0, "$NetBSD: pmap_synci.c,v 1.2 2013/07/02 09:35:48 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap_synci.c,v 1.3 2016/07/11 16:06:09 matt Exp $");
 
 #define __PMAP_PRIVATE
 
@@ -44,8 +44,11 @@ __KERNEL_RCSID(0, "$NetBSD: pmap_synci.c
 #include <uvm/uvm.h>
 
 #if defined(MULTIPROCESSOR)
+u_int	pmap_tlb_synci_page_mask;
+u_int	pmap_tlb_synci_map_mask;
+
 void
-pmap_syncicache_ast(struct cpu_info *ci)
+pmap_tlb_syncicache_ast(struct cpu_info *ci)
 {
 	struct pmap_tlb_info * const ti = cpu_tlb_info(ci);
 
@@ -63,7 +66,6 @@ pmap_syncicache_ast(struct cpu_info *ci)
 		pmap_md_icache_sync_all();
 		ti->ti_evcnt_synci_all.ev_count++;
 		ti->ti_evcnt_synci_pages.ev_count += pmap_tlb_synci_page_mask+1;
-		kpreempt_enable();
 		return;
 	}
 
@@ -81,12 +83,10 @@ pmap_syncicache_ast(struct cpu_info *ci)
 			ti->ti_evcnt_synci_pages.ev_count++;
 		}
 	}
-
-	kpreempt_enable();
 }
 
 void
-pmap_tlb_syncicache(vaddr_t va, uint32_t page_onproc)
+pmap_tlb_syncicache(vaddr_t va, const kcpuset_t *page_onproc)
 {
 	KASSERT(kpreempt_disabled());
 	/*
@@ -108,10 +108,11 @@ pmap_tlb_syncicache(vaddr_t va, uint32_t
 	 * then become equal but that's a one in 4 billion cache and will
 	 * just cause an extra sync of the icache.
 	 */
-	const uint32_t cpu_mask = 1L << cpu_index(curcpu());
+	struct cpu_info * const ci = curcpu();
+	kcpuset_t *onproc;
+	kcpuset_create(&onproc, true);
 	const uint32_t page_mask =
 	    1L << ((va >> PGSHIFT) & pmap_tlb_synci_page_mask);
-	uint32_t onproc = 0;
 	for (size_t i = 0; i < pmap_ntlbs; i++) {
 		struct pmap_tlb_info * const ti = pmap_tlbs[i];
 		TLBINFO_LOCK(ti);
@@ -128,7 +129,7 @@ pmap_tlb_syncicache(vaddr_t va, uint32_t
 
 			if (orig_page_bitmap == old_page_bitmap) {
 				if (old_page_bitmap == 0) {
-					onproc |= ti->ti_cpu_mask;
+					kcpuset_merge(onproc, ti->ti_kcpuset);
 				} else {
 					ti->ti_evcnt_synci_deferred.ev_count++;
 				}
@@ -143,20 +144,20 @@ pmap_tlb_syncicache(vaddr_t va, uint32_t
 #endif
 		TLBINFO_UNLOCK(ti);
 	}
-	onproc &= page_onproc;
-	if (__predict_false(onproc != 0)) {
+	kcpuset_intersect(onproc, page_onproc);
+	if (__predict_false(!kcpuset_iszero(onproc))) {
 		/*
 		 * If the cpu need to sync this page, tell the current lwp
 		 * to sync the icache before it returns to userspace.
 		 */
-		if (onproc & cpu_mask) {
-			if (curcpu()->ci_flags & CPUF_USERPMAP) {
+		if (kcpuset_isset(onproc, cpu_index(ci))) {
+			if (ci->ci_flags & CPUF_USERPMAP) {
 				curlwp->l_md.md_astpending = 1;	/* force call to ast() */
-				curcpu()->ci_evcnt_synci_onproc_rqst.ev_count++;
+				ci->ci_evcnt_synci_onproc_rqst.ev_count++;
 			} else {
-				curcpu()->ci_evcnt_synci_deferred_rqst.ev_count++;
+				ci->ci_evcnt_synci_deferred_rqst.ev_count++;
 			}
-			onproc ^= cpu_mask;
+			kcpuset_clear(onproc, cpu_index(ci));
 		}
 
 		/*
@@ -165,12 +166,14 @@ pmap_tlb_syncicache(vaddr_t va, uint32_t
 		 * We might cause some spurious icache syncs but that's not
 		 * going to break anything.
 		 */
-		for (u_int n = ffs(onproc);
-		     onproc != 0;
-		     onproc >>= n, onproc <<= n, n = ffs(onproc)) {
-			cpu_send_ipi(cpu_lookup(n-1), IPI_SYNCICACHE);
+		for (cpuid_t n = kcpuset_ffs(onproc);
+		     n-- > 0;
+		     n = kcpuset_ffs(onproc)) {
+			kcpuset_clear(onproc, n);
+			cpu_send_ipi(cpu_lookup(n), IPI_SYNCICACHE);
 		}
 	}
+	kcpuset_destroy(onproc);
 }
 
 void
Index: src/sys/uvm/pmap/tlb.h
diff -u src/sys/uvm/pmap/tlb.h:1.2 src/sys/uvm/pmap/tlb.h:1.3
--- src/sys/uvm/pmap/tlb.h:1.2	Mon Sep 21 15:50:19 2015
+++ src/sys/uvm/pmap/tlb.h	Mon Jul 11 16:06:09 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: tlb.h,v 1.2 2015/09/21 15:50:19 matt Exp $	*/
+/*	$NetBSD: tlb.h,v 1.3 2016/07/11 16:06:09 matt Exp $	*/
 /*-
  * Copyright (c) 2011 The NetBSD Foundation, Inc.
  * All rights reserved.
@@ -34,6 +34,8 @@
 
 struct tlbmask;
 
+typedef bool	(*tlb_walkfunc_t)(void *, vaddr_t, tlb_asid_t, pt_entry_t);
+
 struct tlb_md_ops {
 	void	(*md_tlb_set_asid)(tlb_asid_t);
 	tlb_asid_t
@@ -45,10 +47,9 @@ struct tlb_md_ops {
 	bool	(*md_tlb_update_addr)(vaddr_t, tlb_asid_t, pt_entry_t, bool);
 	void	(*md_tlb_read_entry)(size_t, struct tlbmask *);
 	void	(*md_tlb_write_entry)(size_t, const struct tlbmask *);
-	u_int	(*md_tlb_record_asids)(u_long *);
+	u_int	(*md_tlb_record_asids)(u_long *, tlb_asid_t);
 	void	(*md_tlb_dump)(void (*)(const char *, ...));
-	void	(*md_tlb_walk)(void *, bool (*)(void *, vaddr_t, tlb_asid_t,
-		    pt_entry_t));
+	void	(*md_tlb_walk)(void *, tlb_walkfunc_t);
 };
 
 tlb_asid_t
@@ -59,11 +60,11 @@ void	tlb_invalidate_globals(void);
 void	tlb_invalidate_asids(tlb_asid_t, tlb_asid_t);
 void	tlb_invalidate_addr(vaddr_t, tlb_asid_t);
 bool	tlb_update_addr(vaddr_t, tlb_asid_t, pt_entry_t, bool);
-u_int	tlb_record_asids(u_long *);
+u_int	tlb_record_asids(u_long *, tlb_asid_t);
 void	tlb_enter_addr(size_t, const struct tlbmask *);
 void	tlb_read_entry(size_t, struct tlbmask *);
 void	tlb_write_entry(size_t, const struct tlbmask *);
-void	tlb_walk(void *, bool (*)(void *, vaddr_t, tlb_asid_t, pt_entry_t));
+void	tlb_walk(void *, tlb_walkfunc_t);
 void	tlb_dump(void (*)(const char *, ...));
 
 #endif /* _KERNEL || _KMEMUSER */
Index: src/sys/uvm/pmap/vmpagemd.h
diff -u src/sys/uvm/pmap/vmpagemd.h:1.2 src/sys/uvm/pmap/vmpagemd.h:1.3
--- src/sys/uvm/pmap/vmpagemd.h:1.2	Tue Mar  4 06:14:53 2014
+++ src/sys/uvm/pmap/vmpagemd.h	Mon Jul 11 16:06:09 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: vmpagemd.h,v 1.2 2014/03/04 06:14:53 matt Exp $	*/
+/*	$NetBSD: vmpagemd.h,v 1.3 2016/07/11 16:06:09 matt Exp $	*/
 
 /*-
  * Copyright (c) 2011 The NetBSD Foundation, Inc.
@@ -42,15 +42,16 @@
 #error use assym.h instead
 #endif
 
-#ifdef _MODULE
-#error this file should not be included by loadable kernel modules
-#endif
+//#ifdef _MODULE
+//#error this file should not be included by loadable kernel modules
+//#endif
 
 #ifdef _KERNEL_OPT
 #include "opt_modular.h"
 #include "opt_multiprocessor.h"
 #endif
 
+#include <sys/atomic.h>
 #include <sys/mutex.h>
 
 #define	__HAVE_VM_PAGE_MD
@@ -59,17 +60,20 @@ typedef struct pv_entry {
 	struct pv_entry *pv_next;
 	struct pmap *pv_pmap;
 	vaddr_t pv_va;
+#define	PV_KENTER		0x0001
 } *pv_entry_t;
 
+#ifndef _MODULE
+
 #define	VM_PAGEMD_REFERENCED	0x0001	/* page has been recently referenced */
 #define	VM_PAGEMD_MODIFIED	0x0002	/* page has been modified */
 #define	VM_PAGEMD_POOLPAGE	0x0004	/* page is used as a poolpage */
 #define	VM_PAGEMD_EXECPAGE	0x0008	/* page is exec mapped */
-#ifdef __PMAP_VIRTUAL_CACHE_ALIASES
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
 #define	VM_PAGEMD_UNCACHED	0x0010	/* page is mapped uncached */
 #endif
 
-#ifdef __PMAP_VIRTUAL_CACHE_ALIASES
+#ifdef PMAP_VIRTUAL_CACHE_ALIASES
 #define	VM_PAGEMD_CACHED_P(mdpg)	(((mdpg)->mdpg_attrs & VM_PAGEMD_UNCACHED) == 0)
 #define	VM_PAGEMD_UNCACHED_P(mdpg)	(((mdpg)->mdpg_attrs & VM_PAGEMD_UNCACHED) != 0)
 #endif
@@ -78,29 +82,70 @@ typedef struct pv_entry {
 #define	VM_PAGEMD_POOLPAGE_P(mdpg)	(((mdpg)->mdpg_attrs & VM_PAGEMD_POOLPAGE) != 0)
 #define	VM_PAGEMD_EXECPAGE_P(mdpg)	(((mdpg)->mdpg_attrs & VM_PAGEMD_EXECPAGE) != 0)
 
+#endif /* !_MODULE */
+
 struct vm_page_md {
-	volatile u_int mdpg_attrs;	/* page attributes */
+	volatile unsigned long mdpg_attrs;	/* page attributes */
 	struct pv_entry mdpg_first;	/* pv_entry first */
-#if defined(MULTIPROCESSOR) || defined(MODULAR)
+#if defined(MULTIPROCESSOR) || defined(MODULAR) || defined(_MODULE)
 	kmutex_t *mdpg_lock;		/* pv list lock */
+#endif
+};
+
+#ifndef _MODULE
+#if defined(MULTIPROCESSOR) || defined(MODULAR)
 #define	VM_PAGEMD_PVLIST_LOCK_INIT(mdpg) 	\
 	(mdpg)->mdpg_lock = NULL
-#define	VM_PAGEMD_PVLIST_LOCK(pg, list_change)	\
-	pmap_pvlist_lock(mdpg, list_change)
+#else
+#define	VM_PAGEMD_PVLIST_LOCK_INIT(mdpg)	do { } while (/*CONSTCOND*/ 0)
+#endif /* MULTIPROCESSOR || MODULAR */
+
+#define	VM_PAGEMD_PVLIST_LOCK(mdpg)		\
+	pmap_pvlist_lock(mdpg, 1)
+#define	VM_PAGEMD_PVLIST_READLOCK(mdpg)		\
+	pmap_pvlist_lock(mdpg, 0)
 #define	VM_PAGEMD_PVLIST_UNLOCK(mdpg)		\
-	mutex_spin_exit((mdpg)->mdpg_lock)
+	pmap_pvlist_unlock(mdpg)
 #define	VM_PAGEMD_PVLIST_LOCKED_P(mdpg)		\
-	mutex_owner((mdpg)->mdpg_lock)
+	pmap_pvlist_locked_p(mdpg)
 #define	VM_PAGEMD_PVLIST_GEN(mdpg)		\
-	((uint16_t)((mdpg)->mdpg_attrs >> 16))
+	((mdpg)->mdpg_attrs >> 16)
+
+#ifdef _KERNEL
+#if defined(MULTIPROCESSOR) || defined(MODULAR)
+kmutex_t *pmap_pvlist_lock_addr(struct vm_page_md *);
 #else
-#define	VM_PAGEMD_PVLIST_LOCK_INIT(mdpg)	do { } while (/*CONSTCOND*/ 0)
-#define	VM_PAGEMD_PVLIST_LOCK(mdpg, lc)	(mutex_spin_enter(&pmap_pvlist_mutex), 0)
-#define	VM_PAGEMD_PVLIST_UNLOCK(mdpg)	mutex_spin_exit(&pmap_pvlist_mutex)
-#define	VM_PAGEMD_PVLIST_LOCKED_P(mdpg)	true
-#define	VM_PAGEMD_PVLIST_GEN(mdpg)		(0)
-#endif /* MULTIPROCESSOR || MODULAR */
-};
+extern kmutex_t pmap_pvlist_mutex;
+static inline kmutex_t *
+pmap_pvlist_lock_addr(struct vm_page_md *mdpg)
+{
+	return &pmap_pvlist_mutex;
+}
+#endif
+
+static inline uintptr_t
+pmap_pvlist_lock(struct vm_page_md *mdpg, uintptr_t increment)
+{
+	mutex_spin_enter(pmap_pvlist_lock_addr(mdpg));
+	const uintptr_t gen = VM_PAGEMD_PVLIST_GEN(mdpg);
+	mdpg->mdpg_attrs += increment << 16;
+	return gen;
+}
+
+static inline uintptr_t
+pmap_pvlist_unlock(struct vm_page_md *mdpg)
+{
+	const uintptr_t gen = VM_PAGEMD_PVLIST_GEN(mdpg);
+	mutex_spin_exit(pmap_pvlist_lock_addr(mdpg));
+	return gen;
+}
+
+static inline bool
+pmap_pvlist_locked_p(struct vm_page_md *mdpg)
+{
+	return mutex_owned(pmap_pvlist_lock_addr(mdpg));
+}
+#endif /* _KERNEL */
 
 #define VM_MDPAGE_INIT(pg)						\
 do {									\
@@ -111,4 +156,6 @@ do {									\
 	VM_PAGEMD_PVLIST_LOCK_INIT(&(pg)->mdpage);			\
 } while (/* CONSTCOND */ 0)
 
+#endif /* _MODULE */
+
 #endif /* __COMMON_PMAP_TLB_VMPAGEMD_H_ */

Index: src/sys/uvm/pmap/pmap_tlb.c
diff -u src/sys/uvm/pmap/pmap_tlb.c:1.12 src/sys/uvm/pmap/pmap_tlb.c:1.13
--- src/sys/uvm/pmap/pmap_tlb.c:1.12	Thu Jun 11 05:28:42 2015
+++ src/sys/uvm/pmap/pmap_tlb.c	Mon Jul 11 16:06:09 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap_tlb.c,v 1.12 2015/06/11 05:28:42 matt Exp $	*/
+/*	$NetBSD: pmap_tlb.c,v 1.13 2016/07/11 16:06:09 matt Exp $	*/
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -31,7 +31,7 @@
 
 #include <sys/cdefs.h>
 
-__KERNEL_RCSID(0, "$NetBSD: pmap_tlb.c,v 1.12 2015/06/11 05:28:42 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap_tlb.c,v 1.13 2016/07/11 16:06:09 matt Exp $");
 
 /*
  * Manages address spaces in a TLB.
@@ -181,20 +181,6 @@ u_int pmap_ntlbs = 1;
 #define	TLBINFO_ASID_INUSE_P(ti, asid) \
 	__BITMAP_ISSET_P((ti)->ti_asid_bitmap, (asid))
 
-static void
-pmap_pai_check(struct pmap_tlb_info *ti)
-{
-#ifdef DIAGNOSTIC
-	struct pmap_asid_info *pai;
-	LIST_FOREACH(pai, &ti->ti_pais, pai_link) {
-		KASSERT(pai != NULL);
-		KASSERT(PAI_PMAP(pai, ti) != pmap_kernel());
-		KASSERT(pai->pai_asid > KERNEL_PID);
-		KASSERT(TLBINFO_ASID_INUSE_P(ti, pai->pai_asid));
-	}
-#endif
-}
-
 #ifdef MULTIPROCESSOR
 __unused static inline bool
 pmap_tlb_intersecting_active_p(pmap_t pm, struct pmap_tlb_info *ti)
@@ -217,15 +203,41 @@ pmap_tlb_intersecting_onproc_p(pmap_t pm
 }
 #endif
 
-static inline void
-pmap_pai_reset(struct pmap_tlb_info *ti, struct pmap_asid_info *pai,
+static void
+pmap_tlb_pai_check(struct pmap_tlb_info *ti)
+{
+#ifdef DIAGNOSTIC
+	struct pmap_asid_info *pai;
+	LIST_FOREACH(pai, &ti->ti_pais, pai_link) {
+		KASSERT(pai != NULL);
+		KASSERT(PAI_PMAP(pai, ti) != pmap_kernel());
+		KASSERT(pai->pai_asid > KERNEL_PID);
+		KASSERTMSG(pai->pai_asid <= ti->ti_asid_max,
+		    "pm %p asid %#x", PAI_PMAP(pai, ti), pai->pai_asid);
+		KASSERTMSG(TLBINFO_ASID_INUSE_P(ti, pai->pai_asid),
+		    "pm %p asid %u", PAI_PMAP(pai, ti), pai->pai_asid);
+#ifdef MULTIPROCESSOR
+		KASSERT(pmap_tlb_intersecting_active_p(PAI_PMAP(pai, ti), ti));
+#endif
+	}
+#endif
+}
+
+static void
+pmap_tlb_pai_reset(struct pmap_tlb_info *ti, struct pmap_asid_info *pai,
 	struct pmap *pm)
 {
+	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist, "(ti=%p, pai=%p, pm=%p): asid %u",
+	    ti, pai, pm, pai->pai_asid);
+
 	/*
 	 * We must have an ASID but it must not be onproc (on a processor).
 	 */
 	KASSERT(pai->pai_asid > KERNEL_PID);
+	KASSERT(pai->pai_asid <= ti->ti_asid_max);
 #if defined(MULTIPROCESSOR)
+	KASSERT(pmap_tlb_intersecting_active_p(pm, ti));
 	KASSERT(!pmap_tlb_intersecting_onproc_p(pm, ti));
 #endif
 	LIST_REMOVE(pai, pai_link);
@@ -269,9 +281,12 @@ pmap_pai_reset(struct pmap_tlb_info *ti,
 #if PMAP_TLB_MAX == 1
 	kcpuset_zero(pm->pm_active);
 #else
-	kcpuset_atomicly_remove(pm->pm_active, ti->ti_kcpuset);
+	kcpuset_remove(pm->pm_active, ti->ti_kcpuset);
 #endif
+	KASSERT(!pmap_tlb_intersecting_active_p(pm, ti));
 #endif /* MULTIPROCESSOR */
+
+	UVMHIST_LOG(maphist, " <-- done", 0, 0, 0, 0);
 }
 
 void
@@ -394,7 +409,10 @@ pmap_tlb_asid_reinitialize(struct pmap_t
 	const size_t asid_bitmap_words =
 	    ti->ti_asid_max / (8 * sizeof(ti->ti_asid_bitmap[0]));
 
-	pmap_pai_check(ti);
+	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist, "(ti=%p, op=%u)", ti, op, 0, 0);
+
+	pmap_tlb_pai_check(ti);
 
 	ti->ti_evcnt_asid_reinits.ev_count++;
 
@@ -410,14 +428,14 @@ pmap_tlb_asid_reinitialize(struct pmap_t
 	}
 
 	switch (op) {
-#if defined(MULTIPROCESSOR) && defined(PMAP_NEED_TLB_SHOOTDOWN)
+#if defined(MULTIPROCESSOR) && defined(PMAP_TLB_NEED_SHOOTDOWN)
 	case TLBINV_ALL:
 		tlb_invalidate_all();
 		break;
 	case TLBINV_ALLUSER:
 		tlb_invalidate_asids(KERNEL_PID + 1, ti->ti_asid_max);
 		break;
-#endif /* MULTIPROCESSOR && PMAP_NEED_TLB_SHOOTDOWN */
+#endif /* MULTIPROCESSOR && PMAP_TLB_NEED_SHOOTDOWN */
 	case TLBINV_NOBODY: {
 		/*
 		 * If we are just reclaiming ASIDs in the TLB, let's go find
@@ -428,14 +446,15 @@ pmap_tlb_asid_reinitialize(struct pmap_t
 		 * and clear the ASID bitmap.  That will force everyone to
 		 * allocate a new ASID.
 		 */
-#if !defined(MULTIPROCESSOR) || defined(PMAP_NEED_TLB_SHOOTDOWN)
+#if !defined(MULTIPROCESSOR) || defined(PMAP_TLB_NEED_SHOOTDOWN)
 		pmap_tlb_asid_check();
-		const u_int asids_found = tlb_record_asids(ti->ti_asid_bitmap);
+		const u_int asids_found = tlb_record_asids(ti->ti_asid_bitmap,
+		    ti->ti_asid_max);
 		pmap_tlb_asid_check();
 		KASSERT(asids_found == pmap_tlb_asid_count(ti));
 		if (__predict_false(asids_found >= ti->ti_asid_max / 2)) {
 			tlb_invalidate_asids(KERNEL_PID + 1, ti->ti_asid_max);
-#else /* MULTIPROCESSOR && !PMAP_NEED_TLB_SHOOTDOWN */
+#else /* MULTIPROCESSOR && !PMAP_TLB_NEED_SHOOTDOWN */
 			/*
 			 * For those systems (PowerPC) that don't require
 			 * cross cpu TLB shootdowns, we have to invalidate the
@@ -446,7 +465,7 @@ pmap_tlb_asid_reinitialize(struct pmap_t
 			 * nightmare).
 			 */
 			tlb_invalidate_all();
-#endif /* MULTIPROCESSOR && !PMAP_NEED_TLB_SHOOTDOWN */
+#endif /* MULTIPROCESSOR && !PMAP_TLB_NEED_SHOOTDOWN */
 			ti->ti_asid_bitmap[0] = (2 << KERNEL_PID) - 1;
 			for (size_t word = 1;
 			     word <= asid_bitmap_words;
@@ -454,11 +473,11 @@ pmap_tlb_asid_reinitialize(struct pmap_t
 				ti->ti_asid_bitmap[word] = 0;
 			}
 			ti->ti_asids_free = ti->ti_asid_max - KERNEL_PID;
-#if !defined(MULTIPROCESSOR) || defined(PMAP_NEED_TLB_SHOOTDOWN)
+#if !defined(MULTIPROCESSOR) || defined(PMAP_TLB_NEED_SHOOTDOWN)
 		} else {
 			ti->ti_asids_free -= asids_found;
 		}
-#endif /* !MULTIPROCESSOR || PMAP_NEED_TLB_SHOOTDOWN */
+#endif /* !MULTIPROCESSOR || PMAP_TLB_NEED_SHOOTDOWN */
 		KASSERTMSG(ti->ti_asids_free <= ti->ti_asid_max, "%u",
 		    ti->ti_asids_free);
 		break;
@@ -490,7 +509,7 @@ pmap_tlb_asid_reinitialize(struct pmap_t
 		if (TLBINFO_ASID_INUSE_P(ti, pai->pai_asid)) {
 			KASSERT(op == TLBINV_NOBODY);
 		} else {
-			pmap_pai_reset(ti, pai, pm);
+			pmap_tlb_pai_reset(ti, pai, pm);
 		}
 	}
 #ifdef DIAGNOSTIC
@@ -498,9 +517,10 @@ pmap_tlb_asid_reinitialize(struct pmap_t
 	KASSERTMSG(free_count == ti->ti_asids_free,
 	    "bitmap error: %zu != %u", free_count, ti->ti_asids_free);
 #endif
+	UVMHIST_LOG(maphist, " <-- done", 0, 0, 0, 0);
 }
 
-#if defined(MULTIPROCESSOR) && defined(PMAP_NEED_TLB_SHOOTDOWN)
+#if defined(MULTIPROCESSOR) && defined(PMAP_TLB_NEED_SHOOTDOWN)
 #if PMAP_MAX_TLB == 1
 #error shootdown not required for single TLB systems
 #endif
@@ -527,7 +547,7 @@ pmap_tlb_shootdown_process(void)
 		 */
 		struct pmap_asid_info * const pai = PMAP_PAI(ti->ti_victim, ti);
 		KASSERT(ti->ti_victim != pmap_kernel());
-		if (!pmap_tlb_intersecting_onproc_p(ti_victim->pm_onproc, ti)) {
+		if (!pmap_tlb_intersecting_onproc_p(ti->ti_victim, ti)) {
 			/*
 			 * The victim is an active pmap so we will just
 			 * invalidate its TLB entries.
@@ -544,7 +564,7 @@ pmap_tlb_shootdown_process(void)
 			 * ASID.
 			 */
 			KASSERT(!pmap_tlb_intersecting_onproc_p(pm, ti));
-			pmap_pai_reset(ti, pai, PAI_PMAP(pai, ti));
+			pmap_tlb_pai_reset(ti, pai, PAI_PMAP(pai, ti));
 		}
 		break;
 	}
@@ -610,11 +630,12 @@ pmap_tlb_shootdown_bystanders(pmap_t pm)
 	/*
 	 * We don't need to deal our own TLB.
 	 */
-	kcpuset_t *pm_active;
 
+	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
+
+	kcpuset_t *pm_active;
 	kcpuset_clone(&pm_active, pm->pm_active);
-	kcpuset_atomicly_remove(pm->pm_active,
-	    cpu_tlb_info(curcpu())->ti_kcpuset);
+	kcpuset_remove(pm_active, cpu_tlb_info(curcpu())->ti_kcpuset);
 	const bool kernel_p = (pm == pmap_kernel());
 	bool ipi_sent = false;
 
@@ -682,7 +703,7 @@ pmap_tlb_shootdown_bystanders(pmap_t pm)
 			 * And best of all, we avoid an IPI.
 			 */
 			KASSERT(!kernel_p);
-			pmap_pai_reset(ti, pai, pm);
+			pmap_tlb_pai_reset(ti, pai, pm);
 			//ti->ti_evcnt_lazy_shots.ev_count++;
 		}
 		TLBINFO_UNLOCK(ti);
@@ -690,32 +711,51 @@ pmap_tlb_shootdown_bystanders(pmap_t pm)
 
 	kcpuset_destroy(pm_active);
 
+	UVMHIST_LOG(maphist, " <-- done (ipi_sent=%d)", ipi_sent, 0, 0, 0);
+
 	return ipi_sent;
 }
-#endif /* MULTIPROCESSOR && PMAP_NEED_TLB_SHOOTDOWN */
+#endif /* MULTIPROCESSOR && PMAP_TLB_NEED_SHOOTDOWN */
 
 #ifndef PMAP_TLB_HWPAGEWALKER
 int
-pmap_tlb_update_addr(pmap_t pm, vaddr_t va, pt_entry_t pt_entry, u_int flags)
+pmap_tlb_update_addr(pmap_t pm, vaddr_t va, pt_entry_t pte, u_int flags)
 {
 	struct pmap_tlb_info * const ti = cpu_tlb_info(curcpu());
 	struct pmap_asid_info * const pai = PMAP_PAI(pm, ti);
 	int rv = -1;
 
+	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist,
+	    " (pm=%p va=%#"PRIxVADDR", pte=%#"PRIxPTE" flags=%#x)",
+	    pm, va, pte_value(pte), flags);
+
 	KASSERT(kpreempt_disabled());
 
+	KASSERTMSG(pte_valid_p(pte), "va %#"PRIxVADDR" %#"PRIxPTE,
+	    va, pte_value(pte));
+
 	TLBINFO_LOCK(ti);
 	if (pm == pmap_kernel() || PMAP_PAI_ASIDVALID_P(pai, ti)) {
 		pmap_tlb_asid_check();
-		rv = tlb_update_addr(va, pai->pai_asid, pt_entry,
+		rv = tlb_update_addr(va, pai->pai_asid, pte,
 		    (flags & PMAP_TLB_INSERT) != 0);
 		pmap_tlb_asid_check();
-	}
-#if defined(MULTIPROCESSOR) && defined(PMAP_NEED_TLB_SHOOTDOWN)
-	pm->pm_shootdown_pending = (flags & PMAP_TLB_NEED_IPI) != 0;
+		UVMHIST_LOG(maphist,
+		     "   %d <-- tlb_update_addr(%#"PRIxVADDR", %#x, %#"PRIxPTE", ...)",
+		     rv, va, pai->pai_asid, pte_value(pte));
+		KASSERTMSG((flags & PMAP_TLB_INSERT) == 0 || rv == 1,
+		    "pmap %p (asid %u) va %#"PRIxVADDR" pte %#"PRIxPTE" rv %d",
+		    pm, pai->pai_asid, va, pte_value(pte), rv);
+	}
+#if defined(MULTIPROCESSOR) && defined(PMAP_TLB_NEED_SHOOTDOWN)
+	if (flags & PMAP_TLB_NEED_IPI)
+		pm->pm_shootdown_pending = 1;
 #endif
 	TLBINFO_UNLOCK(ti);
 
+	UVMHIST_LOG(maphist, "   <-- done (rv=%d)", rv, 0, 0, 0);
+
 	return rv;
 }
 #endif /* !PMAP_TLB_HWPAGEWALKER */
@@ -727,21 +767,20 @@ pmap_tlb_invalidate_addr(pmap_t pm, vadd
 	struct pmap_asid_info * const pai = PMAP_PAI(pm, ti);
 
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist, " (pm=%p va=%#"PRIxVADDR") ti=%p asid=%#x",
+	    pm, va, ti, pai->pai_asid);
 
 	KASSERT(kpreempt_disabled());
 
-	UVMHIST_LOG(maphist, " (pm=%#x va=%#x) ti=%#x asid=%#x",
-	    pm, va, ti, pai->pai_asid);
-
 	TLBINFO_LOCK(ti);
 	if (pm == pmap_kernel() || PMAP_PAI_ASIDVALID_P(pai, ti)) {
 		pmap_tlb_asid_check();
-		UVMHIST_LOG(maphist, " invalidating %#x asid %#x", 
+		UVMHIST_LOG(maphist, " invalidating %#"PRIxVADDR" asid %#x", 
 		    va, pai->pai_asid, 0, 0);
 		tlb_invalidate_addr(va, pai->pai_asid);
 		pmap_tlb_asid_check();
 	}
-#if defined(MULTIPROCESSOR) && defined(PMAP_NEED_TLB_SHOOTDOWN)
+#if defined(MULTIPROCESSOR) && defined(PMAP_TLB_NEED_SHOOTDOWN)
 	pm->pm_shootdown_pending = 1;
 #endif
 	TLBINFO_UNLOCK(ti);
@@ -836,7 +875,7 @@ pmap_tlb_asid_alloc(struct pmap_tlb_info
 #if PMAP_TLB_MAX == 1
 	kcpuset_copy(pm->pm_active, kcpuset_running);
 #else
-	kcpuset_atomicly_merge(pm->pm_active, ti->ti_kcpuset);
+	kcpuset_merge(pm->pm_active, ti->ti_kcpuset);
 #endif
 #endif
 }
@@ -852,9 +891,10 @@ pmap_tlb_asid_acquire(pmap_t pm, struct 
 	struct pmap_tlb_info * const ti = cpu_tlb_info(ci);
 	struct pmap_asid_info * const pai = PMAP_PAI(pm, ti);
 
-	KASSERT(kpreempt_disabled());
-
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist, "(pm=%p, l=%p, ti=%p)", pm, l, ti, 0);
+
+	KASSERT(kpreempt_disabled());
 
 	/*
 	 * Kernels use a fixed ASID and thus doesn't need to acquire one.
@@ -864,11 +904,10 @@ pmap_tlb_asid_acquire(pmap_t pm, struct 
 		return;
 	}
 
-	UVMHIST_LOG(maphist, " (pm=%#x, l=%#x, ti=%#x)", pm, l, ti, 0);
 	TLBINFO_LOCK(ti);
 	KASSERT(pai->pai_asid <= KERNEL_PID || pai->pai_link.le_prev != NULL);
 	KASSERT(pai->pai_asid > KERNEL_PID || pai->pai_link.le_prev == NULL);
-	pmap_pai_check(ti);
+	pmap_tlb_pai_check(ti);
 	if (__predict_false(!PMAP_PAI_ASIDVALID_P(pai, ti))) {
 		/*
 		 * If we've run out ASIDs, reinitialize the ASID space.
@@ -886,6 +925,10 @@ pmap_tlb_asid_acquire(pmap_t pm, struct 
 		pmap_tlb_asid_alloc(ti, pm, pai);
 		UVMHIST_LOG(maphist, "allocated asid %#x", pai->pai_asid, 0, 0, 0);
 	}
+	pmap_tlb_pai_check(ti);
+#if defined(MULTIPROCESSOR)
+	KASSERT(kcpuset_isset(pm->pm_active, cpu_index(ci)));
+#endif
 
 	if (l == curlwp) {
 #if defined(MULTIPROCESSOR)
@@ -912,6 +955,7 @@ void
 pmap_tlb_asid_deactivate(pmap_t pm)
 {
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
+
 	KASSERT(kpreempt_disabled());
 #if defined(MULTIPROCESSOR)
 	/*
@@ -935,8 +979,9 @@ pmap_tlb_asid_deactivate(pmap_t pm)
 	}
 #endif
 	curcpu()->ci_pmap_asid_cur = 0;
-	UVMHIST_LOG(maphist, " <-- done (pm=%#x)", pm, 0, 0, 0);
+	UVMHIST_LOG(maphist, " <-- done (pm=%p)", pm, 0, 0, 0);
 	tlb_set_asid(KERNEL_PID);
+	pmap_tlb_pai_check(cpu_tlb_info(curcpu()));
 #if defined(DEBUG)
 	pmap_tlb_asid_check();
 #endif
@@ -945,11 +990,15 @@ pmap_tlb_asid_deactivate(pmap_t pm)
 void
 pmap_tlb_asid_release_all(struct pmap *pm)
 {
+	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist, "(pm=%p)", pm, 0, 0, 0);
+
 	KASSERT(pm != pmap_kernel());
 #if defined(MULTIPROCESSOR)
 	//KASSERT(!kcpuset_iszero(pm->pm_onproc)); // XXX
-#if PMAP_TLB_MAX > 1
 	struct cpu_info * const ci __diagused = curcpu();
+	KASSERT(!kcpuset_isotherset(pm->pm_onproc, cpu_index(ci)));
+#if PMAP_TLB_MAX > 1
 	for (u_int i = 0; !kcpuset_iszero(pm->pm_active); i++) {
 		KASSERT(i < pmap_ntlbs);
 		struct pmap_tlb_info * const ti = pmap_tlbs[i];
@@ -960,31 +1009,25 @@ pmap_tlb_asid_release_all(struct pmap *p
 		TLBINFO_LOCK(ti);
 		if (PMAP_PAI_ASIDVALID_P(pai, ti)) {
 			/*
-			 * If this pmap isn't onproc on any of the cpus
-			 * belonging to this tlb domain, we can just reset
-			 * the ASID and be done.
+			 * This pmap should not be in use by any other cpu so
+			 * we can just reset and be happy.
 			 */
-			if (!pmap_tlb_intersecting_onproc_p(pm, ti)) {
-				KASSERT(ti->ti_victim != pm);
-				pmap_pai_reset(ti, pai, pm);
-#if PMAP_TLB_MAX == 1
-			} else {
-				KASSERT(cpu_tlb_info(ci) == ti);
-				tlb_invalidate_asids(pai->pai_asid,
-				    pai->pai_asid);
-#else
-			} else if (cpu_tlb_info(ci) == ti) {
-				tlb_invalidate_asids(pai->pai_asid,
-				    pai->pai_asid);
-			} else {
-				pm->pm_shootdown_needed = 1;
-#endif
-			}
+			if (ti->ti_victim == pm)
+				ti->ti_victim = NULL;
+			pmap_tlb_pai_reset(ti, pai, pm);
 		}
+		KASSERT(pai->pai_link.le_prev == NULL);
 		TLBINFO_UNLOCK(ti);
 #if PMAP_TLB_MAX > 1
 	}
 #endif
+#ifdef DIAGNOSTIC
+	for (size_t i = 0; i < (PMAP_TLB_MAX > 1 ? pmap_ntlbs : 1); i++) {
+		KASSERTMSG(pm->pm_pai[i].pai_asid == 0,
+		    "pm %p i %zu asid %u",
+		    pm, i, pm->pm_pai[i].pai_asid);
+	}
+#endif
 #else
 	/*
 	 * Handle the case of an UP kernel which only has, at most, one ASID.
@@ -997,11 +1040,12 @@ pmap_tlb_asid_release_all(struct pmap *p
 		if (curcpu()->ci_pmap_asid_cur == pai->pai_asid) {
 			tlb_invalidate_asids(pai->pai_asid, pai->pai_asid);
 		} else {
-			pmap_pai_reset(ti, pai, pm);
+			pmap_tlb_pai_reset(ti, pai, pm);
 		}
 	}
 	TLBINFO_UNLOCK(ti);
 #endif /* MULTIPROCESSOR */
+	UVMHIST_LOG(maphist, " <-- done", 0, 0, 0, 0);
 }
 
 void

Index: src/sys/uvm/pmap/pmap_tlb.h
diff -u src/sys/uvm/pmap/pmap_tlb.h:1.8 src/sys/uvm/pmap/pmap_tlb.h:1.9
--- src/sys/uvm/pmap/pmap_tlb.h:1.8	Thu Apr  2 06:17:52 2015
+++ src/sys/uvm/pmap/pmap_tlb.h	Mon Jul 11 16:06:09 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap_tlb.h,v 1.8 2015/04/02 06:17:52 matt Exp $	*/
+/*	$NetBSD: pmap_tlb.h,v 1.9 2016/07/11 16:06:09 matt Exp $	*/
 
 /*
  * Copyright (c) 1992, 1993
@@ -171,7 +171,7 @@ void	pmap_tlb_info_evcnt_attach(struct p
 void	pmap_tlb_asid_acquire(pmap_t, struct lwp *l);
 void	pmap_tlb_asid_deactivate(pmap_t);
 void	pmap_tlb_asid_release_all(pmap_t);
-int	pmap_tlb_update_addr(pmap_t, vaddr_t, uint32_t, u_int);
+int	pmap_tlb_update_addr(pmap_t, vaddr_t, pt_entry_t, u_int);
 #define	PMAP_TLB_NEED_IPI	0x01
 #define	PMAP_TLB_INSERT		0x02
 void	pmap_tlb_invalidate_addr(pmap_t, vaddr_t);

Added files:

Index: src/sys/uvm/pmap/pmap_synci.h
diff -u /dev/null src/sys/uvm/pmap/pmap_synci.h:1.1
--- /dev/null	Mon Jul 11 16:06:09 2016
+++ src/sys/uvm/pmap/pmap_synci.h	Mon Jul 11 16:06:09 2016
@@ -0,0 +1,41 @@
+/* $NetBSD: pmap_synci.h,v 1.1 2016/07/11 16:06:09 matt Exp $ */
+/*-
+ * Copyright (c) 2015 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Matt Thomas of 3am Software Foundry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _UVM_PMAP_PMAP_SYNCI_H_
+#define _UVM_PMAP_PMAP_SYNCI_H_
+
+extern u_int pmap_tlb_synci_map_mask;
+extern u_int pmap_tlb_synci_page_mask;
+
+void	pmap_tlb_syncicache(vaddr_t, const kcpuset_t *);
+void	pmap_tlb_syncicache_ast(struct cpu_info *);
+void	pmap_tlb_syncicache_wanted(struct cpu_info *);
+
+#endif /* _UVM_PMAP_PMAP_SYNCI_H_ */

Reply via email to