Module Name:    src
Committed By:   ad
Date:           Sun Jun 14 21:47:15 UTC 2020

Modified Files:
        src/sys/arch/aarch64/aarch64: pmap.c
        src/sys/arch/aarch64/include: pmap.h

Log Message:
- Fix a lock order reversal in pmap_page_protect().

- Make sure pmap is always locked when updating stats; atomics no longer
  needed to do that.

- Remove unneeded traversal of pv list in pmap_enter_pv().

- Shrink struct vm_page from 136 to 128 bytes (cache line sized) and struct
  pv_entry from 48 to 32 bytes (power of 2 sized).

- Embed a pv_entry in each vm_page.  This means PV entries don't need to
  be allocated for private anonymous memory / COW pages / most UBC mappings.
  Dynamic PV entries are then used only for stuff like shared libraries and
  shared memory.

Proposed on port-arm@.


To generate a diff of this commit:
cvs rdiff -u -r1.77 -r1.78 src/sys/arch/aarch64/aarch64/pmap.c
cvs rdiff -u -r1.39 -r1.40 src/sys/arch/aarch64/include/pmap.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/aarch64/aarch64/pmap.c
diff -u src/sys/arch/aarch64/aarch64/pmap.c:1.77 src/sys/arch/aarch64/aarch64/pmap.c:1.78
--- src/sys/arch/aarch64/aarch64/pmap.c:1.77	Wed Jun 10 22:24:22 2020
+++ src/sys/arch/aarch64/aarch64/pmap.c	Sun Jun 14 21:47:14 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.77 2020/06/10 22:24:22 ad Exp $	*/
+/*	$NetBSD: pmap.c,v 1.78 2020/06/14 21:47:14 ad Exp $	*/
 
 /*
  * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org>
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.77 2020/06/10 22:24:22 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.78 2020/06/14 21:47:14 ad Exp $");
 
 #include "opt_arm_debug.h"
 #include "opt_ddb.h"
@@ -102,8 +102,9 @@ PMAP_COUNTER(pdp_alloc_boot, "page table
 PMAP_COUNTER(pdp_alloc, "page table page allocate (uvm_pagealloc)");
 PMAP_COUNTER(pdp_free, "page table page free (uvm_pagefree)");
 
-PMAP_COUNTER(pv_enter, "pv_entry allocate and link");
-PMAP_COUNTER(pv_remove, "pv_entry free and unlink");
+PMAP_COUNTER(pv_enter, "pv_entry fill");
+PMAP_COUNTER(pv_remove_dyn, "pv_entry free and unlink dynamic");
+PMAP_COUNTER(pv_remove_emb, "pv_entry clear embedded");
 PMAP_COUNTER(pv_remove_nopv, "no pv_entry found when removing pv");
 
 PMAP_COUNTER(activate, "pmap_activate call");
@@ -184,15 +185,6 @@ PMAP_COUNTER(unwire_failure, "pmap_unwir
 
 #define VM_PAGE_TO_PP(pg)	(&(pg)->mdpage.mdpg_pp)
 
-struct pv_entry {
-	LIST_ENTRY(pv_entry) pv_link;
-	struct pmap *pv_pmap;
-	vaddr_t pv_va;
-	paddr_t pv_pa;		/* debug */
-	pt_entry_t *pv_ptep;	/* for fast pte lookup */
-};
-#define pv_next	pv_link.le_next
-
 #define L3INDEXMASK	(L3_SIZE * Ln_ENTRIES - 1)
 #define PDPSWEEP_TRIGGER	512
 
@@ -204,7 +196,7 @@ static void _pmap_remove(struct pmap *, 
     struct pv_entry **);
 static int _pmap_enter(struct pmap *, vaddr_t, paddr_t, vm_prot_t, u_int, bool);
 
-static struct pmap kernel_pmap;
+static struct pmap kernel_pmap __cacheline_aligned;
 
 struct pmap * const kernel_pmap_ptr = &kernel_pmap;
 static vaddr_t pmap_maxkvaddr;
@@ -223,27 +215,48 @@ static inline void
 pmap_pv_lock(struct pmap_page *pp)
 {
 
-	mutex_enter(&pp->pp_pvlock);
+	mutex_spin_enter(&pp->pp_pvlock);
 }
 
 static inline void
 pmap_pv_unlock(struct pmap_page *pp)
 {
 
-	mutex_exit(&pp->pp_pvlock);
+	mutex_spin_exit(&pp->pp_pvlock);
 }
 
 
 static inline void
 pm_lock(struct pmap *pm)
 {
-	mutex_enter(&pm->pm_lock);
+	mutex_spin_enter(&pm->pm_lock);
 }
 
 static inline void
 pm_unlock(struct pmap *pm)
 {
-	mutex_exit(&pm->pm_lock);
+	mutex_spin_exit(&pm->pm_lock);
+}
+
+static bool
+pm_reverse_lock(struct pmap *pm, struct pmap_page *pp)
+{
+
+	KASSERT(mutex_owned(&pp->pp_pvlock));
+
+	if (__predict_true(mutex_tryenter(&pm->pm_lock)))
+		return true;
+
+	if (pm != pmap_kernel())
+		pmap_reference(pm);
+	mutex_spin_exit(&pp->pp_pvlock);
+	mutex_spin_enter(&pm->pm_lock);
+	/* nothing, just wait for lock */
+	mutex_spin_exit(&pm->pm_lock);
+	if (pm != pmap_kernel())
+		pmap_destroy(pm);
+	mutex_spin_enter(&pp->pp_pvlock);
+	return false;
 }
 
 static inline struct pmap_page *
@@ -466,14 +479,22 @@ pmap_bootstrap(vaddr_t vstart, vaddr_t v
 
 	CTASSERT(sizeof(kpm->pm_stats.wired_count) == sizeof(long));
 	CTASSERT(sizeof(kpm->pm_stats.resident_count) == sizeof(long));
-#define PMSTAT_INC_WIRED_COUNT(pm)	\
-	atomic_inc_ulong(&(pm)->pm_stats.wired_count)
-#define PMSTAT_DEC_WIRED_COUNT(pm)	\
-	atomic_dec_ulong(&(pm)->pm_stats.wired_count)
-#define PMSTAT_INC_RESIDENT_COUNT(pm)	\
-	atomic_inc_ulong(&(pm)->pm_stats.resident_count)
-#define PMSTAT_DEC_RESIDENT_COUNT(pm)	\
-	atomic_dec_ulong(&(pm)->pm_stats.resident_count)
+#define PMSTAT_INC_WIRED_COUNT(pm) do { \
+	KASSERT(mutex_owned(&(pm)->pm_lock)); \
+	(pm)->pm_stats.wired_count++; \
+} while (/* CONSTCOND */ 0);
+#define PMSTAT_DEC_WIRED_COUNT(pm) do{ \
+	KASSERT(mutex_owned(&(pm)->pm_lock)); \
+	(pm)->pm_stats.wired_count--; \
+} while (/* CONSTCOND */ 0);
+#define PMSTAT_INC_RESIDENT_COUNT(pm) do { \
+	KASSERT(mutex_owned(&(pm)->pm_lock)); \
+	(pm)->pm_stats.resident_count++; \
+} while (/* CONSTCOND */ 0);
+#define PMSTAT_DEC_RESIDENT_COUNT(pm) do { \
+	KASSERT(mutex_owned(&(pm)->pm_lock)); \
+	(pm)->pm_stats.resident_count--; \
+} while (/* CONSTCOND */ 0);
 }
 
 inline static int
@@ -501,10 +522,12 @@ pmap_init(void)
 {
 
 	pool_cache_bootstrap(&_pmap_cache, sizeof(struct pmap),
-	    0, 0, 0, "pmappl", NULL, IPL_NONE, _pmap_pmap_ctor, NULL, NULL);
-	pool_cache_bootstrap(&_pmap_pv_pool, sizeof(struct pv_entry),
-	    0, 0, 0, "pvpl", NULL, IPL_VM, _pmap_pv_ctor, NULL, NULL);
+	    coherency_unit, 0, 0, "pmappl", NULL, IPL_NONE, _pmap_pmap_ctor,
+	    NULL, NULL);
 
+	pool_cache_bootstrap(&_pmap_pv_pool, sizeof(struct pv_entry),
+	    32, 0, PR_LARGECACHE, "pvpl", NULL, IPL_NONE, _pmap_pv_ctor,
+	    NULL, NULL);
 }
 
 void
@@ -584,17 +607,12 @@ pmap_alloc_pdp(struct pmap *pm, struct v
 			return POOL_PADDR_INVALID;
 		}
 
-		LIST_INSERT_HEAD(&pm->pm_vmlist, pg, mdpage.mdpg_vmlist);
+		LIST_INSERT_HEAD(&pm->pm_vmlist, pg, pageq.list);
 		pg->flags &= ~PG_BUSY;	/* never busy */
 		pg->wire_count = 1;	/* max = 1 + Ln_ENTRIES = 513 */
 		pa = VM_PAGE_TO_PHYS(pg);
 		PMAP_COUNT(pdp_alloc);
-
-		VM_PAGE_TO_MD(pg)->mdpg_ptep_parent = NULL;
-
-		struct pmap_page *pp = VM_PAGE_TO_PP(pg);
-		pp->pp_flags = 0;
-
+		PMAP_PAGE_INIT(VM_PAGE_TO_PP(pg));
 	} else {
 		/* uvm_pageboot_alloc() returns AARCH64 KSEG address */
 		pg = NULL;
@@ -614,13 +632,13 @@ pmap_alloc_pdp(struct pmap *pm, struct v
 static void
 pmap_free_pdp(struct pmap *pm, struct vm_page *pg)
 {
-	LIST_REMOVE(pg, mdpage.mdpg_vmlist);
-	pg->flags |= PG_BUSY;
-	pg->wire_count = 0;
 
-	struct pmap_page *pp __diagused = VM_PAGE_TO_PP(pg);
-	KASSERT(LIST_EMPTY(&pp->pp_pvhead));
+	KASSERT(pm != pmap_kernel());
+	KASSERT(VM_PAGE_TO_PP(pg)->pp_pv.pv_pmap == NULL);
+	KASSERT(VM_PAGE_TO_PP(pg)->pp_pv.pv_next == NULL);
 
+	LIST_REMOVE(pg, pageq.list);
+	pg->wire_count = 0;
 	uvm_pagefree(pg);
 	PMAP_COUNT(pdp_free);
 }
@@ -635,8 +653,10 @@ _pmap_sweep_pdp(struct pmap *pm)
 	int nsweep;
 	uint16_t wirecount __diagused;
 
+	KASSERT(mutex_owned(&pm->pm_lock) || pm->pm_refcnt == 0);
+
 	nsweep = 0;
-	LIST_FOREACH_SAFE(pg, &pm->pm_vmlist, mdpage.mdpg_vmlist, tmp) {
+	LIST_FOREACH_SAFE(pg, &pm->pm_vmlist, pageq.list, tmp) {
 		if (pg->wire_count != 1)
 			continue;
 
@@ -655,7 +675,7 @@ _pmap_sweep_pdp(struct pmap *pm)
 		/* unlink from parent */
 		opte = atomic_swap_64(ptep_in_parent, 0);
 		KASSERT(lxpde_valid(opte));
-		wirecount = atomic_add_32_nv(&pg->wire_count, -1); /* 1 -> 0 */
+		wirecount = --pg->wire_count; /* 1 -> 0 */
 		KASSERT(wirecount == 0);
 		pmap_free_pdp(pm, pg);
 		nsweep++;
@@ -670,12 +690,12 @@ _pmap_sweep_pdp(struct pmap *pm)
 		KASSERTMSG(pg->wire_count >= 1,
 		    "wire_count=%d", pg->wire_count);
 		/* decrement wire_count of parent */
-		wirecount = atomic_add_32_nv(&pg->wire_count, -1);
+		wirecount = --pg->wire_count;
 		KASSERTMSG(pg->wire_count <= (Ln_ENTRIES + 1),
 		    "pm=%p[%d], pg=%p, wire_count=%d",
 		    pm, pm->pm_asid, pg, pg->wire_count);
 	}
-	atomic_swap_uint(&pm->pm_idlepdp, 0);
+	pm->pm_idlepdp = 0;
 
 	return nsweep;
 }
@@ -683,9 +703,9 @@ _pmap_sweep_pdp(struct pmap *pm)
 static void
 _pmap_free_pdp_all(struct pmap *pm)
 {
-	struct vm_page *pg, *tmp;
+	struct vm_page *pg;
 
-	LIST_FOREACH_SAFE(pg, &pm->pm_vmlist, mdpage.mdpg_vmlist, tmp) {
+	while ((pg = LIST_FIRST(&pm->pm_vmlist)) != NULL) {
 		pmap_free_pdp(pm, pg);
 	}
 }
@@ -1015,9 +1035,10 @@ _pmap_pte_adjust_cacheflags(pt_entry_t p
 }
 
 static struct pv_entry *
-_pmap_remove_pv(struct pmap_page *pp, struct pmap *pm, vaddr_t va, pt_entry_t pte)
+_pmap_remove_pv(struct pmap_page *pp, struct pmap *pm, vaddr_t va,
+    pt_entry_t pte)
 {
-	struct pv_entry *pv;
+	struct pv_entry *pv, *ppv;
 
 	UVMHIST_FUNC(__func__);
 	UVMHIST_CALLED(pmaphist);
@@ -1025,18 +1046,26 @@ _pmap_remove_pv(struct pmap_page *pp, st
 	UVMHIST_LOG(pmaphist, "pp=%p, pm=%p, va=%llx, pte=%llx",
 	    pp, pm, va, pte);
 
-	LIST_FOREACH(pv, &pp->pp_pvhead, pv_link) {
-		if ((pm == pv->pv_pmap) && (va == pv->pv_va)) {
-			LIST_REMOVE(pv, pv_link);
-			PMAP_COUNT(pv_remove);
+	KASSERT(mutex_owned(&pp->pp_pvlock));
+
+	for (ppv = NULL, pv = &pp->pp_pv; pv != NULL; pv = pv->pv_next) {
+		if (pv->pv_pmap == pm && trunc_page(pv->pv_va) == va) {
 			break;
 		}
+		ppv = pv;
 	}
-#ifdef PMAPCOUNTERS
-	if (pv == NULL) {
+	if (ppv == NULL) {
+		/* embedded in pmap_page */
+		pv->pv_pmap = NULL;
+		pv = NULL;
+		PMAP_COUNT(pv_remove_emb);
+	} else if (pv != NULL) {
+		/* dynamically allocated */
+		ppv->pv_next = pv->pv_next;
+		PMAP_COUNT(pv_remove_dyn);
+	} else {
 		PMAP_COUNT(pv_remove_nopv);
 	}
-#endif
 
 	return pv;
 }
@@ -1082,23 +1111,25 @@ static void
 pv_dump(struct pmap_page *pp, void (*pr)(const char *, ...) __printflike(1, 2))
 {
 	struct pv_entry *pv;
-	int i;
+	int i, flags;
 
 	i = 0;
+	flags = pp->pp_pv.pv_va & (PAGE_SIZE - 1);
 
 	pr("pp=%p\n", pp);
-	pr(" pp->pp_flags=%08x %s\n", pp->pp_flags,
-	    str_vmflags(pp->pp_flags));
+	pr(" pp flags=%08x %s\n", flags, str_vmflags(flags));
 
-	LIST_FOREACH(pv, &pp->pp_pvhead, pv_link) {
+	for (pv = &pp->pp_pv; pv != NULL; pv = pv->pv_next) {
+		if (pv->pv_pmap == NULL) {
+			KASSERT(pv == &pp->pp_pv);
+			continue;
+		}
 		pr("  pv[%d] pv=%p\n",
 		    i, pv);
 		pr("    pv[%d].pv_pmap = %p (asid=%d)\n",
 		    i, pv->pv_pmap, pv->pv_pmap->pm_asid);
 		pr("    pv[%d].pv_va   = %016lx (color=%d)\n",
-		    i, pv->pv_va, _pmap_color(pv->pv_va));
-		pr("    pv[%d].pv_pa   = %016lx (color=%d)\n",
-		    i, pv->pv_pa, _pmap_color(pv->pv_pa));
+		    i, trunc_page(pv->pv_va), _pmap_color(pv->pv_va));
 		pr("    pv[%d].pv_ptep = %p\n",
 		    i, pv->pv_ptep);
 		i++;
@@ -1118,14 +1149,20 @@ _pmap_enter_pv(struct pmap_page *pp, str
 	UVMHIST_LOG(pmaphist, "pp=%p, pm=%p, va=%llx, pa=%llx", pp, pm, va, pa);
 	UVMHIST_LOG(pmaphist, "ptep=%p, flags=%08x", ptep, flags, 0, 0);
 
-	/* pv is already registered? */
-	LIST_FOREACH(pv, &pp->pp_pvhead, pv_link) {
-		if ((pm == pv->pv_pmap) && (va == pv->pv_va)) {
-			break;
-		}
-	}
+	KASSERT(mutex_owned(&pp->pp_pvlock));
+	KASSERT(trunc_page(va) == va);
 
-	if (pv == NULL) {
+	/*
+	 * mapping cannot be already registered at this VA.
+	 */
+	if (pp->pp_pv.pv_pmap == NULL) {
+		/*
+		 * claim pv_entry embedded in pmap_page.
+		 * take care not to wipe out acc/mod flags.
+		 */
+		pv = &pp->pp_pv;
+		pv->pv_va = (pv->pv_va & (PAGE_SIZE - 1)) | va;
+	} else {
 		/*
 		 * create and link new pv.
 		 * pv is already allocated at beginning of _pmap_enter().
@@ -1134,23 +1171,18 @@ _pmap_enter_pv(struct pmap_page *pp, str
 		if (pv == NULL)
 			return ENOMEM;
 		*pvp = NULL;
-
-		pv->pv_pmap = pm;
+		pv->pv_next = pp->pp_pv.pv_next;
+		pp->pp_pv.pv_next = pv;
 		pv->pv_va = va;
-		pv->pv_pa = pa;
-		pv->pv_ptep = ptep;
-
-		LIST_INSERT_HEAD(&pp->pp_pvhead, pv, pv_link);
-		PMAP_COUNT(pv_enter);
+	}
+	pv->pv_pmap = pm;
+	pv->pv_ptep = ptep;
+	PMAP_COUNT(pv_enter);
 
 #ifdef PMAP_PV_DEBUG
-		if (!LIST_EMPTY(&pp->pp_pvhead)){
-			printf("pv %p alias added va=%016lx -> pa=%016lx\n",
-			    pv, va, pa);
-			pv_dump(pp, printf);
-		}
+	printf("pv %p alias added va=%016lx -> pa=%016lx\n", pv, va, pa);
+	pv_dump(pp, printf);
 #endif
-	}
 
 	return 0;
 }
@@ -1158,18 +1190,14 @@ _pmap_enter_pv(struct pmap_page *pp, str
 void
 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
 {
-	int s;
 
-	s = splvm();
 	_pmap_enter(pmap_kernel(), va, pa, prot, flags | PMAP_WIRED, true);
-	splx(s);
 }
 
 void
 pmap_kremove(vaddr_t va, vsize_t size)
 {
 	struct pmap *kpm = pmap_kernel();
-	int s;
 
 	UVMHIST_FUNC(__func__);
 	UVMHIST_CALLED(pmaphist);
@@ -1182,11 +1210,9 @@ pmap_kremove(vaddr_t va, vsize_t size)
 	KDASSERT(!IN_KSEG_ADDR(va));
 	KDASSERT(IN_RANGE(va, VM_MIN_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS));
 
-	s = splvm();
 	pm_lock(kpm);
 	_pmap_remove(kpm, va, va + size, true, NULL);
 	pm_unlock(kpm);
-	splx(s);
 }
 
 static void
@@ -1201,13 +1227,10 @@ _pmap_protect_pv(struct pmap_page *pp, s
 	UVMHIST_CALLED(pmaphist);
 
 	UVMHIST_LOG(pmaphist, "pp=%p, pv=%p, prot=%08x", pp, pv, prot, 0);
+	KASSERT(mutex_owned(&pv->pv_pmap->pm_lock));
 
 	/* get prot mask from referenced/modified */
-	mdattr = pp->pp_flags &
-	    (VM_PROT_READ | VM_PROT_WRITE);
-
-	pm_lock(pv->pv_pmap);
-
+	mdattr = pp->pp_pv.pv_va & (VM_PROT_READ | VM_PROT_WRITE);
 	ptep = pv->pv_ptep;
 	pte = *ptep;
 
@@ -1223,9 +1246,8 @@ _pmap_protect_pv(struct pmap_page *pp, s
 	/* new prot = prot & pteprot & mdattr */
 	pte = _pmap_pte_adjust_prot(pte, prot & pteprot, mdattr, user);
 	atomic_swap_64(ptep, pte);
-	AARCH64_TLBI_BY_ASID_VA(pv->pv_pmap->pm_asid, pv->pv_va, true);
-
-	pm_unlock(pv->pv_pmap);
+	AARCH64_TLBI_BY_ASID_VA(pv->pv_pmap->pm_asid, trunc_page(pv->pv_va),
+	    true);
 }
 
 void
@@ -1307,7 +1329,8 @@ pmap_protect(struct pmap *pm, vaddr_t sv
 
 		if (pp != NULL) {
 			/* get prot mask from referenced/modified */
-			mdattr = pp->pp_flags & (VM_PROT_READ | VM_PROT_WRITE);
+			mdattr = pp->pp_pv.pv_va &
+			    (VM_PROT_READ | VM_PROT_WRITE);
 		} else {
 			/* unmanaged page */
 			mdattr = VM_PROT_ALL;
@@ -1471,8 +1494,11 @@ pmap_destroy(struct pmap *pm)
 static inline void
 _pmap_pdp_setparent(struct pmap *pm, struct vm_page *pg, pt_entry_t *ptep)
 {
-	if ((pm != pmap_kernel()) && (pg != NULL))
+
+	if ((pm != pmap_kernel()) && (pg != NULL)) {
+		KASSERT(mutex_owned(&pm->pm_lock));
 		VM_PAGE_TO_MD(pg)->mdpg_ptep_parent = ptep;
+	}
 }
 
 /*
@@ -1488,6 +1514,9 @@ _pmap_pdp_addref(struct pmap *pm, paddr_
 	/* kernel L0-L3 page will be never freed */
 	if (pm == pmap_kernel())
 		return;
+
+	KASSERT(mutex_owned(&pm->pm_lock));
+
 	/* no need for L0 page */
 	if (pm->pm_l0table_pa == pdppa)
 		return;
@@ -1497,8 +1526,7 @@ _pmap_pdp_addref(struct pmap *pm, paddr_
 		pg = PHYS_TO_VM_PAGE(pdppa);
 	KASSERT(pg != NULL);
 
-	CTASSERT(sizeof(pg->wire_count) == sizeof(uint32_t));
-	atomic_add_32(&pg->wire_count, 1);
+	pg->wire_count++;
 
 	KASSERTMSG(pg->wire_count <= (Ln_ENTRIES + 1),
 	    "pg=%p, wire_count=%d", pg, pg->wire_count);
@@ -1520,6 +1548,9 @@ _pmap_pdp_delref(struct pmap *pm, paddr_
 	/* kernel L0-L3 page will be never freed */
 	if (pm == pmap_kernel())
 		return false;
+
+	KASSERT(mutex_owned(&pm->pm_lock));
+
 	/* no need for L0 page */
 	if (pm->pm_l0table_pa == pdppa)
 		return false;
@@ -1527,7 +1558,7 @@ _pmap_pdp_delref(struct pmap *pm, paddr_
 	pg = PHYS_TO_VM_PAGE(pdppa);
 	KASSERT(pg != NULL);
 
-	wirecount = atomic_add_32_nv(&pg->wire_count, -1);
+	wirecount = --pg->wire_count;
 
 	if (!do_free_pdp) {
 		/*
@@ -1536,7 +1567,7 @@ _pmap_pdp_delref(struct pmap *pm, paddr_
 		 * pmap_enter(), but useful hint to just sweep.
 		 */
 		if (wirecount == 1)
-			atomic_inc_uint(&pm->pm_idlepdp);
+			pm->pm_idlepdp++;
 		return false;
 	}
 
@@ -1854,9 +1885,8 @@ _pmap_enter(struct pmap *pm, vaddr_t va,
 
 	if (pp != NULL) {
 		/* update referenced/modified flags */
-		pp->pp_flags |=
-		    (flags & (VM_PROT_READ | VM_PROT_WRITE));
-		mdattr &= pp->pp_flags;
+		pp->pp_pv.pv_va |= (flags & (VM_PROT_READ | VM_PROT_WRITE));
+		mdattr &= (uint32_t)pp->pp_pv.pv_va;
 	}
 
 #ifdef PMAPCOUNTERS
@@ -2028,37 +2058,53 @@ pmap_page_remove(struct pmap_page *pp, v
 {
 	struct pv_entry *pv, *pvtmp;
 	struct pv_entry *pvtofree = NULL;
+	struct pmap *pm;
 	pt_entry_t opte;
 
-		/* remove all pages reference to this physical page */
-		pmap_pv_lock(pp);
-		LIST_FOREACH_SAFE(pv, &pp->pp_pvhead, pv_link, pvtmp) {
-
-			opte = atomic_swap_64(pv->pv_ptep, 0);
-			if (lxpde_valid(opte)) {
-				_pmap_pdp_delref(pv->pv_pmap,
-				    AARCH64_KVA_TO_PA(trunc_page(
-				    (vaddr_t)pv->pv_ptep)), false);
-				AARCH64_TLBI_BY_ASID_VA(pv->pv_pmap->pm_asid,
-				    pv->pv_va, true);
+	/* remove all pages reference to this physical page */
+	pmap_pv_lock(pp);
+	for (pv = &pp->pp_pv; pv != NULL;) {
+		if ((pm = pv->pv_pmap) == NULL) {
+			KASSERT(pv == &pp->pp_pv);
+			pv = pp->pp_pv.pv_next;
+			continue;
+		}
+		if (!pm_reverse_lock(pm, pp)) {
+			/* now retry */
+			pv = &pp->pp_pv;
+			continue;
+		}
+		opte = atomic_swap_64(pv->pv_ptep, 0);
+		if (lxpde_valid(opte)) {
+			_pmap_pdp_delref(pv->pv_pmap,
+			    AARCH64_KVA_TO_PA(trunc_page(
+			    (vaddr_t)pv->pv_ptep)), false);
+			AARCH64_TLBI_BY_ASID_VA(pv->pv_pmap->pm_asid,
+			    trunc_page(pv->pv_va), true);
 
-				if ((opte & LX_BLKPAG_OS_WIRED) != 0) {
-					PMSTAT_DEC_WIRED_COUNT(pv->pv_pmap);
-				}
-				PMSTAT_DEC_RESIDENT_COUNT(pv->pv_pmap);
+			if ((opte & LX_BLKPAG_OS_WIRED) != 0) {
+				PMSTAT_DEC_WIRED_COUNT(pv->pv_pmap);
 			}
-			LIST_REMOVE(pv, pv_link);
-			PMAP_COUNT(pv_remove);
-
+			PMSTAT_DEC_RESIDENT_COUNT(pv->pv_pmap);
+		}
+		pvtmp = _pmap_remove_pv(pp, pm, trunc_page(pv->pv_va), opte);
+		if (pvtmp == NULL) {
+			KASSERT(pv == &pp->pp_pv);
+		} else {
+			KASSERT(pv == pvtmp);
+			pp->pp_pv.pv_next = pv->pv_next;
 			pv->pv_next = pvtofree;
 			pvtofree = pv;
 		}
-		pmap_pv_unlock(pp);
+		pm_unlock(pm);
+		pv = pp->pp_pv.pv_next;
+	}
+	pmap_pv_unlock(pp);
 
-		for (pv = pvtofree; pv != NULL; pv = pvtmp) {
-			pvtmp = pv->pv_next;
-			pool_cache_put(&_pmap_pv_pool, pv);
-		}
+	for (pv = pvtofree; pv != NULL; pv = pvtmp) {
+		pvtmp = pv->pv_next;
+		pool_cache_put(&_pmap_pv_pool, pv);
+	}
 }
 
 #ifdef __HAVE_PMAP_PV_TRACK
@@ -2087,6 +2133,7 @@ pmap_page_protect(struct vm_page *pg, vm
 {
 	struct pv_entry *pv;
 	struct pmap_page *pp;
+	struct pmap *pm;
 
 	KASSERT((prot & VM_PROT_READ) || !(prot & VM_PROT_WRITE));
 
@@ -2098,13 +2145,32 @@ pmap_page_protect(struct vm_page *pg, vm
 	UVMHIST_LOG(pmaphist, "pg=%p, pp=%p, pa=%016lx, prot=%08x",
 	    pg, pp, VM_PAGE_TO_PHYS(pg), prot);
 
+	/* do an unlocked check first */
+	if (atomic_load_relaxed(&pp->pp_pv.pv_pmap) == NULL &&
+	    atomic_load_relaxed(&pp->pp_pv.pv_next) == NULL) {
+		return;
+	}
+
 	if ((prot & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
 	    VM_PROT_NONE) {
 		pmap_page_remove(pp, prot);
 	} else {
 		pmap_pv_lock(pp);
-		LIST_FOREACH(pv, &pp->pp_pvhead, pv_link) {
+		pv = &pp->pp_pv;
+		while (pv != NULL) {
+			if ((pm = pv->pv_pmap) == NULL) {
+				KASSERT(pv == &pp->pp_pv);
+				pv = pv->pv_next;
+				continue;
+			}
+			if (!pm_reverse_lock(pm, pp)) {
+				/* retry */
+				pv = &pp->pp_pv;
+				continue;
+			}
 			_pmap_protect_pv(pp, pv, prot);
+			pm_unlock(pm);
+			pv = pv->pv_next;
 		}
 		pmap_pv_unlock(pp);
 	}
@@ -2243,7 +2309,7 @@ pmap_fault_fixup(struct pmap *pm, vaddr_
 		    "REFERENCED:"
 		    " va=%016lx, pa=%016lx, pte_prot=%08x, accessprot=%08x",
 		    va, pa, pmap_prot, accessprot);
-		pp->pp_flags |= VM_PROT_READ;		/* set referenced */
+		pp->pp_pv.pv_va |= VM_PROT_READ;	/* set referenced */
 		pte |= LX_BLKPAG_AF;
 
 		PMAP_COUNT(fixup_referenced);
@@ -2255,7 +2321,7 @@ pmap_fault_fixup(struct pmap *pm, vaddr_
 		UVMHIST_LOG(pmaphist, "MODIFIED:"
 		    " va=%016lx, pa=%016lx, pte_prot=%08x, accessprot=%08x",
 		    va, pa, pmap_prot, accessprot);
-		pp->pp_flags |= VM_PROT_WRITE;	/* set modified */
+		pp->pp_pv.pv_va |= VM_PROT_WRITE;	/* set modified */
 		pte &= ~LX_BLKPAG_AP;
 		pte |= LX_BLKPAG_AP_RW;
 
@@ -2284,23 +2350,40 @@ pmap_clear_modify(struct vm_page *pg)
 	UVMHIST_FUNC(__func__);
 	UVMHIST_CALLED(pmaphist);
 
-	UVMHIST_LOG(pmaphist, "pg=%p, pp_flags=%08x",
-	    pg, pp->pp_flags, 0, 0);
+	UVMHIST_LOG(pmaphist, "pg=%p, flags=%08x",
+	    pg, (int)(pp->pp_pv.pv_va & (PAGE_SIZE - 1)), 0, 0);
+
+	PMAP_COUNT(clear_modify);
+
+	/*
+	 * if this is a new page, assert it has no mappings and simply zap
+	 * the stored attributes without taking any locks.
+	 */
+	if ((pg->flags & PG_FAKE) != 0) {
+		KASSERT(atomic_load_relaxed(&pp->pp_pv.pv_pmap) == NULL);
+		KASSERT(atomic_load_relaxed(&pp->pp_pv.pv_next) == NULL);
+		atomic_store_relaxed(&pp->pp_pv.pv_va, 0);
+		return false;
+	}
 
 	pmap_pv_lock(pp);
 
-	if ((pp->pp_flags & VM_PROT_WRITE) == 0) {
+	if ((pp->pp_pv.pv_va & VM_PROT_WRITE) == 0) {
 		pmap_pv_unlock(pp);
 		return false;
 	}
 
-	pp->pp_flags &= ~VM_PROT_WRITE;
+	pp->pp_pv.pv_va &= ~(vaddr_t)VM_PROT_WRITE;
+
+	for (pv = &pp->pp_pv; pv != NULL; pv = pv->pv_next) {
+		if (pv->pv_pmap == NULL) {
+			KASSERT(pv == &pp->pp_pv);
+			continue;
+		}
 
-	PMAP_COUNT(clear_modify);
-	LIST_FOREACH(pv, &pp->pp_pvhead, pv_link) {
 		PMAP_COUNT(clear_modify_pages);
 
-		va = pv->pv_va;
+		va = trunc_page(pv->pv_va);
 
 		ptep = pv->pv_ptep;
 		opte = pte = *ptep;
@@ -2341,22 +2424,27 @@ pmap_clear_reference(struct vm_page *pg)
 	UVMHIST_FUNC(__func__);
 	UVMHIST_CALLED(pmaphist);
 
-	UVMHIST_LOG(pmaphist, "pg=%p, pp=%p, pp_flags=%08x",
-	    pg, pp, pp->pp_flags, 0);
+	UVMHIST_LOG(pmaphist, "pg=%p, pp=%p, flags=%08x",
+	    pg, pp, (int)(pp->pp_pv.pv_va & (PAGE_SIZE - 1)), 0);
 
 	pmap_pv_lock(pp);
 
-	if ((pp->pp_flags & VM_PROT_READ) == 0) {
+	if ((pp->pp_pv.pv_va & VM_PROT_READ) == 0) {
 		pmap_pv_unlock(pp);
 		return false;
 	}
-	pp->pp_flags &= ~VM_PROT_READ;
+	pp->pp_pv.pv_va &= ~(vaddr_t)VM_PROT_READ;
 
 	PMAP_COUNT(clear_reference);
-	LIST_FOREACH(pv, &pp->pp_pvhead, pv_link) {
+	for (pv = &pp->pp_pv; pv != NULL; pv = pv->pv_next) {
+		if (pv->pv_pmap == NULL) {
+			KASSERT(pv == &pp->pp_pv);
+			continue;
+		}
+
 		PMAP_COUNT(clear_reference_pages);
 
-		va = pv->pv_va;
+		va = trunc_page(pv->pv_va);
 
 		ptep = pv->pv_ptep;
 		opte = pte = *ptep;
@@ -2389,7 +2477,7 @@ pmap_is_modified(struct vm_page *pg)
 {
 	struct pmap_page * const pp = VM_PAGE_TO_PP(pg);
 
-	return (pp->pp_flags & VM_PROT_WRITE);
+	return (pp->pp_pv.pv_va & VM_PROT_WRITE);
 }
 
 bool
@@ -2397,7 +2485,7 @@ pmap_is_referenced(struct vm_page *pg)
 {
 	struct pmap_page * const pp = VM_PAGE_TO_PP(pg);
 
-	return (pp->pp_flags & VM_PROT_READ);
+	return (pp->pp_pv.pv_va & VM_PROT_READ);
 }
 
 #ifdef DDB

Index: src/sys/arch/aarch64/include/pmap.h
diff -u src/sys/arch/aarch64/include/pmap.h:1.39 src/sys/arch/aarch64/include/pmap.h:1.40
--- src/sys/arch/aarch64/include/pmap.h:1.39	Thu May 14 07:59:03 2020
+++ src/sys/arch/aarch64/include/pmap.h	Sun Jun 14 21:47:15 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap.h,v 1.39 2020/05/14 07:59:03 skrll Exp $ */
+/* $NetBSD: pmap.h,v 1.40 2020/06/14 21:47:15 ad Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -84,34 +84,38 @@ struct pmap {
 	bool pm_activated;
 };
 
-struct pv_entry;
+/* sized to reduce memory consumption & cache misses (32 bytes) */
+struct pv_entry {
+	struct pv_entry *pv_next;
+	struct pmap *pv_pmap;
+	vaddr_t pv_va;	/* for embedded entry (pp_pv) also includes flags */
+	void *pv_ptep;	/* pointer for fast pte lookup */
+};
 
 struct pmap_page {
 	kmutex_t pp_pvlock;
-	LIST_HEAD(, pv_entry) pp_pvhead;
-
-	/* VM_PROT_READ means referenced, VM_PROT_WRITE means modified */
-	uint32_t pp_flags;
+	struct pv_entry pp_pv;
 };
 
+/* try to keep vm_page at or under 128 bytes to reduce cache misses */
 struct vm_page_md {
-	LIST_ENTRY(vm_page) mdpg_vmlist;	/* L[0123] table vm_page list */
-	pd_entry_t *mdpg_ptep_parent;	/* for page descriptor page only */
-
 	struct pmap_page mdpg_pp;
 };
+/* for page descriptor page only */
+#define	mdpg_ptep_parent	mdpg_pp.pp_pv.pv_ptep
 
 #define VM_MDPAGE_INIT(pg)					\
 	do {							\
-		(pg)->mdpage.mdpg_ptep_parent = NULL;		\
 		PMAP_PAGE_INIT(&(pg)->mdpage.mdpg_pp);		\
 	} while (/*CONSTCOND*/ 0)
 
 #define PMAP_PAGE_INIT(pp)						\
 	do {								\
 		mutex_init(&(pp)->pp_pvlock, MUTEX_NODEBUG, IPL_VM);	\
-		LIST_INIT(&(pp)->pp_pvhead);				\
-		(pp)->pp_flags = 0;					\
+		(pp)->pp_pv.pv_next = NULL;				\
+		(pp)->pp_pv.pv_pmap = NULL;				\
+		(pp)->pp_pv.pv_va = 0;					\
+		(pp)->pp_pv.pv_ptep = NULL;				\
 	} while (/*CONSTCOND*/ 0)
 
 /* saved permission bit for referenced/modified emulation */

Reply via email to