Module Name:    src
Committed By:   ryo
Date:           Sat Apr  6 18:30:20 UTC 2019

Modified Files:
        src/sys/arch/aarch64/aarch64: pmap.c

Log Message:
Fix race conditions about pmap_page_protect() and pmap_enter().

while handling same PTE by these functions in same time, there
is a critical path that the number of valid PTEs and wire_count
are inconsistent, and it caused KASSERT.
Need to hold a pv_lock while modifying them.


To generate a diff of this commit:
cvs rdiff -u -r1.38 -r1.39 src/sys/arch/aarch64/aarch64/pmap.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/aarch64/aarch64/pmap.c
diff -u src/sys/arch/aarch64/aarch64/pmap.c:1.38 src/sys/arch/aarch64/aarch64/pmap.c:1.39
--- src/sys/arch/aarch64/aarch64/pmap.c:1.38	Wed Mar 20 07:05:06 2019
+++ src/sys/arch/aarch64/aarch64/pmap.c	Sat Apr  6 18:30:20 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.38 2019/03/20 07:05:06 ryo Exp $	*/
+/*	$NetBSD: pmap.c,v 1.39 2019/04/06 18:30:20 ryo Exp $	*/
 
 /*
  * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org>
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.38 2019/03/20 07:05:06 ryo Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.39 2019/04/06 18:30:20 ryo Exp $");
 
 #include "opt_arm_debug.h"
 #include "opt_ddb.h"
@@ -908,8 +908,6 @@ _pmap_remove_pv(struct vm_page *pg, stru
 
 	md = VM_PAGE_TO_MD(pg);
 
-	pmap_pv_lock(md);
-
 	TAILQ_FOREACH(pv, &md->mdpg_pvhead, pv_link) {
 		if ((pm == pv->pv_pmap) && (va == pv->pv_va)) {
 			TAILQ_REMOVE(&md->mdpg_pvhead, pv, pv_link);
@@ -923,8 +921,6 @@ _pmap_remove_pv(struct vm_page *pg, stru
 	}
 #endif
 
-	pmap_pv_unlock(md);
-
 	return pv;
 }
 
@@ -1008,8 +1004,6 @@ _pmap_enter_pv(struct vm_page *pg, struc
 
 	md = VM_PAGE_TO_MD(pg);
 
-	pmap_pv_lock(md);
-
 	/* pv is already registered? */
 	TAILQ_FOREACH(pv, &md->mdpg_pvhead, pv_link) {
 		if ((pm == pv->pv_pmap) && (va == pv->pv_va)) {
@@ -1018,8 +1012,6 @@ _pmap_enter_pv(struct vm_page *pg, struc
 	}
 
 	if (pv == NULL) {
-		pmap_pv_unlock(md);
-
 		/*
 		 * create and link new pv.
 		 * pv is already allocated at beginning of _pmap_enter().
@@ -1034,7 +1026,6 @@ _pmap_enter_pv(struct vm_page *pg, struc
 		pv->pv_pa = pa;
 		pv->pv_ptep = ptep;
 
-		pmap_pv_lock(md);
 		TAILQ_INSERT_HEAD(&md->mdpg_pvhead, pv, pv_link);
 		PMAP_COUNT(pv_enter);
 
@@ -1047,7 +1038,6 @@ _pmap_enter_pv(struct vm_page *pg, struc
 #endif
 	}
 
-	pmap_pv_unlock(md);
 	return 0;
 }
 
@@ -1283,6 +1273,7 @@ pmap_create(void)
 	pm->pm_asid = -1;
 	TAILQ_INIT(&pm->pm_vmlist);
 	mutex_init(&pm->pm_lock, MUTEX_DEFAULT, IPL_VM);
+
 	pm->pm_l0table_pa = pmap_alloc_pdp(pm, NULL, true);
 	KASSERT(pm->pm_l0table_pa != POOL_PADDR_INVALID);
 	pm->pm_l0table = (pd_entry_t *)AARCH64_PA_TO_KVA(pm->pm_l0table_pa);
@@ -1438,17 +1429,14 @@ _pmap_enter(struct pmap *pm, vaddr_t va,
 	struct vm_page *pg, *pdppg, *pdppg0;
 	struct pv_entry *spv, *opv = NULL;
 	pd_entry_t pde;
-	pt_entry_t attr, pte, *ptep;
-#ifdef UVMHIST
-	pt_entry_t opte;
-#endif
+	pt_entry_t attr, pte, opte, *ptep;
 	pd_entry_t *l0, *l1, *l2, *l3;
 	paddr_t pdppa, pdppa0;
 	uint32_t mdattr;
 	unsigned int idx;
 	int error = 0;
 	const bool user = (pm != pmap_kernel());
-	bool need_sync_icache, exists;
+	bool need_sync_icache, need_update_pv;
 	bool l3only = true;
 
 	UVMHIST_FUNC(__func__);
@@ -1496,9 +1484,11 @@ _pmap_enter(struct pmap *pm, vaddr_t va,
 		 * pool_cache_get() may call pmap_kenter() internally.
 		 */
 		spv = pool_cache_get(&_pmap_pv_pool, PR_NOWAIT);
+		need_update_pv = true;
 	} else {
 		PMAP_COUNT(unmanaged_mappings);
 		spv = NULL;
+		need_update_pv = false;
 	}
 
 	pm_lock(pm);
@@ -1512,12 +1502,13 @@ _pmap_enter(struct pmap *pm, vaddr_t va,
 	pde = l0[idx];
 	if (!l0pde_valid(pde)) {
 		/* no need to increment L0 occupancy. L0 page never freed */
-		pdppa = pmap_alloc_pdp(pm, &pdppg, false);	/* L1 pdp */
+		pdppa = pmap_alloc_pdp(pm, &pdppg, flags);	/* L1 pdp */
 		if (pdppa == POOL_PADDR_INVALID) {
 			if (flags & PMAP_CANFAIL) {
 				error = ENOMEM;
-				goto done;
+				goto fail0;
 			}
+			pm_unlock(pm);
 			panic("%s: cannot allocate L1 table", __func__);
 		}
 		atomic_swap_64(&l0[idx], pdppa | L0_TABLE);
@@ -1534,12 +1525,13 @@ _pmap_enter(struct pmap *pm, vaddr_t va,
 	if (!l1pde_valid(pde)) {
 		pdppa0 = pdppa;
 		pdppg0 = pdppg;
-		pdppa = pmap_alloc_pdp(pm, &pdppg, false);	/* L2 pdp */
+		pdppa = pmap_alloc_pdp(pm, &pdppg, flags);	/* L2 pdp */
 		if (pdppa == POOL_PADDR_INVALID) {
 			if (flags & PMAP_CANFAIL) {
 				error = ENOMEM;
-				goto done;
+				goto fail0;
 			}
+			pm_unlock(pm);
 			panic("%s: cannot allocate L2 table", __func__);
 		}
 		atomic_swap_64(&l1[idx], pdppa | L1_TABLE);
@@ -1557,12 +1549,13 @@ _pmap_enter(struct pmap *pm, vaddr_t va,
 	if (!l2pde_valid(pde)) {
 		pdppa0 = pdppa;
 		pdppg0 = pdppg;
-		pdppa = pmap_alloc_pdp(pm, &pdppg, false);	/* L3 pdp */
+		pdppa = pmap_alloc_pdp(pm, &pdppg, flags);	/* L3 pdp */
 		if (pdppa == POOL_PADDR_INVALID) {
 			if (flags & PMAP_CANFAIL) {
 				error = ENOMEM;
-				goto done;
+				goto fail0;
 			}
+			pm_unlock(pm);
 			panic("%s: cannot allocate L3 table", __func__);
 		}
 		atomic_swap_64(&l2[idx], pdppa | L2_TABLE);
@@ -1578,10 +1571,7 @@ _pmap_enter(struct pmap *pm, vaddr_t va,
 	idx = l3pte_index(va);
 	ptep = &l3[idx];	/* as PTE */
 
-	pte = *ptep;
-#ifdef UVMHIST
-	opte = pte;
-#endif
+	opte = atomic_swap_64(ptep, 0);
 	need_sync_icache = (prot & VM_PROT_EXECUTE);
 
 #ifdef KASAN
@@ -1590,45 +1580,47 @@ _pmap_enter(struct pmap *pm, vaddr_t va,
 	}
 #endif
 
-	if (l3pte_valid(pte)) {
-		KASSERT(!kenter);	/* pmap_kenter_pa() cannot override */
-
-		PMAP_COUNT(remappings);
-
-		/* pte is Already mapped */
-		if (l3pte_pa(pte) == pa) {
-			if (need_sync_icache && l3pte_executable(pte, user))
-				need_sync_icache = false;
+	if (pg != NULL)
+		pmap_pv_lock(VM_PAGE_TO_MD(pg));
 
-		} else {
-			struct vm_page *opg;
+	/* remap? */
+	if (l3pte_valid(opte)) {
+		struct vm_page *opg;
+		bool need_remove_pv;
 
+		KASSERT(!kenter);	/* pmap_kenter_pa() cannot override */
 #ifdef PMAPCOUNTERS
-			if (user) {
-				PMAP_COUNT(user_mappings_changed);
-			} else {
-				PMAP_COUNT(kern_mappings_changed);
-			}
-#endif
-
-			UVMHIST_LOG(pmaphist,
-			    "va=%016lx has already mapped."
-			    " old-pa=%016lx new-pa=%016lx, pte=%016llx\n",
-			    va, l3pte_pa(pte), pa, pte);
-
-			opg = PHYS_TO_VM_PAGE(l3pte_pa(pte));
-			if (opg != NULL)
-				opv = _pmap_remove_pv(opg, pm, va, pte);
-		}
-
-		if (pte & LX_BLKPAG_OS_WIRED) {
+		PMAP_COUNT(remappings);
+		if (opte & LX_BLKPAG_OS_WIRED) {
 			PMSTAT_DEC_WIRED_COUNT(pm);
 		}
 		PMSTAT_DEC_RESIDENT_COUNT(pm);
-
-		exists = true;	/* already exists */
-	} else {
-		exists = false;
+		if (user) {
+			PMAP_COUNT(user_mappings_changed);
+		} else {
+			PMAP_COUNT(kern_mappings_changed);
+		}
+#endif
+		UVMHIST_LOG(pmaphist,
+		    "va=%016lx has already mapped."
+		    " old-pa=%016lx new-pa=%016lx, old-pte=%016llx\n",
+		    va, l3pte_pa(opte), pa, opte);
+
+		if (pa == l3pte_pa(opte)) {
+			/* old and new pte have same pa, no need to update pv */
+			need_remove_pv = (pg == NULL);
+			need_update_pv = false;
+			if (need_sync_icache && l3pte_executable(opte, user))
+				need_sync_icache = false;
+		} else {
+			need_remove_pv = true;
+		}
+		if (need_remove_pv) {
+			opg = PHYS_TO_VM_PAGE(l3pte_pa(opte));
+			if (opg != NULL) {
+				opv = _pmap_remove_pv(opg, pm, va, opte);
+			}
+		}
 	}
 
 	/*
@@ -1640,23 +1632,28 @@ _pmap_enter(struct pmap *pm, vaddr_t va,
 		prot |= VM_PROT_READ;
 
 	mdattr = VM_PROT_READ | VM_PROT_WRITE;
-	if (pg != NULL) {
+	if (need_update_pv) {
 		error = _pmap_enter_pv(pg, pm, &spv, va, ptep, pa, flags);
-
 		if (error != 0) {
 			/*
 			 * If pmap_enter() fails,
 			 * it must not leave behind an existing pmap entry.
 			 */
-			if (!kenter && ((pte & LX_BLKPAG_OS_WIRED) == 0))
-				atomic_swap_64(ptep, 0);
-
+			if (lxpde_valid(opte)) {
+				bool pdpremoved = _pmap_pdp_delref(pm,
+				    AARCH64_KVA_TO_PA(trunc_page(
+				    (vaddr_t)ptep)), true);
+				AARCH64_TLBI_BY_ASID_VA(pm->pm_asid,
+				    va, !pdpremoved);
+			}
 			PMAP_COUNT(pv_entry_cannotalloc);
 			if (flags & PMAP_CANFAIL)
-				goto done;
+				goto fail1;
 			panic("pmap_enter: failed to allocate pv_entry");
 		}
+	}
 
+	if (pg != NULL) {
 		/* update referenced/modified flags */
 		VM_PAGE_TO_MD(pg)->mdpg_flags |=
 		    (flags & (VM_PROT_READ | VM_PROT_WRITE));
@@ -1702,7 +1699,8 @@ _pmap_enter(struct pmap *pm, vaddr_t va,
 		atomic_swap_64(ptep, pte);
 		AARCH64_TLBI_BY_ASID_VA(pm->pm_asid, va, l3only);
 	}
-	if (!exists)
+
+	if (!l3pte_valid(opte))
 		_pmap_pdp_addref(pm, pdppa, pdppg);	/* L3 occupancy++ */
 
 	if (pte & LX_BLKPAG_OS_WIRED) {
@@ -1710,7 +1708,10 @@ _pmap_enter(struct pmap *pm, vaddr_t va,
 	}
 	PMSTAT_INC_RESIDENT_COUNT(pm);
 
- done:
+ fail1:
+	if (pg != NULL)
+		pmap_pv_unlock(VM_PAGE_TO_MD(pg));
+ fail0:
 	pm_unlock(pm);
 
 	/* spare pv was not used. discard */
@@ -1772,7 +1773,10 @@ _pmap_remove(struct pmap *pm, vaddr_t sv
 			pa = lxpde_pa(pte);
 			pg = PHYS_TO_VM_PAGE(pa);
 			if (pg != NULL) {
+
+				pmap_pv_lock(VM_PAGE_TO_MD(pg));
 				opv = _pmap_remove_pv(pg, pm, va, pte);
+				pmap_pv_unlock(VM_PAGE_TO_MD(pg));
 				if (opv != NULL) {
 					opv->pv_next = *pvtofree;
 					*pvtofree = opv;

Reply via email to