Module Name: src Committed By: ryo Date: Sat Apr 6 18:30:20 UTC 2019
Modified Files: src/sys/arch/aarch64/aarch64: pmap.c Log Message: Fix race conditions about pmap_page_protect() and pmap_enter(). while handling same PTE by these functions in same time, there is a critical path that the number of valid PTEs and wire_count are inconsistent, and it caused KASSERT. Need to hold a pv_lock while modifying them. To generate a diff of this commit: cvs rdiff -u -r1.38 -r1.39 src/sys/arch/aarch64/aarch64/pmap.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/aarch64/aarch64/pmap.c diff -u src/sys/arch/aarch64/aarch64/pmap.c:1.38 src/sys/arch/aarch64/aarch64/pmap.c:1.39 --- src/sys/arch/aarch64/aarch64/pmap.c:1.38 Wed Mar 20 07:05:06 2019 +++ src/sys/arch/aarch64/aarch64/pmap.c Sat Apr 6 18:30:20 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.38 2019/03/20 07:05:06 ryo Exp $ */ +/* $NetBSD: pmap.c,v 1.39 2019/04/06 18:30:20 ryo Exp $ */ /* * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org> @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.38 2019/03/20 07:05:06 ryo Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.39 2019/04/06 18:30:20 ryo Exp $"); #include "opt_arm_debug.h" #include "opt_ddb.h" @@ -908,8 +908,6 @@ _pmap_remove_pv(struct vm_page *pg, stru md = VM_PAGE_TO_MD(pg); - pmap_pv_lock(md); - TAILQ_FOREACH(pv, &md->mdpg_pvhead, pv_link) { if ((pm == pv->pv_pmap) && (va == pv->pv_va)) { TAILQ_REMOVE(&md->mdpg_pvhead, pv, pv_link); @@ -923,8 +921,6 @@ _pmap_remove_pv(struct vm_page *pg, stru } #endif - pmap_pv_unlock(md); - return pv; } @@ -1008,8 +1004,6 @@ _pmap_enter_pv(struct vm_page *pg, struc md = VM_PAGE_TO_MD(pg); - pmap_pv_lock(md); - /* pv is already registered? */ TAILQ_FOREACH(pv, &md->mdpg_pvhead, pv_link) { if ((pm == pv->pv_pmap) && (va == pv->pv_va)) { @@ -1018,8 +1012,6 @@ _pmap_enter_pv(struct vm_page *pg, struc } if (pv == NULL) { - pmap_pv_unlock(md); - /* * create and link new pv. * pv is already allocated at beginning of _pmap_enter(). @@ -1034,7 +1026,6 @@ _pmap_enter_pv(struct vm_page *pg, struc pv->pv_pa = pa; pv->pv_ptep = ptep; - pmap_pv_lock(md); TAILQ_INSERT_HEAD(&md->mdpg_pvhead, pv, pv_link); PMAP_COUNT(pv_enter); @@ -1047,7 +1038,6 @@ _pmap_enter_pv(struct vm_page *pg, struc #endif } - pmap_pv_unlock(md); return 0; } @@ -1283,6 +1273,7 @@ pmap_create(void) pm->pm_asid = -1; TAILQ_INIT(&pm->pm_vmlist); mutex_init(&pm->pm_lock, MUTEX_DEFAULT, IPL_VM); + pm->pm_l0table_pa = pmap_alloc_pdp(pm, NULL, true); KASSERT(pm->pm_l0table_pa != POOL_PADDR_INVALID); pm->pm_l0table = (pd_entry_t *)AARCH64_PA_TO_KVA(pm->pm_l0table_pa); @@ -1438,17 +1429,14 @@ _pmap_enter(struct pmap *pm, vaddr_t va, struct vm_page *pg, *pdppg, *pdppg0; struct pv_entry *spv, *opv = NULL; pd_entry_t pde; - pt_entry_t attr, pte, *ptep; -#ifdef UVMHIST - pt_entry_t opte; -#endif + pt_entry_t attr, pte, opte, *ptep; pd_entry_t *l0, *l1, *l2, *l3; paddr_t pdppa, pdppa0; uint32_t mdattr; unsigned int idx; int error = 0; const bool user = (pm != pmap_kernel()); - bool need_sync_icache, exists; + bool need_sync_icache, need_update_pv; bool l3only = true; UVMHIST_FUNC(__func__); @@ -1496,9 +1484,11 @@ _pmap_enter(struct pmap *pm, vaddr_t va, * pool_cache_get() may call pmap_kenter() internally. */ spv = pool_cache_get(&_pmap_pv_pool, PR_NOWAIT); + need_update_pv = true; } else { PMAP_COUNT(unmanaged_mappings); spv = NULL; + need_update_pv = false; } pm_lock(pm); @@ -1512,12 +1502,13 @@ _pmap_enter(struct pmap *pm, vaddr_t va, pde = l0[idx]; if (!l0pde_valid(pde)) { /* no need to increment L0 occupancy. L0 page never freed */ - pdppa = pmap_alloc_pdp(pm, &pdppg, false); /* L1 pdp */ + pdppa = pmap_alloc_pdp(pm, &pdppg, flags); /* L1 pdp */ if (pdppa == POOL_PADDR_INVALID) { if (flags & PMAP_CANFAIL) { error = ENOMEM; - goto done; + goto fail0; } + pm_unlock(pm); panic("%s: cannot allocate L1 table", __func__); } atomic_swap_64(&l0[idx], pdppa | L0_TABLE); @@ -1534,12 +1525,13 @@ _pmap_enter(struct pmap *pm, vaddr_t va, if (!l1pde_valid(pde)) { pdppa0 = pdppa; pdppg0 = pdppg; - pdppa = pmap_alloc_pdp(pm, &pdppg, false); /* L2 pdp */ + pdppa = pmap_alloc_pdp(pm, &pdppg, flags); /* L2 pdp */ if (pdppa == POOL_PADDR_INVALID) { if (flags & PMAP_CANFAIL) { error = ENOMEM; - goto done; + goto fail0; } + pm_unlock(pm); panic("%s: cannot allocate L2 table", __func__); } atomic_swap_64(&l1[idx], pdppa | L1_TABLE); @@ -1557,12 +1549,13 @@ _pmap_enter(struct pmap *pm, vaddr_t va, if (!l2pde_valid(pde)) { pdppa0 = pdppa; pdppg0 = pdppg; - pdppa = pmap_alloc_pdp(pm, &pdppg, false); /* L3 pdp */ + pdppa = pmap_alloc_pdp(pm, &pdppg, flags); /* L3 pdp */ if (pdppa == POOL_PADDR_INVALID) { if (flags & PMAP_CANFAIL) { error = ENOMEM; - goto done; + goto fail0; } + pm_unlock(pm); panic("%s: cannot allocate L3 table", __func__); } atomic_swap_64(&l2[idx], pdppa | L2_TABLE); @@ -1578,10 +1571,7 @@ _pmap_enter(struct pmap *pm, vaddr_t va, idx = l3pte_index(va); ptep = &l3[idx]; /* as PTE */ - pte = *ptep; -#ifdef UVMHIST - opte = pte; -#endif + opte = atomic_swap_64(ptep, 0); need_sync_icache = (prot & VM_PROT_EXECUTE); #ifdef KASAN @@ -1590,45 +1580,47 @@ _pmap_enter(struct pmap *pm, vaddr_t va, } #endif - if (l3pte_valid(pte)) { - KASSERT(!kenter); /* pmap_kenter_pa() cannot override */ - - PMAP_COUNT(remappings); - - /* pte is Already mapped */ - if (l3pte_pa(pte) == pa) { - if (need_sync_icache && l3pte_executable(pte, user)) - need_sync_icache = false; + if (pg != NULL) + pmap_pv_lock(VM_PAGE_TO_MD(pg)); - } else { - struct vm_page *opg; + /* remap? */ + if (l3pte_valid(opte)) { + struct vm_page *opg; + bool need_remove_pv; + KASSERT(!kenter); /* pmap_kenter_pa() cannot override */ #ifdef PMAPCOUNTERS - if (user) { - PMAP_COUNT(user_mappings_changed); - } else { - PMAP_COUNT(kern_mappings_changed); - } -#endif - - UVMHIST_LOG(pmaphist, - "va=%016lx has already mapped." - " old-pa=%016lx new-pa=%016lx, pte=%016llx\n", - va, l3pte_pa(pte), pa, pte); - - opg = PHYS_TO_VM_PAGE(l3pte_pa(pte)); - if (opg != NULL) - opv = _pmap_remove_pv(opg, pm, va, pte); - } - - if (pte & LX_BLKPAG_OS_WIRED) { + PMAP_COUNT(remappings); + if (opte & LX_BLKPAG_OS_WIRED) { PMSTAT_DEC_WIRED_COUNT(pm); } PMSTAT_DEC_RESIDENT_COUNT(pm); - - exists = true; /* already exists */ - } else { - exists = false; + if (user) { + PMAP_COUNT(user_mappings_changed); + } else { + PMAP_COUNT(kern_mappings_changed); + } +#endif + UVMHIST_LOG(pmaphist, + "va=%016lx has already mapped." + " old-pa=%016lx new-pa=%016lx, old-pte=%016llx\n", + va, l3pte_pa(opte), pa, opte); + + if (pa == l3pte_pa(opte)) { + /* old and new pte have same pa, no need to update pv */ + need_remove_pv = (pg == NULL); + need_update_pv = false; + if (need_sync_icache && l3pte_executable(opte, user)) + need_sync_icache = false; + } else { + need_remove_pv = true; + } + if (need_remove_pv) { + opg = PHYS_TO_VM_PAGE(l3pte_pa(opte)); + if (opg != NULL) { + opv = _pmap_remove_pv(opg, pm, va, opte); + } + } } /* @@ -1640,23 +1632,28 @@ _pmap_enter(struct pmap *pm, vaddr_t va, prot |= VM_PROT_READ; mdattr = VM_PROT_READ | VM_PROT_WRITE; - if (pg != NULL) { + if (need_update_pv) { error = _pmap_enter_pv(pg, pm, &spv, va, ptep, pa, flags); - if (error != 0) { /* * If pmap_enter() fails, * it must not leave behind an existing pmap entry. */ - if (!kenter && ((pte & LX_BLKPAG_OS_WIRED) == 0)) - atomic_swap_64(ptep, 0); - + if (lxpde_valid(opte)) { + bool pdpremoved = _pmap_pdp_delref(pm, + AARCH64_KVA_TO_PA(trunc_page( + (vaddr_t)ptep)), true); + AARCH64_TLBI_BY_ASID_VA(pm->pm_asid, + va, !pdpremoved); + } PMAP_COUNT(pv_entry_cannotalloc); if (flags & PMAP_CANFAIL) - goto done; + goto fail1; panic("pmap_enter: failed to allocate pv_entry"); } + } + if (pg != NULL) { /* update referenced/modified flags */ VM_PAGE_TO_MD(pg)->mdpg_flags |= (flags & (VM_PROT_READ | VM_PROT_WRITE)); @@ -1702,7 +1699,8 @@ _pmap_enter(struct pmap *pm, vaddr_t va, atomic_swap_64(ptep, pte); AARCH64_TLBI_BY_ASID_VA(pm->pm_asid, va, l3only); } - if (!exists) + + if (!l3pte_valid(opte)) _pmap_pdp_addref(pm, pdppa, pdppg); /* L3 occupancy++ */ if (pte & LX_BLKPAG_OS_WIRED) { @@ -1710,7 +1708,10 @@ _pmap_enter(struct pmap *pm, vaddr_t va, } PMSTAT_INC_RESIDENT_COUNT(pm); - done: + fail1: + if (pg != NULL) + pmap_pv_unlock(VM_PAGE_TO_MD(pg)); + fail0: pm_unlock(pm); /* spare pv was not used. discard */ @@ -1772,7 +1773,10 @@ _pmap_remove(struct pmap *pm, vaddr_t sv pa = lxpde_pa(pte); pg = PHYS_TO_VM_PAGE(pa); if (pg != NULL) { + + pmap_pv_lock(VM_PAGE_TO_MD(pg)); opv = _pmap_remove_pv(pg, pm, va, pte); + pmap_pv_unlock(VM_PAGE_TO_MD(pg)); if (opv != NULL) { opv->pv_next = *pvtofree; *pvtofree = opv;