Module Name: src Committed By: ad Date: Tue Mar 17 18:40:35 UTC 2020
Modified Files: src/sys/arch/x86/x86: pmap.c Log Message: - Add more assertions. - Range clipping for pmap_remove(): only need to keep track of the lowest VA in PTP, as ptp->wire_count provides an upper bound. D'oh. Move set of range to where there is already a writeback to the PTP. - pmap_pp_remove(): panic if pmap_sync_pv() returns an error, because it means something has gone very wrong. The PTE should not change here since the pmap is locked. - pmap_pp_clear_attrs(): wait for the competing V->P operation by acquiring and releasing the pmap's lock, rather than busy looping. - pmap_test_attrs(): this needs to wait for any competing operations, otherwise it could return without all necessary updates reflected in pp_attrs. - pmap_enter(): fix cut-n-paste screwup in an error path for Xen. To generate a diff of this commit: cvs rdiff -u -r1.371 -r1.372 src/sys/arch/x86/x86/pmap.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/x86/x86/pmap.c diff -u src/sys/arch/x86/x86/pmap.c:1.371 src/sys/arch/x86/x86/pmap.c:1.372 --- src/sys/arch/x86/x86/pmap.c:1.371 Tue Mar 17 13:34:50 2020 +++ src/sys/arch/x86/x86/pmap.c Tue Mar 17 18:40:35 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.371 2020/03/17 13:34:50 ad Exp $ */ +/* $NetBSD: pmap.c,v 1.372 2020/03/17 18:40:35 ad Exp $ */ /* * Copyright (c) 2008, 2010, 2016, 2017, 2019, 2020 The NetBSD Foundation, Inc. @@ -130,7 +130,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.371 2020/03/17 13:34:50 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.372 2020/03/17 18:40:35 ad Exp $"); #include "opt_user_ldt.h" #include "opt_lockdebug.h" @@ -640,13 +640,8 @@ pmap_compare_key(void *context, const vo static inline void pmap_ptp_init(struct vm_page *ptp) { - uint16_t *minidx, *maxidx; - minidx = (uint16_t *)&ptp->uanon; - maxidx = (uint16_t *)&ptp->uanon + 1; - - *minidx = UINT16_MAX; - *maxidx = 0; + ptp->uanon = (struct vm_anon *)(vaddr_t)~0L; rb_tree_init(&VM_PAGE_TO_PP(ptp)->pp_rb, &pmap_rbtree_ops); PMAP_CHECK_PP(VM_PAGE_TO_PP(ptp)); } @@ -664,24 +659,15 @@ pmap_ptp_fini(struct vm_page *ptp) } /* - * pmap_ptp_range_set: abuse ptp->uanon to record min/max idx of PTE + * pmap_ptp_range_set: abuse ptp->uanon to record minimum VA of PTE */ static inline void pmap_ptp_range_set(struct vm_page *ptp, vaddr_t va) { - uint16_t *minidx, *maxidx; - u_int idx; - - idx = pl1_pi(va); - KASSERT(idx < UINT16_MAX); - minidx = (uint16_t *)&ptp->uanon; - maxidx = (uint16_t *)&ptp->uanon + 1; + vaddr_t *min = (vaddr_t *)&ptp->uanon; - if (idx < *minidx) { - *minidx = idx; - } - if (idx > *maxidx) { - *maxidx = idx; + if (va < *min) { + *min = va; } } @@ -689,27 +675,18 @@ pmap_ptp_range_set(struct vm_page *ptp, * pmap_ptp_range_clip: abuse ptp->uanon to clip range of PTEs to remove */ static inline void -pmap_ptp_range_clip(struct vm_page *ptp, vaddr_t *startva, vaddr_t *endva, - pt_entry_t **pte) +pmap_ptp_range_clip(struct vm_page *ptp, vaddr_t *startva, pt_entry_t **pte) { - vaddr_t sclip, eclip; + vaddr_t sclip; if (ptp == NULL) { return; } - sclip = (*startva & L2_FRAME) + - x86_ptob(*(uint16_t *)&ptp->uanon); - eclip = (*startva & L2_FRAME) + - x86_ptob(*((uint16_t *)&ptp->uanon + 1) + 1); - - KASSERT(sclip < eclip); - + sclip = (vaddr_t)ptp->uanon; sclip = (*startva < sclip ? sclip : *startva); - eclip = (*endva > eclip ? eclip : *endva); *pte += (sclip - *startva) / PAGE_SIZE; *startva = sclip; - *endva = eclip; } /* @@ -2966,7 +2943,7 @@ pmap_remove_all(struct pmap *pmap) blkendva, &pv_tofree); /* PTP should now be unused - free it. */ - KASSERT(ptps[i]->wire_count <= 1); + KASSERT(ptps[i]->wire_count == 1); pmap_free_ptp(pmap, ptps[i], va, ptes, pdes); } pmap_unmap_ptes(pmap, pmap2); @@ -2982,6 +2959,9 @@ pmap_remove_all(struct pmap *pmap) /* Verify that the pmap is now completely empty. */ pmap_check_ptps(pmap); + KASSERTMSG(pmap->pm_stats.resident_count == PDP_SIZE, + "pmap %p not empty", pmap); + return true; } @@ -3817,7 +3797,7 @@ pmap_remove_ptes(struct pmap *pmap, stru * mappings are very often sparse, so clip the given range to the * range of PTEs that are known present in the PTP. */ - pmap_ptp_range_clip(ptp, &startva, &endva, &pte); + pmap_ptp_range_clip(ptp, &startva, &pte); /* * note that ptpva points to the PTE that maps startva. this may @@ -4168,9 +4148,9 @@ pmap_pp_remove(struct pmap_page *pp, pad { struct pv_pte *pvpte; struct vm_page *ptp; + uintptr_t sum; uint8_t oattrs; bool locked; - int count; /* * Do an unlocked check to see if the page has no mappings, eg when @@ -4179,20 +4159,19 @@ pmap_pp_remove(struct pmap_page *pp, pad * out, so we don't have to worry about concurrent attempts to enter * it (otherwise the caller either doesn't care or has screwed up). */ - if (atomic_load_relaxed(&pp->pp_pte.pte_va) == 0 && - atomic_load_relaxed(&pp->pp_pte.pte_ptp) == NULL && - atomic_load_relaxed(&pp->pp_pvlist.lh_first) == NULL) { + sum = (uintptr_t)atomic_load_relaxed(&pp->pp_pte.pte_va); + sum |= (uintptr_t)atomic_load_relaxed(&pp->pp_pte.pte_ptp); + sum |= (uintptr_t)atomic_load_relaxed(&pp->pp_pvlist.lh_first); + if (sum == 0) { return; } - count = SPINLOCK_BACKOFF_MIN; kpreempt_disable(); for (;;) { struct pmap *pmap; struct pv_entry *pve; pt_entry_t opte; vaddr_t va; - int error; mutex_spin_enter(&pp->pp_lock); if ((pvpte = pv_pte_first(pp)) == NULL) { @@ -4228,25 +4207,35 @@ pmap_pp_remove(struct pmap_page *pp, pad } continue; } + va = pvpte->pte_va; - error = pmap_sync_pv(pvpte, pa, ~0, &oattrs, &opte); - if (error == EAGAIN) { - /* - * XXXAD Shouldn't need to loop here, as the mapping - * can't disappear, seen as the pmap's lock is held. - */ - int hold_count; - KERNEL_UNLOCK_ALL(curlwp, &hold_count); - mutex_exit(&pmap->pm_lock); - if (ptp != NULL) { - pmap_destroy(pmap); - } - SPINLOCK_BACKOFF(count); - KERNEL_LOCK(hold_count, curlwp); - continue; + KASSERTMSG(pmap->pm_stats.resident_count > PDP_SIZE, + "va %lx pmap %p ptp %p is empty", va, pmap, ptp); + KASSERTMSG(ptp == NULL || (ptp->flags & PG_FREE) == 0, + "va %lx pmap %p ptp %p is free", va, pmap, ptp); + KASSERTMSG(ptp == NULL || ptp->wire_count > 1, + "va %lx pmap %p ptp %p is empty", va, pmap, ptp); + +#ifdef DIAGNOSTIC /* XXX Too expensive make DEBUG before April 2020 */ + pmap_check_pv(pmap, ptp, pp, pvpte->pte_va, true); + rb_tree_t *tree = (ptp != NULL ? + &VM_PAGE_TO_PP(ptp)->pp_rb : &pmap_kernel_rb); + pve = pmap_treelookup_pv(pmap, ptp, tree, va); + if (pve == NULL) { + KASSERTMSG(&pp->pp_pte == pvpte, + "va %lx pmap %p ptp %p pvpte %p pve %p oops 1", + va, pmap, ptp, pvpte, pve); + } else { + KASSERTMSG(&pve->pve_pte == pvpte, + "va %lx pmap %p ptp %p pvpte %p pve %p oops 2", + va, pmap, ptp, pvpte, pve); + } +#endif + + if (pmap_sync_pv(pvpte, pa, ~0, &oattrs, &opte)) { + panic("pmap_pp_remove: mapping not present"); } - va = pvpte->pte_va; pve = pmap_lookup_pv(pmap, ptp, pp, va); pmap_remove_pv(pmap, pp, ptp, va, pve, oattrs); @@ -4322,6 +4311,7 @@ pmap_test_attrs(struct vm_page *pg, unsi { struct pmap_page *pp; struct pv_pte *pvpte; + struct pmap *pmap; uint8_t oattrs; u_int result; paddr_t pa; @@ -4331,17 +4321,29 @@ pmap_test_attrs(struct vm_page *pg, unsi return true; } pa = VM_PAGE_TO_PHYS(pg); + startover: mutex_spin_enter(&pp->pp_lock); for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) { - int error; - if ((pp->pp_attrs & testbits) != 0) { break; } - error = pmap_sync_pv(pvpte, pa, 0, &oattrs, NULL); - if (error == 0) { - pp->pp_attrs |= oattrs; + if (pmap_sync_pv(pvpte, pa, 0, &oattrs, NULL)) { + /* + * raced with a V->P operation. wait for the other + * side to finish by acquring pmap's lock. if no + * wait, updates to pp_attrs by the other side may + * go unseen. + */ + pmap = ptp_to_pmap(pvpte->pte_ptp); + pmap_reference(pmap); + mutex_spin_exit(&pp->pp_lock); + mutex_enter(&pmap->pm_lock); + /* nothing. */ + mutex_exit(&pmap->pm_lock); + pmap_destroy(pmap); + goto startover; } + pp->pp_attrs |= oattrs; } result = pp->pp_attrs & testbits; mutex_spin_exit(&pp->pp_lock); @@ -4358,24 +4360,27 @@ static bool pmap_pp_clear_attrs(struct pmap_page *pp, paddr_t pa, unsigned clearbits) { struct pv_pte *pvpte; + struct pmap *pmap; uint8_t oattrs; u_int result; - int count; - count = SPINLOCK_BACKOFF_MIN; startover: mutex_spin_enter(&pp->pp_lock); for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) { - int error; - - error = pmap_sync_pv(pvpte, pa, clearbits, &oattrs, NULL); - if (error == EAGAIN) { - int hold_count; - /* XXXAD wait for the pmap's lock instead. */ + if (pmap_sync_pv(pvpte, pa, clearbits, &oattrs, NULL)) { + /* + * raced with a V->P operation. wait for the other + * side to finish by acquring pmap's lock. it is + * probably unmapping the page, and it will be gone + * when the loop is restarted. + */ + pmap = ptp_to_pmap(pvpte->pte_ptp); + pmap_reference(pmap); mutex_spin_exit(&pp->pp_lock); - KERNEL_UNLOCK_ALL(curlwp, &hold_count); - SPINLOCK_BACKOFF(count); - KERNEL_LOCK(hold_count, curlwp); + mutex_enter(&pmap->pm_lock); + /* nothing. */ + mutex_exit(&pmap->pm_lock); + pmap_destroy(pmap); goto startover; } pp->pp_attrs |= oattrs; @@ -4705,8 +4710,6 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t error); } } - /* Remember min/max index of PTE in PTP. */ - pmap_ptp_range_set(ptp, va); tree = &VM_PAGE_TO_PP(ptp)->pp_rb; } else { /* Embedded PV entries rely on this. */ @@ -4800,14 +4803,14 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t new_pp->pp_pte.pte_va = 0; } mutex_spin_exit(&new_pp->pp_lock); - pmap_unmap_ptes(pmap, pmap2); - mutex_exit(&pmap->pm_lock); } + pmap_unmap_ptes(pmap, pmap2); /* Free new PTP. */ if (ptp != NULL && ptp->wire_count <= 1) { pmap_free_ptp(pmap, ptp, va, ptes, pdes); } + mutex_exit(&pmap->pm_lock); return error; } break; @@ -4824,8 +4827,12 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t * Update statistics and PTP's reference count. */ pmap_stats_update_bypte(pmap, npte, opte); - if (ptp != NULL && !have_oldpa) { - ptp->wire_count++; + if (ptp != NULL) { + if (!have_oldpa) { + ptp->wire_count++; + } + /* Remember minimum VA in PTP. */ + pmap_ptp_range_set(ptp, va); } KASSERT(ptp == NULL || ptp->wire_count > 1); @@ -5636,8 +5643,6 @@ pmap_ept_enter(struct pmap *pmap, vaddr_ error); } } - /* Remember min/max index of PTE in PTP. */ - pmap_ptp_range_set(ptp, va); tree = &VM_PAGE_TO_PP(ptp)->pp_rb; } else { /* Embedded PV entries rely on this. */ @@ -5719,8 +5724,12 @@ pmap_ept_enter(struct pmap *pmap, vaddr_ * Update statistics and PTP's reference count. */ pmap_ept_stats_update_bypte(pmap, npte, opte); - if (ptp != NULL && !have_oldpa) { - ptp->wire_count++; + if (ptp != NULL) { + if (!have_oldpa) { + ptp->wire_count++; + } + /* Remember minimum VA in PTP. */ + pmap_ptp_range_set(ptp, va); } KASSERT(ptp == NULL || ptp->wire_count > 1); @@ -5952,7 +5961,7 @@ pmap_ept_remove_ptes(struct pmap *pmap, * mappings are very often sparse, so clip the given range to the * range of PTEs that are known present in the PTP. */ - pmap_ptp_range_clip(ptp, &startva, &endva, &pte); + pmap_ptp_range_clip(ptp, &startva, &pte); /* * note that ptpva points to the PTE that maps startva. this may