Module Name: src Committed By: ad Date: Sun Mar 22 00:16:16 UTC 2020
Modified Files: src/sys/arch/x86/include: pmap.h src/sys/arch/x86/x86: pmap.c x86_tlb.c src/sys/dev/nvmm/x86: nvmm_x86_svm.c nvmm_x86_vmx.c Log Message: x86 pmap: - Give pmap_remove_all() its own version of pmap_remove_ptes() that on native x86 does the bare minimum needed to clear out PTPs. Cuts ~4% sys time on 'build.sh release' for me. - pmap_sync_pv(): there's no need to issue a redundant TLB shootdown. The caller waits for the competing operation to finish. - Bring 'options TLBSTATS' up to date. To generate a diff of this commit: cvs rdiff -u -r1.115 -r1.116 src/sys/arch/x86/include/pmap.h cvs rdiff -u -r1.379 -r1.380 src/sys/arch/x86/x86/pmap.c cvs rdiff -u -r1.17 -r1.18 src/sys/arch/x86/x86/x86_tlb.c cvs rdiff -u -r1.57 -r1.58 src/sys/dev/nvmm/x86/nvmm_x86_svm.c cvs rdiff -u -r1.51 -r1.52 src/sys/dev/nvmm/x86/nvmm_x86_vmx.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/x86/include/pmap.h diff -u src/sys/arch/x86/include/pmap.h:1.115 src/sys/arch/x86/include/pmap.h:1.116 --- src/sys/arch/x86/include/pmap.h:1.115 Tue Mar 17 22:29:19 2020 +++ src/sys/arch/x86/include/pmap.h Sun Mar 22 00:16:16 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.h,v 1.115 2020/03/17 22:29:19 ad Exp $ */ +/* $NetBSD: pmap.h,v 1.116 2020/03/22 00:16:16 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -399,18 +399,15 @@ vaddr_t slotspace_rand(int, size_t, siz vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */ typedef enum tlbwhy { - TLBSHOOT_APTE, + TLBSHOOT_REMOVE_ALL, TLBSHOOT_KENTER, TLBSHOOT_KREMOVE, - TLBSHOOT_FREE_PTP1, - TLBSHOOT_FREE_PTP2, + TLBSHOOT_FREE_PTP, TLBSHOOT_REMOVE_PTE, - TLBSHOOT_REMOVE_PTES, - TLBSHOOT_SYNC_PV1, - TLBSHOOT_SYNC_PV2, + TLBSHOOT_SYNC_PV, TLBSHOOT_WRITE_PROTECT, TLBSHOOT_ENTER, - TLBSHOOT_UPDATE, + TLBSHOOT_NVMM, TLBSHOOT_BUS_DMA, TLBSHOOT_BUS_SPACE, TLBSHOOT__MAX, Index: src/sys/arch/x86/x86/pmap.c diff -u src/sys/arch/x86/x86/pmap.c:1.379 src/sys/arch/x86/x86/pmap.c:1.380 --- src/sys/arch/x86/x86/pmap.c:1.379 Fri Mar 20 19:06:14 2020 +++ src/sys/arch/x86/x86/pmap.c Sun Mar 22 00:16:16 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.379 2020/03/20 19:06:14 ad Exp $ */ +/* $NetBSD: pmap.c,v 1.380 2020/03/22 00:16:16 ad Exp $ */ /* * Copyright (c) 2008, 2010, 2016, 2017, 2019, 2020 The NetBSD Foundation, Inc. @@ -130,7 +130,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.379 2020/03/20 19:06:14 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.380 2020/03/22 00:16:16 ad Exp $"); #include "opt_user_ldt.h" #include "opt_lockdebug.h" @@ -579,6 +579,32 @@ pv_pte_next(struct pmap_page *pp, struct return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list)); } +static inline uint8_t +pmap_pte_to_pp_attrs(pt_entry_t pte) +{ + uint8_t ret = 0; + if (pte & PTE_D) + ret |= PP_ATTRS_D; + if (pte & PTE_A) + ret |= PP_ATTRS_A; + if (pte & PTE_W) + ret |= PP_ATTRS_W; + return ret; +} + +static inline pt_entry_t +pmap_pp_attrs_to_pte(uint8_t attrs) +{ + pt_entry_t pte = 0; + if (attrs & PP_ATTRS_D) + pte |= PTE_D; + if (attrs & PP_ATTRS_A) + pte |= PTE_A; + if (attrs & PP_ATTRS_W) + pte |= PTE_W; + return pte; +} + /* * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]? * of course the kernel is always loaded @@ -2033,7 +2059,7 @@ pmap_lookup_pv(const struct pmap *pmap, * [This mostly deals with shared mappings, for example shared libs * and executables.] * - * Optimise for pmap_remove_all() which works by ascending scan: + * Optimise for pmap_remove_ptes() which works by ascending scan: * look at the lowest numbered node in the tree first. The tree is * known non-empty because of the check above. For short lived * processes where pmap_remove() isn't used much this gets close to @@ -2287,7 +2313,7 @@ pmap_free_ptp(struct pmap *pmap, struct invaladdr = level == 1 ? (vaddr_t)ptes : (vaddr_t)pdes[level - 2]; pmap_tlb_shootdown(pmap, invaladdr + index * PAGE_SIZE, - opde, TLBSHOOT_FREE_PTP1); + opde, TLBSHOOT_FREE_PTP); #if defined(XENPV) pmap_tlb_shootnow(); @@ -2858,6 +2884,134 @@ pmap_destroy(struct pmap *pmap) } /* + * pmap_zap_ptp: clear out an entire PTP without modifying PTEs + * + * => caller must hold pmap's lock + * => PTP must be mapped into KVA + * => must be called with kernel preemption disabled + * => does as little work as possible + */ +static void +pmap_zap_ptp(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte, + vaddr_t startva, vaddr_t blkendva, struct pv_entry **pv_tofree) +{ +#ifndef XEN + struct pv_entry *pve; + struct vm_page *pg; + struct pmap_page *pp; + pt_entry_t opte; + rb_tree_t *tree; + vaddr_t va; + int wired; + uint8_t oattrs; + u_int cnt; + + KASSERT(mutex_owned(&pmap->pm_lock)); + KASSERT(kpreempt_disabled()); + KASSERT(pmap != pmap_kernel()); + KASSERT(ptp->wire_count > 1); + KASSERT(ptp->wire_count - 1 <= PAGE_SIZE / sizeof(pt_entry_t)); + + /* + * Start at the lowest entered VA, and scan until there are no more + * PTEs in the PTPs. The goal is to disconnect PV entries and patch + * up the pmap's stats. No PTEs will be modified. + */ + tree = &VM_PAGE_TO_PP(ptp)->pp_rb; + pve = RB_TREE_MIN(tree); + wired = 0; + va = (vaddr_t)ptp->uanon; + pte += ((va - startva) >> PAGE_SHIFT); + + for (cnt = ptp->wire_count; cnt > 1; pte++, va += PAGE_SIZE) { + opte = *pte; + if (!pmap_valid_entry(opte)) { + continue; + } + + /* + * Count the PTE. If it's not for a managed mapping + * there's noting more to do. + */ + cnt--; + wired -= (opte & PTE_WIRED); + if ((opte & PTE_PVLIST) == 0) { +#ifndef DOM0OPS + KASSERTMSG((PHYS_TO_VM_PAGE(pmap_pte2pa(opte)) == NULL), + "managed page without PTE_PVLIST for %#" + PRIxVADDR, va); + KASSERTMSG((pmap_pv_tracked(pmap_pte2pa(opte)) == NULL), + "pv-tracked page without PTE_PVLIST for %#" + PRIxVADDR, va); +#endif + KASSERT(pmap_treelookup_pv(pmap, ptp, (ptp != NULL ? + &VM_PAGE_TO_PP(ptp)->pp_rb : &pmap_kernel_rb), + va) == NULL); + continue; + } + + /* + * "pve" now points to the lowest (by VA) dynamic PV entry + * in the PTP. If it's for this VA, take advantage of it to + * avoid calling PHYS_TO_VM_PAGE(). Avoid modifying the RB + * tree by skipping to the next VA in the tree whenever + * there is a match here. The tree will be cleared out in + * one pass before return to pmap_remove_all(). + */ + oattrs = pmap_pte_to_pp_attrs(opte); + if (pve != NULL && pve->pve_pte.pte_va == va) { + pp = pve->pve_pp; + KASSERT(pve->pve_pte.pte_ptp == ptp); + KASSERT(pp->pp_pte.pte_ptp != ptp || + pp->pp_pte.pte_va != va); + mutex_spin_enter(&pp->pp_lock); + pp->pp_attrs |= oattrs; + LIST_REMOVE(pve, pve_list); + mutex_spin_exit(&pp->pp_lock); + pve->pve_next = *pv_tofree; + *pv_tofree = pve; + pve = RB_TREE_NEXT(tree, pve); + continue; + } + + /* + * No entry in the tree so it must be embedded. Look up the + * page and cancel the embedded entry. + */ + if ((pg = PHYS_TO_VM_PAGE(pmap_pte2pa(opte))) != NULL) { + pp = VM_PAGE_TO_PP(pg); + } else if ((pp = pmap_pv_tracked(pmap_pte2pa(opte))) == NULL) { + paddr_t pa = pmap_pte2pa(opte); + panic("%s: PTE_PVLIST with pv-untracked page" + " va = %#"PRIxVADDR"pa = %#"PRIxPADDR + "(%#"PRIxPADDR")", __func__, va, pa, atop(pa)); + } + mutex_spin_enter(&pp->pp_lock); + KASSERT(pp->pp_pte.pte_ptp == ptp); + KASSERT(pp->pp_pte.pte_va == va); + pp->pp_attrs |= oattrs; + pp->pp_pte.pte_ptp = NULL; + pp->pp_pte.pte_va = 0; + mutex_spin_exit(&pp->pp_lock); + } + + /* PTP now empty - adjust the tree & stats to match. */ + pmap_stats_update(pmap, -(ptp->wire_count - 1), wired / PTE_WIRED); + ptp->wire_count = 1; +#ifdef DIAGNOSTIC + rb_tree_init(tree, &pmap_rbtree_ops); +#endif +#else /* !XEN */ + /* + * XXXAD For XEN, it's not clear to me that we can do this, because + * I guess the hypervisor keeps track of PTEs too. + */ + pmap_remove_ptes(pmap, ptp, (vaddr_t)pte, startva, blkendva, + pv_tofree); +#endif /* !XEN */ +} + +/* * pmap_remove_all: remove all mappings from pmap in bulk. * * Ordinarily when removing mappings it's important to hold the UVM object's @@ -2912,8 +3066,7 @@ pmap_remove_all(struct pmap *pmap) KASSERT(pmap_find_ptp(pmap, va, 1) == ptps[i]); /* Zap! */ - pmap_remove_ptes(pmap, ptps[i], - (vaddr_t)&ptes[pl1_i(va)], va, + pmap_zap_ptp(pmap, ptps[i], &ptes[pl1_i(va)], va, blkendva, &pv_tofree); /* PTP should now be unused - free it. */ @@ -2922,6 +3075,7 @@ pmap_remove_all(struct pmap *pmap) } pmap_unmap_ptes(pmap, pmap2); pmap_free_pvs(pmap, pv_tofree); + pmap_tlb_shootdown(pmap, -1L, 0, TLBSHOOT_REMOVE_ALL); mutex_exit(&pmap->pm_lock); /* Process deferred frees. */ @@ -3767,32 +3921,6 @@ pmap_remove_ptes(struct pmap *pmap, stru } } -static inline uint8_t -pmap_pte_to_pp_attrs(pt_entry_t pte) -{ - uint8_t ret = 0; - if (pte & PTE_D) - ret |= PP_ATTRS_D; - if (pte & PTE_A) - ret |= PP_ATTRS_A; - if (pte & PTE_W) - ret |= PP_ATTRS_W; - return ret; -} - -static inline pt_entry_t -pmap_pp_attrs_to_pte(uint8_t attrs) -{ - pt_entry_t pte = 0; - if (attrs & PP_ATTRS_D) - pte |= PTE_D; - if (attrs & PP_ATTRS_A) - pte |= PTE_A; - if (attrs & PP_ATTRS_W) - pte |= PTE_W; - return pte; -} - /* * pmap_remove_pte: remove a single PTE from a PTP. * @@ -4024,16 +4152,8 @@ pmap_sync_pv(struct pv_pte *pvpte, paddr * We lost a race with a V->P operation like * pmap_remove(). Wait for the competitor * reflecting pte bits into mp_attrs. - * - * Issue a redundant TLB shootdown so that - * we can wait for its completion. */ pmap_unmap_pte(); - if (clearbits != 0) { - pmap_tlb_shootdown(pmap, va, - (pmap == pmap_kernel() ? PTE_G : 0), - TLBSHOOT_SYNC_PV1); - } return EAGAIN; } @@ -4067,7 +4187,7 @@ pmap_sync_pv(struct pv_pte *pvpte, paddr } while (pmap_pte_cas(ptep, opte, npte) != opte); if (need_shootdown) { - pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_SYNC_PV2); + pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_SYNC_PV); } pmap_unmap_pte(); @@ -5204,8 +5324,10 @@ pmap_update(struct pmap *pmap) uvm_pagerealloc(ptp, NULL, 0); PMAP_DUMMY_UNLOCK(pmap); - /* pmap zeros all pages before freeing */ - ptp->flags |= PG_ZERO; + /* + * XXX for PTPs freed by pmap_remove_ptes() but not + * pmap_zap_ptp(), we could mark them PG_ZERO. + */ uvm_pagefree(ptp); } mutex_exit(&pmap->pm_lock); @@ -6016,15 +6138,8 @@ pmap_ept_sync_pv(struct vm_page *ptp, va * We lost a race with a V->P operation like * pmap_remove(). Wait for the competitor * reflecting pte bits into mp_attrs. - * - * Issue a redundant TLB shootdown so that - * we can wait for its completion. */ pmap_unmap_pte(); - if (clearbits != 0) { - pmap_tlb_shootdown(pmap, va, 0, - TLBSHOOT_SYNC_PV1); - } return EAGAIN; } @@ -6062,7 +6177,7 @@ pmap_ept_sync_pv(struct vm_page *ptp, va } while (pmap_pte_cas(ptep, opte, npte) != opte); if (need_shootdown) { - pmap_tlb_shootdown(pmap, va, 0, TLBSHOOT_SYNC_PV2); + pmap_tlb_shootdown(pmap, va, 0, TLBSHOOT_SYNC_PV); } pmap_unmap_pte(); Index: src/sys/arch/x86/x86/x86_tlb.c diff -u src/sys/arch/x86/x86/x86_tlb.c:1.17 src/sys/arch/x86/x86/x86_tlb.c:1.18 --- src/sys/arch/x86/x86/x86_tlb.c:1.17 Sun Feb 23 18:57:28 2020 +++ src/sys/arch/x86/x86/x86_tlb.c Sun Mar 22 00:16:16 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: x86_tlb.c,v 1.17 2020/02/23 18:57:28 ad Exp $ */ +/* $NetBSD: x86_tlb.c,v 1.18 2020/03/22 00:16:16 ad Exp $ */ /*- * Copyright (c) 2008-2020 The NetBSD Foundation, Inc. @@ -40,7 +40,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: x86_tlb.c,v 1.17 2020/02/23 18:57:28 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: x86_tlb.c,v 1.18 2020/03/22 00:16:16 ad Exp $"); #include <sys/param.h> #include <sys/kernel.h> @@ -124,20 +124,17 @@ static struct evcnt tlbstat_kernel[TLBS static struct evcnt tlbstat_single_req; static struct evcnt tlbstat_single_issue; static const char * tlbstat_name[ ] = { - "APTE", + "REMOVE_ALL", "KENTER", "KREMOVE", - "FREE_PTP1", - "FREE_PTP2", + "FREE_PTP", "REMOVE_PTE", - "REMOVE_PTES", - "SYNC_PV1", - "SYNC_PV2", + "SYNC_PV", "WRITE_PROTECT", "ENTER", - "UPDATE", + "NVMM", "BUS_DMA", - "BUS_SPACE" + "BUS_SPACE", }; #endif Index: src/sys/dev/nvmm/x86/nvmm_x86_svm.c diff -u src/sys/dev/nvmm/x86/nvmm_x86_svm.c:1.57 src/sys/dev/nvmm/x86/nvmm_x86_svm.c:1.58 --- src/sys/dev/nvmm/x86/nvmm_x86_svm.c:1.57 Sat Mar 14 18:08:39 2020 +++ src/sys/dev/nvmm/x86/nvmm_x86_svm.c Sun Mar 22 00:16:16 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: nvmm_x86_svm.c,v 1.57 2020/03/14 18:08:39 ad Exp $ */ +/* $NetBSD: nvmm_x86_svm.c,v 1.58 2020/03/22 00:16:16 ad Exp $ */ /* * Copyright (c) 2018-2020 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_svm.c,v 1.57 2020/03/14 18:08:39 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_svm.c,v 1.58 2020/03/22 00:16:16 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -2214,7 +2214,7 @@ svm_tlb_flush(struct pmap *pm) atomic_inc_64(&machdata->mach_htlb_gen); /* Generates IPIs, which cause #VMEXITs. */ - pmap_tlb_shootdown(pmap_kernel(), -1, PTE_G, TLBSHOOT_UPDATE); + pmap_tlb_shootdown(pmap_kernel(), -1, PTE_G, TLBSHOOT_NVMM); } static void Index: src/sys/dev/nvmm/x86/nvmm_x86_vmx.c diff -u src/sys/dev/nvmm/x86/nvmm_x86_vmx.c:1.51 src/sys/dev/nvmm/x86/nvmm_x86_vmx.c:1.52 --- src/sys/dev/nvmm/x86/nvmm_x86_vmx.c:1.51 Sat Mar 14 18:08:39 2020 +++ src/sys/dev/nvmm/x86/nvmm_x86_vmx.c Sun Mar 22 00:16:16 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: nvmm_x86_vmx.c,v 1.51 2020/03/14 18:08:39 ad Exp $ */ +/* $NetBSD: nvmm_x86_vmx.c,v 1.52 2020/03/22 00:16:16 ad Exp $ */ /* * Copyright (c) 2018-2020 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_vmx.c,v 1.51 2020/03/14 18:08:39 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_vmx.c,v 1.52 2020/03/22 00:16:16 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -2866,7 +2866,7 @@ vmx_tlb_flush(struct pmap *pm) atomic_inc_64(&machdata->mach_htlb_gen); /* Generates IPIs, which cause #VMEXITs. */ - pmap_tlb_shootdown(pmap_kernel(), -1, PTE_G, TLBSHOOT_UPDATE); + pmap_tlb_shootdown(pmap_kernel(), -1, PTE_G, TLBSHOOT_NVMM); } static void