Module Name: src Committed By: ad Date: Wed May 27 19:26:43 UTC 2020
Modified Files: src/sys/arch/x86/x86: pmap.c Log Message: Reported-by: syzbot+c1770938bb3fa7c08...@syzkaller.appspotmail.com Reported-by: syzbot+ae26209c7d7f06e0b...@syzkaller.appspotmail.com Can't defer freeing PV entries for the kernel's pmap until pmap_update(), as that means taking locks and potentially recursing, and pmap_update() for the kernel is used in all sorts of sensitive places. To generate a diff of this commit: cvs rdiff -u -r1.394 -r1.395 src/sys/arch/x86/x86/pmap.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/x86/x86/pmap.c diff -u src/sys/arch/x86/x86/pmap.c:1.394 src/sys/arch/x86/x86/pmap.c:1.395 --- src/sys/arch/x86/x86/pmap.c:1.394 Tue May 26 10:10:31 2020 +++ src/sys/arch/x86/x86/pmap.c Wed May 27 19:26:43 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.394 2020/05/26 10:10:31 bouyer Exp $ */ +/* $NetBSD: pmap.c,v 1.395 2020/05/27 19:26:43 ad Exp $ */ /* * Copyright (c) 2008, 2010, 2016, 2017, 2019, 2020 The NetBSD Foundation, Inc. @@ -130,7 +130,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.394 2020/05/26 10:10:31 bouyer Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.395 2020/05/27 19:26:43 ad Exp $"); #include "opt_user_ldt.h" #include "opt_lockdebug.h" @@ -494,6 +494,7 @@ static int pmap_pvp_ctor(void *, void *, static void pmap_pvp_dtor(void *, void *); static struct pv_entry *pmap_alloc_pv(struct pmap *); static void pmap_free_pv(struct pmap *, struct pv_entry *); +static void pmap_drain_pv(struct pmap *); static void pmap_alloc_level(struct pmap *, vaddr_t, long *); @@ -2073,6 +2074,25 @@ pmap_free_pv(struct pmap *pmap, struct p } /* + * pmap_drain_pv: free full PV pages. + */ +static void +pmap_drain_pv(struct pmap *pmap) +{ + struct pv_page *pvp; + + KASSERT(mutex_owned(&pmap->pm_lock)); + + while ((pvp = LIST_FIRST(&pmap->pm_pvp_full)) != NULL) { + LIST_REMOVE(pvp, pvp_list); + KASSERT(pvp->pvp_pmap == pmap); + KASSERT(pvp->pvp_nfree == PVE_PER_PVP); + pvp->pvp_pmap = NULL; + pool_cache_put(&pmap_pvp_cache, pvp); + } +} + +/* * pmap_check_pv: verify {VA, PTP} pair is either tracked/untracked by page */ static void @@ -2940,12 +2960,13 @@ pmap_destroy(struct pmap *pmap) * handle any deferred frees. */ + mutex_enter(&pmap->pm_lock); if (pmap->pm_pve != NULL) { - mutex_enter(&pmap->pm_lock); pmap_free_pv(pmap, pmap->pm_pve); - mutex_exit(&pmap->pm_lock); pmap->pm_pve = NULL; } + pmap_drain_pv(pmap); + mutex_exit(&pmap->pm_lock); pmap_update(pmap); /* @@ -3088,7 +3109,7 @@ pmap_zap_ptp(struct pmap *pmap, struct v mutex_spin_exit(&pp->pp_lock); /* - * pve won't be touched again until pmap_update(), + * pve won't be touched again until pmap_drain_pv(), * so it's still safe to traverse the tree. */ pmap_free_pv(pmap, pve); @@ -3123,6 +3144,7 @@ pmap_zap_ptp(struct pmap *pmap, struct v #ifdef DIAGNOSTIC rb_tree_init(tree, &pmap_rbtree_ops); #endif + pmap_drain_pv(pmap); #else /* !XENPV */ /* * XXXAD For XEN, it's not clear to me that we can do this, because @@ -4187,6 +4209,7 @@ pmap_remove_locked(struct pmap *pmap, va } } pmap_unmap_ptes(pmap, pmap2); + pmap_drain_pv(pmap); } /* @@ -4432,6 +4455,7 @@ pmap_pp_remove(struct pmap_page *pp, pad pmap_stats_update_bypte(pmap, 0, opte); } pmap_tlb_shootnow(); + pmap_drain_pv(pmap); mutex_exit(&pmap->pm_lock); if (ptp != NULL) { pmap_destroy(pmap); @@ -5076,6 +5100,7 @@ same_pa: ((opte ^ npte) & (PTE_FRAME | PTE_W)) != 0) { pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_ENTER); } + pmap_drain_pv(pmap); mutex_exit(&pmap->pm_lock); return 0; } @@ -5322,6 +5347,7 @@ pmap_enter_gnt(struct pmap *pmap, vaddr_ KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL); } + pmap_drain_pv(pmap); mutex_exit(&pmap->pm_lock); return op->status; } @@ -5716,10 +5742,8 @@ pmap_dump(struct pmap *pmap, vaddr_t sva void pmap_update(struct pmap *pmap) { - struct pv_page *pvp; struct pmap_page *pp; struct vm_page *ptp; - uintptr_t sum; /* * Initiate any pending TLB shootdowns. Wait for them to @@ -5733,18 +5757,12 @@ pmap_update(struct pmap *pmap) * Now that shootdowns are complete, process deferred frees. This * is an unlocked check, but is safe as we're only interested in * work done in this LWP - we won't get a false negative. - * - * If pmap_kernel(), this can be called from interrupt context or - * while holding a spinlock so we can't wait on the pmap lock. No - * big deal as we'll catch up eventually (even for user pmaps, in - * pmap_destroy() when there's never contention on the lock). - */ - sum = (uintptr_t)atomic_load_relaxed(&pmap->pm_gc_ptp.lh_first); - sum |= (uintptr_t)atomic_load_relaxed(&pmap->pm_pvp_full.lh_first); - if (__predict_true(sum == 0 || cpu_intr_p() || - !mutex_tryenter(&pmap->pm_lock))) { + */ + if (atomic_load_relaxed(&pmap->pm_gc_ptp.lh_first) == NULL) { return; - } + } + + mutex_enter(&pmap->pm_lock); while ((ptp = LIST_FIRST(&pmap->pm_gc_ptp)) != NULL) { KASSERT(ptp->wire_count == 0); KASSERT(ptp->uanon == NULL); @@ -5768,13 +5786,6 @@ pmap_update(struct pmap *pmap) ptp->flags |= PG_ZERO; uvm_pagefree(ptp); } - while ((pvp = LIST_FIRST(&pmap->pm_pvp_full)) != NULL) { - LIST_REMOVE(pvp, pvp_list); - KASSERT(pvp->pvp_pmap == pmap); - KASSERT(pvp->pvp_nfree == PVE_PER_PVP); - pvp->pvp_pmap = NULL; - pool_cache_put(&pmap_pvp_cache, pvp); - } mutex_exit(&pmap->pm_lock); } @@ -6311,6 +6322,7 @@ same_pa: if (accessed && ((opte ^ npte) & (PTE_FRAME | EPT_W)) != 0) { pmap_tlb_shootdown(pmap, va, 0, TLBSHOOT_ENTER); } + pmap_drain_pv(pmap); mutex_exit(&pmap->pm_lock); return 0; } @@ -6533,6 +6545,7 @@ pmap_ept_remove(struct pmap *pmap, vaddr } kpreempt_enable(); + pmap_drain_pv(pmap); mutex_exit(&pmap->pm_lock); }