Module Name:    src
Committed By:   ad
Date:           Sun Jan 12 13:01:12 UTC 2020

Modified Files:
        src/sys/arch/x86/include: pmap.h pmap_pv.h
        src/sys/arch/x86/x86: pmap.c vm_machdep.c x86_tlb.c

Log Message:
x86 pmap:

- It turns out that every page the pmap frees is necessarily zeroed.  Tell
  the VM system about this and use the pmap as a source of pre-zeroed pages.

- Redo deferred freeing of PTPs more elegantly, including the integration with
  pmap_remove_all().  This fixes problems with nvmm, and possibly also a crash
  discovered during fuzzing.

Reported-by: syzbot+a97186518c84f1d85...@syzkaller.appspotmail.com


To generate a diff of this commit:
cvs rdiff -u -r1.108 -r1.109 src/sys/arch/x86/include/pmap.h
cvs rdiff -u -r1.9 -r1.10 src/sys/arch/x86/include/pmap_pv.h
cvs rdiff -u -r1.354 -r1.355 src/sys/arch/x86/x86/pmap.c
cvs rdiff -u -r1.39 -r1.40 src/sys/arch/x86/x86/vm_machdep.c
cvs rdiff -u -r1.13 -r1.14 src/sys/arch/x86/x86/x86_tlb.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/x86/include/pmap.h
diff -u src/sys/arch/x86/include/pmap.h:1.108 src/sys/arch/x86/include/pmap.h:1.109
--- src/sys/arch/x86/include/pmap.h:1.108	Sat Jan  4 22:49:20 2020
+++ src/sys/arch/x86/include/pmap.h	Sun Jan 12 13:01:11 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.h,v 1.108 2020/01/04 22:49:20 ad Exp $	*/
+/*	$NetBSD: pmap.h,v 1.109 2020/01/12 13:01:11 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -262,7 +262,7 @@ struct pmap {
 #if !defined(__x86_64__)
 	vaddr_t pm_hiexec;		/* highest executable mapping */
 #endif /* !defined(__x86_64__) */
-	int pm_flags;			/* see below */
+	struct lwp *pm_remove_all;	/* who's emptying the pmap */
 
 	union descriptor *pm_ldt;	/* user-set LDT */
 	size_t pm_ldt_len;		/* size of LDT in bytes */
@@ -273,7 +273,7 @@ struct pmap {
 	kcpuset_t *pm_xen_ptp_cpus;	/* mask of CPUs which have this pmap's
 					 ptp mapped */
 	uint64_t pm_ncsw;		/* for assertions */
-	struct vm_page *pm_gc_ptp;	/* pages from pmap g/c */
+	LIST_HEAD(,vm_page) pm_gc_ptp;	/* PTPs queued for free */
 
 	/* Used by NVMM. */
 	int (*pm_enter)(struct pmap *, vaddr_t, paddr_t, vm_prot_t, u_int);
@@ -580,7 +580,6 @@ void	pmap_kenter_ma(vaddr_t, paddr_t, vm
 int	pmap_enter_ma(struct pmap *, vaddr_t, paddr_t, paddr_t,
 	    vm_prot_t, u_int, int);
 bool	pmap_extract_ma(pmap_t, vaddr_t, paddr_t *);
-void	pmap_free_ptps(struct vm_page *);
 
 paddr_t pmap_get_physpage(void);
 

Index: src/sys/arch/x86/include/pmap_pv.h
diff -u src/sys/arch/x86/include/pmap_pv.h:1.9 src/sys/arch/x86/include/pmap_pv.h:1.10
--- src/sys/arch/x86/include/pmap_pv.h:1.9	Sat Jan  4 22:49:20 2020
+++ src/sys/arch/x86/include/pmap_pv.h	Sun Jan 12 13:01:11 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap_pv.h,v 1.9 2020/01/04 22:49:20 ad Exp $	*/
+/*	$NetBSD: pmap_pv.h,v 1.10 2020/01/12 13:01:11 ad Exp $	*/
 
 /*-
  * Copyright (c)2008 YAMAMOTO Takashi,
@@ -69,7 +69,7 @@ struct pmap_page {
 		struct pv_pte u_pte;
 
 		/* PTPs */
-		struct vm_page *u_link;
+		LIST_ENTRY(vm_page) u_link;
 	} pp_u;
 	LIST_HEAD(, pv_entry) pp_pvlist;
 #define	pp_pte	pp_u.u_pte
@@ -83,6 +83,7 @@ struct pmap_page {
 
 /* pp_flags */
 #define	PP_EMBEDDED	1
+#define	PP_FREEING	2
 
 #define	PMAP_PAGE_INIT(pp)	LIST_INIT(&(pp)->pp_pvlist)
 

Index: src/sys/arch/x86/x86/pmap.c
diff -u src/sys/arch/x86/x86/pmap.c:1.354 src/sys/arch/x86/x86/pmap.c:1.355
--- src/sys/arch/x86/x86/pmap.c:1.354	Tue Jan  7 21:18:24 2020
+++ src/sys/arch/x86/x86/pmap.c	Sun Jan 12 13:01:11 2020
@@ -1,7 +1,7 @@
-/*	$NetBSD: pmap.c,v 1.354 2020/01/07 21:18:24 ad Exp $	*/
+/*	$NetBSD: pmap.c,v 1.355 2020/01/12 13:01:11 ad Exp $	*/
 
 /*
- * Copyright (c) 2008, 2010, 2016, 2017, 2019 The NetBSD Foundation, Inc.
+ * Copyright (c) 2008, 2010, 2016, 2017, 2019, 2020 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -130,7 +130,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.354 2020/01/07 21:18:24 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.355 2020/01/12 13:01:11 ad Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -419,9 +419,9 @@ static void pmap_unget_ptp(struct pmap *
 static void pmap_install_ptp(struct pmap *, struct pmap_ptparray *, vaddr_t,
     pd_entry_t * const *);
 static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int);
-static void pmap_freepages(struct pmap *, struct vm_page *);
+static void pmap_freepage(struct pmap *, struct vm_page *, int);
 static void pmap_free_ptp(struct pmap *, struct vm_page *, vaddr_t,
-    pt_entry_t *, pd_entry_t * const *, struct vm_page **);
+    pt_entry_t *, pd_entry_t * const *);
 static bool pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *,
     vaddr_t, struct pv_entry **);
 static void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, vaddr_t,
@@ -435,6 +435,7 @@ static void pmap_alloc_level(struct pmap
 
 static void pmap_load1(struct lwp *, struct pmap *, struct pmap *);
 static void pmap_reactivate(struct pmap *);
+static void pmap_dropref(struct pmap *);
 
 /*
  * p m a p   h e l p e r   f u n c t i o n s
@@ -667,7 +668,7 @@ pmap_unmap_ptes(struct pmap *pmap, struc
 
 	/* Toss reference to other pmap taken earlier. */
 	if (pmap2 != NULL) {
-		pmap_destroy(pmap2);
+		pmap_dropref(pmap2);
 	}
 }
 
@@ -1953,40 +1954,51 @@ pmap_find_ptp(struct pmap *pmap, vaddr_t
 		return (pmap->pm_ptphint[lidx]);
 	}
 	pg = uvm_pagelookup(&pmap->pm_obj[lidx], ptp_va2o(va, level));
-
-	KASSERT(pg == NULL || pg->wire_count >= 1);
+	if (pg != NULL) {
+		if (__predict_false(pg->wire_count == 0)) {
+			/* This page is queued to be freed - ignore. */
+			KASSERT((VM_PAGE_TO_PP(pg)->pp_flags &
+			    PP_FREEING) != 0);
+			pg = NULL;
+		} else {
+			KASSERT((VM_PAGE_TO_PP(pg)->pp_flags &
+			    PP_FREEING) == 0);
+		}
+	}
 	return pg;
 }
 
-static void
-pmap_freepages(struct pmap *pmap, struct vm_page *ptp_tofree)
+static inline void
+pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level)
 {
-	struct vm_page *ptp;
-	lwp_t *l;
+	struct pmap_page *pp;
 	int lidx;
 
-	while ((ptp = ptp_tofree) != NULL) {
-		KASSERT(ptp->wire_count == 1);
-		for (lidx = 0; lidx < __arraycount(pmap->pm_obj); lidx++) {
-			if (pmap->pm_ptphint[lidx] == ptp) {
-				pmap->pm_ptphint[lidx] = NULL;
-			}
-		}
-		pmap_stats_update(pmap, -1, 0);
-		ptp->wire_count = 0;
-		uvm_pagerealloc(ptp, NULL, 0);
-		l = curlwp;
-		KASSERT((l->l_pflag & LP_INTR) == 0);
-		ptp_tofree = VM_PAGE_TO_PP(ptp)->pp_link;
-		VM_PAGE_TO_PP(ptp)->pp_link = l->l_md.md_gc_ptp;
-		l->l_md.md_gc_ptp = ptp;
-	}
+	KASSERT(ptp->wire_count == 1);
+
+	lidx = level - 1;
+	pmap_stats_update(pmap, -1, 0);
+	if (pmap->pm_ptphint[lidx] == ptp)
+		pmap->pm_ptphint[lidx] = NULL;
+	ptp->wire_count = 0;
+
+	/*
+	 * Enqueue the PTP to be freed by pmap_update().  We can't remove
+	 * the page from the uvm_object, as that can take further locks
+	 * (intolerable right now because the PTEs are likely mapped in). 
+	 * Instead mark the PTP as free and if we bump into it again, we'll
+	 * either ignore or reuse (depending on what's tolerable at the
+	 * time).
+	 */
+	pp = VM_PAGE_TO_PP(ptp);
+	KASSERT((pp->pp_flags & PP_FREEING) == 0);
+	pp->pp_flags |= PP_FREEING;
+	LIST_INSERT_HEAD(&pmap->pm_gc_ptp, ptp, mdpage.mp_pp.pp_link);
 }
 
 static void
 pmap_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va,
-	      pt_entry_t *ptes, pd_entry_t * const *pdes,
-	      struct vm_page **ptp_tofree)
+	      pt_entry_t *ptes, pd_entry_t * const *pdes)
 {
 	unsigned long index;
 	int level;
@@ -2025,8 +2037,7 @@ pmap_free_ptp(struct pmap *pmap, struct 
 		pmap_tlb_shootnow();
 #endif
 
-		VM_PAGE_TO_PP(ptp)->pp_link = *ptp_tofree;
-		*ptp_tofree = ptp;
+		pmap_freepage(pmap, ptp, level);
 		if (level < PTP_LEVELS - 1) {
 			ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1);
 			ptp->wire_count--;
@@ -2071,6 +2082,15 @@ pmap_get_ptp(struct pmap *pmap, struct p
 		if (pt->pg[i] == NULL) {
 			pt->pg[i] = uvm_pagealloc(obj, off, NULL, aflags);
 			pt->alloced[i] = true;
+		} else if (pt->pg[i]->wire_count == 0) {
+			/* This page was queued to be freed; dequeue it. */
+			KASSERT((VM_PAGE_TO_PP(pt->pg[i])->pp_flags &
+			    PP_FREEING) != 0);
+			VM_PAGE_TO_PP(pt->pg[i])->pp_flags &= ~PP_FREEING;
+			LIST_REMOVE(pt->pg[i], mdpage.mp_pp.pp_link);
+		} else {		
+			KASSERT((VM_PAGE_TO_PP(pt->pg[i])->pp_flags &
+			    PP_FREEING) == 0);
 		}
 		if (pt->pg[i] == NULL) {
 			pmap_unget_ptp(pmap, pt);
@@ -2175,6 +2195,11 @@ pmap_unget_ptp(struct pmap *pmap, struct
 		if (!pt->alloced[i]) {
 			continue;
 		}
+		KASSERT((VM_PAGE_TO_PP(pt->pg[i])->pp_flags &
+		    PP_FREEING) == 0);
+		KASSERT(pt->pg[i]->wire_count == 0);
+		/* pmap zeros all pages before freeing. */
+		pt->pg[i]->flags |= PG_ZERO; 
 		uvm_pagefree(pt->pg[i]);
 		pt->pg[i] = NULL;
 		pmap->pm_ptphint[0] = NULL;
@@ -2366,6 +2391,8 @@ pmap_ctor(void *arg, void *obj, int flag
 #ifdef XENPV
 	kcpuset_create(&pmap->pm_xen_ptp_cpus, true);
 #endif
+	LIST_INIT(&pmap->pm_gc_ptp);
+	pmap->pm_remove_all = NULL;
 
 	/* allocate and init PDP */
 	pmap->pm_pdir = pool_get(&pmap_pdp_pool, PR_WAITOK);
@@ -2436,8 +2463,6 @@ pmap_create(void)
 #if !defined(__x86_64__)
 	pmap->pm_hiexec = 0;
 #endif
-	pmap->pm_flags = 0;
-	pmap->pm_gc_ptp = NULL;
 
 	/* Used by NVMM. */
 	pmap->pm_enter = NULL;
@@ -2459,23 +2484,6 @@ pmap_create(void)
 }
 
 /*
- * pmap_free_ptps: put a list of ptps back to the freelist.
- */
-void
-pmap_free_ptps(struct vm_page *empty_ptps)
-{
-	struct vm_page *ptp;
-	struct pmap_page *pp;
-
-	while ((ptp = empty_ptps) != NULL) {
-		pp = VM_PAGE_TO_PP(ptp);
-		empty_ptps = pp->pp_link;
-		LIST_INIT(&pp->pp_pvlist);
-		uvm_pagefree(ptp);
-	}
-}
-
-/*
  * pmap_check_ptps: verify that none of the pmap's page table objects
  * have any pages allocated to them.
  */
@@ -2503,7 +2511,7 @@ pmap_check_inuse(struct pmap *pmap)
 		for (int i = 0; i < PDIR_SLOT_USERLIM; i++) {
 			if (pmap->pm_pdir[i] != 0 &&
 			    ci->ci_kpm_pdir[i] == pmap->pm_pdir[i]) {
-				printf("pmap_destroy(%p) pmap_kernel %p "
+				printf("pmap_dropref(%p) pmap_kernel %p "
 				    "curcpu %d cpu %d ci_pmap %p "
 				    "ci->ci_kpm_pdir[%d]=%" PRIx64
 				    " pmap->pm_pdir[%d]=%" PRIx64 "\n",
@@ -2520,33 +2528,30 @@ pmap_check_inuse(struct pmap *pmap)
 }
 
 /*
- * pmap_destroy: drop reference count on pmap.   free pmap if
- * reference count goes to zero.
+ * pmap_destroy:  pmap is being destroyed by UVM.
+ */
+void
+pmap_destroy(struct pmap *pmap)
+{
+
+	/* Undo pmap_remove_all(), then drop the reference. */
+	pmap_update(pmap);
+	pmap_dropref(pmap);
+}
+
+/*
+ * pmap_dropref:  drop reference count on pmap.  free pmap if reference
+ * count goes to zero.
  *
  * => we can be called from pmap_unmap_ptes() with a different, unrelated
  *    pmap's lock held.  be careful!
  */
-void
-pmap_destroy(struct pmap *pmap)
+static void
+pmap_dropref(struct pmap *pmap)
 {
-	lwp_t *l;
 	int i;
 
 	/*
-	 * If we have torn down this pmap, process deferred frees and
-	 * invalidations.  Free when the pmap is destroyed thus avoiding
-	 * TLB shootdowns.
-	 */
-	l = curlwp;
-	if (__predict_false(l->l_md.md_gc_pmap == pmap)) {
-		pmap_check_ptps(pmap);
-		KASSERT(pmap->pm_gc_ptp == NULL);
-		pmap->pm_gc_ptp = l->l_md.md_gc_ptp;
-		l->l_md.md_gc_ptp = NULL;
-		l->l_md.md_gc_pmap = NULL;
-	}
-
-	/*
 	 * drop reference count
 	 */
 
@@ -2560,12 +2565,9 @@ pmap_destroy(struct pmap *pmap)
 	 * Reference count is zero, free pmap resources and then free pmap.
 	 */
 
-	/*
-	 * Process deferred PTP frees.  No TLB shootdown required, as the
-	 * PTP pages are no longer visible to any CPU.
-	 */
-
-	pmap_free_ptps(pmap->pm_gc_ptp);
+	KASSERT(pmap->pm_remove_all == NULL);
+	pmap_check_ptps(pmap);
+	KASSERT(LIST_EMPTY(&pmap->pm_gc_ptp));
 
 #ifdef USER_LDT
 	if (pmap->pm_ldt != NULL) {
@@ -2576,6 +2578,7 @@ pmap_destroy(struct pmap *pmap)
 		 * No need to lock the pmap for ldt_free (or anything else),
 		 * we're the last one to use it.
 		 */
+		/* XXXAD can't take cpu_lock here - fix soon. */
 		mutex_enter(&cpu_lock);
 		ldt_free(pmap->pm_ldt_sel);
 		mutex_exit(&cpu_lock);
@@ -2610,11 +2613,13 @@ pmap_destroy(struct pmap *pmap)
 void
 pmap_remove_all(struct pmap *pmap)
 {
-	lwp_t *l = curlwp;
-
-	KASSERT(l->l_md.md_gc_pmap == NULL);
 
-	l->l_md.md_gc_pmap = pmap;
+	/*
+	 * No locking needed; at this point it should only ever be checked
+	 * by curlwp.
+	 */
+	KASSERT(pmap->pm_remove_all == NULL);
+	pmap->pm_remove_all = curlwp;
 }
 
 #if defined(PMAP_FORK)
@@ -2933,7 +2938,7 @@ pmap_load(void)
 	 * to the old pmap.  if we block, we need to go around again.
 	 */
 
-	pmap_destroy(oldpmap);
+	pmap_dropref(oldpmap);
 	__insn_barrier();
 	if (l->l_ncsw != ncsw) {
 		goto retry;
@@ -3581,7 +3586,6 @@ pmap_remove(struct pmap *pmap, vaddr_t s
 	pd_entry_t pde;
 	pd_entry_t * const *pdes;
 	struct pv_entry *pv_tofree = NULL;
-	struct vm_page *ptp_tofree = NULL;
 	bool result;
 	paddr_t ptppa;
 	vaddr_t blkendva, va = sva;
@@ -3626,10 +3630,8 @@ pmap_remove(struct pmap *pmap, vaddr_t s
 			 * being used, free it!
 			 */
 
-			if (result && ptp && ptp->wire_count <= 1) {
-				pmap_free_ptp(pmap, ptp, va, ptes, pdes,
-				    &ptp_tofree);
-			}
+			if (result && ptp && ptp->wire_count <= 1)
+				pmap_free_ptp(pmap, ptp, va, ptes, pdes);
 		}
 	} else for (/* null */ ; va < eva ; va = blkendva) {
 		/* determine range of block */
@@ -3660,25 +3662,9 @@ pmap_remove(struct pmap *pmap, vaddr_t s
 		pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[pl1_i(va)], va,
 		    blkendva, &pv_tofree);
 
-		/*
-		 * If PTP is no longer being used, free it.  We need to unmap
-		 * and re-map to do this, then continue on at the next VA,
-		 * because we can't tolerate blocking with the PTEs mapped in.
-		 */
-		if (ptp == NULL || ptp->wire_count > 1) {
-			continue;
-		}
-		pmap_free_ptp(pmap, ptp, va, ptes, pdes, &ptp_tofree);
-		if (ptp_tofree != NULL) {
-			pmap_unmap_ptes(pmap, pmap2);
-			/* Now safe to free, with the pmap still locked. */
-			pmap_freepages(pmap, ptp_tofree);
-			ptp_tofree = NULL;
-			if (pv_tofree != NULL) {
-				pmap_free_pvs(pmap, pv_tofree);
-				pv_tofree = NULL;
-			}
-			pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
+		/* If PTP is no longer being used, free it. */
+		if (ptp && ptp->wire_count <= 1) {
+			pmap_free_ptp(pmap, ptp, va, ptes, pdes);
 		}
 	}
 	pmap_unmap_ptes(pmap, pmap2);
@@ -3687,9 +3673,6 @@ pmap_remove(struct pmap *pmap, vaddr_t s
 	 * block again.  Radix tree nodes are removed here, so we need to
 	 * continue holding the pmap locked until complete.
 	 */
-	if (ptp_tofree != NULL) {
-		pmap_freepages(pmap, ptp_tofree);
-	}
 	if (pv_tofree != NULL) {
 		pmap_free_pvs(pmap, pv_tofree);
 	}
@@ -3803,7 +3786,6 @@ static void
 pmap_pp_remove_ent(struct pmap *pmap, struct vm_page *ptp, pt_entry_t opte,
     vaddr_t va)
 {
-	struct vm_page *ptp_tofree = NULL;
 	struct pmap *pmap2;
 	pt_entry_t *ptes;
 	pd_entry_t * const *pdes;
@@ -3814,13 +3796,9 @@ pmap_pp_remove_ent(struct pmap *pmap, st
 	pmap_stats_update_bypte(pmap, 0, opte);
 	ptp->wire_count--;
 	if (ptp->wire_count <= 1) {
-		pmap_free_ptp(pmap, ptp, va, ptes, pdes, &ptp_tofree);
+		pmap_free_ptp(pmap, ptp, va, ptes, pdes);
 	}
 	pmap_unmap_ptes(pmap, pmap2);
-	/* Now safe to free PTPs, with the pmap still locked. */
-	if (ptp_tofree != NULL) {
-		pmap_freepages(pmap, ptp_tofree);
-	}
 }
 
 static void
@@ -3857,7 +3835,7 @@ startover:
 			KERNEL_UNLOCK_ALL(curlwp, &hold_count);
 			mutex_exit(&pmap->pm_lock);
 			if (ptp != NULL) {
-				pmap_destroy(pmap);
+				pmap_dropref(pmap);
 			}
 			SPINLOCK_BACKOFF(count);
 			KERNEL_LOCK(hold_count, curlwp);
@@ -3891,7 +3869,7 @@ startover:
 		}
 		mutex_exit(&pmap->pm_lock);
 		if (ptp != NULL) {
-			pmap_destroy(pmap);
+			pmap_dropref(pmap);
 		}
 	}
 	pmap_tlb_shootnow();
@@ -4096,7 +4074,7 @@ pmap_write_protect(struct pmap *pmap, va
 	vaddr_t blockend, va;
 	int lvl, i;
 
-	KASSERT(curlwp->l_md.md_gc_pmap != pmap);
+	KASSERT(pmap->pm_remove_all == NULL);
 
 	if (__predict_false(pmap->pm_write_protect != NULL)) {
 		(*pmap->pm_write_protect)(pmap, sva, eva, prot);
@@ -4262,7 +4240,7 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t
 	struct pmap_ptparray pt;
 
 	KASSERT(pmap_initialized);
-	KASSERT(curlwp->l_md.md_gc_pmap != pmap);
+	KASSERT(pmap->pm_remove_all == NULL);
 	KASSERT(va < VM_MAX_KERNEL_ADDRESS);
 	KASSERTMSG(va != (vaddr_t)PDP_BASE, "%s: trying to map va=%#"
 	    PRIxVADDR " over PDP!", __func__, va);
@@ -4394,10 +4372,9 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t
 			    vtomach((vaddr_t)ptep), npte, domid);
 			splx(s);
 			if (error) {
-				struct vm_page *ptp_tofree = NULL;
 				if (ptp != NULL && ptp->wire_count <= 1) {
-					pmap_free_ptp(pmap, ptp, va,
-					    ptes, pdes, &ptp_tofree);
+					pmap_free_ptp(pmap, ptp, va, ptes,
+					    pdes);
 				}
 				goto out;
 			}
@@ -4785,16 +4762,18 @@ pmap_dump(struct pmap *pmap, vaddr_t sva
 void
 pmap_update(struct pmap *pmap)
 {
-	struct vm_page *empty_ptps;
-	lwp_t *l = curlwp;
+	struct pmap_page *pp;
+	struct vm_page *ptp;
 
 	/*
-	 * If we have torn down this pmap, invalidate non-global TLB
-	 * entries on any processors using it.
+	 * If pmap_remove_all() was in effect, re-enable invalidations from
+	 * this point on; issue a shootdown for all the mappings just
+	 * removed.
 	 */
 	kpreempt_disable();
-	if (__predict_false(l->l_md.md_gc_pmap == pmap)) {
-		l->l_md.md_gc_pmap = NULL;
+	if (pmap->pm_remove_all != NULL) {
+		KASSERT(pmap->pm_remove_all == curlwp);
+		pmap->pm_remove_all = NULL;
 		pmap_tlb_shootdown(pmap, (vaddr_t)-1LL, 0, TLBSHOOT_UPDATE);
 	}
 
@@ -4806,17 +4785,32 @@ pmap_update(struct pmap *pmap)
 	kpreempt_enable();
 
 	/*
-	 * Now that shootdowns are complete, process deferred frees,
-	 * but not from interrupt context.
+	 * Now that shootdowns are complete, process deferred frees.  This
+	 * is an unlocked check, but is safe as we're only interested in
+	 * work done in this LWP - we won't get a false negative.
 	 */
-	if (l->l_md.md_gc_ptp != NULL) {
-		KASSERT((l->l_pflag & LP_INTR) == 0);
-		if (cpu_intr_p()) {
-			return;
+	if (!LIST_EMPTY(&pmap->pm_gc_ptp)) {
+		mutex_enter(&pmap->pm_lock);
+		while ((ptp = LIST_FIRST(&pmap->pm_gc_ptp)) != NULL) {
+			LIST_REMOVE(ptp, mdpage.mp_pp.pp_link);
+			pp = VM_PAGE_TO_PP(ptp);
+			LIST_INIT(&pp->pp_pvlist);
+			KASSERT((pp->pp_flags & PP_FREEING) != 0);
+			KASSERT(ptp->wire_count == 0);
+			pp->pp_flags &= ~PP_FREEING;
+	
+			/*
+			 * XXX Hack to avoid extra locking, and lock
+			 * assertions in uvm_pagefree().  Despite uobject
+			 * being set, this isn't a managed page.
+			 */
+			uvm_pagerealloc(ptp, NULL, 0);
+
+			/* pmap zeros all pages before freeing */
+			ptp->flags |= PG_ZERO;
+			uvm_pagefree(ptp);
 		}
-		empty_ptps = l->l_md.md_gc_ptp;
-		l->l_md.md_gc_ptp = NULL;
-		pmap_free_ptps(empty_ptps);
+		mutex_exit(&pmap->pm_lock);
 	}
 }
 
@@ -5082,7 +5076,7 @@ pmap_ept_free_ptp(struct pmap *pmap, str
 	do {
 		(void)pmap_pte_testset(tree[level - 1], 0);
 
-		pmap_freepages(pmap, ptp);
+		pmap_freepage(pmap, ptp, level);
 		if (level < PTP_LEVELS - 1) {
 			ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1);
 			ptp->wire_count--;
@@ -5158,7 +5152,7 @@ pmap_ept_enter(struct pmap *pmap, vaddr_
 	int error;
 
 	KASSERT(pmap_initialized);
-	KASSERT(curlwp->l_md.md_gc_pmap != pmap);
+	KASSERT(pmap->pm_remove_all == NULL);
 	KASSERT(va < VM_MAXUSER_ADDRESS);
 
 	npte = pa | pmap_ept_prot(prot) | pmap_ept_type(flags);

Index: src/sys/arch/x86/x86/vm_machdep.c
diff -u src/sys/arch/x86/x86/vm_machdep.c:1.39 src/sys/arch/x86/x86/vm_machdep.c:1.40
--- src/sys/arch/x86/x86/vm_machdep.c:1.39	Fri Oct 18 16:26:38 2019
+++ src/sys/arch/x86/x86/vm_machdep.c	Sun Jan 12 13:01:11 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: vm_machdep.c,v 1.39 2019/10/18 16:26:38 maxv Exp $	*/
+/*	$NetBSD: vm_machdep.c,v 1.40 2020/01/12 13:01:11 ad Exp $	*/
 
 /*-
  * Copyright (c) 1982, 1986 The Regents of the University of California.
@@ -80,7 +80,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.39 2019/10/18 16:26:38 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.40 2020/01/12 13:01:11 ad Exp $");
 
 #include "opt_mtrr.h"
 
@@ -253,12 +253,6 @@ cpu_lwp_free(struct lwp *l, int proc)
 	if (proc && l->l_proc->p_md.md_flags & MDP_USEDMTRR)
 		mtrr_clean(l->l_proc);
 #endif
-	/*
-	 * Free deferred mappings if any.
-	 */
-	struct vm_page *empty_ptps = l->l_md.md_gc_ptp;
-	l->l_md.md_gc_ptp = NULL;
-	pmap_free_ptps(empty_ptps);
 }
 
 /*
@@ -270,9 +264,6 @@ cpu_lwp_free2(struct lwp *l)
 {
 	struct pcb *pcb;
 
-	KASSERT(l->l_md.md_gc_ptp == NULL);
-	KASSERT(l->l_md.md_gc_pmap == NULL);
-
 	pcb = lwp_getpcb(l);
 	KASSERT((pcb->pcb_flags & PCB_DBREGS) == 0);
 	if (pcb->pcb_dbregs) {

Index: src/sys/arch/x86/x86/x86_tlb.c
diff -u src/sys/arch/x86/x86/x86_tlb.c:1.13 src/sys/arch/x86/x86/x86_tlb.c:1.14
--- src/sys/arch/x86/x86/x86_tlb.c:1.13	Mon Dec 16 19:17:25 2019
+++ src/sys/arch/x86/x86/x86_tlb.c	Sun Jan 12 13:01:11 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: x86_tlb.c,v 1.13 2019/12/16 19:17:25 ad Exp $	*/
+/*	$NetBSD: x86_tlb.c,v 1.14 2020/01/12 13:01:11 ad Exp $	*/
 
 /*-
  * Copyright (c) 2008-2019 The NetBSD Foundation, Inc.
@@ -40,7 +40,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: x86_tlb.c,v 1.13 2019/12/16 19:17:25 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: x86_tlb.c,v 1.14 2020/01/12 13:01:11 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -235,14 +235,6 @@ pmap_tlb_shootdown(struct pmap *pm, vadd
 		return;
 	}
 
-	/*
-	 * If tearing down the pmap, do nothing.  We will flush later
-	 * when we are ready to recycle/destroy it.
-	 */
-	if (__predict_false(curlwp->l_md.md_gc_pmap == pm)) {
-		return;
-	}
-
 	if ((pte & PTE_PS) != 0) {
 		va &= PTE_LGFRAME;
 	}

Reply via email to