Module Name: src Committed By: ad Date: Wed Jan 15 17:55:45 UTC 2020
Modified Files: src/external/cddl/osnet/dist/uts/common/fs/zfs: zfs_vnops.c src/sys/external/bsd/drm2/dist/drm: drm_gem.c src/sys/external/bsd/drm2/dist/drm/i915: i915_gem.c i915_gem_fence.c src/sys/external/bsd/drm2/include/linux: mm.h src/sys/miscfs/genfs: genfs_io.c genfs_node.h src/sys/nfs: nfs_bio.c src/sys/rump/librump/rumpkern: Makefile.rumpkern vm.c src/sys/rump/librump/rumpvfs: vm_vfs.c src/sys/sys: cpu_data.h src/sys/ufs/lfs: lfs_pages.c lfs_segment.c lfs_vfsops.c ulfs_inode.c src/sys/ufs/ufs: ufs_inode.c src/sys/uvm: files.uvm uvm_anon.c uvm_aobj.c uvm_bio.c uvm_extern.h uvm_fault.c uvm_loan.c uvm_meter.c uvm_object.c uvm_object.h uvm_page.c uvm_page.h uvm_page_array.c uvm_pager.c uvm_pdaemon.c uvm_vnode.c Added Files: src/sys/uvm: uvm_page_status.c Log Message: Merge from yamt-pagecache (after much testing): - Reduce unnecessary page scan in putpages esp. when an object has a ton of pages cached but only a few of them are dirty. - Reduce the number of pmap operations by tracking page dirtiness more precisely in uvm layer. To generate a diff of this commit: cvs rdiff -u -r1.54 -r1.55 \ src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c cvs rdiff -u -r1.10 -r1.11 src/sys/external/bsd/drm2/dist/drm/drm_gem.c cvs rdiff -u -r1.54 -r1.55 src/sys/external/bsd/drm2/dist/drm/i915/i915_gem.c cvs rdiff -u -r1.5 -r1.6 \ src/sys/external/bsd/drm2/dist/drm/i915/i915_gem_fence.c cvs rdiff -u -r1.9 -r1.10 src/sys/external/bsd/drm2/include/linux/mm.h cvs rdiff -u -r1.83 -r1.84 src/sys/miscfs/genfs/genfs_io.c cvs rdiff -u -r1.22 -r1.23 src/sys/miscfs/genfs/genfs_node.h cvs rdiff -u -r1.192 -r1.193 src/sys/nfs/nfs_bio.c cvs rdiff -u -r1.181 -r1.182 src/sys/rump/librump/rumpkern/Makefile.rumpkern cvs rdiff -u -r1.182 -r1.183 src/sys/rump/librump/rumpkern/vm.c cvs rdiff -u -r1.35 -r1.36 src/sys/rump/librump/rumpvfs/vm_vfs.c cvs rdiff -u -r1.48 -r1.49 src/sys/sys/cpu_data.h cvs rdiff -u -r1.19 -r1.20 src/sys/ufs/lfs/lfs_pages.c cvs rdiff -u -r1.280 -r1.281 src/sys/ufs/lfs/lfs_segment.c cvs rdiff -u -r1.367 -r1.368 src/sys/ufs/lfs/lfs_vfsops.c cvs rdiff -u -r1.23 -r1.24 src/sys/ufs/lfs/ulfs_inode.c cvs rdiff -u -r1.107 -r1.108 src/sys/ufs/ufs/ufs_inode.c cvs rdiff -u -r1.32 -r1.33 src/sys/uvm/files.uvm cvs rdiff -u -r1.70 -r1.71 src/sys/uvm/uvm_anon.c cvs rdiff -u -r1.133 -r1.134 src/sys/uvm/uvm_aobj.c cvs rdiff -u -r1.102 -r1.103 src/sys/uvm/uvm_bio.c cvs rdiff -u -r1.218 -r1.219 src/sys/uvm/uvm_extern.h cvs rdiff -u -r1.214 -r1.215 src/sys/uvm/uvm_fault.c cvs rdiff -u -r1.93 -r1.94 src/sys/uvm/uvm_loan.c cvs rdiff -u -r1.73 -r1.74 src/sys/uvm/uvm_meter.c cvs rdiff -u -r1.19 -r1.20 src/sys/uvm/uvm_object.c cvs rdiff -u -r1.35 -r1.36 src/sys/uvm/uvm_object.h cvs rdiff -u -r1.223 -r1.224 src/sys/uvm/uvm_page.c cvs rdiff -u -r1.95 -r1.96 src/sys/uvm/uvm_page.h cvs rdiff -u -r1.2 -r1.3 src/sys/uvm/uvm_page_array.c cvs rdiff -u -r0 -r1.2 src/sys/uvm/uvm_page_status.c cvs rdiff -u -r1.119 -r1.120 src/sys/uvm/uvm_pager.c cvs rdiff -u -r1.122 -r1.123 src/sys/uvm/uvm_pdaemon.c cvs rdiff -u -r1.104 -r1.105 src/sys/uvm/uvm_vnode.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c diff -u src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c:1.54 src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c:1.55 --- src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c:1.54 Fri Dec 13 20:10:21 2019 +++ src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c Wed Jan 15 17:55:43 2020 @@ -746,7 +746,8 @@ mappedread(vnode_t *vp, int nbytes, uio_ pp = NULL; npages = 1; mutex_enter(mtx); - found = uvn_findpages(uobj, start, &npages, &pp, UFP_NOALLOC); + found = uvn_findpages(uobj, start, &npages, &pp, NULL, + UFP_NOALLOC); mutex_exit(mtx); /* XXXNETBSD shouldn't access userspace with the page busy */ @@ -792,7 +793,8 @@ update_pages(vnode_t *vp, int64_t start, pp = NULL; npages = 1; - found = uvn_findpages(uobj, start, &npages, &pp, UFP_NOALLOC); + found = uvn_findpages(uobj, start, &npages, &pp, NULL, + UFP_NOALLOC); if (found) { mutex_exit(mtx); @@ -5976,7 +5978,7 @@ zfs_netbsd_getpages(void *v) } npages = 1; pg = NULL; - uvn_findpages(uobj, offset, &npages, &pg, UFP_ALL); + uvn_findpages(uobj, offset, &npages, &pg, NULL, UFP_ALL); if (pg->flags & PG_FAKE) { mutex_exit(mtx); @@ -6224,7 +6226,7 @@ zfs_netbsd_setsize(vnode_t *vp, off_t si mutex_enter(mtx); count = 1; pg = NULL; - if (uvn_findpages(uobj, tsize, &count, &pg, UFP_NOALLOC)) { + if (uvn_findpages(uobj, tsize, &count, &pg, NULL, UFP_NOALLOC)) { va = zfs_map_page(pg, S_WRITE); pgoff = size - tsize; memset(va + pgoff, 0, PAGESIZE - pgoff); Index: src/sys/external/bsd/drm2/dist/drm/drm_gem.c diff -u src/sys/external/bsd/drm2/dist/drm/drm_gem.c:1.10 src/sys/external/bsd/drm2/dist/drm/drm_gem.c:1.11 --- src/sys/external/bsd/drm2/dist/drm/drm_gem.c:1.10 Mon Aug 27 15:22:53 2018 +++ src/sys/external/bsd/drm2/dist/drm/drm_gem.c Wed Jan 15 17:55:43 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: drm_gem.c,v 1.10 2018/08/27 15:22:53 riastradh Exp $ */ +/* $NetBSD: drm_gem.c,v 1.11 2020/01/15 17:55:43 ad Exp $ */ /* * Copyright © 2008 Intel Corporation @@ -28,7 +28,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: drm_gem.c,v 1.10 2018/08/27 15:22:53 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: drm_gem.c,v 1.11 2020/01/15 17:55:43 ad Exp $"); #include <linux/types.h> #include <linux/slab.h> @@ -612,8 +612,10 @@ drm_gem_put_pages(struct drm_gem_object unsigned i; for (i = 0; i < (obj->size >> PAGE_SHIFT); i++) { - if (dirty) - pages[i]->p_vmp.flags &= ~PG_CLEAN; + if (dirty) { + uvm_pagemarkdirty(&pages[i]->p_vmp, + UVM_PAGE_STATUS_DIRTY); + } } uvm_obj_unwirepages(obj->filp, 0, obj->size); Index: src/sys/external/bsd/drm2/dist/drm/i915/i915_gem.c diff -u src/sys/external/bsd/drm2/dist/drm/i915/i915_gem.c:1.54 src/sys/external/bsd/drm2/dist/drm/i915/i915_gem.c:1.55 --- src/sys/external/bsd/drm2/dist/drm/i915/i915_gem.c:1.54 Mon Aug 27 15:22:54 2018 +++ src/sys/external/bsd/drm2/dist/drm/i915/i915_gem.c Wed Jan 15 17:55:43 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: i915_gem.c,v 1.54 2018/08/27 15:22:54 riastradh Exp $ */ +/* $NetBSD: i915_gem.c,v 1.55 2020/01/15 17:55:43 ad Exp $ */ /* * Copyright © 2008-2015 Intel Corporation @@ -28,7 +28,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: i915_gem.c,v 1.54 2018/08/27 15:22:54 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: i915_gem.c,v 1.55 2020/01/15 17:55:43 ad Exp $"); #ifdef __NetBSD__ #if 0 /* XXX uvmhist option? */ @@ -2644,7 +2644,7 @@ i915_gem_object_put_pages_gtt(struct drm if (obj->dirty) { TAILQ_FOREACH(page, &obj->pageq, pageq.queue) { - page->flags &= ~PG_CLEAN; + uvm_pagemarkdirty(page, UVM_PAGE_STATUS_DIRTY); /* XXX mark page accessed */ } } Index: src/sys/external/bsd/drm2/dist/drm/i915/i915_gem_fence.c diff -u src/sys/external/bsd/drm2/dist/drm/i915/i915_gem_fence.c:1.5 src/sys/external/bsd/drm2/dist/drm/i915/i915_gem_fence.c:1.6 --- src/sys/external/bsd/drm2/dist/drm/i915/i915_gem_fence.c:1.5 Mon Aug 27 15:09:35 2018 +++ src/sys/external/bsd/drm2/dist/drm/i915/i915_gem_fence.c Wed Jan 15 17:55:43 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: i915_gem_fence.c,v 1.5 2018/08/27 15:09:35 riastradh Exp $ */ +/* $NetBSD: i915_gem_fence.c,v 1.6 2020/01/15 17:55:43 ad Exp $ */ /* * Copyright © 2008-2015 Intel Corporation @@ -24,7 +24,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: i915_gem_fence.c,v 1.5 2018/08/27 15:09:35 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: i915_gem_fence.c,v 1.6 2020/01/15 17:55:43 ad Exp $"); #include <drm/drmP.h> #include <drm/i915_drm.h> @@ -769,7 +769,7 @@ i915_gem_object_do_bit_17_swizzle(struct (test_bit(i, obj->bit_17) != 0)) { i915_gem_swizzle_page(container_of(page, struct page, p_vmp)); - page->flags &= ~PG_CLEAN; + uvm_pagemarkdirty(page, UVM_PAGE_STATUS_DIRTY); } i += 1; } Index: src/sys/external/bsd/drm2/include/linux/mm.h diff -u src/sys/external/bsd/drm2/include/linux/mm.h:1.9 src/sys/external/bsd/drm2/include/linux/mm.h:1.10 --- src/sys/external/bsd/drm2/include/linux/mm.h:1.9 Mon Aug 27 13:44:54 2018 +++ src/sys/external/bsd/drm2/include/linux/mm.h Wed Jan 15 17:55:44 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: mm.h,v 1.9 2018/08/27 13:44:54 riastradh Exp $ */ +/* $NetBSD: mm.h,v 1.10 2020/01/15 17:55:44 ad Exp $ */ /*- * Copyright (c) 2013 The NetBSD Foundation, Inc. @@ -96,7 +96,7 @@ static inline void set_page_dirty(struct page *page) { - page->p_vmp.flags &= ~PG_CLEAN; + uvm_pagemarkdirty(&page->p_vmp, UVM_PAGE_STATUS_DIRTY); } #endif /* _LINUX_MM_H_ */ Index: src/sys/miscfs/genfs/genfs_io.c diff -u src/sys/miscfs/genfs/genfs_io.c:1.83 src/sys/miscfs/genfs/genfs_io.c:1.84 --- src/sys/miscfs/genfs/genfs_io.c:1.83 Tue Dec 31 22:42:50 2019 +++ src/sys/miscfs/genfs/genfs_io.c Wed Jan 15 17:55:44 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: genfs_io.c,v 1.83 2019/12/31 22:42:50 ad Exp $ */ +/* $NetBSD: genfs_io.c,v 1.84 2020/01/15 17:55:44 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 @@ -31,7 +31,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.83 2019/12/31 22:42:50 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.84 2020/01/15 17:55:44 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -86,10 +86,8 @@ genfs_rel_pages(struct vm_page **pgs, un static void genfs_markdirty(struct vnode *vp) { - struct genfs_node * const gp = VTOG(vp); KASSERT(mutex_owned(vp->v_interlock)); - gp->g_dirtygen++; if ((vp->v_iflag & VI_ONWORKLST) == 0) { vn_syncer_add_to_worklist(vp, filedelay); } @@ -137,6 +135,7 @@ genfs_getpages(void *v) UVMHIST_LOG(ubchist, "vp %#jx off 0x%jx/%jx count %jd", (uintptr_t)vp, ap->a_offset >> 32, ap->a_offset, *ap->a_count); + KASSERT(memwrite >= overwrite); KASSERT(vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VLNK || vp->v_type == VBLK); @@ -231,12 +230,17 @@ startover: } #endif /* defined(DEBUG) */ nfound = uvn_findpages(uobj, origoffset, &npages, - ap->a_m, UFP_NOWAIT|UFP_NOALLOC|(memwrite ? UFP_NORDONLY : 0)); + ap->a_m, NULL, + UFP_NOWAIT|UFP_NOALLOC|(memwrite ? UFP_NORDONLY : 0)); KASSERT(npages == *ap->a_count); if (nfound == 0) { error = EBUSY; goto out_err; } + /* + * lock and unlock g_glock to ensure that no one is truncating + * the file behind us. + */ if (!genfs_node_rdtrylock(vp)) { genfs_rel_pages(ap->a_m, npages); @@ -258,6 +262,17 @@ startover: } error = (ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0); if (error == 0 && memwrite) { + for (i = 0; i < npages; i++) { + pg = ap->a_m[i]; + if (pg == NULL || pg == PGO_DONTCARE) { + continue; + } + if (uvm_pagegetdirty(pg) == + UVM_PAGE_STATUS_CLEAN) { + uvm_pagemarkdirty(pg, + UVM_PAGE_STATUS_UNKNOWN); + } + } genfs_markdirty(vp); } goto out_err; @@ -351,7 +366,7 @@ startover: goto startover; } - if (uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], + if (uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], NULL, async ? UFP_NOWAIT : UFP_ALL) != orignmempages) { if (!glocked) { genfs_node_unlock(vp); @@ -364,41 +379,57 @@ startover: } /* - * if the pages are already resident, just return them. + * if PGO_OVERWRITE is set, don't bother reading the pages. */ - for (i = 0; i < npages; i++) { - struct vm_page *pg = pgs[ridx + i]; - - if ((pg->flags & PG_FAKE) || - (blockalloc && (pg->flags & PG_RDONLY))) { - break; - } - } - if (i == npages) { + if (overwrite) { if (!glocked) { genfs_node_unlock(vp); } - UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0); + UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0); + + for (i = 0; i < npages; i++) { + struct vm_page *pg = pgs[ridx + i]; + + /* + * it's caller's responsibility to allocate blocks + * beforehand for the overwrite case. + */ + + KASSERT((pg->flags & PG_RDONLY) == 0 || !blockalloc); + pg->flags &= ~PG_RDONLY; + + /* + * mark the page DIRTY. + * otherwise another thread can do putpages and pull + * our vnode from syncer's queue before our caller does + * ubc_release. note that putpages won't see CLEAN + * pages even if they are BUSY. + */ + + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); + } npages += ridx; goto out; } /* - * if PGO_OVERWRITE is set, don't bother reading the pages. + * if the pages are already resident, just return them. */ - if (overwrite) { + for (i = 0; i < npages; i++) { + struct vm_page *pg = pgs[ridx + i]; + + if ((pg->flags & PG_FAKE) || + (blockalloc && (pg->flags & PG_RDONLY) != 0)) { + break; + } + } + if (i == npages) { if (!glocked) { genfs_node_unlock(vp); } - UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0); - - for (i = 0; i < npages; i++) { - struct vm_page *pg = pgs[ridx + i]; - - pg->flags &= ~(PG_RDONLY|PG_CLEAN); - } + UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0); npages += ridx; goto out; } @@ -425,7 +456,7 @@ startover: UVMHIST_LOG(ubchist, "reset npages start 0x%jx end 0x%jx", startoffset, endoffset, 0,0); npgs = npages; - if (uvn_findpages(uobj, startoffset, &npgs, pgs, + if (uvn_findpages(uobj, startoffset, &npgs, pgs, NULL, async ? UFP_NOWAIT : UFP_ALL) != npages) { if (!glocked) { genfs_node_unlock(vp); @@ -473,8 +504,16 @@ out: UVMHIST_LOG(ubchist, "examining pg %#jx flags 0x%jx", (uintptr_t)pg, pg->flags, 0,0); if (pg->flags & PG_FAKE && !overwrite) { - pg->flags &= ~(PG_FAKE); - pmap_clear_modify(pgs[i]); + /* + * we've read page's contents from the backing storage. + * + * for a read fault, we keep them CLEAN; if we + * encountered a hole while reading, the pages can + * already been dirtied with zeros. + */ + KASSERTMSG(blockalloc || uvm_pagegetdirty(pg) == + UVM_PAGE_STATUS_CLEAN, "page %p not clean", pg); + pg->flags &= ~PG_FAKE; } KASSERT(!memwrite || !blockalloc || (pg->flags & PG_RDONLY) == 0); if (i < ridx || i >= ridx + orignmempages || async) { @@ -496,6 +535,13 @@ out: uvm_pageunlock(pg); pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE); UVM_PAGE_OWN(pg, NULL); + } else if (memwrite && !overwrite && + uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN) { + /* + * for a write fault, start dirtiness tracking of + * requested pages. + */ + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_UNKNOWN); } } if (memwrite) { @@ -690,16 +736,13 @@ genfs_getpages_read(struct vnode *vp, st iobytes); skipbytes += iobytes; - mutex_enter(uobj->vmobjlock); - for (i = 0; i < holepages; i++) { - if (memwrite) { - pgs[pidx + i]->flags &= ~PG_CLEAN; - } - if (!blockalloc) { + if (!blockalloc) { + mutex_enter(uobj->vmobjlock); + for (i = 0; i < holepages; i++) { pgs[pidx + i]->flags |= PG_RDONLY; } + mutex_exit(uobj->vmobjlock); } - mutex_exit(uobj->vmobjlock); continue; } @@ -764,7 +807,8 @@ loopdone: if (pg == NULL) { continue; } - pg->flags &= ~(PG_CLEAN|PG_RDONLY); + pg->flags &= ~PG_RDONLY; + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); UVMHIST_LOG(ubchist, "mark dirty pg %#jx", (uintptr_t)pg, 0, 0, 0); } @@ -793,11 +837,11 @@ loopdone: * this routine is holding the lock on the object. the only time * that it can run into a PG_BUSY page that it does not own is if * some other process has started I/O on the page (e.g. either - * a pagein, or a pageout). if the PG_BUSY page is being paged - * in, then it can not be dirty (!PG_CLEAN) because no one has - * had a chance to modify it yet. if the PG_BUSY page is being - * paged out then it means that someone else has already started - * cleaning the page for us (how nice!). in this case, if we + * a pagein, or a pageout). if the PG_BUSY page is being paged + * in, then it can not be dirty (!UVM_PAGE_STATUS_CLEAN) because no + * one has had a chance to modify it yet. if the PG_BUSY page is + * being paged out then it means that someone else has already started + * cleaning the page for us (how nice!). in this case, if we * have syncio specified, then after we make our pass through the * object we need to wait for the other PG_BUSY pages to clear * off (i.e. we need to do an iosync). also note that once a @@ -839,14 +883,13 @@ genfs_do_putpages(struct vnode *vp, off_ bool async = (origflags & PGO_SYNCIO) == 0; bool pagedaemon = curlwp == uvm.pagedaemon_lwp; struct lwp * const l = curlwp ? curlwp : &lwp0; - struct genfs_node * const gp = VTOG(vp); struct mount *trans_mp; int flags; - int dirtygen; - bool modified; + bool modified; /* if we write out any pages */ bool holds_wapbl; - bool cleanall; + bool cleanall; /* try to pull off from the syncer's list */ bool onworklst; + const bool dirtyonly = (origflags & (PGO_DEACTIVATE|PGO_FREE)) == 0; UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist); @@ -870,7 +913,14 @@ retry: flags = origflags; KASSERT((vp->v_iflag & VI_ONWORKLST) != 0 || (vp->v_iflag & VI_WRMAPDIRTY) == 0); - if (uobj->uo_npages == 0) { + + /* + * shortcut if we have no pages to process. + */ + + if (uobj->uo_npages == 0 || (dirtyonly && + radix_tree_empty_tagged_tree_p(&uobj->uo_pages, + UVM_PAGE_DIRTY_TAG))) { if (vp->v_iflag & VI_ONWORKLST) { vp->v_iflag &= ~VI_WRMAPDIRTY; if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL) @@ -940,7 +990,7 @@ retry: if ((vp->v_iflag & VI_ONWORKLST) == 0) { #if !defined(DEBUG) - if ((flags & (PGO_FREE|PGO_DEACTIVATE)) == 0) { + if (dirtyonly) { goto skip_scan; } #endif /* !defined(DEBUG) */ @@ -951,18 +1001,23 @@ retry: * start the loop to scan pages. */ - cleanall = (flags & PGO_CLEANIT) != 0 && wasclean && - startoff == 0 && endoff == trunc_page(LLONG_MAX) && - (vp->v_iflag & VI_ONWORKLST) != 0; - dirtygen = gp->g_dirtygen; + cleanall = true; freeflag = pagedaemon ? PG_PAGEOUT : PG_RELEASED; uvm_page_array_init(&a); for (;;) { + bool pgprotected; + /* - * if the current page is not interesting, move on to the next. + * if !dirtyonly, iterate over all resident pages in the range. + * + * if dirtyonly, only possibly dirty pages are interesting. + * however, if we are asked to sync for integrity, we should + * wait on pages being written back by other threads as well. */ - pg = uvm_page_array_fill_and_peek(&a, uobj, nextoff, 0, 0); + pg = uvm_page_array_fill_and_peek(&a, uobj, nextoff, 0, + dirtyonly ? (UVM_PAGE_ARRAY_FILL_DIRTY | + (!async ? UVM_PAGE_ARRAY_FILL_WRITEBACK : 0)) : 0); if (pg == NULL) { break; } @@ -972,18 +1027,15 @@ retry: (pg->flags & (PG_BUSY)) != 0); KASSERT(pg->offset >= startoff); KASSERT(pg->offset >= nextoff); + KASSERT(!dirtyonly || + uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN || + radix_tree_get_tag(&uobj->uo_pages, + pg->offset >> PAGE_SHIFT, UVM_PAGE_WRITEBACK_TAG)); if (pg->offset >= endoff) { break; } - if (pg->flags & (PG_RELEASED|PG_PAGEOUT)) { - wasclean = false; - nextoff = pg->offset + PAGE_SIZE; - uvm_page_array_advance(&a); - continue; - } - /* * a preempt point. */ @@ -1003,14 +1055,14 @@ retry: } /* - * if the current page needs to be cleaned and it's busy, - * wait for it to become unbusy. + * if the current page is busy, wait for it to become unbusy. */ - if (pg->flags & PG_BUSY) { + if ((pg->flags & PG_BUSY) != 0) { UVMHIST_LOG(ubchist, "busy %#jx", (uintptr_t)pg, 0, 0, 0); - if (flags & PGO_BUSYFAIL && pg->flags & PG_BUSY) { + if ((pg->flags & (PG_RELEASED|PG_PAGEOUT)) != 0 + && (flags & PGO_BUSYFAIL) != 0) { UVMHIST_LOG(ubchist, "busyfail %#jx", (uintptr_t)pg, 0, 0, 0); error = EDEADLK; @@ -1025,6 +1077,16 @@ retry: */ break; } + /* + * don't bother to wait on other's activities + * unless we are asked to sync for integrity. + */ + if (!async && (flags & PGO_RECLAIM) == 0) { + wasclean = false; + nextoff = pg->offset + PAGE_SIZE; + uvm_page_array_advance(&a); + continue; + } nextoff = pg->offset; /* visit this page again */ pg->flags |= PG_WANTED; UVM_UNLOCK_AND_WAIT(pg, slock, 0, "genput", 0); @@ -1045,8 +1107,10 @@ retry: * if we're cleaning, check if the page is needs to be cleaned. */ + pgprotected = false; if (flags & PGO_FREE) { pmap_page_protect(pg, VM_PROT_NONE); + pgprotected = true; } else if (flags & PGO_CLEANIT) { /* @@ -1054,8 +1118,7 @@ retry: * from the syncer queue, write-protect the page. */ - if (cleanall && wasclean && - gp->g_dirtygen == dirtygen) { + if (cleanall && wasclean) { /* * uobj pages get wired only by uvm_fault @@ -1065,6 +1128,7 @@ retry: if (pg->wire_count == 0) { pmap_page_protect(pg, VM_PROT_READ|VM_PROT_EXECUTE); + pgprotected = true; } else { cleanall = false; } @@ -1072,17 +1136,14 @@ retry: } if (flags & PGO_CLEANIT) { - needs_clean = pmap_clear_modify(pg) || - (pg->flags & PG_CLEAN) == 0; - pg->flags |= PG_CLEAN; + needs_clean = uvm_pagecheckdirty(pg, pgprotected); } else { needs_clean = false; } /* * if we're cleaning, build a cluster. - * the cluster will consist of pages which are currently dirty, - * but they will be returned to us marked clean. + * the cluster will consist of pages which are currently dirty. * if not cleaning, just operate on the one page. */ @@ -1118,7 +1179,8 @@ retry: npages = (off - lo) >> PAGE_SHIFT; nback = npages; - uvn_findpages(uobj, off - PAGE_SIZE, &nback, &pgs[0], + uvn_findpages(uobj, off - PAGE_SIZE, &nback, + &pgs[0], NULL, UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY|UFP_BACKWARD); if (nback) { memmove(&pgs[0], &pgs[npages - nback], @@ -1140,6 +1202,14 @@ retry: /* * then look forward to fill in the remaining space in * the array of pages. + * + * pass our cached array of pages so that hopefully + * uvn_findpages can find some good pages in it. + * the array a was filled above with the one of + * following sets of flags: + * 0 + * UVM_PAGE_ARRAY_FILL_DIRTY + * UVM_PAGE_ARRAY_FILL_DIRTY|WRITEBACK */ npages = MAXPAGES - nback - 1; @@ -1147,7 +1217,7 @@ retry: npages = MIN(npages, (fshi - off - 1) >> PAGE_SHIFT); uvn_findpages(uobj, off + PAGE_SIZE, &npages, - &pgs[nback + 1], + &pgs[nback + 1], NULL, UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY); npages += nback + 1; } else { @@ -1163,6 +1233,19 @@ retry: for (i = 0; i < npages; i++) { tpg = pgs[i]; KASSERT(tpg->uobject == uobj); + KASSERT(i == 0 || + pgs[i-1]->offset + PAGE_SIZE == tpg->offset); + KASSERT(!needs_clean || uvm_pagegetdirty(pgs[i]) != + UVM_PAGE_STATUS_DIRTY); + if (needs_clean) { + /* + * mark pages as WRITEBACK so that concurrent + * fsync can find and wait for our activities. + */ + radix_tree_set_tag(&uobj->uo_pages, + pgs[i]->offset >> PAGE_SHIFT, + UVM_PAGE_WRITEBACK_TAG); + } if (tpg->offset < startoff || tpg->offset >= endoff) continue; if (flags & PGO_DEACTIVATE && tpg->wire_count == 0) { @@ -1224,6 +1307,16 @@ retry: } uvm_page_array_fini(&a); + /* + * update ctime/mtime if the modification we started writing out might + * be from mmap'ed write. + * + * this is necessary when an application keeps a file mmaped and + * repeatedly modifies it via the window. note that, because we + * don't always write-protect pages when cleaning, such modifications + * might not involve any page faults. + */ + if (modified && (vp->v_iflag & VI_WRMAPDIRTY) != 0 && (vp->v_type != VBLK || (vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)) { @@ -1231,34 +1324,13 @@ retry: } /* - * if we're cleaning and there was nothing to clean, - * take us off the syncer list. if we started any i/o - * and we're doing sync i/o, wait for all writes to finish. + * if we no longer have any possibly dirty pages, take us off the + * syncer list. */ - if (cleanall && wasclean && gp->g_dirtygen == dirtygen && - (vp->v_iflag & VI_ONWORKLST) != 0) { -#if defined(DEBUG) - uvm_page_array_init(&a); - for (nextoff = 0;; nextoff = pg->offset + PAGE_SIZE) { - pg = uvm_page_array_fill_and_peek(&a, uobj, nextoff, - 0, 0); - if (pg == NULL) { - break; - } - uvm_page_array_advance(&a); - if ((pg->flags & (PG_FAKE | PG_MARKER)) != 0) { - continue; - } - if ((pg->flags & PG_CLEAN) == 0) { - printf("%s: %p: !CLEAN\n", __func__, pg); - } - if (pmap_is_modified(pg)) { - printf("%s: %p: modified\n", __func__, pg); - } - } - uvm_page_array_fini(&a); -#endif /* defined(DEBUG) */ + if ((vp->v_iflag & VI_ONWORKLST) != 0 && + radix_tree_empty_tagged_tree_p(&uobj->uo_pages, + UVM_PAGE_DIRTY_TAG)) { vp->v_iflag &= ~VI_WRMAPDIRTY; if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL) vn_syncer_remove_from_worklist(vp); @@ -1557,7 +1629,7 @@ genfs_compat_getpages(void *v) pgs = ap->a_m; if (ap->a_flags & PGO_LOCKED) { - uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m, + uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m, NULL, UFP_NOWAIT|UFP_NOALLOC| (memwrite ? UFP_NORDONLY : 0)); error = ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0; @@ -1575,7 +1647,7 @@ genfs_compat_getpages(void *v) return 0; } npages = orignpages; - uvn_findpages(uobj, origoffset, &npages, pgs, UFP_ALL); + uvn_findpages(uobj, origoffset, &npages, pgs, NULL, UFP_ALL); mutex_exit(uobj->vmobjlock); kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK); @@ -1608,7 +1680,7 @@ genfs_compat_getpages(void *v) if (error && (pg->flags & PG_FAKE) != 0) { pg->flags |= PG_RELEASED; } else { - pmap_clear_modify(pg); + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_UNKNOWN); uvm_pagelock(pg); uvm_pageactivate(pg); uvm_pageunlock(pg); Index: src/sys/miscfs/genfs/genfs_node.h diff -u src/sys/miscfs/genfs/genfs_node.h:1.22 src/sys/miscfs/genfs/genfs_node.h:1.23 --- src/sys/miscfs/genfs/genfs_node.h:1.22 Mon May 28 21:04:38 2018 +++ src/sys/miscfs/genfs/genfs_node.h Wed Jan 15 17:55:44 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: genfs_node.h,v 1.22 2018/05/28 21:04:38 chs Exp $ */ +/* $NetBSD: genfs_node.h,v 1.23 2020/01/15 17:55:44 ad Exp $ */ /* * Copyright (c) 2001 Chuck Silvers. @@ -80,7 +80,6 @@ struct genfs_ops { struct genfs_node { const struct genfs_ops *g_op; /* ops vector */ krwlock_t g_glock; /* getpages lock */ - int g_dirtygen; }; #define VTOG(vp) ((struct genfs_node *)(vp)->v_data) Index: src/sys/nfs/nfs_bio.c diff -u src/sys/nfs/nfs_bio.c:1.192 src/sys/nfs/nfs_bio.c:1.193 --- src/sys/nfs/nfs_bio.c:1.192 Fri Dec 13 20:10:21 2019 +++ src/sys/nfs/nfs_bio.c Wed Jan 15 17:55:44 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: nfs_bio.c,v 1.192 2019/12/13 20:10:21 ad Exp $ */ +/* $NetBSD: nfs_bio.c,v 1.193 2020/01/15 17:55:44 ad Exp $ */ /* * Copyright (c) 1989, 1993 @@ -35,7 +35,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: nfs_bio.c,v 1.192 2019/12/13 20:10:21 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: nfs_bio.c,v 1.193 2020/01/15 17:55:44 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_nfs.h" @@ -1120,7 +1120,8 @@ again: */ mutex_enter(uobj->vmobjlock); for (i = 0; i < npages; i++) { - pgs[i]->flags &= ~PG_CLEAN; + uvm_pagemarkdirty(pgs[i], + UVM_PAGE_STATUS_DIRTY); } mutex_exit(uobj->vmobjlock); } Index: src/sys/rump/librump/rumpkern/Makefile.rumpkern diff -u src/sys/rump/librump/rumpkern/Makefile.rumpkern:1.181 src/sys/rump/librump/rumpkern/Makefile.rumpkern:1.182 --- src/sys/rump/librump/rumpkern/Makefile.rumpkern:1.181 Fri Dec 20 21:20:09 2019 +++ src/sys/rump/librump/rumpkern/Makefile.rumpkern Wed Jan 15 17:55:44 2020 @@ -1,4 +1,4 @@ -# $NetBSD: Makefile.rumpkern,v 1.181 2019/12/20 21:20:09 ad Exp $ +# $NetBSD: Makefile.rumpkern,v 1.182 2020/01/15 17:55:44 ad Exp $ # IOCONFDIR:= ${.PARSEDIR} @@ -139,7 +139,7 @@ SRCS+= init_sysctl_base.c \ # sys/uvm SRCS+= uvm_aobj.c uvm_readahead.c uvm_object.c uvm_swapstub.c -SRCS+= uvm_page_array.c +SRCS+= uvm_page_array.c uvm_page_status.c # 4.4BSD secmodel. selection is hardcoded for now SRCS+= secmodel.c Index: src/sys/rump/librump/rumpkern/vm.c diff -u src/sys/rump/librump/rumpkern/vm.c:1.182 src/sys/rump/librump/rumpkern/vm.c:1.183 --- src/sys/rump/librump/rumpkern/vm.c:1.182 Sun Jan 5 15:57:15 2020 +++ src/sys/rump/librump/rumpkern/vm.c Wed Jan 15 17:55:44 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: vm.c,v 1.182 2020/01/05 15:57:15 para Exp $ */ +/* $NetBSD: vm.c,v 1.183 2020/01/15 17:55:44 ad Exp $ */ /* * Copyright (c) 2007-2011 Antti Kantee. All Rights Reserved. @@ -41,7 +41,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.182 2020/01/05 15:57:15 para Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.183 2020/01/15 17:55:44 ad Exp $"); #include <sys/param.h> #include <sys/atomic.h> @@ -235,7 +235,7 @@ void uvm_pagezero(struct vm_page *pg) { - pg->flags &= ~PG_CLEAN; + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); memset((void *)pg->uanon, 0, PAGE_SIZE); } Index: src/sys/rump/librump/rumpvfs/vm_vfs.c diff -u src/sys/rump/librump/rumpvfs/vm_vfs.c:1.35 src/sys/rump/librump/rumpvfs/vm_vfs.c:1.36 --- src/sys/rump/librump/rumpvfs/vm_vfs.c:1.35 Fri Dec 13 20:10:22 2019 +++ src/sys/rump/librump/rumpvfs/vm_vfs.c Wed Jan 15 17:55:44 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: vm_vfs.c,v 1.35 2019/12/13 20:10:22 ad Exp $ */ +/* $NetBSD: vm_vfs.c,v 1.36 2020/01/15 17:55:44 ad Exp $ */ /* * Copyright (c) 2008-2011 Antti Kantee. All Rights Reserved. @@ -26,7 +26,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vm_vfs.c,v 1.35 2019/12/13 20:10:22 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vm_vfs.c,v 1.36 2020/01/15 17:55:44 ad Exp $"); #include <sys/param.h> @@ -141,7 +141,7 @@ ubc_zerorange(struct uvm_object *uobj, o start = (uint8_t *)pg->uanon + chunkoff; memset(start, 0, chunklen); - pg->flags &= ~PG_CLEAN; + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); off += chunklen; len -= chunklen; @@ -210,8 +210,10 @@ ubc_uiomove(struct uvm_object *uobj, str mutex_exit(uobj->vmobjlock); goto out; } - if (uio->uio_rw == UIO_WRITE) - pg->flags &= ~(PG_CLEAN | PG_FAKE); + if (uio->uio_rw == UIO_WRITE) { + pg->flags &= ~PG_FAKE; + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); + } todo -= xfersize; } uvm_page_unbusy(pgs, npages); Index: src/sys/sys/cpu_data.h diff -u src/sys/sys/cpu_data.h:1.48 src/sys/sys/cpu_data.h:1.49 --- src/sys/sys/cpu_data.h:1.48 Sun Jan 12 13:29:24 2020 +++ src/sys/sys/cpu_data.h Wed Jan 15 17:55:44 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu_data.h,v 1.48 2020/01/12 13:29:24 ad Exp $ */ +/* $NetBSD: cpu_data.h,v 1.49 2020/01/15 17:55:44 ad Exp $ */ /*- * Copyright (c) 2004, 2006, 2007, 2008, 2019 The NetBSD Foundation, Inc. @@ -87,7 +87,15 @@ enum cpu_count { CPU_COUNT_FLTNOANON, CPU_COUNT_FLTNORAM, CPU_COUNT_FLTPGRELE, - CPU_COUNT_MAX /* 40 */ + CPU_COUNT_ANONUNKNOWN, /* 40 */ + CPU_COUNT_ANONCLEAN, + CPU_COUNT_ANONDIRTY, + CPU_COUNT_FILEUNKNOWN, + CPU_COUNT_FILECLEAN, + CPU_COUNT_FILEDIRTY, + CPU_COUNT__UNUSED1, + CPU_COUNT__UNUSED2, + CPU_COUNT_MAX /* 48 */ }; /* Index: src/sys/ufs/lfs/lfs_pages.c diff -u src/sys/ufs/lfs/lfs_pages.c:1.19 src/sys/ufs/lfs/lfs_pages.c:1.20 --- src/sys/ufs/lfs/lfs_pages.c:1.19 Tue Dec 31 22:42:51 2019 +++ src/sys/ufs/lfs/lfs_pages.c Wed Jan 15 17:55:44 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: lfs_pages.c,v 1.19 2019/12/31 22:42:51 ad Exp $ */ +/* $NetBSD: lfs_pages.c,v 1.20 2020/01/15 17:55:44 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2019 The NetBSD Foundation, Inc. @@ -60,7 +60,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: lfs_pages.c,v 1.19 2019/12/31 22:42:51 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_pages.c,v 1.20 2020/01/15 17:55:44 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_compat_netbsd.h" @@ -306,8 +306,10 @@ check_dirty(struct lfs *fs, struct vnode UVM_PAGE_OWN(pg, "lfs_putpages"); pmap_page_protect(pg, VM_PROT_NONE); - tdirty = (pmap_clear_modify(pg) || - (pg->flags & PG_CLEAN) == 0); + tdirty = + uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN && + (uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_DIRTY || + pmap_clear_modify(pg)); dirty += tdirty; } if ((pages_per_block > 0 && nonexistent >= pages_per_block) || @@ -329,10 +331,11 @@ check_dirty(struct lfs *fs, struct vnode for (i = 0; i == 0 || i < pages_per_block; i++) { KASSERT(mutex_owned(vp->v_interlock)); pg = pgs[i]; - KASSERT(!((pg->flags & PG_CLEAN) && (pg->flags & PG_DELWRI))); + KASSERT(!(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_DIRTY + && (pg->flags & PG_DELWRI))); KASSERT(pg->flags & PG_BUSY); if (dirty) { - pg->flags &= ~PG_CLEAN; + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); if (flags & PGO_FREE) { /* * Wire the page so that Index: src/sys/ufs/lfs/lfs_segment.c diff -u src/sys/ufs/lfs/lfs_segment.c:1.280 src/sys/ufs/lfs/lfs_segment.c:1.281 --- src/sys/ufs/lfs/lfs_segment.c:1.280 Sun Dec 8 19:52:37 2019 +++ src/sys/ufs/lfs/lfs_segment.c Wed Jan 15 17:55:44 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: lfs_segment.c,v 1.280 2019/12/08 19:52:37 ad Exp $ */ +/* $NetBSD: lfs_segment.c,v 1.281 2020/01/15 17:55:44 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. @@ -60,7 +60,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.280 2019/12/08 19:52:37 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.281 2020/01/15 17:55:44 ad Exp $"); #ifdef DEBUG # define vndebug(vp, str) do { \ @@ -241,7 +241,8 @@ lfs_vflush(struct vnode *vp) pg = uvm_pagelookup(&vp->v_uobj, off); if (pg == NULL) continue; - if ((pg->flags & PG_CLEAN) == 0 || + if (uvm_pagegetdirty(pg) + == UVM_PAGE_STATUS_DIRTY || pmap_is_modified(pg)) { lfs_sb_addavail(fs, lfs_btofsb(fs, Index: src/sys/ufs/lfs/lfs_vfsops.c diff -u src/sys/ufs/lfs/lfs_vfsops.c:1.367 src/sys/ufs/lfs/lfs_vfsops.c:1.368 --- src/sys/ufs/lfs/lfs_vfsops.c:1.367 Tue Dec 31 22:42:51 2019 +++ src/sys/ufs/lfs/lfs_vfsops.c Wed Jan 15 17:55:44 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: lfs_vfsops.c,v 1.367 2019/12/31 22:42:51 ad Exp $ */ +/* $NetBSD: lfs_vfsops.c,v 1.368 2020/01/15 17:55:44 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007, 2007 @@ -61,7 +61,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.367 2019/12/31 22:42:51 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.368 2020/01/15 17:55:44 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_lfs.h" @@ -2249,7 +2249,8 @@ lfs_gop_write(struct vnode *vp, struct v } uvm_pageactivate(pg); uvm_pageunlock(pg); - pg->flags &= ~(PG_CLEAN|PG_DELWRI|PG_PAGEOUT|PG_RELEASED); + pg->flags &= ~(PG_DELWRI|PG_PAGEOUT|PG_RELEASED); + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); DLOG((DLOG_PAGE, "pg[%d] = %p (vp %p off %" PRIx64 ")\n", i, pg, vp, pg->offset)); DLOG((DLOG_PAGE, "pg[%d]->flags = %x\n", i, pg->flags)); Index: src/sys/ufs/lfs/ulfs_inode.c diff -u src/sys/ufs/lfs/ulfs_inode.c:1.23 src/sys/ufs/lfs/ulfs_inode.c:1.24 --- src/sys/ufs/lfs/ulfs_inode.c:1.23 Tue Dec 31 22:42:51 2019 +++ src/sys/ufs/lfs/ulfs_inode.c Wed Jan 15 17:55:44 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: ulfs_inode.c,v 1.23 2019/12/31 22:42:51 ad Exp $ */ +/* $NetBSD: ulfs_inode.c,v 1.24 2020/01/15 17:55:44 ad Exp $ */ /* from NetBSD: ufs_inode.c,v 1.95 2015/06/13 14:56:45 hannken Exp */ /* @@ -38,7 +38,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ulfs_inode.c,v 1.23 2019/12/31 22:42:51 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ulfs_inode.c,v 1.24 2020/01/15 17:55:44 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_lfs.h" @@ -223,13 +223,13 @@ ulfs_balloc_range(struct vnode *vp, off_ genfs_node_unlock(vp); /* - * if the allocation succeeded, clear PG_CLEAN on all the pages - * and clear PG_RDONLY on any pages that are now fully backed - * by disk blocks. if the allocation failed, we do not invalidate - * the pages since they might have already existed and been dirty, - * in which case we need to keep them around. if we created the pages, - * they will be clean and read-only, and leaving such pages - * in the cache won't cause any problems. + * if the allocation succeeded, mark all pages dirty and clear + * PG_RDONLY on any pages that are now fully backed by disk blocks. + * if the allocation failed, we do not invalidate the pages since + * they might have already existed and been dirty, in which case we + * need to keep them around. if we created the pages, they will be + * clean and read-only, and leaving such pages in the cache won't + * cause any problems. */ GOP_SIZE(vp, off + len, &eob, 0); @@ -241,7 +241,7 @@ ulfs_balloc_range(struct vnode *vp, off_ pagestart + ((i + 1) << PAGE_SHIFT) <= eob) { pgs[i]->flags &= ~PG_RDONLY; } - pgs[i]->flags &= ~PG_CLEAN; + uvm_pagemarkdirty(pgs[i], UVM_PAGE_STATUS_DIRTY); } uvm_pagelock(pgs[i]); uvm_pageactivate(pgs[i]); Index: src/sys/ufs/ufs/ufs_inode.c diff -u src/sys/ufs/ufs/ufs_inode.c:1.107 src/sys/ufs/ufs/ufs_inode.c:1.108 --- src/sys/ufs/ufs/ufs_inode.c:1.107 Tue Dec 31 22:42:51 2019 +++ src/sys/ufs/ufs/ufs_inode.c Wed Jan 15 17:55:44 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: ufs_inode.c,v 1.107 2019/12/31 22:42:51 ad Exp $ */ +/* $NetBSD: ufs_inode.c,v 1.108 2020/01/15 17:55:44 ad Exp $ */ /* * Copyright (c) 1991, 1993 @@ -37,7 +37,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.107 2019/12/31 22:42:51 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.108 2020/01/15 17:55:44 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_ffs.h" @@ -259,7 +259,7 @@ ufs_balloc_range(struct vnode *vp, off_t genfs_node_unlock(vp); /* - * if the allocation succeeded, clear PG_CLEAN on all the pages + * if the allocation succeeded, mark all the pages dirty * and clear PG_RDONLY on any pages that are now fully backed * by disk blocks. if the allocation failed, we do not invalidate * the pages since they might have already existed and been dirty, @@ -277,7 +277,7 @@ ufs_balloc_range(struct vnode *vp, off_t pagestart + ((i + 1) << PAGE_SHIFT) <= eob) { pgs[i]->flags &= ~PG_RDONLY; } - pgs[i]->flags &= ~PG_CLEAN; + uvm_pagemarkdirty(pgs[i], UVM_PAGE_STATUS_DIRTY); } uvm_pagelock(pgs[i]); uvm_pageactivate(pgs[i]); Index: src/sys/uvm/files.uvm diff -u src/sys/uvm/files.uvm:1.32 src/sys/uvm/files.uvm:1.33 --- src/sys/uvm/files.uvm:1.32 Fri Dec 27 12:51:57 2019 +++ src/sys/uvm/files.uvm Wed Jan 15 17:55:45 2020 @@ -1,4 +1,4 @@ -# $NetBSD: files.uvm,v 1.32 2019/12/27 12:51:57 ad Exp $ +# $NetBSD: files.uvm,v 1.33 2020/01/15 17:55:45 ad Exp $ # # UVM options @@ -38,6 +38,7 @@ file uvm/uvm_mremap.c uvm file uvm/uvm_object.c uvm file uvm/uvm_page.c uvm file uvm/uvm_page_array.c uvm +file uvm/uvm_page_status.c uvm file uvm/uvm_pager.c uvm file uvm/uvm_pdaemon.c uvm file uvm/uvm_pdpolicy_clock.c !pdpolicy_clockpro Index: src/sys/uvm/uvm_anon.c diff -u src/sys/uvm/uvm_anon.c:1.70 src/sys/uvm/uvm_anon.c:1.71 --- src/sys/uvm/uvm_anon.c:1.70 Tue Dec 31 22:42:51 2019 +++ src/sys/uvm/uvm_anon.c Wed Jan 15 17:55:45 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_anon.c,v 1.70 2019/12/31 22:42:51 ad Exp $ */ +/* $NetBSD: uvm_anon.c,v 1.71 2020/01/15 17:55:45 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v 1.70 2019/12/31 22:42:51 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v 1.71 2020/01/15 17:55:45 ad Exp $"); #include "opt_uvmhist.h" @@ -346,7 +346,7 @@ uvm_anon_pagein(struct vm_amap *amap, st uvm_swap_free(anon->an_swslot, 1); } anon->an_swslot = 0; - pg->flags &= ~PG_CLEAN; + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); /* * Deactivate the page (to put it on a page queue). Index: src/sys/uvm/uvm_aobj.c diff -u src/sys/uvm/uvm_aobj.c:1.133 src/sys/uvm/uvm_aobj.c:1.134 --- src/sys/uvm/uvm_aobj.c:1.133 Tue Dec 31 22:42:51 2019 +++ src/sys/uvm/uvm_aobj.c Wed Jan 15 17:55:45 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_aobj.c,v 1.133 2019/12/31 22:42:51 ad Exp $ */ +/* $NetBSD: uvm_aobj.c,v 1.134 2020/01/15 17:55:45 ad Exp $ */ /* * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and @@ -38,7 +38,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.133 2019/12/31 22:42:51 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.134 2020/01/15 17:55:45 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_uvmhist.h" @@ -849,7 +849,8 @@ uao_get(struct uvm_object *uobj, voff_t if (ptmp) { /* new page */ ptmp->flags &= ~(PG_FAKE); - ptmp->flags |= PG_AOBJ; + uvm_pagemarkdirty(ptmp, + UVM_PAGE_STATUS_UNKNOWN); goto gotpage; } } @@ -870,6 +871,8 @@ uao_get(struct uvm_object *uobj, voff_t * useful page: busy/lock it and plug it in our * result array */ + KASSERT(uvm_pagegetdirty(ptmp) != + UVM_PAGE_STATUS_CLEAN); /* caller must un-busy this page */ ptmp->flags |= PG_BUSY; @@ -951,8 +954,6 @@ gotpage: continue; } - ptmp->flags |= PG_AOBJ; - /* * got new page ready for I/O. break pps while * loop. pps[lcv] is still NULL. @@ -980,6 +981,8 @@ gotpage: * loop). */ + KASSERT(uvm_pagegetdirty(ptmp) != + UVM_PAGE_STATUS_CLEAN); /* we own it, caller must un-busy */ ptmp->flags |= PG_BUSY; UVM_PAGE_OWN(ptmp, "uao_get2"); @@ -1060,10 +1063,11 @@ gotpage: #endif /* defined(VMSWAP) */ } - if ((access_type & VM_PROT_WRITE) == 0) { - ptmp->flags |= PG_CLEAN; - pmap_clear_modify(ptmp); - } + /* + * note that we will allow the page being writably-mapped + * (!PG_RDONLY) regardless of access_type. + */ + uvm_pagemarkdirty(ptmp, UVM_PAGE_STATUS_UNKNOWN); /* * we got the page! clear the fake flag (indicates valid @@ -1075,7 +1079,8 @@ gotpage: * => unbusy the page * => activate the page */ - + KASSERT(uvm_pagegetdirty(ptmp) != UVM_PAGE_STATUS_CLEAN); + KASSERT((ptmp->flags & PG_FAKE) != 0); ptmp->flags &= ~PG_FAKE; pps[lcv] = ptmp; } @@ -1308,7 +1313,8 @@ uao_pagein_page(struct uvm_aobj *aobj, i if (pg->flags & PG_WANTED) { wakeup(pg); } - pg->flags &= ~(PG_WANTED|PG_BUSY|PG_CLEAN|PG_FAKE); + pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE); + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); UVM_PAGE_OWN(pg, NULL); return false; Index: src/sys/uvm/uvm_bio.c diff -u src/sys/uvm/uvm_bio.c:1.102 src/sys/uvm/uvm_bio.c:1.103 --- src/sys/uvm/uvm_bio.c:1.102 Tue Dec 31 22:42:51 2019 +++ src/sys/uvm/uvm_bio.c Wed Jan 15 17:55:45 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_bio.c,v 1.102 2019/12/31 22:42:51 ad Exp $ */ +/* $NetBSD: uvm_bio.c,v 1.103 2020/01/15 17:55:45 ad Exp $ */ /* * Copyright (c) 1998 Chuck Silvers. @@ -34,7 +34,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.102 2019/12/31 22:42:51 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.103 2020/01/15 17:55:45 ad Exp $"); #include "opt_uvmhist.h" #include "opt_ubc.h" @@ -230,13 +230,11 @@ static inline int ubc_fault_page(const struct uvm_faultinfo *ufi, const struct ubc_map *umap, struct vm_page *pg, vm_prot_t prot, vm_prot_t access_type, vaddr_t va) { - struct uvm_object *uobj; vm_prot_t mask; int error; bool rdonly; - uobj = pg->uobject; - KASSERT(mutex_owned(uobj->vmobjlock)); + KASSERT(mutex_owned(pg->uobject->vmobjlock)); if (pg->flags & PG_WANTED) { wakeup(pg); @@ -270,6 +268,9 @@ ubc_fault_page(const struct uvm_faultinf /* * Note that a page whose backing store is partially allocated * is marked as PG_RDONLY. + * + * it's a responsibility of ubc_alloc's caller to allocate backing + * blocks before writing to the window. */ KASSERT((pg->flags & PG_RDONLY) == 0 || @@ -277,9 +278,7 @@ ubc_fault_page(const struct uvm_faultinf pg->offset < umap->writeoff || pg->offset + PAGE_SIZE > umap->writeoff + umap->writelen); - rdonly = ((access_type & VM_PROT_WRITE) == 0 && - (pg->flags & PG_RDONLY) != 0) || - UVM_OBJ_NEEDS_WRITEFAULT(uobj); + rdonly = uvm_pagereadonly_p(pg); mask = rdonly ? ~VM_PROT_WRITE : VM_PROT_ALL; error = pmap_enter(ufi->orig_map->pmap, va, VM_PAGE_TO_PHYS(pg), @@ -665,7 +664,10 @@ ubc_release(void *va, int flags) umapva + slot_offset + (i << PAGE_SHIFT), &pa); KASSERT(rv); pgs[i] = PHYS_TO_VM_PAGE(pa); - pgs[i]->flags &= ~(PG_FAKE|PG_CLEAN); + pgs[i]->flags &= ~PG_FAKE; + KASSERTMSG(uvm_pagegetdirty(pgs[i]) == + UVM_PAGE_STATUS_DIRTY, + "page %p not dirty", pgs[i]); KASSERT(pgs[i]->loan_count == 0); uvm_pagelock(pgs[i]); uvm_pageactivate(pgs[i]); @@ -896,9 +898,18 @@ ubc_direct_release(struct uvm_object *uo uvm_pageactivate(pg); uvm_pageunlock(pg); - /* Page was changed, no longer fake and neither clean */ - if (flags & UBC_WRITE) - pg->flags &= ~(PG_FAKE|PG_CLEAN); + /* + * Page was changed, no longer fake and neither clean. + * There's no managed mapping in the direct case, so + * mark the page dirty manually. + */ + if (flags & UBC_WRITE) { + pg->flags &= ~PG_FAKE; + KASSERTMSG(uvm_pagegetdirty(pg) == + UVM_PAGE_STATUS_DIRTY, + "page %p not dirty", pg); + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); + } } uvm_page_unbusy(pgs, npages); mutex_exit(uobj->vmobjlock); Index: src/sys/uvm/uvm_extern.h diff -u src/sys/uvm/uvm_extern.h:1.218 src/sys/uvm/uvm_extern.h:1.219 --- src/sys/uvm/uvm_extern.h:1.218 Tue Dec 31 22:42:51 2019 +++ src/sys/uvm/uvm_extern.h Wed Jan 15 17:55:45 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_extern.h,v 1.218 2019/12/31 22:42:51 ad Exp $ */ +/* $NetBSD: uvm_extern.h,v 1.219 2020/01/15 17:55:45 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -500,6 +500,12 @@ struct uvmexp_sysctl { int64_t poolpages; int64_t countsyncone; int64_t countsyncall; + int64_t anonunknown; + int64_t anonclean; + int64_t anondirty; + int64_t fileunknown; + int64_t fileclean; + int64_t filedirty; }; #ifdef _KERNEL @@ -779,10 +785,12 @@ int uvm_grow(struct proc *, vaddr_t); void uvm_deallocate(struct vm_map *, vaddr_t, vsize_t); /* uvm_vnode.c */ +struct uvm_page_array; void uvm_vnp_setsize(struct vnode *, voff_t); void uvm_vnp_setwritesize(struct vnode *, voff_t); int uvn_findpages(struct uvm_object *, voff_t, - int *, struct vm_page **, int); + unsigned int *, struct vm_page **, + struct uvm_page_array *, unsigned int); bool uvn_text_p(struct uvm_object *); bool uvn_clean_p(struct uvm_object *); bool uvn_needs_writefault_p(struct uvm_object *); Index: src/sys/uvm/uvm_fault.c diff -u src/sys/uvm/uvm_fault.c:1.214 src/sys/uvm/uvm_fault.c:1.215 --- src/sys/uvm/uvm_fault.c:1.214 Tue Dec 31 22:42:51 2019 +++ src/sys/uvm/uvm_fault.c Wed Jan 15 17:55:45 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_fault.c,v 1.214 2019/12/31 22:42:51 ad Exp $ */ +/* $NetBSD: uvm_fault.c,v 1.215 2020/01/15 17:55:45 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -32,7 +32,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.214 2019/12/31 22:42:51 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.215 2020/01/15 17:55:45 ad Exp $"); #include "opt_uvmhist.h" @@ -378,7 +378,7 @@ uvmfault_anonget(struct uvm_faultinfo *u uvmfault_unlockall(ufi, amap, NULL); /* - * Pass a PG_BUSY+PG_FAKE+PG_CLEAN page into + * Pass a PG_BUSY+PG_FAKE clean page into * the uvm_swap_get() function with all data * structures unlocked. Note that it is OK * to read an_swslot here, because we hold @@ -488,6 +488,7 @@ released: uvm_pageactivate(pg); uvm_pageunlock(pg); pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE); + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_UNKNOWN); UVM_PAGE_OWN(pg, NULL); #else panic("%s: we_own", __func__); @@ -640,6 +641,7 @@ uvmfault_promote(struct uvm_faultinfo *u if (opg) { uvm_pagecopy(opg, pg); } + KASSERT(uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_DIRTY); amap_add(&ufi->entry->aref, ufi->orig_rvaddr - ufi->entry->start, anon, oanon != NULL); @@ -782,7 +784,7 @@ static inline void uvm_fault_lower_looku struct vm_page **); static inline void uvm_fault_lower_neighbor( struct uvm_faultinfo *, const struct uvm_faultctx *, - vaddr_t, struct vm_page *, bool); + vaddr_t, struct vm_page *); static inline int uvm_fault_lower_io( struct uvm_faultinfo *, const struct uvm_faultctx *, struct uvm_object **, struct vm_page **); @@ -1256,6 +1258,11 @@ uvm_fault_upper_neighbor( /* locked: amap, anon */ + KASSERT(pg->uobject == NULL); + KASSERT(pg->uanon != NULL); + KASSERT(mutex_owned(pg->uanon->an_lock)); + KASSERT(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN); + uvm_pagelock(pg); uvm_pageenqueue(pg); uvm_pageunlock(pg); @@ -1535,6 +1542,7 @@ uvm_fault_upper_enter( KASSERT(anon->an_lock == amap->am_lock); KASSERT(oanon->an_lock == amap->am_lock); KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock)); + KASSERT(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN); /* * now map the page in. @@ -1612,21 +1620,20 @@ uvm_fault_upper_done( uvm_pagelock(pg); if (wire_paging) { uvm_pagewire(pg); - - /* - * since the now-wired page cannot be paged out, - * release its swap resources for others to use. - * since an anon with no swap cannot be PG_CLEAN, - * clear its clean flag now. - */ - - pg->flags &= ~(PG_CLEAN); } else { uvm_pageactivate(pg); } uvm_pageunlock(pg); if (wire_paging) { + /* + * since the now-wired page cannot be paged out, + * release its swap resources for others to use. + * and since an anon with no swap cannot be clean, + * mark it dirty now. + */ + + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); uvm_anon_dropswap(anon); } } @@ -1744,7 +1751,7 @@ uvm_fault_lower( KASSERT(uobjpage != NULL); KASSERT(uobj == NULL || uobj == uobjpage->uobject); KASSERT(uobj == NULL || !UVM_OBJ_IS_CLEAN(uobjpage->uobject) || - (uobjpage->flags & PG_CLEAN) != 0); + uvm_pagegetdirty(uobjpage) == UVM_PAGE_STATUS_CLEAN); if (!flt->promote) { error = uvm_fault_lower_direct(ufi, flt, uobj, uobjpage); @@ -1813,12 +1820,7 @@ uvm_fault_lower_lookup( UVMHIST_LOG(maphist, " got uobjpage (0x%#jx) " "with locked get", (uintptr_t)curpg, 0, 0, 0); } else { - bool readonly = (curpg->flags & PG_RDONLY) - || (curpg->loan_count > 0) - || UVM_OBJ_NEEDS_WRITEFAULT(curpg->uobject); - - uvm_fault_lower_neighbor(ufi, flt, - currva, curpg, readonly); + uvm_fault_lower_neighbor(ufi, flt, currva, curpg); } } pmap_update(ufi->orig_map->pmap); @@ -1831,8 +1833,9 @@ uvm_fault_lower_lookup( static void uvm_fault_lower_neighbor( struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, - vaddr_t currva, struct vm_page *pg, bool readonly) + vaddr_t currva, struct vm_page *pg) { + const bool readonly = uvm_pagereadonly_p(pg) || pg->loan_count > 0; UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); /* locked: maps(read), amap(if there), uobj */ @@ -1861,7 +1864,8 @@ uvm_fault_lower_neighbor( KASSERT((pg->flags & PG_PAGEOUT) == 0); KASSERT((pg->flags & PG_RELEASED) == 0); KASSERT((pg->flags & PG_WANTED) == 0); - KASSERT(!UVM_OBJ_IS_CLEAN(pg->uobject) || (pg->flags & PG_CLEAN) != 0); + KASSERT(!UVM_OBJ_IS_CLEAN(pg->uobject) || + uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN); pg->flags &= ~(PG_BUSY); UVM_PAGE_OWN(pg, NULL); @@ -2216,6 +2220,7 @@ uvm_fault_lower_enter( struct vm_anon *anon, struct vm_page *pg) { struct vm_amap * const amap = ufi->entry->aref.ar_amap; + const bool readonly = uvm_pagereadonly_p(pg); int error; UVMHIST_FUNC("uvm_fault_lower_enter"); UVMHIST_CALLED(maphist); @@ -2241,12 +2246,16 @@ uvm_fault_lower_enter( " MAPPING: case2: pm=%#jx, va=%#jx, pg=%#jx, promote=%jd", (uintptr_t)ufi->orig_map->pmap, ufi->orig_rvaddr, (uintptr_t)pg, flt->promote); - KASSERT((flt->access_type & VM_PROT_WRITE) == 0 || - (pg->flags & PG_RDONLY) == 0); + KASSERTMSG((flt->access_type & VM_PROT_WRITE) == 0 || !readonly, + "promote=%u cow_now=%u access_type=%x enter_prot=%x cow=%u " + "entry=%p map=%p orig_rvaddr=%p pg=%p", + flt->promote, flt->cow_now, flt->access_type, flt->enter_prot, + UVM_ET_ISCOPYONWRITE(ufi->entry), ufi->entry, ufi->orig_map, + (void *)ufi->orig_rvaddr, pg); + KASSERT((flt->access_type & VM_PROT_WRITE) == 0 || !readonly); if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr, VM_PAGE_TO_PHYS(pg), - (pg->flags & PG_RDONLY) != 0 ? - flt->enter_prot & ~VM_PROT_WRITE : flt->enter_prot, + readonly ? flt->enter_prot & ~VM_PROT_WRITE : flt->enter_prot, flt->access_type | PMAP_CANFAIL | (flt->wire_mapping ? PMAP_WIRED : 0)) != 0) { @@ -2332,12 +2341,12 @@ uvm_fault_lower_done( /* * since the now-wired page cannot be paged out, * release its swap resources for others to use. - * since an aobj page with no swap cannot be PG_CLEAN, - * clear its clean flag now. + * since an aobj page with no swap cannot be clean, + * mark it dirty now. */ KASSERT(uobj != NULL); - pg->flags &= ~(PG_CLEAN); + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); dropswap = true; } } else { Index: src/sys/uvm/uvm_loan.c diff -u src/sys/uvm/uvm_loan.c:1.93 src/sys/uvm/uvm_loan.c:1.94 --- src/sys/uvm/uvm_loan.c:1.93 Tue Dec 31 22:42:51 2019 +++ src/sys/uvm/uvm_loan.c Wed Jan 15 17:55:45 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_loan.c,v 1.93 2019/12/31 22:42:51 ad Exp $ */ +/* $NetBSD: uvm_loan.c,v 1.94 2020/01/15 17:55:45 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -32,7 +32,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.93 2019/12/31 22:42:51 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.94 2020/01/15 17:55:45 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -1126,22 +1126,13 @@ uvm_loanbreak(struct vm_page *uobjpage) * one and clear the fake flags on the new page (keep it busy). * force a reload of the old page by clearing it from all * pmaps. - * transfer dirtiness of the old page to the new page. * then rename the pages. */ uvm_pagecopy(uobjpage, pg); /* old -> new */ pg->flags &= ~PG_FAKE; + KASSERT(uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_DIRTY); pmap_page_protect(uobjpage, VM_PROT_NONE); - if ((uobjpage->flags & PG_CLEAN) != 0 && !pmap_clear_modify(uobjpage)) { - pmap_clear_modify(pg); - pg->flags |= PG_CLEAN; - } else { - /* uvm_pagecopy marked it dirty */ - KASSERT((pg->flags & PG_CLEAN) == 0); - /* a object with a dirty page should be dirty. */ - KASSERT(!UVM_OBJ_IS_CLEAN(uobj)); - } if (uobjpage->flags & PG_WANTED) wakeup(uobjpage); /* uobj still locked */ @@ -1184,9 +1175,11 @@ int uvm_loanbreak_anon(struct vm_anon *anon, struct uvm_object *uobj) { struct vm_page *newpg, *oldpg; + unsigned oldstatus; KASSERT(mutex_owned(anon->an_lock)); KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock)); + KASSERT(anon->an_page->loan_count > 0); /* get new un-owned replacement page */ newpg = uvm_pagealloc(NULL, 0, NULL, 0); @@ -1197,24 +1190,29 @@ uvm_loanbreak_anon(struct vm_anon *anon, oldpg = anon->an_page; /* copy old -> new */ uvm_pagecopy(oldpg, newpg); + KASSERT(uvm_pagegetdirty(newpg) == UVM_PAGE_STATUS_DIRTY); /* force reload */ pmap_page_protect(oldpg, VM_PROT_NONE); + oldstatus = uvm_pagegetdirty(anon->an_page); uvm_pagelock2(oldpg, newpg); if (uobj == NULL) { /* * we were the lender (A->K); need to remove the page from * pageq's. + * + * PG_ANON is updated by the caller. */ + KASSERT((oldpg->flags & PG_ANON) != 0); + oldpg->flags &= ~PG_ANON; uvm_pagedequeue(oldpg); } oldpg->uanon = NULL; - /* in case we owned */ - oldpg->flags &= ~PG_ANON; if (uobj) { /* if we were receiver of loan */ + KASSERT((oldpg->pqflags & PG_ANON) == 0); oldpg->loan_count--; } @@ -1234,6 +1232,13 @@ uvm_loanbreak_anon(struct vm_anon *anon, } /* done! */ - + kpreempt_disable(); + if (uobj != NULL) { + CPU_COUNT(CPU_COUNT_ANONPAGES, 1); + } else { + CPU_COUNT(CPU_COUNT_ANONUNKNOWN + oldstatus, -1); + } + CPU_COUNT(CPU_COUNT_ANONDIRTY, 1); + kpreempt_enable(); return 0; } Index: src/sys/uvm/uvm_meter.c diff -u src/sys/uvm/uvm_meter.c:1.73 src/sys/uvm/uvm_meter.c:1.74 --- src/sys/uvm/uvm_meter.c:1.73 Tue Dec 31 13:07:14 2019 +++ src/sys/uvm/uvm_meter.c Wed Jan 15 17:55:45 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_meter.c,v 1.73 2019/12/31 13:07:14 ad Exp $ */ +/* $NetBSD: uvm_meter.c,v 1.74 2020/01/15 17:55:45 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -36,7 +36,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uvm_meter.c,v 1.73 2019/12/31 13:07:14 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_meter.c,v 1.74 2020/01/15 17:55:45 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -180,6 +180,12 @@ sysctl_vm_uvmexp2(SYSCTLFN_ARGS) u.poolpages = pool_totalpages(); u.countsyncone = cpu_count_get(CPU_COUNT_SYNC_ONE); u.countsyncall = cpu_count_get(CPU_COUNT_SYNC_ALL); + u.anonunknown = (int)cpu_count_get(CPU_COUNT_ANONUNKNOWN); + u.anonclean = (int)cpu_count_get(CPU_COUNT_ANONCLEAN); + u.anondirty = (int)cpu_count_get(CPU_COUNT_ANONDIRTY); + u.fileunknown = (int)cpu_count_get(CPU_COUNT_FILEUNKNOWN); + u.fileclean = (int)cpu_count_get(CPU_COUNT_FILECLEAN); + u.filedirty = (int)cpu_count_get(CPU_COUNT_FILEDIRTY); node = *rnode; node.sysctl_data = &u; Index: src/sys/uvm/uvm_object.c diff -u src/sys/uvm/uvm_object.c:1.19 src/sys/uvm/uvm_object.c:1.20 --- src/sys/uvm/uvm_object.c:1.19 Tue Dec 31 22:42:51 2019 +++ src/sys/uvm/uvm_object.c Wed Jan 15 17:55:45 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_object.c,v 1.19 2019/12/31 22:42:51 ad Exp $ */ +/* $NetBSD: uvm_object.c,v 1.20 2020/01/15 17:55:45 ad Exp $ */ /* * Copyright (c) 2006, 2010, 2019 The NetBSD Foundation, Inc. @@ -37,7 +37,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uvm_object.c,v 1.19 2019/12/31 22:42:51 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_object.c,v 1.20 2020/01/15 17:55:45 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_ddb.h" @@ -174,7 +174,8 @@ uvm_obj_wirepages(struct uvm_object *uob } if (pgs[i]->flags & PG_AOBJ) { - pgs[i]->flags &= ~(PG_CLEAN); + uvm_pagemarkdirty(pgs[i], + UVM_PAGE_STATUS_DIRTY); uao_dropswap(uobj, i); } } Index: src/sys/uvm/uvm_object.h diff -u src/sys/uvm/uvm_object.h:1.35 src/sys/uvm/uvm_object.h:1.36 --- src/sys/uvm/uvm_object.h:1.35 Sun Dec 15 21:11:35 2019 +++ src/sys/uvm/uvm_object.h Wed Jan 15 17:55:45 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_object.h,v 1.35 2019/12/15 21:11:35 ad Exp $ */ +/* $NetBSD: uvm_object.h,v 1.36 2020/01/15 17:55:45 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -63,6 +63,13 @@ struct uvm_object { }; /* + * tags for uo_pages + */ + +#define UVM_PAGE_DIRTY_TAG 1 /* might be dirty (!PG_CLEAN) */ +#define UVM_PAGE_WRITEBACK_TAG 2 /* being written back */ + +/* * UVM_OBJ_KERN is a 'special' uo_refs value which indicates that the * object is a kernel memory object rather than a normal one (kernel * memory objects don't have reference counts -- they never die). Index: src/sys/uvm/uvm_page.c diff -u src/sys/uvm/uvm_page.c:1.223 src/sys/uvm/uvm_page.c:1.224 --- src/sys/uvm/uvm_page.c:1.223 Sat Jan 11 19:51:01 2020 +++ src/sys/uvm/uvm_page.c Wed Jan 15 17:55:45 2020 @@ -1,7 +1,7 @@ -/* $NetBSD: uvm_page.c,v 1.223 2020/01/11 19:51:01 ad Exp $ */ +/* $NetBSD: uvm_page.c,v 1.224 2020/01/15 17:55:45 ad Exp $ */ /*- - * Copyright (c) 2019 The NetBSD Foundation, Inc. + * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -95,7 +95,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.223 2020/01/11 19:51:01 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.224 2020/01/15 17:55:45 ad Exp $"); #include "opt_ddb.h" #include "opt_uvm.h" @@ -217,19 +217,31 @@ uvm_pageinsert_object(struct uvm_object KASSERT(mutex_owned(uobj->vmobjlock)); KASSERT((pg->flags & PG_TABLED) == 0); - if (UVM_OBJ_IS_VNODE(uobj)) { - if (uobj->uo_npages == 0) { - struct vnode *vp = (struct vnode *)uobj; + if ((pg->flags & PG_STAT) != 0) { + /* Cannot use uvm_pagegetdirty(): not yet in radix tree. */ + const unsigned int status = pg->flags & (PG_CLEAN | PG_DIRTY); + const bool isaobj = (pg->flags & PG_AOBJ) != 0; + + if (!isaobj) { + KASSERT((pg->flags & PG_FILE) != 0); + if (uobj->uo_npages == 0) { + struct vnode *vp = (struct vnode *)uobj; - vholdl(vp); - } - if (UVM_OBJ_IS_VTEXT(uobj)) { - cpu_count(CPU_COUNT_EXECPAGES, 1); + vholdl(vp); + } + kpreempt_disable(); + if (UVM_OBJ_IS_VTEXT(uobj)) { + CPU_COUNT(CPU_COUNT_EXECPAGES, 1); + } else { + CPU_COUNT(CPU_COUNT_FILEPAGES, 1); + } + CPU_COUNT(CPU_COUNT_FILEUNKNOWN + status, 1); } else { - cpu_count(CPU_COUNT_FILEPAGES, 1); + kpreempt_disable(); + CPU_COUNT(CPU_COUNT_ANONPAGES, 1); + CPU_COUNT(CPU_COUNT_ANONUNKNOWN + status, 1); } - } else if (UVM_OBJ_IS_AOBJ(uobj)) { - cpu_count(CPU_COUNT_ANONPAGES, 1); + kpreempt_enable(); } pg->flags |= PG_TABLED; uobj->uo_npages++; @@ -245,6 +257,11 @@ uvm_pageinsert_tree(struct uvm_object *u if (error != 0) { return error; } + if ((pg->flags & PG_CLEAN) == 0) { + radix_tree_set_tag(&uobj->uo_pages, idx, UVM_PAGE_DIRTY_TAG); + } + KASSERT(((pg->flags & PG_CLEAN) == 0) == + radix_tree_get_tag(&uobj->uo_pages, idx, UVM_PAGE_DIRTY_TAG)); return 0; } @@ -262,22 +279,32 @@ uvm_pageremove_object(struct uvm_object KASSERT(mutex_owned(uobj->vmobjlock)); KASSERT(pg->flags & PG_TABLED); - if (UVM_OBJ_IS_VNODE(uobj)) { - if (uobj->uo_npages == 1) { - struct vnode *vp = (struct vnode *)uobj; + if ((pg->flags & PG_STAT) != 0) { + /* Cannot use uvm_pagegetdirty(): no longer in radix tree. */ + const unsigned int status = pg->flags & (PG_CLEAN | PG_DIRTY); + const bool isaobj = (pg->flags & PG_AOBJ) != 0; + + if (!isaobj) { + KASSERT((pg->flags & PG_FILE) != 0); + if (uobj->uo_npages == 1) { + struct vnode *vp = (struct vnode *)uobj; - holdrelel(vp); - } - if (UVM_OBJ_IS_VTEXT(uobj)) { - cpu_count(CPU_COUNT_EXECPAGES, -1); + holdrelel(vp); + } + kpreempt_disable(); + if (UVM_OBJ_IS_VTEXT(uobj)) { + CPU_COUNT(CPU_COUNT_EXECPAGES, -1); + } else { + CPU_COUNT(CPU_COUNT_FILEPAGES, -1); + } + CPU_COUNT(CPU_COUNT_FILEUNKNOWN + status, -1); } else { - cpu_count(CPU_COUNT_FILEPAGES, -1); + kpreempt_disable(); + CPU_COUNT(CPU_COUNT_ANONPAGES, -1); + CPU_COUNT(CPU_COUNT_ANONUNKNOWN + status, -1); } - } else if (UVM_OBJ_IS_AOBJ(uobj)) { - cpu_count(CPU_COUNT_ANONPAGES, -1); + kpreempt_enable(); } - - /* object should be locked */ uobj->uo_npages--; pg->flags &= ~PG_TABLED; pg->uobject = NULL; @@ -1290,6 +1317,7 @@ uvm_pagealloc_strat(struct uvm_object *o } if (anon) { CPU_COUNT(CPU_COUNT_ANONPAGES, 1); + CPU_COUNT(CPU_COUNT_ANONCLEAN, 1); } splx(s); KASSERT((pg->flags & ~(PG_ZERO|PG_FREE)) == 0); @@ -1312,6 +1340,14 @@ uvm_pagealloc_strat(struct uvm_object *o pg->flags |= PG_ANON; mutex_exit(&pg->interlock); } else if (obj) { + /* + * set PG_FILE|PG_AOBJ before the first uvm_pageinsert. + */ + if (UVM_OBJ_IS_VNODE(obj)) { + pg->flags |= PG_FILE; + } else { + pg->flags |= PG_AOBJ; + } uvm_pageinsert_object(obj, pg); mutex_exit(&pg->interlock); error = uvm_pageinsert_tree(obj, pg); @@ -1334,9 +1370,12 @@ uvm_pagealloc_strat(struct uvm_object *o * A zero'd page is not clean. If we got a page not already * zero'd, then we have to zero it ourselves. */ - pg->flags &= ~PG_CLEAN; - if (zeroit) + if (obj != NULL || anon != NULL) { + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); + } + if (zeroit) { pmap_zero_page(VM_PAGE_TO_PHYS(pg)); + } } return(pg); @@ -1354,6 +1393,7 @@ uvm_pagereplace(struct vm_page *oldpg, s { struct uvm_object *uobj = oldpg->uobject; struct vm_page *pg __diagused; + uint64_t idx; KASSERT((oldpg->flags & PG_TABLED) != 0); KASSERT(uobj != NULL); @@ -1363,12 +1403,25 @@ uvm_pagereplace(struct vm_page *oldpg, s KASSERT(mutex_owned(&oldpg->interlock)); KASSERT(mutex_owned(&newpg->interlock)); + newpg->uobject = uobj; newpg->offset = oldpg->offset; - pg = radix_tree_replace_node(&uobj->uo_pages, - newpg->offset >> PAGE_SHIFT, newpg); + idx = newpg->offset >> PAGE_SHIFT; + pg = radix_tree_replace_node(&uobj->uo_pages, idx, newpg); KASSERT(pg == oldpg); - - newpg->uobject = uobj; + if (((oldpg->flags ^ newpg->flags) & PG_CLEAN) != 0) { + if ((newpg->flags & PG_CLEAN) != 0) { + radix_tree_clear_tag(&uobj->uo_pages, idx, + UVM_PAGE_DIRTY_TAG); + } else { + radix_tree_set_tag(&uobj->uo_pages, idx, + UVM_PAGE_DIRTY_TAG); + } + } + /* + * oldpg's PG_STAT is stable. newpg is not reachable by others yet. + */ + newpg->flags |= + (newpg->flags & ~PG_STAT) | (oldpg->flags & PG_STAT); uvm_pageinsert_object(uobj, newpg); uvm_pageremove_object(uobj, oldpg); } @@ -1502,7 +1555,7 @@ uvm_pagefree(struct vm_page *pg) locked = true; if (pg->uobject != NULL) { uvm_pageremove_object(pg->uobject, pg); - pg->flags &= ~PG_CLEAN; + pg->flags &= ~(PG_FILE|PG_AOBJ); } else if (pg->uanon != NULL) { if ((pg->flags & PG_ANON) == 0) { pg->loan_count--; @@ -1520,6 +1573,7 @@ uvm_pagefree(struct vm_page *pg) #ifdef UVM_PAGE_TRKOWN pg->owner_tag = NULL; #endif + KASSERT((pg->flags & PG_STAT) == 0); if (pg->loan_count) { KASSERT(pg->uobject == NULL); if (pg->uanon == NULL) { @@ -1542,9 +1596,13 @@ uvm_pagefree(struct vm_page *pg) if (pg->uobject != NULL) { uvm_pageremove_object(pg->uobject, pg); } else if (pg->uanon != NULL) { + const unsigned int status = uvm_pagegetdirty(pg); pg->uanon->an_page = NULL; pg->uanon = NULL; - cpu_count(CPU_COUNT_ANONPAGES, -1); + kpreempt_disable(); + CPU_COUNT(CPU_COUNT_ANONPAGES, -1); + CPU_COUNT(CPU_COUNT_ANONUNKNOWN + status, -1); + kpreempt_enable(); } /* @@ -1953,7 +2011,8 @@ uvm_pageunlock2(struct vm_page *pg1, str void uvm_pagezero(struct vm_page *pg) { - pg->flags &= ~PG_CLEAN; + + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); pmap_zero_page(VM_PAGE_TO_PHYS(pg)); } @@ -1968,7 +2027,7 @@ void uvm_pagecopy(struct vm_page *src, struct vm_page *dst) { - dst->flags &= ~PG_CLEAN; + uvm_pagemarkdirty(dst, UVM_PAGE_STATUS_DIRTY); pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst)); } @@ -2015,6 +2074,29 @@ uvm_page_owner_locked_p(struct vm_page * return true; } +/* + * uvm_pagereadonly_p: return if the page should be mapped read-only + */ + +bool +uvm_pagereadonly_p(struct vm_page *pg) +{ + struct uvm_object * const uobj = pg->uobject; + + KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock)); + KASSERT(uobj != NULL || mutex_owned(pg->uanon->an_lock)); + if ((pg->flags & PG_RDONLY) != 0) { + return true; + } + if (uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN) { + return true; + } + if (uobj == NULL) { + return false; + } + return UVM_OBJ_NEEDS_WRITEFAULT(uobj); +} + #ifdef PMAP_DIRECT /* * Call pmap to translate physical address into a virtual and to run a callback @@ -2080,7 +2162,7 @@ uvm_page_printit(struct vm_page *pg, boo (*pr)("PAGE %p:\n", pg); snprintb(pgbuf, sizeof(pgbuf), page_flagbits, pg->flags); - (*pr)(" flags=%s, pqflags=%x, wire_count=%d, pa=0x%lx\n", + (*pr)(" flags=%s\n pqflags=%x, wire_count=%d, pa=0x%lx\n", pgbuf, pg->pqflags, pg->wire_count, (long)VM_PAGE_TO_PHYS(pg)); (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n", pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count); Index: src/sys/uvm/uvm_page.h diff -u src/sys/uvm/uvm_page.h:1.95 src/sys/uvm/uvm_page.h:1.96 --- src/sys/uvm/uvm_page.h:1.95 Fri Jan 10 21:32:17 2020 +++ src/sys/uvm/uvm_page.h Wed Jan 15 17:55:45 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_page.h,v 1.95 2020/01/10 21:32:17 ad Exp $ */ +/* $NetBSD: uvm_page.h,v 1.96 2020/01/15 17:55:45 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -160,8 +160,7 @@ struct vm_page { TAILQ_ENTRY(vm_page) pdqueue; /* p: pagedaemon queue */ kmutex_t interlock; /* s: lock on identity */ uint32_t pqflags; /* i: pagedaemon flags */ - uint16_t flags; /* o: object flags */ - uint16_t spare; /* : spare for now */ + uint32_t flags; /* o: object flags */ paddr_t phys_addr; /* o: physical address of pg */ uint32_t loan_count; /* o,i: num. active loans */ uint32_t wire_count; /* o,i: wired down map refs */ @@ -194,6 +193,15 @@ struct vm_page { * * Flag descriptions: * + * PG_CLEAN: + * Page is known clean. + * The contents of the page is consistent with its backing store. + * + * PG_DIRTY: + * Page is known dirty. + * To avoid losing data, the contents of the page should be written + * back to the backing store before freeing the page. + * * PG_BUSY: * Page is long-term locked, usually because of I/O (transfer from the * page memory to the backing store) is in progress. LWP attempting @@ -205,31 +213,20 @@ struct vm_page { * responsible to clear both flags and wake up any waiters once it has * released the long-term lock (PG_BUSY). * + * PG_PAGEOUT: + * Indicates that the page is being paged-out in preparation for + * being freed. + * * PG_RELEASED: * Indicates that the page, which is currently PG_BUSY, should be freed * after the release of long-term lock. It is responsibility of the * owning LWP (i.e. which set PG_BUSY) to do it. * - * PG_CLEAN: - * Page has not been modified since it was loaded from the backing - * store. If this flag is not set, page is considered "dirty". - * XXX: Currently it means that the page *might* be clean; will be - * fixed with yamt-pagecache merge. - * * PG_FAKE: * Page has been allocated, but not yet initialised. The flag is used * to avoid overwriting of valid data, e.g. to prevent read from the * backing store when in-core data is newer. * - * PG_TABLED: - * Indicates that the page is currently in the object's offset queue, - * and that it should be removed from it once the page is freed. Used - * diagnostic purposes. - * - * PG_PAGEOUT: - * Indicates that the page is being paged-out in preparation for - * being freed. - * * PG_RDONLY: * Indicates that the page must be mapped read-only. * @@ -239,31 +236,43 @@ struct vm_page { * page is placed on the free list. * * PG_MARKER: - * Dummy marker page. + * Dummy marker page, generally used for list traversal. + */ + +/* + * if you want to renumber PG_CLEAN and PG_DIRTY, check __CTASSERTs in + * uvm_page_status.c first. */ -#define PG_BUSY 0x0001 -#define PG_WANTED 0x0002 -#define PG_TABLED 0x0004 -#define PG_CLEAN 0x0008 -#define PG_PAGEOUT 0x0010 -#define PG_RELEASED 0x0020 -#define PG_FAKE 0x0040 -#define PG_RDONLY 0x0080 -#define PG_AOBJ 0x0100 /* page is part of an anonymous +#define PG_CLEAN 0x00000001 /* page is known clean */ +#define PG_DIRTY 0x00000002 /* page is known dirty */ +#define PG_BUSY 0x00000004 /* page is locked */ +#define PG_WANTED 0x00000008 /* someone is waiting for page */ +#define PG_PAGEOUT 0x00000010 /* page to be freed for pagedaemon */ +#define PG_RELEASED 0x00000020 /* page to be freed when unbusied */ +#define PG_FAKE 0x00000040 /* page is not yet initialized */ +#define PG_RDONLY 0x00000080 /* page must be mapped read-only */ +#define PG_ZERO 0x00000100 /* page is pre-zero'd */ +#define PG_TABLED 0x00000200 /* page is tabled in object */ +#define PG_AOBJ 0x00000400 /* page is part of an anonymous uvm_object */ -#define PG_ANON 0x0200 /* page is part of an anon, rather +#define PG_ANON 0x00000800 /* page is part of an anon, rather than an uvm_object */ -#define PG_SWAPBACKED (PG_ANON|PG_AOBJ) -#define PG_READAHEAD 0x0400 /* read-ahead but not "hit" yet */ -#define PG_FREE 0x0800 /* page is on free list */ -#define PG_MARKER 0x1000 -#define PG_PAGER1 0x2000 /* pager-specific flag */ -#define PG_ZERO 0x4000 +#define PG_FILE 0x00001000 /* file backed (non-anonymous) */ +#define PG_READAHEAD 0x00002000 /* read-ahead but not "hit" yet */ +#define PG_FREE 0x00004000 /* page is on free list */ +#define PG_MARKER 0x00008000 /* dummy marker page */ +#define PG_PAGER1 0x00010000 /* pager-specific flag */ + +#define PG_STAT (PG_ANON|PG_AOBJ|PG_FILE) +#define PG_SWAPBACKED (PG_ANON|PG_AOBJ) #define UVM_PGFLAGBITS \ - "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5PAGEOUT\6RELEASED\7FAKE\10RDONLY" \ - "\11AOBJ\12AOBJ\13READAHEAD\14FREE\15MARKER\16PAGER1\17ZERO" + "\20\1CLEAN\2DIRTY\3BUSY\4WANTED" \ + "\5PAGEOUT\6RELEASED\7FAKE\10RDONLY" \ + "\11ZERO\12TABLED\13AOBJ\14ANON" \ + "\15FILE\16READAHEAD\17FREE\20MARKER" \ + "\21PAGER1" /* * uvmpdpol state flags. @@ -343,6 +352,11 @@ bool uvm_pageismanaged(paddr_t); bool uvm_page_owner_locked_p(struct vm_page *); void uvm_pgfl_lock(void); void uvm_pgfl_unlock(void); +unsigned int uvm_pagegetdirty(struct vm_page *); +void uvm_pagemarkdirty(struct vm_page *, unsigned int); +bool uvm_pagecheckdirty(struct vm_page *, bool); +bool uvm_pagereadonly_p(struct vm_page *); +bool uvm_page_locked_p(struct vm_page *); int uvm_page_lookup_freelist(struct vm_page *); @@ -356,6 +370,23 @@ int uvm_direct_process(struct vm_page ** #endif /* + * page dirtiness status for uvm_pagegetdirty and uvm_pagemarkdirty + * + * UNKNOWN means that we need to consult pmap to know if the page is + * dirty or not. + * basically, UVM_PAGE_STATUS_CLEAN implies that the page has no writable + * mapping. + * + * if you want to renumber these, check __CTASSERTs in + * uvm_page_status.c first. + */ + +#define UVM_PAGE_STATUS_UNKNOWN 0 +#define UVM_PAGE_STATUS_CLEAN 1 +#define UVM_PAGE_STATUS_DIRTY 2 +#define UVM_PAGE_NUM_STATUS 3 + +/* * macros */ Index: src/sys/uvm/uvm_page_array.c diff -u src/sys/uvm/uvm_page_array.c:1.2 src/sys/uvm/uvm_page_array.c:1.3 --- src/sys/uvm/uvm_page_array.c:1.2 Sun Dec 15 21:11:35 2019 +++ src/sys/uvm/uvm_page_array.c Wed Jan 15 17:55:45 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_page_array.c,v 1.2 2019/12/15 21:11:35 ad Exp $ */ +/* $NetBSD: uvm_page_array.c,v 1.3 2020/01/15 17:55:45 ad Exp $ */ /*- * Copyright (c)2011 YAMAMOTO Takashi, @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uvm_page_array.c,v 1.2 2019/12/15 21:11:35 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_page_array.c,v 1.3 2020/01/15 17:55:45 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -142,7 +142,6 @@ uvm_page_array_fill(struct uvm_page_arra KASSERT(mutex_owned(uobj->vmobjlock)); #endif KASSERT(uvm_page_array_peek(ar) == NULL); -#if 0 /* not merged from yamt-pagecache yet */ if ((flags & UVM_PAGE_ARRAY_FILL_DIRTY) != 0) { unsigned int tagmask = UVM_PAGE_DIRTY_TAG; @@ -154,9 +153,7 @@ uvm_page_array_fill(struct uvm_page_arra radix_tree_gang_lookup_tagged_node)( &uobj->uo_pages, off >> PAGE_SHIFT, (void **)ar->ar_pages, maxpages, dense, tagmask); - } else -#endif - { + } else { npages = (backward ? radix_tree_gang_lookup_node_reverse : radix_tree_gang_lookup_node)( Index: src/sys/uvm/uvm_pager.c diff -u src/sys/uvm/uvm_pager.c:1.119 src/sys/uvm/uvm_pager.c:1.120 --- src/sys/uvm/uvm_pager.c:1.119 Tue Dec 31 22:42:51 2019 +++ src/sys/uvm/uvm_pager.c Wed Jan 15 17:55:45 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_pager.c,v 1.119 2019/12/31 22:42:51 ad Exp $ */ +/* $NetBSD: uvm_pager.c,v 1.120 2020/01/15 17:55:45 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -32,7 +32,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uvm_pager.c,v 1.119 2019/12/31 22:42:51 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_pager.c,v 1.120 2020/01/15 17:55:45 ad Exp $"); #include "opt_uvmhist.h" #include "opt_readahead.h" @@ -367,6 +367,13 @@ uvm_aio_aiodone_pages(struct vm_page **p } #endif /* defined(VMSWAP) */ + if (write && uobj != NULL) { + KASSERT(radix_tree_get_tag(&uobj->uo_pages, + pg->offset >> PAGE_SHIFT, UVM_PAGE_WRITEBACK_TAG)); + radix_tree_clear_tag(&uobj->uo_pages, + pg->offset >> PAGE_SHIFT, UVM_PAGE_WRITEBACK_TAG); + } + /* * process errors. for reads, just mark the page to be freed. * for writes, if the error was ENOMEM, we assume this was @@ -386,7 +393,7 @@ uvm_aio_aiodone_pages(struct vm_page **p pg->flags &= ~PG_PAGEOUT; pageout_done++; } - pg->flags &= ~PG_CLEAN; + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); uvm_pagelock(pg); uvm_pageactivate(pg); uvm_pageunlock(pg); @@ -413,8 +420,6 @@ uvm_aio_aiodone_pages(struct vm_page **p /* * if the page is PG_FAKE, this must have been a read to * initialize the page. clear PG_FAKE and activate the page. - * we must also clear the pmap "modified" flag since it may - * still be set from the page's previous identity. */ if (pg->flags & PG_FAKE) { @@ -424,11 +429,10 @@ uvm_aio_aiodone_pages(struct vm_page **p pg->flags |= PG_READAHEAD; uvm_ra_total.ev_count++; #endif /* defined(READAHEAD_STATS) */ - KASSERT((pg->flags & PG_CLEAN) != 0); + KASSERT(uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN); uvm_pagelock(pg); uvm_pageenqueue(pg); uvm_pageunlock(pg); - pmap_clear_modify(pg); } /* Index: src/sys/uvm/uvm_pdaemon.c diff -u src/sys/uvm/uvm_pdaemon.c:1.122 src/sys/uvm/uvm_pdaemon.c:1.123 --- src/sys/uvm/uvm_pdaemon.c:1.122 Tue Dec 31 22:42:51 2019 +++ src/sys/uvm/uvm_pdaemon.c Wed Jan 15 17:55:45 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_pdaemon.c,v 1.122 2019/12/31 22:42:51 ad Exp $ */ +/* $NetBSD: uvm_pdaemon.c,v 1.123 2020/01/15 17:55:45 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -66,7 +66,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.122 2019/12/31 22:42:51 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.123 2020/01/15 17:55:45 ad Exp $"); #include "opt_uvmhist.h" #include "opt_readahead.h" @@ -609,14 +609,14 @@ uvmpd_dropswap(struct vm_page *pg) if ((pg->flags & PG_ANON) && anon->an_swslot) { uvm_swap_free(anon->an_swslot, 1); anon->an_swslot = 0; - pg->flags &= ~PG_CLEAN; + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); result = true; } else if (pg->flags & PG_AOBJ) { int slot = uao_set_swslot(pg->uobject, pg->offset >> PAGE_SHIFT, 0); if (slot) { uvm_swap_free(slot, 1); - pg->flags &= ~PG_CLEAN; + uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); result = true; } } @@ -757,10 +757,14 @@ uvmpd_scan_queue(void) */ pmap_page_protect(p, VM_PROT_NONE); - if ((p->flags & PG_CLEAN) && pmap_clear_modify(p)) { - p->flags &= ~(PG_CLEAN); + if (uvm_pagegetdirty(p) == UVM_PAGE_STATUS_UNKNOWN) { + if (pmap_clear_modify(p)) { + uvm_pagemarkdirty(p, UVM_PAGE_STATUS_DIRTY); + } else { + uvm_pagemarkdirty(p, UVM_PAGE_STATUS_CLEAN); + } } - if (p->flags & PG_CLEAN) { + if (uvm_pagegetdirty(p) != UVM_PAGE_STATUS_DIRTY) { int slot; int pageidx; Index: src/sys/uvm/uvm_vnode.c diff -u src/sys/uvm/uvm_vnode.c:1.104 src/sys/uvm/uvm_vnode.c:1.105 --- src/sys/uvm/uvm_vnode.c:1.104 Sat Dec 21 14:41:44 2019 +++ src/sys/uvm/uvm_vnode.c Wed Jan 15 17:55:45 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_vnode.c,v 1.104 2019/12/21 14:41:44 ad Exp $ */ +/* $NetBSD: uvm_vnode.c,v 1.105 2020/01/15 17:55:45 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -45,7 +45,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uvm_vnode.c,v 1.104 2019/12/21 14:41:44 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_vnode.c,v 1.105 2020/01/15 17:55:45 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_uvmhist.h" @@ -66,6 +66,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_vnode.c, #include <uvm/uvm.h> #include <uvm/uvm_readahead.h> +#include <uvm/uvm_page_array.h> #ifdef UVMHIST UVMHIST_DEFINE(ubchist); @@ -82,7 +83,8 @@ static int uvn_put(struct uvm_object *, static void uvn_reference(struct uvm_object *); static int uvn_findpage(struct uvm_object *, voff_t, struct vm_page **, - int); + unsigned int, struct uvm_page_array *a, + unsigned int); /* * master pager structure @@ -136,7 +138,6 @@ uvn_detach(struct uvm_object *uobj) * * => object must be locked on entry! VOP_PUTPAGES must unlock it. * => flags: PGO_SYNCIO -- use sync. I/O - * => note: caller must set PG_CLEAN and pmap_clear_modify (if needed) */ static int @@ -201,16 +202,23 @@ uvn_get(struct uvm_object *uobj, voff_t */ int -uvn_findpages(struct uvm_object *uobj, voff_t offset, int *npagesp, - struct vm_page **pgs, int flags) +uvn_findpages(struct uvm_object *uobj, voff_t offset, unsigned int *npagesp, + struct vm_page **pgs, struct uvm_page_array *a, unsigned int flags) { - int i, count, found, npages, rv; - + unsigned int count, found, npages; + int i, rv; + struct uvm_page_array a_store; + + if (a == NULL) { + a = &a_store; + uvm_page_array_init(a); + } count = found = 0; npages = *npagesp; if (flags & UFP_BACKWARD) { for (i = npages - 1; i >= 0; i--, offset -= PAGE_SIZE) { - rv = uvn_findpage(uobj, offset, &pgs[i], flags); + rv = uvn_findpage(uobj, offset, &pgs[i], flags, a, + i + 1); if (rv == 0) { if (flags & UFP_DIRTYONLY) break; @@ -220,7 +228,8 @@ uvn_findpages(struct uvm_object *uobj, v } } else { for (i = 0; i < npages; i++, offset += PAGE_SIZE) { - rv = uvn_findpage(uobj, offset, &pgs[i], flags); + rv = uvn_findpage(uobj, offset, &pgs[i], flags, a, + npages - i); if (rv == 0) { if (flags & UFP_DIRTYONLY) break; @@ -229,16 +238,29 @@ uvn_findpages(struct uvm_object *uobj, v count++; } } + if (a == &a_store) { + uvm_page_array_fini(a); + } *npagesp = count; return (found); } +/* + * uvn_findpage: find a single page + * + * if a suitable page was found, put it in *pgp and return 1. + * otherwise return 0. + */ + static int uvn_findpage(struct uvm_object *uobj, voff_t offset, struct vm_page **pgp, - int flags) + unsigned int flags, struct uvm_page_array *a, unsigned int nleft) { struct vm_page *pg; - bool dirty; + const unsigned int fillflags = + ((flags & UFP_BACKWARD) ? UVM_PAGE_ARRAY_FILL_BACKWARD : 0) | + ((flags & UFP_DIRTYONLY) ? + (UVM_PAGE_ARRAY_FILL_DIRTY|UVM_PAGE_ARRAY_FILL_DENSE) : 0); UVMHIST_FUNC("uvn_findpage"); UVMHIST_CALLED(ubchist); UVMHIST_LOG(ubchist, "vp %#jx off 0x%jx", (uintptr_t)uobj, offset, 0, 0); @@ -247,11 +269,35 @@ uvn_findpage(struct uvm_object *uobj, vo if (*pgp != NULL) { UVMHIST_LOG(ubchist, "dontcare", 0,0,0,0); - return 0; + goto skip_offset; } for (;;) { - /* look for an existing page */ - pg = uvm_pagelookup(uobj, offset); + /* + * look for an existing page. + * + * XXX fragile API + * note that the array can be the one supplied by the caller of + * uvn_findpages. in that case, fillflags used by the caller + * might not match strictly with ours. + * in particular, the caller might have filled the array + * without DENSE but passed us UFP_DIRTYONLY (thus DENSE). + */ + pg = uvm_page_array_fill_and_peek(a, uobj, offset, nleft, + fillflags); + if (pg != NULL && pg->offset != offset) { + KASSERT( + ((fillflags & UVM_PAGE_ARRAY_FILL_BACKWARD) != 0) + == (pg->offset < offset)); + KASSERT(uvm_pagelookup(uobj, offset) == NULL + || ((fillflags & UVM_PAGE_ARRAY_FILL_DIRTY) != 0 && + radix_tree_get_tag(&uobj->uo_pages, + offset >> PAGE_SHIFT, UVM_PAGE_DIRTY_TAG) == 0)); + pg = NULL; + if ((fillflags & UVM_PAGE_ARRAY_FILL_DENSE) != 0) { + UVMHIST_LOG(ubchist, "dense", 0,0,0,0); + return 0; + } + } /* nope? allocate one now */ if (pg == NULL) { @@ -268,28 +314,32 @@ uvn_findpage(struct uvm_object *uobj, vo } mutex_exit(uobj->vmobjlock); uvm_wait("uvn_fp1"); + uvm_page_array_clear(a); mutex_enter(uobj->vmobjlock); continue; } UVMHIST_LOG(ubchist, "alloced %#jx (color %ju)", (uintptr_t)pg, VM_PGCOLOR(pg), 0, 0); + KASSERTMSG(uvm_pagegetdirty(pg) == + UVM_PAGE_STATUS_CLEAN, "page %p not clean", pg); break; } else if (flags & UFP_NOCACHE) { UVMHIST_LOG(ubchist, "nocache",0,0,0,0); - return 0; + goto skip; } /* page is there, see if we need to wait on it */ if ((pg->flags & PG_BUSY) != 0) { if (flags & UFP_NOWAIT) { UVMHIST_LOG(ubchist, "nowait",0,0,0,0); - return 0; + goto skip; } pg->flags |= PG_WANTED; UVMHIST_LOG(ubchist, "wait %#jx (color %ju)", (uintptr_t)pg, VM_PGCOLOR(pg), 0, 0); UVM_UNLOCK_AND_WAIT(pg, uobj->vmobjlock, 0, "uvn_fp2", 0); + uvm_page_array_clear(a); mutex_enter(uobj->vmobjlock); continue; } @@ -297,14 +347,12 @@ uvn_findpage(struct uvm_object *uobj, vo /* skip PG_RDONLY pages if requested */ if ((flags & UFP_NORDONLY) && (pg->flags & PG_RDONLY)) { UVMHIST_LOG(ubchist, "nordonly",0,0,0,0); - return 0; + goto skip; } /* stop on clean pages if requested */ if (flags & UFP_DIRTYONLY) { - dirty = pmap_clear_modify(pg) || - (pg->flags & PG_CLEAN) == 0; - pg->flags |= PG_CLEAN; + const bool dirty = uvm_pagecheckdirty(pg, false); if (!dirty) { UVMHIST_LOG(ubchist, "dirtonly", 0,0,0,0); return 0; @@ -316,10 +364,33 @@ uvn_findpage(struct uvm_object *uobj, vo UVM_PAGE_OWN(pg, "uvn_findpage"); UVMHIST_LOG(ubchist, "found %#jx (color %ju)", (uintptr_t)pg, VM_PGCOLOR(pg), 0, 0); + uvm_page_array_advance(a); break; } *pgp = pg; return 1; + + skip_offset: + /* + * skip this offset + */ + pg = uvm_page_array_peek(a); + if (pg != NULL) { + if (pg->offset == offset) { + uvm_page_array_advance(a); + } else { + KASSERT((fillflags & UVM_PAGE_ARRAY_FILL_DENSE) == 0); + } + } + return 0; + + skip: + /* + * skip this page + */ + KASSERT(pg != NULL); + uvm_page_array_advance(a); + return 0; } /* Added files: Index: src/sys/uvm/uvm_page_status.c diff -u /dev/null src/sys/uvm/uvm_page_status.c:1.2 --- /dev/null Wed Jan 15 17:55:45 2020 +++ src/sys/uvm/uvm_page_status.c Wed Jan 15 17:55:45 2020 @@ -0,0 +1,194 @@ +/* $NetBSD: uvm_page_status.c,v 1.2 2020/01/15 17:55:45 ad Exp $ */ + +/*- + * Copyright (c)2011 YAMAMOTO Takashi, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__KERNEL_RCSID(0, "$NetBSD: uvm_page_status.c,v 1.2 2020/01/15 17:55:45 ad Exp $"); + +#include <sys/param.h> +#include <sys/systm.h> + +#include <uvm/uvm.h> + +/* + * page dirtiness status tracking + * + * separated from uvm_page.c mainly for rump + */ + +/* + * these constants are chosen to match so that we can convert between + * them quickly. + */ + +__CTASSERT(UVM_PAGE_STATUS_UNKNOWN == 0); +__CTASSERT(UVM_PAGE_STATUS_DIRTY == PG_DIRTY); +__CTASSERT(UVM_PAGE_STATUS_CLEAN == PG_CLEAN); + +/* + * uvm_pagegetdirty: return the dirtiness status (one of UVM_PAGE_STATUS_ + * values) of the page. + * + * called with the owner locked. + */ + +unsigned int +uvm_pagegetdirty(struct vm_page *pg) +{ + struct uvm_object * const uobj __diagused = pg->uobject; + const uint64_t idx __diagused = pg->offset >> PAGE_SHIFT; + + KASSERT((~pg->flags & (PG_CLEAN|PG_DIRTY)) != 0); + KASSERT(uvm_page_owner_locked_p(pg)); + KASSERT(uobj == NULL || ((pg->flags & PG_CLEAN) == 0) == + !!radix_tree_get_tag(&uobj->uo_pages, idx, UVM_PAGE_DIRTY_TAG)); + return pg->flags & (PG_CLEAN|PG_DIRTY); +} + +/* + * uvm_pagemarkdirty: set the dirtiness status (one of UVM_PAGE_STATUS_ values) + * of the page. + * + * called with the owner locked. + * + * update the radix tree tag for object-owned page. + * + * if new status is UVM_PAGE_STATUS_UNKNOWN, clear pmap-level dirty bit + * so that later uvm_pagecheckdirty() can notice modifications on the page. + */ + +void +uvm_pagemarkdirty(struct vm_page *pg, unsigned int newstatus) +{ + struct uvm_object * const uobj = pg->uobject; + const uint64_t idx = pg->offset >> PAGE_SHIFT; + const unsigned int oldstatus = uvm_pagegetdirty(pg); + enum cpu_count base; + + KASSERT((~newstatus & (PG_CLEAN|PG_DIRTY)) != 0); + KASSERT((newstatus & ~(PG_CLEAN|PG_DIRTY)) == 0); + KASSERT(uvm_page_owner_locked_p(pg)); + KASSERT(uobj == NULL || ((pg->flags & PG_CLEAN) == 0) == + !!radix_tree_get_tag(&uobj->uo_pages, idx, UVM_PAGE_DIRTY_TAG)); + + if (oldstatus == newstatus) { + return; + } + + /* + * set UVM_PAGE_DIRTY_TAG tag unless known CLEAN so that putpages can + * find possibly-dirty pages quickly. + */ + + if (uobj != NULL) { + if (newstatus == UVM_PAGE_STATUS_CLEAN) { + radix_tree_clear_tag(&uobj->uo_pages, idx, + UVM_PAGE_DIRTY_TAG); + } else { + radix_tree_set_tag(&uobj->uo_pages, idx, + UVM_PAGE_DIRTY_TAG); + } + } + if (newstatus == UVM_PAGE_STATUS_UNKNOWN) { + /* + * start relying on pmap-level dirtiness tracking. + */ + pmap_clear_modify(pg); + } + pg->flags &= ~(PG_CLEAN|PG_DIRTY); + pg->flags |= newstatus; + KASSERT(uobj == NULL || ((pg->flags & PG_CLEAN) == 0) == + !!radix_tree_get_tag(&uobj->uo_pages, idx, UVM_PAGE_DIRTY_TAG)); + if ((pg->flags & PG_STAT) != 0) { + if ((pg->flags & PG_SWAPBACKED) != 0) { + base = CPU_COUNT_ANONUNKNOWN; + } else { + base = CPU_COUNT_FILEUNKNOWN; + } + kpreempt_disable(); + CPU_COUNT(base + oldstatus, -1); + CPU_COUNT(base + newstatus, +1); + kpreempt_enable(); + } +} + +/* + * uvm_pagecheckdirty: check if page is dirty, and remove its dirty bit. + * + * called with the owner locked. + * + * returns if the page was dirty. + * + * if protected is true, mark the page CLEAN. otherwise, mark the page UNKNOWN. + * ("mark" in the sense of uvm_pagemarkdirty().) + */ + +bool +uvm_pagecheckdirty(struct vm_page *pg, bool pgprotected) +{ + const unsigned int oldstatus = uvm_pagegetdirty(pg); + bool modified; + + KASSERT(uvm_page_owner_locked_p(pg)); + + /* + * if pgprotected is true, mark the page CLEAN. + * otherwise mark the page UNKNOWN unless it's CLEAN. + * + * possible transitions: + * + * CLEAN -> CLEAN , modified = false + * UNKNOWN -> UNKNOWN, modified = true + * UNKNOWN -> UNKNOWN, modified = false + * UNKNOWN -> CLEAN , modified = true + * UNKNOWN -> CLEAN , modified = false + * DIRTY -> UNKNOWN, modified = true + * DIRTY -> CLEAN , modified = true + * + * pmap_clear_modify is necessary if either of + * oldstatus or newstatus is UVM_PAGE_STATUS_UNKNOWN. + */ + + if (oldstatus == UVM_PAGE_STATUS_CLEAN) { + modified = false; + } else { + const unsigned int newstatus = pgprotected ? + UVM_PAGE_STATUS_CLEAN : UVM_PAGE_STATUS_UNKNOWN; + + if (oldstatus == UVM_PAGE_STATUS_DIRTY) { + modified = true; + if (newstatus == UVM_PAGE_STATUS_UNKNOWN) { + pmap_clear_modify(pg); + } + } else { + KASSERT(oldstatus == UVM_PAGE_STATUS_UNKNOWN); + modified = pmap_clear_modify(pg); + } + uvm_pagemarkdirty(pg, newstatus); + } + return modified; +}