svn commit: r362459 - head/sys/kern

2020-06-20 Thread Jeff Roberson
Author: jeff
Date: Sun Jun 21 04:59:02 2020
New Revision: 362459
URL: https://svnweb.freebsd.org/changeset/base/362459

Log:
  Use zone nomenclature that is consistent with UMA.

Modified:
  head/sys/kern/kern_mbuf.c

Modified: head/sys/kern/kern_mbuf.c
==
--- head/sys/kern/kern_mbuf.c   Sun Jun 21 03:39:26 2020(r362458)
+++ head/sys/kern/kern_mbuf.c   Sun Jun 21 04:59:02 2020(r362459)
@@ -73,12 +73,12 @@ __FBSDID("$FreeBSD$");
  * Zone.  The Zone can be capped at kern.ipc.nmbclusters, if the
  * administrator so desires.
  *
- * Mbufs are allocated from a UMA Master Zone called the Mbuf
+ * Mbufs are allocated from a UMA Primary Zone called the Mbuf
  * Zone.
  *
  * Additionally, FreeBSD provides a Packet Zone, which it
- * configures as a Secondary Zone to the Mbuf Master Zone,
- * thus sharing backend Slab kegs with the Mbuf Master Zone.
+ * configures as a Secondary Zone to the Mbuf Primary Zone,
+ * thus sharing backend Slab kegs with the Mbuf Primary Zone.
  *
  * Thus common-case allocations and locking are simplified:
  *
@@ -87,7 +87,7 @@ __FBSDID("$FreeBSD$");
  *|   .>[(Packet Cache)]m_get(), m_gethdr()
  *|   | [ Packet   ]|
  *  [(Cluster Cache)]   [Secondary ]   [ (Mbuf Cache) ]
- *  [ Cluster Zone  ]   [ Zone ]   [ Mbuf Master Zone ]
+ *  [ Cluster Zone  ]   [ Zone ]   [ Mbuf Primary Zone ]
  *|   \ |
  *  [ Cluster Keg   ]  \   /
  *| [ Mbuf Keg   ]
@@ -101,7 +101,7 @@ __FBSDID("$FreeBSD$");
  * for any deallocation through uma_zfree() the _dtor_ function
  * is executed.
  *
- * Caches are per-CPU and are filled from the Master Zone.
+ * Caches are per-CPU and are filled from the Primary Zone.
  *
  * Whenever an object is allocated from the underlying global
  * memory pool it gets pre-initialized with the _zinit_ functions.
@@ -611,7 +611,7 @@ debugnet_mbuf_reinit(int nmbuf, int nclust, int clsize
 #endif /* DEBUGNET */
 
 /*
- * Constructor for Mbuf master zone.
+ * Constructor for Mbuf primary zone.
  *
  * The 'arg' pointer points to a mb_args structure which
  * contains call-specific information required to support the
@@ -646,7 +646,7 @@ mb_ctor_mbuf(void *mem, int size, void *arg, int how)
 }
 
 /*
- * The Mbuf master zone destructor.
+ * The Mbuf primary zone destructor.
  */
 static void
 mb_dtor_mbuf(void *mem, int size, void *arg)
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r362449 - head/sys/vm

2020-06-20 Thread Jeff Roberson
Author: jeff
Date: Sat Jun 20 20:21:04 2020
New Revision: 362449
URL: https://svnweb.freebsd.org/changeset/base/362449

Log:
  Clarify some language.  Favor primary where both master and primary were
  used in conjunction with secondary.

Modified:
  head/sys/vm/uma.h
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/sys/vm/uma.h
==
--- head/sys/vm/uma.h   Sat Jun 20 20:20:16 2020(r362448)
+++ head/sys/vm/uma.h   Sat Jun 20 20:21:04 2020(r362449)
@@ -197,24 +197,23 @@ uma_zone_t uma_zcreate(const char *name, size_t size, 
  * ctor/dtor/zinit/zfini may all be null, see notes above.
  * Note that the zinit and zfini specified here are NOT
  * exactly the same as the init/fini specified to uma_zcreate()
- * when creating a master zone.  These zinit/zfini are called
+ * when creating a primary zone.  These zinit/zfini are called
  * on the TRANSITION from keg to zone (and vice-versa). Once
  * these are set, the primary zone may alter its init/fini
  * (which are called when the object passes from VM to keg)
  * using uma_zone_set_init/fini()) as well as its own
- * zinit/zfini (unset by default for master zone) with
+ * zinit/zfini (unset by default for primary zone) with
  * uma_zone_set_zinit/zfini() (note subtle 'z' prefix).
  *
- * master  A reference to this zone's Master Zone (Primary Zone),
- * which contains the backing Keg for the Secondary Zone
- * being added.
+ * primary A reference to this zone's Primary Zone which contains the
+ * backing Keg for the Secondary Zone being added.
  *
  * Returns:
  * A pointer to a structure which is intended to be opaque to users of
  * the interface.  The value may be null if the wait flag is not set.
  */
 uma_zone_t uma_zsecond_create(const char *name, uma_ctor ctor, uma_dtor dtor,
-uma_init zinit, uma_fini zfini, uma_zone_t master);
+uma_init zinit, uma_fini zfini, uma_zone_t primary);
 
 /*
  * Create cache-only zones.

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Sat Jun 20 20:20:16 2020(r362448)
+++ head/sys/vm/uma_core.c  Sat Jun 20 20:21:04 2020(r362449)
@@ -2197,7 +2197,7 @@ keg_ctor(void *mem, int size, void *udata, int flags)
keg->uk_dr.dr_iter = 0;
 
/*
-* The master zone is passed to us at keg-creation time.
+* The primary zone is passed to us at keg-creation time.
 */
zone = arg->zone;
keg->uk_name = zone->uz_name;
@@ -2808,7 +2808,7 @@ uma_startup1(vm_offset_t virtual_avail)
 {
struct uma_zctor_args args;
size_t ksize, zsize, size;
-   uma_keg_t masterkeg;
+   uma_keg_t primarykeg;
uintptr_t m;
int domain;
uint8_t pflag;
@@ -2838,7 +2838,7 @@ uma_startup1(vm_offset_t virtual_avail)
m += zsize;
kegs = (uma_zone_t)m;
m += zsize;
-   masterkeg = (uma_keg_t)m;
+   primarykeg = (uma_keg_t)m;
 
/* "manually" create the initial zone */
memset(, 0, sizeof(args));
@@ -2848,7 +2848,7 @@ uma_startup1(vm_offset_t virtual_avail)
args.dtor = keg_dtor;
args.uminit = zero_init;
args.fini = NULL;
-   args.keg = masterkeg;
+   args.keg = primarykeg;
args.align = UMA_SUPER_ALIGN - 1;
args.flags = UMA_ZFLAG_INTERNAL;
zone_ctor(kegs, zsize, , M_WAITOK);
@@ -3024,13 +3024,13 @@ uma_zcreate(const char *name, size_t size, uma_ctor ct
 /* See uma.h */
 uma_zone_t
 uma_zsecond_create(const char *name, uma_ctor ctor, uma_dtor dtor,
-uma_init zinit, uma_fini zfini, uma_zone_t master)
+uma_init zinit, uma_fini zfini, uma_zone_t primary)
 {
struct uma_zctor_args args;
uma_keg_t keg;
uma_zone_t res;
 
-   keg = master->uz_keg;
+   keg = primary->uz_keg;
memset(, 0, sizeof(args));
args.name = name;
args.size = keg->uk_size;

Modified: head/sys/vm/uma_int.h
==
--- head/sys/vm/uma_int.h   Sat Jun 20 20:20:16 2020(r362448)
+++ head/sys/vm/uma_int.h   Sat Jun 20 20:21:04 2020(r362449)
@@ -97,8 +97,8 @@
  * safely only from their associated CPU, while the Zones backed by the same
  * Keg all share a common Keg lock (to coalesce contention on the backing
  * slabs).  The backing Keg typically only serves one Zone but in the case of
- * multiple Zones, one of the Zones is considered the Master Zone and all
- * Zone-related stats from the Keg are done in the Master Zone.  For an
+ * multiple Zones, one of the Zones is considered the Primary Zone and all
+ * Zone-related stats from the Keg are done 

svn commit: r358901 - head/sys/vm

2020-03-11 Thread Jeff Roberson
Author: jeff
Date: Wed Mar 11 22:25:45 2020
New Revision: 358901
URL: https://svnweb.freebsd.org/changeset/base/358901

Log:
  Check for busy or wired in vm_page_relookup().  Some callers will only keep
  a page wired and expect it to still be present.
  
  Reported by:  delp...@freebsd.org
  Reviewed by:  kib

Modified:
  head/sys/vm/vm_page.c

Modified: head/sys/vm/vm_page.c
==
--- head/sys/vm/vm_page.c   Wed Mar 11 22:05:41 2020(r358900)
+++ head/sys/vm/vm_page.c   Wed Mar 11 22:25:45 2020(r358901)
@@ -1672,7 +1672,7 @@ vm_page_relookup(vm_object_t object, vm_pindex_t pinde
vm_page_t m;
 
m = vm_radix_lookup_unlocked(>rtree, pindex);
-   KASSERT(m != NULL && vm_page_busied(m) &&
+   KASSERT(m != NULL && (vm_page_busied(m) || vm_page_wired(m)) &&
m->object == object && m->pindex == pindex,
("vm_page_relookup: Invalid page %p", m));
return (m);
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r358451 - in head/sys: kern vm

2020-02-28 Thread Jeff Roberson
Author: jeff
Date: Fri Feb 28 21:42:48 2020
New Revision: 358451
URL: https://svnweb.freebsd.org/changeset/base/358451

Log:
  Provide a lock free alternative to resolve bogus pages.  This is not likely
  to be much of a perf win, just a nice code simplification.
  
  Reviewed by:  markj, kib
  Differential Revision:https://reviews.freebsd.org/D23866

Modified:
  head/sys/kern/kern_sendfile.c
  head/sys/kern/vfs_bio.c
  head/sys/vm/vm_page.c
  head/sys/vm/vm_page.h

Modified: head/sys/kern/kern_sendfile.c
==
--- head/sys/kern/kern_sendfile.c   Fri Feb 28 21:31:40 2020
(r358450)
+++ head/sys/kern/kern_sendfile.c   Fri Feb 28 21:42:48 2020
(r358451)
@@ -350,7 +350,6 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
 {
vm_page_t *pa = sfio->pa;
int grabbed;
-   bool locked;
 
*nios = 0;
flags = (flags & SF_NODISKIO) ? VM_ALLOC_NOWAIT : 0;
@@ -359,8 +358,6 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
 * First grab all the pages and wire them.  Note that we grab
 * only required pages.  Readahead pages are dealt with later.
 */
-   locked = false;
-
grabbed = vm_page_grab_pages_unlocked(obj, OFF_TO_IDX(off),
VM_ALLOC_NORMAL | VM_ALLOC_WIRED | flags, pa, npages);
if (grabbed < npages) {
@@ -381,10 +378,6 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
i++;
continue;
}
-   if (!locked) {
-   VM_OBJECT_WLOCK(obj);
-   locked = true;
-   }
 
/*
 * Next page is invalid.  Check if it belongs to pager.  It
@@ -396,8 +389,10 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
 * stored in 'a', about how many pages we can pagein after
 * this page in a single I/O.
 */
+   VM_OBJECT_RLOCK(obj);
if (!vm_pager_has_page(obj, OFF_TO_IDX(vmoff(i, off)), NULL,
)) {
+   VM_OBJECT_RUNLOCK(obj);
pmap_zero_page(pa[i]);
vm_page_valid(pa[i]);
MPASS(pa[i]->dirty == 0);
@@ -405,6 +400,7 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
i++;
continue;
}
+   VM_OBJECT_RUNLOCK(obj);
 
/*
 * We want to pagein as many pages as possible, limited only
@@ -435,11 +431,9 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
}
 
refcount_acquire(>nios);
-   VM_OBJECT_WUNLOCK(obj);
rv = vm_pager_get_pages_async(obj, pa + i, count, NULL,
i + count == npages ?  : NULL,
_iodone, sfio);
-   VM_OBJECT_WLOCK(obj);
if (__predict_false(rv != VM_PAGER_OK)) {
/*
 * Perform full pages recovery before returning EIO.
@@ -451,7 +445,7 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
for (j = 0; j < npages; j++) {
if (j > i && j < i + count - 1 &&
pa[j] == bogus_page)
-   pa[j] = vm_page_lookup(obj,
+   pa[j] = vm_page_relookup(obj,
OFF_TO_IDX(vmoff(j, off)));
else if (j >= i)
vm_page_xunbusy(pa[j]);
@@ -460,7 +454,6 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
__func__, pa, j));
vm_page_unwire(pa[j], PQ_INACTIVE);
}
-   VM_OBJECT_WUNLOCK(obj);
return (EIO);
}
 
@@ -475,7 +468,7 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
 */
for (j = i + 1; j < i + count - 1; j++)
if (pa[j] == bogus_page) {
-   pa[j] = vm_page_lookup(obj,
+   pa[j] = vm_page_relookup(obj,
OFF_TO_IDX(vmoff(j, off)));
KASSERT(pa[j], ("%s: page %p[%d] disappeared",
__func__, pa, j));
@@ -484,9 +477,6 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
i += count;
(*nios)++;
}
-
-   if (locked)
-   VM_OBJECT_WUNLOCK(obj);
 
if (*nios == 0 && npages != 0)
SFSTAT_INC(sf_noiocnt);

Modified: head/sys/kern/vfs_bio.c

svn commit: r358447 - in head/sys: dev/drm2/ttm dev/md kern vm

2020-02-28 Thread Jeff Roberson
Author: jeff
Date: Fri Feb 28 20:34:30 2020
New Revision: 358447
URL: https://svnweb.freebsd.org/changeset/base/358447

Log:
  Convert a few triviail consumers to the new unlocked grab API.
  
  Reviewed by:  kib, markj
  Differential Revision:https://reviews.freebsd.org/D23847

Modified:
  head/sys/dev/drm2/ttm/ttm_tt.c
  head/sys/dev/md/md.c
  head/sys/kern/kern_exec.c
  head/sys/kern/kern_sendfile.c
  head/sys/kern/vfs_bio.c
  head/sys/vm/vm_glue.c

Modified: head/sys/dev/drm2/ttm/ttm_tt.c
==
--- head/sys/dev/drm2/ttm/ttm_tt.c  Fri Feb 28 20:33:28 2020
(r358446)
+++ head/sys/dev/drm2/ttm/ttm_tt.c  Fri Feb 28 20:34:30 2020
(r358447)
@@ -285,24 +285,24 @@ int ttm_tt_swapin(struct ttm_tt *ttm)
 
obj = ttm->swap_storage;
 
-   VM_OBJECT_WLOCK(obj);
vm_object_pip_add(obj, 1);
for (i = 0; i < ttm->num_pages; ++i) {
-   rv = vm_page_grab_valid(_page, obj, i,
-   VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY);
+   rv = vm_page_grab_valid_unlocked(_page, obj, i,
+   VM_ALLOC_NORMAL | VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY);
if (rv != VM_PAGER_OK) {
ret = -EIO;
goto err_ret;
}
to_page = ttm->pages[i];
if (unlikely(to_page == NULL)) {
+   vm_page_sunbusy(from_page);
ret = -ENOMEM;
goto err_ret;
}
pmap_copy_page(from_page, to_page);
+   vm_page_sunbusy(from_page);
}
vm_object_pip_wakeup(obj);
-   VM_OBJECT_WUNLOCK(obj);
 
if (!(ttm->page_flags & TTM_PAGE_FLAG_PERSISTENT_SWAP))
vm_object_deallocate(obj);
@@ -312,7 +312,6 @@ int ttm_tt_swapin(struct ttm_tt *ttm)
 
 err_ret:
vm_object_pip_wakeup(obj);
-   VM_OBJECT_WUNLOCK(obj);
return (ret);
 }
 

Modified: head/sys/dev/md/md.c
==
--- head/sys/dev/md/md.cFri Feb 28 20:33:28 2020(r358446)
+++ head/sys/dev/md/md.cFri Feb 28 20:34:30 2020(r358447)
@@ -1060,9 +1060,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
vm_object_pip_add(sc->object, 1);
for (i = bp->bio_offset / PAGE_SIZE; i <= lastp; i++) {
len = ((i == lastp) ? lastend : PAGE_SIZE) - offs;
-   VM_OBJECT_WLOCK(sc->object);
-   m = vm_page_grab(sc->object, i, VM_ALLOC_SYSTEM);
-   VM_OBJECT_WUNLOCK(sc->object);
+   m = vm_page_grab_unlocked(sc->object, i, VM_ALLOC_SYSTEM);
if (bp->bio_cmd == BIO_READ) {
if (vm_page_all_valid(m))
rv = VM_PAGER_OK;

Modified: head/sys/kern/kern_exec.c
==
--- head/sys/kern/kern_exec.c   Fri Feb 28 20:33:28 2020(r358446)
+++ head/sys/kern/kern_exec.c   Fri Feb 28 20:34:30 2020(r358447)
@@ -984,14 +984,16 @@ exec_map_first_page(struct image_params *imgp)
object = imgp->vp->v_object;
if (object == NULL)
return (EACCES);
-   VM_OBJECT_WLOCK(object);
 #if VM_NRESERVLEVEL > 0
-   vm_object_color(object, 0);
+   if ((object->flags & OBJ_COLORED) == 0) {
+   VM_OBJECT_WLOCK(object);
+   vm_object_color(object, 0);
+   VM_OBJECT_WUNLOCK(object);
+   }
 #endif
-   error = vm_page_grab_valid(, object, 0,
+   error = vm_page_grab_valid_unlocked(, object, 0,
VM_ALLOC_COUNT(VM_INITIAL_PAGEIN) |
 VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED);
-   VM_OBJECT_WUNLOCK(object);
 
if (error != VM_PAGER_OK)
return (EIO);

Modified: head/sys/kern/kern_sendfile.c
==
--- head/sys/kern/kern_sendfile.c   Fri Feb 28 20:33:28 2020
(r358446)
+++ head/sys/kern/kern_sendfile.c   Fri Feb 28 20:34:30 2020
(r358447)
@@ -350,6 +350,7 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
 {
vm_page_t *pa = sfio->pa;
int grabbed;
+   bool locked;
 
*nios = 0;
flags = (flags & SF_NODISKIO) ? VM_ALLOC_NOWAIT : 0;
@@ -358,9 +359,9 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
 * First grab all the pages and wire them.  Note that we grab
 * only required pages.  Readahead pages are dealt with later.
 */
-   VM_OBJECT_WLOCK(obj);
+   locked = false;
 
-   grabbed = vm_page_grab_pages(obj, OFF_TO_IDX(off),
+   grabbed = vm_page_grab_pages_unlocked(obj, OFF_TO_IDX(off),
VM_ALLOC_NORMAL | VM_ALLOC_WIRED | flags, pa, npages);
if (grabbed < npages) {

svn commit: r358446 - head/sys/kern

2020-02-28 Thread Jeff Roberson
Author: jeff
Date: Fri Feb 28 20:33:28 2020
New Revision: 358446
URL: https://svnweb.freebsd.org/changeset/base/358446

Log:
  Use unlocked grab for uipc_shm/tmpfs.
  
  Reviewed by:  markj
  Differential Revision:https://reviews.freebsd.org/D23865

Modified:
  head/sys/kern/uipc_shm.c

Modified: head/sys/kern/uipc_shm.c
==
--- head/sys/kern/uipc_shm.cFri Feb 28 20:32:35 2020(r358445)
+++ head/sys/kern/uipc_shm.cFri Feb 28 20:33:28 2020(r358446)
@@ -176,23 +176,25 @@ uiomove_object_page(vm_object_t obj, size_t len, struc
offset = uio->uio_offset & PAGE_MASK;
tlen = MIN(PAGE_SIZE - offset, len);
 
-   VM_OBJECT_WLOCK(obj);
+   rv = vm_page_grab_valid_unlocked(, obj, idx,
+   VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY | VM_ALLOC_NOCREAT);
+   if (rv == VM_PAGER_OK)
+   goto found;
 
/*
 * Read I/O without either a corresponding resident page or swap
 * page: use zero_region.  This is intended to avoid instantiating
 * pages on read from a sparse region.
 */
-   if (uio->uio_rw == UIO_READ && vm_page_lookup(obj, idx) == NULL &&
+   VM_OBJECT_WLOCK(obj);
+   m = vm_page_lookup(obj, idx);
+   if (uio->uio_rw == UIO_READ && m == NULL &&
!vm_pager_has_page(obj, idx, NULL, NULL)) {
VM_OBJECT_WUNLOCK(obj);
return (uiomove(__DECONST(void *, zero_region), tlen, uio));
}
 
/*
-* Parallel reads of the page content from disk are prevented
-* by exclusive busy.
-*
 * Although the tmpfs vnode lock is held here, it is
 * nonetheless safe to sleep waiting for a free page.  The
 * pageout daemon does not need to acquire the tmpfs vnode
@@ -208,6 +210,8 @@ uiomove_object_page(vm_object_t obj, size_t len, struc
return (EIO);
}
VM_OBJECT_WUNLOCK(obj);
+
+found:
error = uiomove_fromphys(, offset, tlen, uio);
if (uio->uio_rw == UIO_WRITE && error == 0)
vm_page_set_dirty(m);
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r358445 - head/sys/vm

2020-02-28 Thread Jeff Roberson
Author: jeff
Date: Fri Feb 28 20:32:35 2020
New Revision: 358445
URL: https://svnweb.freebsd.org/changeset/base/358445

Log:
  Support the NOCREAT flag for grab_valid_unlocked.
  
  Reviewed by:  markj
  Differential Revision:https://reviews.freebsd.org/D23865

Modified:
  head/sys/vm/vm_page.c

Modified: head/sys/vm/vm_page.c
==
--- head/sys/vm/vm_page.c   Fri Feb 28 20:30:53 2020(r358444)
+++ head/sys/vm/vm_page.c   Fri Feb 28 20:32:35 2020(r358445)
@@ -4639,6 +4639,10 @@ vm_page_grab_valid_unlocked(vm_page_t *mp, vm_object_t
}
vm_page_busy_release(m);
}
+   if ((allocflags & VM_ALLOC_NOCREAT) != 0) {
+   *mp = NULL;
+   return (VM_PAGER_FAIL);
+   }
VM_OBJECT_WLOCK(object);
error = vm_page_grab_valid(mp, object, pindex, allocflags);
VM_OBJECT_WUNLOCK(object);
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r358444 - head/sys/vm

2020-02-28 Thread Jeff Roberson
Author: jeff
Date: Fri Feb 28 20:30:53 2020
New Revision: 358444
URL: https://svnweb.freebsd.org/changeset/base/358444

Log:
  Simplify vref() code in object_reference.  The local temporary is no longer
  necessary.  Fix formatting errors.
  
  Reported by:  mjg
  Discussed with:   kib

Modified:
  head/sys/vm/vm_object.c

Modified: head/sys/vm/vm_object.c
==
--- head/sys/vm/vm_object.c Fri Feb 28 20:29:53 2020(r358443)
+++ head/sys/vm/vm_object.c Fri Feb 28 20:30:53 2020(r358444)
@@ -484,7 +484,6 @@ vm_object_allocate_anon(vm_pindex_t size, vm_object_t 
 static void
 vm_object_reference_vnode(vm_object_t object)
 {
-   struct vnode *vp;
u_int old;
 
/*
@@ -494,10 +493,8 @@ vm_object_reference_vnode(vm_object_t object)
if (!refcount_acquire_if_gt(>ref_count, 0)) {
VM_OBJECT_RLOCK(object);
old = refcount_acquire(>ref_count);
-   if (object->type == OBJT_VNODE && old == 0) {
-   vp = object->handle;
-   vref(vp);
-   }
+   if (object->type == OBJT_VNODE && old == 0)
+   vref(object->handle);
VM_OBJECT_RUNLOCK(object);
}
 }
@@ -532,13 +529,12 @@ vm_object_reference(vm_object_t object)
 void
 vm_object_reference_locked(vm_object_t object)
 {
-   struct vnode *vp;
u_int old;
 
VM_OBJECT_ASSERT_LOCKED(object);
old = refcount_acquire(>ref_count);
-   if (object->type == OBJT_VNODE && old == 0) {
-   vp = object->handle; vref(vp); }
+   if (object->type == OBJT_VNODE && old == 0)
+   vref(object->handle);
KASSERT((object->flags & OBJ_DEAD) == 0,
("vm_object_reference: Referenced dead object."));
 }
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r358443 - head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs

2020-02-28 Thread Jeff Roberson
Author: jeff
Date: Fri Feb 28 20:29:53 2020
New Revision: 358443
URL: https://svnweb.freebsd.org/changeset/base/358443

Log:
  Eliminate object locking in zfs where possible with the new lockless grab
  APIs.
  
  Reviewed by:  kib, markj, mmacy
  Differential Revision:https://reviews.freebsd.org/D23848

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
==
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c   Fri Feb 28 
18:35:36 2020(r358442)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c   Fri Feb 28 
20:29:53 2020(r358443)
@@ -1738,11 +1738,10 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_
 #endif
 
vmobj = ma[0]->object;
-   zfs_vmobject_wlock(vmobj);
 
db = dbp[0];
for (i = 0; i < *rbehind; i++) {
-   m = vm_page_grab(vmobj, ma[0]->pindex - 1 - i,
+   m = vm_page_grab_unlocked(vmobj, ma[0]->pindex - 1 - i,
VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT |
VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY);
if (m == NULL)
@@ -1857,7 +1856,7 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_
}
 
for (i = 0; i < *rahead; i++) {
-   m = vm_page_grab(vmobj, ma[count - 1]->pindex + 1 + i,
+   m = vm_page_grab_unlocked(vmobj, ma[count - 1]->pindex + 1 + i,
VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT |
VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY);
if (m == NULL)
@@ -1889,7 +1888,6 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_
vm_page_sunbusy(m);
}
*rahead = i;
-   zfs_vmobject_wunlock(vmobj);
 
dmu_buf_rele_array(dbp, numbufs, FTAG);
return (0);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
==
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c Fri Feb 
28 18:35:36 2020(r358442)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c Fri Feb 
28 20:29:53 2020(r358443)
@@ -410,10 +410,10 @@ page_busy(vnode_t *vp, int64_t start, int64_t off, int
nbytes = end - off;
 
obj = vp->v_object;
-   zfs_vmobject_assert_wlocked(obj);
 
-   vm_page_grab_valid(, obj, OFF_TO_IDX(start), VM_ALLOC_NOCREAT |
-   VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY);
+   vm_page_grab_valid_unlocked(, obj, OFF_TO_IDX(start),
+   VM_ALLOC_NOCREAT | VM_ALLOC_SBUSY | VM_ALLOC_NORMAL |
+   VM_ALLOC_IGN_SBUSY);
if (pp != NULL) {
ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
vm_object_pip_add(obj, 1);
@@ -439,10 +439,9 @@ page_wire(vnode_t *vp, int64_t start)
vm_page_t m;
 
obj = vp->v_object;
-   zfs_vmobject_assert_wlocked(obj);
-
-   vm_page_grab_valid(, obj, OFF_TO_IDX(start), VM_ALLOC_NOCREAT |
-   VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY | VM_ALLOC_NOBUSY);
+   vm_page_grab_valid_unlocked(, obj, OFF_TO_IDX(start),
+   VM_ALLOC_NOCREAT | VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY |
+   VM_ALLOC_NOBUSY);
return (m);
 }
 
@@ -475,28 +474,22 @@ update_pages(vnode_t *vp, int64_t start, int len, objs
ASSERT(obj != NULL);
 
off = start & PAGEOFFSET;
-   zfs_vmobject_wlock(obj);
vm_object_pip_add(obj, 1);
for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
vm_page_t pp;
int nbytes = imin(PAGESIZE - off, len);
 
if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
-   zfs_vmobject_wunlock(obj);
-
va = zfs_map_page(pp, );
(void) dmu_read(os, oid, start+off, nbytes,
va+off, DMU_READ_PREFETCH);;
zfs_unmap_page(sf);
-
-   zfs_vmobject_wlock(obj);
page_unbusy(pp);
}
len -= nbytes;
off = 0;
}
vm_object_pip_wakeup(obj);
-   zfs_vmobject_wunlock(obj);
 }
 
 /*
@@ -528,29 +521,31 @@ mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio)
ASSERT(obj != NULL);
ASSERT((uio->uio_loffset & PAGEOFFSET) == 0);
 
-   zfs_vmobject_wlock(obj);
for (start = uio->uio_loffset; len > 0; start += PAGESIZE) {
int bytes = MIN(PAGESIZE, len);
 
-   pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY |
-   VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY);
+   pp = vm_page_grab_unlocked(obj, OFF_TO_IDX(start),
+   VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | 

svn commit: r358400 - in head/sys: kern sys

2020-02-27 Thread Jeff Roberson
Author: jeff
Date: Thu Feb 27 19:05:26 2020
New Revision: 358400
URL: https://svnweb.freebsd.org/changeset/base/358400

Log:
  Simplify lazy advance with a 64bit atomic cmpset.
  
  This provides the potential to force a lazy (tick based) SMR to advance
  when there are blocking waiters by decoupling the wr_seq value from the
  ticks value.
  
  Add some missing compiler barriers.
  
  Reviewed by:  rlibby
  Differential Revision:https://reviews.freebsd.org/D23825

Modified:
  head/sys/kern/subr_smr.c
  head/sys/sys/smr.h

Modified: head/sys/kern/subr_smr.c
==
--- head/sys/kern/subr_smr.cThu Feb 27 19:04:39 2020(r358399)
+++ head/sys/kern/subr_smr.cThu Feb 27 19:05:26 2020(r358400)
@@ -184,12 +184,9 @@ static uma_zone_t smr_zone;
  * that will flush the store buffer or prevent access to the section protected
  * data.  For example, an idle processor, or an system management interrupt,
  * or a vm exit.
- *
- * We must wait one additional tick if we are around the wrap condition
- * because the write seq will move forward by two with one interrupt.
  */
 #defineSMR_LAZY_GRACE  2
-#defineSMR_LAZY_GRACE_MAX  (SMR_LAZY_GRACE + 1)
+#defineSMR_LAZY_INCR   (SMR_LAZY_GRACE * SMR_SEQ_INCR)
 
 /*
  * The maximum sequence number ahead of wr_seq that may still be valid.  The
@@ -197,7 +194,7 @@ static uma_zone_t smr_zone;
  * case poll needs to attempt to forward the sequence number if the goal is
  * within wr_seq + SMR_SEQ_ADVANCE.
  */
-#defineSMR_SEQ_ADVANCE MAX(SMR_SEQ_INCR, SMR_LAZY_GRACE_MAX)
+#defineSMR_SEQ_ADVANCE SMR_LAZY_INCR
 
 static SYSCTL_NODE(_debug, OID_AUTO, smr, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
 "SMR Stats");
@@ -214,66 +211,45 @@ SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, poll_fail, CT
 
 /*
  * Advance a lazy write sequence number.  These move forward at the rate of
- * ticks.  Grace is two ticks in the future.  lazy write sequence numbers can
- * be even but not SMR_SEQ_INVALID so we pause time for a tick when we wrap.
+ * ticks.  Grace is SMR_LAZY_INCR (2 ticks) in the future.
  *
- * This returns the _current_ write sequence number.  The lazy goal sequence
- * number is SMR_LAZY_GRACE ticks ahead.
+ * This returns the goal write sequence number.
  */
 static smr_seq_t
 smr_lazy_advance(smr_t smr, smr_shared_t s)
 {
-   smr_seq_t s_rd_seq, s_wr_seq, goal;
-   int t;
+   union s_wr s_wr, old;
+   int t, d;
 
CRITICAL_ASSERT(curthread);
 
/*
-* Load s_wr_seq prior to ticks to ensure that the thread that
-* observes the largest value wins.
+* Load the stored ticks value before the current one.  This way the
+* current value can only be the same or larger.
 */
-   s_wr_seq = atomic_load_acq_int(>s_wr_seq);
-
-   /*
-* We must not allow a zero tick value.  We go back in time one tick
-* and advance the grace period forward one tick around zero.
-*/
+   old._pair = s_wr._pair = atomic_load_acq_64(>s_wr._pair);
t = ticks;
-   if (t == SMR_SEQ_INVALID)
-   t--;
 
/*
 * The most probable condition that the update already took place.
 */
-   if (__predict_true(t == s_wr_seq))
+   d = t - s_wr.ticks;
+   if (__predict_true(d == 0))
goto out;
+   /* Cap the rate of advancement and handle long idle periods. */
+   if (d > SMR_LAZY_GRACE || d < 0)
+   d = SMR_LAZY_GRACE;
+   s_wr.ticks = t;
+   s_wr.seq += d * SMR_SEQ_INCR;
 
/*
-* After long idle periods the read sequence may fall too far
-* behind write.  Prevent poll from ever seeing this condition
-* by updating the stale rd_seq.  This assumes that there can
-* be no valid section 2bn ticks old.  The rd_seq update must
-* be visible before wr_seq to avoid races with other advance
-* callers.
+* This can only fail if another thread races to call advance().
+* Strong cmpset semantics mean we are guaranteed that the update
+* happened.
 */
-   s_rd_seq = atomic_load_int(>s_rd_seq);
-   if (SMR_SEQ_GT(s_rd_seq, t))
-   atomic_cmpset_rel_int(>s_rd_seq, s_rd_seq, t);
-
-   /*
-* Release to synchronize with the wr_seq load above.  Ignore
-* cmpset failures from simultaneous updates.
-*/
-   atomic_cmpset_rel_int(>s_wr_seq, s_wr_seq, t);
-   counter_u64_add(advance, 1);
-   /* If we lost either update race another thread did it. */
-   s_wr_seq = t;
+   atomic_cmpset_64(>s_wr._pair, old._pair, s_wr._pair);
 out:
-   goal = s_wr_seq + SMR_LAZY_GRACE;
-   /* Skip over the SMR_SEQ_INVALID tick. */
-   if (goal < SMR_LAZY_GRACE)
-   goal++;
-   return (goal);
+   return (s_wr.seq + 

svn commit: r358377 - head/sys/vm

2020-02-27 Thread Jeff Roberson
Author: jeff
Date: Thu Feb 27 08:23:10 2020
New Revision: 358377
URL: https://svnweb.freebsd.org/changeset/base/358377

Log:
  A pair of performance improvements.
  
  Swap buckets on free as well as alloc so that alloc is always the most
  cache-hot data.
  
  When selecting a zone domain for the round-robin bucket cache use the
  local domain unless there is a severe imbalance.  This does not affinitize
  memory, only locks and queues.
  
  Reviewed by:  markj, rlibby
  Differential Revision:https://reviews.freebsd.org/D23824

Modified:
  head/sys/vm/uma_core.c

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Thu Feb 27 07:02:33 2020(r358376)
+++ head/sys/vm/uma_core.c  Thu Feb 27 08:23:10 2020(r358377)
@@ -564,26 +564,29 @@ zone_domain_lock(uma_zone_t zone, int domain)
 }
 
 /*
- * Search for the domain with the least cached items and return it, breaking
- * ties with a preferred domain by returning it.
+ * Search for the domain with the least cached items and return it if it
+ * is out of balance with the preferred domain.
  */
 static __noinline int
 zone_domain_lowest(uma_zone_t zone, int pref)
 {
-   long least, nitems;
+   long least, nitems, prefitems;
int domain;
int i;
 
-   least = LONG_MAX;
+   prefitems = least = LONG_MAX;
domain = 0;
for (i = 0; i < vm_ndomains; i++) {
nitems = ZDOM_GET(zone, i)->uzd_nitems;
if (nitems < least) {
domain = i;
least = nitems;
-   } else if (nitems == least && (i == pref || domain == pref))
-   domain = pref;
+   }
+   if (domain == pref)
+   prefitems = nitems;
}
+   if (prefitems < least * 2)
+   return (pref);
 
return (domain);
 }
@@ -4102,8 +4105,11 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata
bucket = >uc_crossbucket;
} else
 #endif
-   if (bucket->ucb_cnt >= bucket->ucb_entries)
-   bucket = >uc_freebucket;
+   if (bucket->ucb_cnt == bucket->ucb_entries &&
+  cache->uc_freebucket.ucb_cnt <
+  cache->uc_freebucket.ucb_entries)
+   cache_bucket_swap(>uc_freebucket,
+   >uc_allocbucket);
if (__predict_true(bucket->ucb_cnt < bucket->ucb_entries)) {
cache_bucket_push(cache, bucket, item);
critical_exit();
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r358363 - head/sys/vm

2020-02-26 Thread Jeff Roberson
Author: jeff
Date: Thu Feb 27 02:37:27 2020
New Revision: 358363
URL: https://svnweb.freebsd.org/changeset/base/358363

Log:
  Add unlocked grab* function variants that use lockless radix code to
  lookup pages.  These variants will fall back to their locked counterparts
  if the page is not present.
  
  Discussed with:   kib, markj
  Differential Revision:https://reviews.freebsd.org/D23449

Modified:
  head/sys/vm/vm_page.c
  head/sys/vm/vm_page.h

Modified: head/sys/vm/vm_page.c
==
--- head/sys/vm/vm_page.c   Thu Feb 27 00:57:36 2020(r358362)
+++ head/sys/vm/vm_page.c   Thu Feb 27 02:37:27 2020(r358363)
@@ -830,47 +830,39 @@ vm_page_reference(vm_page_t m)
vm_page_aflag_set(m, PGA_REFERENCED);
 }
 
+/*
+ * vm_page_trybusy
+ *
+ * Helper routine for grab functions to trylock busy.
+ *
+ * Returns true on success and false on failure.
+ */
 static bool
-vm_page_acquire_flags(vm_page_t m, int allocflags)
+vm_page_trybusy(vm_page_t m, int allocflags)
 {
-   bool locked;
 
if ((allocflags & (VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY)) != 0)
-   locked = vm_page_trysbusy(m);
+   return (vm_page_trysbusy(m));
else
-   locked = vm_page_tryxbusy(m);
-   if (locked && (allocflags & VM_ALLOC_WIRED) != 0)
-   vm_page_wire(m);
-   return (locked);
+   return (vm_page_tryxbusy(m));
 }
 
 /*
- * vm_page_busy_sleep_flags
+ * vm_page_tryacquire
  *
- * Sleep for busy according to VM_ALLOC_ parameters.  Returns true
- * if the caller should retry and false otherwise.
+ * Helper routine for grab functions to trylock busy and wire.
+ *
+ * Returns true on success and false on failure.
  */
-static bool
-vm_page_busy_sleep_flags(vm_object_t object, vm_page_t m, const char *wmesg,
-int allocflags)
+static inline bool
+vm_page_tryacquire(vm_page_t m, int allocflags)
 {
+   bool locked;
 
-   if ((allocflags & VM_ALLOC_NOWAIT) != 0)
-   return (false);
-
-   /*
-* Reference the page before unlocking and sleeping so that
-* the page daemon is less likely to reclaim it.
-*/
-   if ((allocflags & VM_ALLOC_NOCREAT) == 0)
-   vm_page_reference(m);
-
-   if (_vm_page_busy_sleep(object, m, m->pindex, wmesg, allocflags, true))
-   VM_OBJECT_WLOCK(object);
-   if ((allocflags & VM_ALLOC_WAITFAIL) != 0)
-   return (false);
-
-   return (true);
+   locked = vm_page_trybusy(m, allocflags);
+   if (locked && (allocflags & VM_ALLOC_WIRED) != 0)
+   vm_page_wire(m);
+   return (locked);
 }
 
 /*
@@ -894,7 +886,7 @@ vm_page_busy_acquire(vm_page_t m, int allocflags)
 */
obj = m->object;
for (;;) {
-   if (vm_page_acquire_flags(m, allocflags))
+   if (vm_page_tryacquire(m, allocflags))
return (true);
if ((allocflags & VM_ALLOC_NOWAIT) != 0)
return (false);
@@ -1604,6 +1596,7 @@ vm_page_object_remove(vm_page_t m)
if ((m->a.flags & PGA_SWAP_FREE) != 0)
vm_pager_page_unswapped(m);
 
+   m->object = NULL;
mrem = vm_radix_remove(>rtree, m->pindex);
KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m));
 
@@ -1658,7 +1651,6 @@ vm_page_remove_xbusy(vm_page_t m)
 {
 
vm_page_object_remove(m);
-   m->object = NULL;
return (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF);
 }
 
@@ -1679,6 +1671,38 @@ vm_page_lookup(vm_object_t object, vm_pindex_t pindex)
 }
 
 /*
+ * This should only be used by lockless functions for releasing transient
+ * incorrect acquires.  The page may have been freed after we acquired a
+ * busy lock.  In this case busy_lock == VPB_FREED and we have nothing
+ * further to do.
+ */
+static void
+vm_page_busy_release(vm_page_t m)
+{
+   u_int x;
+
+   x = atomic_load_int(>busy_lock);
+   for (;;) {
+   if (x == VPB_FREED)
+   break;
+   if ((x & VPB_BIT_SHARED) != 0 && VPB_SHARERS(x) > 1) {
+   if (atomic_fcmpset_int(>busy_lock, ,
+   x - VPB_ONE_SHARER))
+   break;
+   continue;
+   }
+   KASSERT((x & VPB_BIT_SHARED) != 0 ||
+   (x & ~VPB_BIT_WAITERS) == VPB_CURTHREAD_EXCLUSIVE,
+   ("vm_page_busy_release: %p xbusy not owned.", m));
+   if (!atomic_fcmpset_rel_int(>busy_lock, , VPB_UNBUSIED))
+   continue;
+   if ((x & VPB_BIT_WAITERS) != 0)
+   wakeup(m);
+   break;
+   }
+}
+
+/*
  * vm_page_find_least:
  *
  * Returns the page associated with the object with least pindex
@@ -3688,7 +3712,6 @@ 

svn commit: r358236 - in head/sys: kern sys vm

2020-02-21 Thread Jeff Roberson
Author: jeff
Date: Sat Feb 22 03:44:10 2020
New Revision: 358236
URL: https://svnweb.freebsd.org/changeset/base/358236

Log:
  Add an atomic-free tick moderated lazy update variant of SMR.
  
  This enables very cheap read sections with free-to-use latencies and memory
  overhead similar to epoch.  On a recent AMD platform a read section cost
  1ns vs 5ns for the default SMR.  On Xeon the numbers should be more like 1
  ns vs 11.  The memory consumption should be proportional to the product
  of the free rate and 2*1/hz while normal SMR consumption is proportional
  to the product of free rate and maximum read section time.
  
  While here refactor the code to make future additions more
  straightforward.
  
  Name the overall technique Global Unbound Sequences (GUS) and adjust some
  comments accordingly.  This helps distinguish discussions of the general
  technique (SMR) vs this specific implementation (GUS).
  
  Discussed with:   rlibby, markj

Modified:
  head/sys/kern/subr_smr.c
  head/sys/sys/_smr.h
  head/sys/sys/smr.h
  head/sys/vm/uma_core.c

Modified: head/sys/kern/subr_smr.c
==
--- head/sys/kern/subr_smr.cSat Feb 22 03:14:05 2020(r358235)
+++ head/sys/kern/subr_smr.cSat Feb 22 03:44:10 2020(r358236)
@@ -41,6 +41,8 @@ __FBSDID("$FreeBSD$");
 #include 
 
 /*
+ * Global Unbounded Sequences (GUS)
+ *
  * This is a novel safe memory reclamation technique inspired by
  * epoch based reclamation from Samy Al Bahra's concurrency kit which
  * in turn was based on work described in:
@@ -53,7 +55,8 @@ __FBSDID("$FreeBSD$");
  * This is not an implementation of hazard pointers or related
  * techniques.  The term safe memory reclamation is used as a
  * generic descriptor for algorithms that defer frees to avoid
- * use-after-free errors with lockless datastructures.
+ * use-after-free errors with lockless datastructures or as
+ * a mechanism to detect quiescence for writer synchronization.
  *
  * The basic approach is to maintain a monotonic write sequence
  * number that is updated on some application defined granularity.
@@ -67,7 +70,7 @@ __FBSDID("$FreeBSD$");
  * a global write clock that is used to mark memory on free.
  *
  * The write and read sequence numbers can be thought of as a two
- * handed clock with readers always advancing towards writers.  SMR
+ * handed clock with readers always advancing towards writers.  GUS 
  * maintains the invariant that all readers can safely access memory
  * that was visible at the time they loaded their copy of the sequence
  * number.  Periodically the read sequence or hand is polled and
@@ -80,9 +83,12 @@ __FBSDID("$FreeBSD$");
  * A stored sequence number that falls outside of this range has expired
  * and needs no scan to reclaim.
  *
- * A notable distinction between this SMR and Epoch, qsbr, rcu, etc. is
+ * A notable distinction between GUS and Epoch, qsbr, rcu, etc. is
  * that advancing the sequence number is decoupled from detecting its
- * observation.  This results in a more granular assignment of sequence
+ * observation.  That is to say, the delta between read and write
+ * sequence numbers is not bound.  This can be thought of as a more
+ * generalized form of epoch which requires them at most one step
+ * apart.  This results in a more granular assignment of sequence
  * numbers even as read latencies prohibit all or some expiration.
  * It also allows writers to advance the sequence number and save the
  * poll for expiration until a later time when it is likely to
@@ -164,60 +170,145 @@ static uma_zone_t smr_zone;
 #defineSMR_SEQ_MAX_ADVANCE SMR_SEQ_MAX_DELTA / 2
 #endif
 
+/*
+ * The grace period for lazy (tick based) SMR.
+ *
+ * Hardclock is responsible for advancing ticks on a single CPU while every
+ * CPU receives a regular clock interrupt.  The clock interrupts are flushing
+ * the store buffers and any speculative loads that may violate our invariants.
+ * Because these interrupts are not synchronized we must wait one additional
+ * tick in the future to be certain that all processors have had their state
+ * synchronized by an interrupt.
+ *
+ * This assumes that the clock interrupt will only be delayed by other causes
+ * that will flush the store buffer or prevent access to the section protected
+ * data.  For example, an idle processor, or an system management interrupt,
+ * or a vm exit.
+ *
+ * We must wait one additional tick if we are around the wrap condition
+ * because the write seq will move forward by two with one interrupt.
+ */
+#defineSMR_LAZY_GRACE  2
+#defineSMR_LAZY_GRACE_MAX  (SMR_LAZY_GRACE + 1)
+
+/*
+ * The maximum sequence number ahead of wr_seq that may still be valid.  The
+ * sequence may not be advanced on write for lazy or deferred SMRs.  In this
+ * case poll needs to attempt to forward the sequence number if the goal is
+ * within wr_seq + SMR_SEQ_ADVANCE.
+ 

Re: svn commit: r358097 - in head/sys: kern vm

2020-02-20 Thread Jeff Roberson

On Fri, 21 Feb 2020, mmats...@cybernet.co.jp wrote:


Hi Jeff,

From: Jeff Roberson 
Date: Wed, 19 Feb 2020 08:17:27 + (UTC)

Author: jeff
Date: Wed Feb 19 08:17:27 2020
New Revision: 358097
URL: https://svnweb.freebsd.org/changeset/base/358097

Log:
 Eliminate some unnecessary uses of UMA_ZONE_VM.  Only zones involved in
 virtual address or physical page allocation need to be marked with this
 flag.

 Reviewed by:   markj
 Tested by: pho
 Differential Revision: https://reviews.freebsd.org/D23712

Modified:
 head/sys/kern/subr_vmem.c
 head/sys/kern/vfs_subr.c
 head/sys/vm/swap_pager.c
 head/sys/vm/vm_page.c
 head/sys/vm/vm_pager.c

Modified: head/sys/kern/subr_vmem.c
==
--- head/sys/kern/subr_vmem.c   Wed Feb 19 08:15:20 2020(r358096)
+++ head/sys/kern/subr_vmem.c   Wed Feb 19 08:17:27 2020(r358097)
@@ -561,8 +561,7 @@ qc_init(vmem_t *vm, vmem_size_t qcache_max)
qc->qc_vmem = vm;
qc->qc_size = size;
qc->qc_cache = uma_zcache_create(qc->qc_name, size,
-   NULL, NULL, NULL, NULL, qc_import, qc_release, qc,
-   UMA_ZONE_VM);
+   NULL, NULL, NULL, NULL, qc_import, qc_release, qc, 0);
MPASS(qc->qc_cache);
}
}
@@ -668,10 +667,10 @@ vmem_startup(void)
mtx_init(_list_lock, "vmem list lock", NULL, MTX_DEF);
vmem_zone = uma_zcreate("vmem",
sizeof(struct vmem), NULL, NULL, NULL, NULL,
-   UMA_ALIGN_PTR, UMA_ZONE_VM);
+   UMA_ALIGN_PTR, 0);
vmem_bt_zone = uma_zcreate("vmem btag",
sizeof(struct vmem_btag), NULL, NULL, NULL, NULL,
-   UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
+   UMA_ALIGN_PTR, UMA_ZONE_VM);


If I'm reading the message correctly, shouldn't the above line be like the 
following?

+   UMA_ALIGN_PTR, UMA_ZONE_NOFREE);


In this case the commit message wasn't complete.  There is no reason to 
mark the vmem btags as NOFREE because we can allocate them on the fly. 
There is a reason to mark them ZONE_VM however, because we need them to 
allocate virtual address space.


Thanks,
Jeff



Regards,
Haro
=---
  _ _Munehiro (haro) Matsuda
-|- /_\  |_|_|   Cybernet Systems Co., Ltd.
/|\ |_|  |_|_|




___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r358133 - head/sys/vm

2020-02-19 Thread Jeff Roberson
Author: jeff
Date: Wed Feb 19 22:34:22 2020
New Revision: 358133
URL: https://svnweb.freebsd.org/changeset/base/358133

Log:
  Silence a gcc warning about no return from a function that handles every
  possible enum in a switch statement.  I verified that this emits nothing
  as expected on clang.  radix relies on constant propagation to eliminate
  any branching from these access routines.
  
  Reported by:  lwhsu/tinderbox

Modified:
  head/sys/vm/vm_radix.c

Modified: head/sys/vm/vm_radix.c
==
--- head/sys/vm/vm_radix.c  Wed Feb 19 21:12:59 2020(r358132)
+++ head/sys/vm/vm_radix.c  Wed Feb 19 22:34:22 2020(r358133)
@@ -208,6 +208,8 @@ vm_radix_node_load(smrnode_t *p, enum vm_radix_access 
case SMR:
return (smr_entered_load(p, vm_radix_smr));
}
+   /* This is unreachable, silence gcc. */
+   panic("vm_radix_node_get: Unknown access type");
 }
 
 static __inline void
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r358130 - head/sys/vm

2020-02-19 Thread Jeff Roberson
Author: jeff
Date: Wed Feb 19 19:58:31 2020
New Revision: 358130
URL: https://svnweb.freebsd.org/changeset/base/358130

Log:
  Use SMR to provide a safe unlocked lookup for vm_radix.
  
  The tree is kept correct for readers with store barriers and careful
  ordering.  The existing object lock serializes writers.  Consumers
  will be introduced in later commits.
  
  Reviewed by:  markj, kib
  Differential Revision:https://reviews.freebsd.org/D23446

Modified:
  head/sys/vm/vm_radix.c
  head/sys/vm/vm_radix.h

Modified: head/sys/vm/vm_radix.c
==
--- head/sys/vm/vm_radix.c  Wed Feb 19 19:51:09 2020(r358129)
+++ head/sys/vm/vm_radix.c  Wed Feb 19 19:58:31 2020(r358130)
@@ -58,11 +58,14 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#include 
 #include 
+#include 
 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -95,26 +98,47 @@ __FBSDID("$FreeBSD$");
 #defineVM_RADIX_UNITLEVEL(lev) 
\
((vm_pindex_t)1 << ((lev) * VM_RADIX_WIDTH))
 
+enum vm_radix_access { SMR, LOCKED, UNSERIALIZED };
+
+struct vm_radix_node;
+SMR_TYPE_DECLARE(smrnode_t, struct vm_radix_node *);
+
 struct vm_radix_node {
-   vm_pindex_t  rn_owner;  /* Owner of record. */
-   uint16_t rn_count;  /* Valid children. */
-   uint16_t rn_clev;   /* Current level. */
-   void*rn_child[VM_RADIX_COUNT];  /* Child nodes. */
+   vm_pindex_t rn_owner;   /* Owner of record. */
+   uint16_trn_count;   /* Valid children. */
+   uint8_t rn_clev;/* Current level. */
+   int8_t  rn_last;/* zero last ptr. */
+   smrnode_t   rn_child[VM_RADIX_COUNT];   /* Child nodes. */
 };
 
 static uma_zone_t vm_radix_node_zone;
+static smr_t vm_radix_smr;
 
+static void vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v,
+enum vm_radix_access access);
+
 /*
  * Allocate a radix node.
  */
-static __inline struct vm_radix_node *
+static struct vm_radix_node *
 vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel)
 {
struct vm_radix_node *rnode;
 
-   rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT);
+   rnode = uma_zalloc_smr(vm_radix_node_zone, M_NOWAIT);
if (rnode == NULL)
return (NULL);
+
+   /*
+* We want to clear the last child pointer after the final section
+* has exited so lookup can not return false negatives.  It is done
+* here because it will be cache-cold in the dtor callback.
+*/
+   if (rnode->rn_last != 0) {
+   vm_radix_node_store(>rn_child[rnode->rn_last - 1],
+   NULL, UNSERIALIZED);
+   rnode->rn_last = 0;
+   }
rnode->rn_owner = owner;
rnode->rn_count = count;
rnode->rn_clev = clevel;
@@ -125,10 +149,24 @@ vm_radix_node_get(vm_pindex_t owner, uint16_t count, u
  * Free radix node.
  */
 static __inline void
-vm_radix_node_put(struct vm_radix_node *rnode)
+vm_radix_node_put(struct vm_radix_node *rnode, int8_t last)
 {
+#ifdef INVARIANTS
+   int slot;
 
-   uma_zfree(vm_radix_node_zone, rnode);
+   KASSERT(rnode->rn_count == 0,
+   ("vm_radix_node_put: rnode %p has %d children", rnode,
+   rnode->rn_count));
+   for (slot = 0; slot < VM_RADIX_COUNT; slot++) {
+   if (slot == last)
+   continue;
+   KASSERT(smr_unserialized_load(>rn_child[slot], true) ==
+   NULL, ("vm_radix_node_put: rnode %p has a child", rnode));
+   }
+#endif
+   /* Off by one so a freshly zero'd node is not assigned to. */
+   rnode->rn_last = last + 1;
+   uma_zfree_smr(vm_radix_node_zone, rnode);
 }
 
 /*
@@ -156,23 +194,59 @@ vm_radix_trimkey(vm_pindex_t index, uint16_t level)
 }
 
 /*
+ * Fetch a node pointer from a slot in another node.
+ */
+static __inline struct vm_radix_node *
+vm_radix_node_load(smrnode_t *p, enum vm_radix_access access)
+{
+
+   switch (access) {
+   case UNSERIALIZED:
+   return (smr_unserialized_load(p, true));
+   case LOCKED:
+   return (smr_serialized_load(p, true));
+   case SMR:
+   return (smr_entered_load(p, vm_radix_smr));
+   }
+}
+
+static __inline void
+vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v,
+enum vm_radix_access access)
+{
+
+
+   switch (access) {
+   case UNSERIALIZED:
+   smr_unserialized_store(p, v, true);
+   break;
+   case LOCKED:
+   smr_serialized_store(p, v, true);
+   break;
+   case SMR:
+   panic("vm_radix_node_store: Not supported in smr section.");
+   

svn commit: r358129 - head/sys/sys

2020-02-19 Thread Jeff Roberson
Author: jeff
Date: Wed Feb 19 19:51:09 2020
New Revision: 358129
URL: https://svnweb.freebsd.org/changeset/base/358129

Log:
  Since r357940 it is no longer possible to use a single type cast for all
  atomic_*_ptr functions.

Modified:
  head/sys/sys/smr.h

Modified: head/sys/sys/smr.h
==
--- head/sys/sys/smr.h  Wed Feb 19 18:48:46 2020(r358128)
+++ head/sys/sys/smr.h  Wed Feb 19 19:51:09 2020(r358129)
@@ -120,7 +120,7 @@ typedef struct {
\
  */
 #definesmr_serialized_load(p, ex) ({   
\
SMR_ASSERT(ex, "smr_serialized_load");  \
-   (__typeof((p)->__ptr))atomic_load_ptr((uintptr_t *)&(p)->__ptr);\
+   (__typeof((p)->__ptr))atomic_load_ptr(&(p)->__ptr); \
 })
 
 /*
@@ -155,7 +155,7 @@ typedef struct {
\
  */
 #definesmr_unserialized_load(p, ex) ({ 
\
SMR_ASSERT(ex, "smr_unserialized_load");\
-   (__typeof((p)->__ptr))atomic_load_ptr((uintptr_t *)&(p)->__ptr);\
+   (__typeof((p)->__ptr))atomic_load_ptr(&(p)->__ptr); \
 })
 
 /*
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r358128 - in head: lib/libmemstat sys/vm

2020-02-19 Thread Jeff Roberson
Author: jeff
Date: Wed Feb 19 18:48:46 2020
New Revision: 358128
URL: https://svnweb.freebsd.org/changeset/base/358128

Log:
  Use per-domain locks for the bucket cache.
  
  This gives much better concurrency when there are a large number of
  cores per-domain and multiple domains.  Avoid taking the lock entirely
  if it will not be productive.  ROUNDROBIN domains will have mixed
  memory in each domain and will load balance to all domains.
  
  While here refactor the zone/domain separation and bucket limits to
  simplify callers.
  
  Reviewed by:  markj
  Differential Revision:https://reviews.freebsd.org/D23673

Modified:
  head/lib/libmemstat/memstat_uma.c
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/lib/libmemstat/memstat_uma.c
==
--- head/lib/libmemstat/memstat_uma.c   Wed Feb 19 17:09:08 2020
(r358127)
+++ head/lib/libmemstat/memstat_uma.c   Wed Feb 19 18:48:46 2020
(r358128)
@@ -425,12 +425,13 @@ memstat_kvm_uma(struct memory_type_list *list, void *k
(unsigned long )uz.uz_frees);
mtp->mt_failures = kvm_counter_u64_fetch(kvm,
(unsigned long )uz.uz_fails);
+   mtp->mt_xdomain = kvm_counter_u64_fetch(kvm,
+   (unsigned long )uz.uz_xdomain);
mtp->mt_sleeps = uz.uz_sleeps;
/* See comment above in memstat_sysctl_uma(). */
if (mtp->mt_numallocs < mtp->mt_numfrees)
mtp->mt_numallocs = mtp->mt_numfrees;
 
-   mtp->mt_xdomain = uz.uz_xdomain;
if (kz.uk_flags & UMA_ZFLAG_INTERNAL)
goto skip_percpu;
for (i = 0; i < mp_maxid + 1; i++) {
@@ -454,8 +455,9 @@ skip_percpu:
mtp->mt_byteslimit = mtp->mt_countlimit * mtp->mt_size;
mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees;
for (i = 0; i < ndomains; i++) {
-   ret = kread(kvm, _domain[i], ,
-  sizeof(uzd), 0);
+   ret = kread(kvm,
+   _cpu[mp_maxid + 1] + i * sizeof(uzd),
+   , sizeof(uzd), 0);
if (ret != 0)
continue;
for (ubp =

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Wed Feb 19 17:09:08 2020(r358127)
+++ head/sys/vm/uma_core.c  Wed Feb 19 18:48:46 2020(r358128)
@@ -285,6 +285,8 @@ static void zone_dtor(void *, int, void *);
 static inline void item_dtor(uma_zone_t zone, void *item, int size,
 void *udata, enum zfreeskip skip);
 static int zero_init(void *, int, int);
+static void zone_free_bucket(uma_zone_t zone, uma_bucket_t bucket, void *udata,
+int itemdomain, bool ws);
 static void zone_foreach(void (*zfunc)(uma_zone_t, void *), void *);
 static void zone_foreach_unlocked(void (*zfunc)(uma_zone_t, void *), void *);
 static void zone_timeout(uma_zone_t zone, void *);
@@ -518,6 +520,9 @@ bucket_free(uma_zone_t zone, uma_bucket_t bucket, void
 {
struct uma_bucket_zone *ubz;
 
+   if (bucket->ub_cnt != 0)
+   bucket_drain(zone, bucket);
+
KASSERT(bucket->ub_cnt == 0,
("bucket_free: Freeing a non free bucket."));
KASSERT(bucket->ub_seq == SMR_SEQ_INVALID,
@@ -538,17 +543,122 @@ bucket_zone_drain(void)
 }
 
 /*
+ * Acquire the domain lock and record contention.
+ */
+static uma_zone_domain_t
+zone_domain_lock(uma_zone_t zone, int domain)
+{
+   uma_zone_domain_t zdom;
+   bool lockfail;
+
+   zdom = ZDOM_GET(zone, domain);
+   lockfail = false;
+   if (ZDOM_OWNED(zdom))
+   lockfail = true;
+   ZDOM_LOCK(zdom);
+   /* This is unsynchronized.  The counter does not need to be precise. */
+   if (lockfail && zone->uz_bucket_size < zone->uz_bucket_size_max)
+   zone->uz_bucket_size++;
+   return (zdom);
+}
+
+/*
+ * Search for the domain with the least cached items and return it, breaking
+ * ties with a preferred domain by returning it.
+ */
+static __noinline int
+zone_domain_lowest(uma_zone_t zone, int pref)
+{
+   long least, nitems;
+   int domain;
+   int i;
+
+   least = LONG_MAX;
+   domain = 0;
+   for (i = 0; i < vm_ndomains; i++) {
+   nitems = ZDOM_GET(zone, i)->uzd_nitems;
+   if (nitems < least) {
+   domain = i;
+   least = nitems;
+   } else if (nitems == least && (i == pref || domain == pref))
+   domain = pref;
+   }
+
+ 

svn commit: r358098 - head/sys/vm

2020-02-19 Thread Jeff Roberson
Author: jeff
Date: Wed Feb 19 09:10:11 2020
New Revision: 358098
URL: https://svnweb.freebsd.org/changeset/base/358098

Log:
  Don't release xbusy on kmem pages.  After lockless page lookup we will not
  be able to guarantee that they can be racquired without blocking.
  
  Reviewed by:  kib
  Discussed with:   markj
  Differential Revision:https://reviews.freebsd.org/D23506

Modified:
  head/sys/vm/vm_glue.c
  head/sys/vm/vm_kern.c
  head/sys/vm/vm_page.h
  head/sys/vm/vm_swapout.c

Modified: head/sys/vm/vm_glue.c
==
--- head/sys/vm/vm_glue.c   Wed Feb 19 08:17:27 2020(r358097)
+++ head/sys/vm/vm_glue.c   Wed Feb 19 09:10:11 2020(r358098)
@@ -342,10 +342,8 @@ vm_thread_stack_create(struct domainset *ds, vm_object
VM_OBJECT_WLOCK(ksobj);
(void)vm_page_grab_pages(ksobj, 0, VM_ALLOC_NORMAL | VM_ALLOC_WIRED,
ma, pages);
-   for (i = 0; i < pages; i++) {
+   for (i = 0; i < pages; i++)
vm_page_valid(ma[i]);
-   vm_page_xunbusy(ma[i]);
-   }
VM_OBJECT_WUNLOCK(ksobj);
pmap_qenter(ks, ma, pages);
*ksobjp = ksobj;
@@ -365,7 +363,7 @@ vm_thread_stack_dispose(vm_object_t ksobj, vm_offset_t
m = vm_page_lookup(ksobj, i);
if (m == NULL)
panic("%s: kstack already missing?", __func__);
-   vm_page_busy_acquire(m, 0);
+   vm_page_xbusy_claim(m);
vm_page_unwire_noq(m);
vm_page_free(m);
}

Modified: head/sys/vm/vm_kern.c
==
--- head/sys/vm/vm_kern.c   Wed Feb 19 08:17:27 2020(r358097)
+++ head/sys/vm/vm_kern.c   Wed Feb 19 09:10:11 2020(r358098)
@@ -224,7 +224,6 @@ retry:
if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
vm_page_valid(m);
-   vm_page_xunbusy(m);
pmap_enter(kernel_pmap, addr + i, m, prot,
prot | PMAP_ENTER_WIRED, 0);
}
@@ -317,7 +316,6 @@ retry:
if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
vm_page_valid(m);
-   vm_page_xunbusy(m);
pmap_enter(kernel_pmap, tmp, m, VM_PROT_RW,
VM_PROT_RW | PMAP_ENTER_WIRED, 0);
tmp += PAGE_SIZE;
@@ -501,7 +499,6 @@ retry:
KASSERT((m->oflags & VPO_UNMANAGED) != 0,
("kmem_malloc: page %p is managed", m));
vm_page_valid(m);
-   vm_page_xunbusy(m);
pmap_enter(kernel_pmap, addr + i, m, prot,
prot | PMAP_ENTER_WIRED, 0);
 #if VM_NRESERVLEVEL > 0
@@ -591,7 +588,7 @@ _kmem_unback(vm_object_t object, vm_offset_t addr, vm_
 #endif
for (; offset < end; offset += PAGE_SIZE, m = next) {
next = vm_page_next(m);
-   vm_page_busy_acquire(m, 0);
+   vm_page_xbusy_claim(m);
vm_page_unwire_noq(m);
vm_page_free(m);
}

Modified: head/sys/vm/vm_page.h
==
--- head/sys/vm/vm_page.h   Wed Feb 19 08:17:27 2020(r358097)
+++ head/sys/vm/vm_page.h   Wed Feb 19 09:10:11 2020(r358098)
@@ -764,9 +764,14 @@ void vm_page_object_busy_assert(vm_page_t m);
 void vm_page_assert_pga_writeable(vm_page_t m, uint16_t bits);
 #defineVM_PAGE_ASSERT_PGA_WRITEABLE(m, bits)   
\
vm_page_assert_pga_writeable(m, bits)
+#definevm_page_xbusy_claim(m) do { 
\
+   vm_page_assert_xbusied_unchecked((m));  \
+   (m)->busy_lock = VPB_CURTHREAD_EXCLUSIVE;   \
+} while (0)
 #else
 #defineVM_PAGE_OBJECT_BUSY_ASSERT(m)   (void)0
 #defineVM_PAGE_ASSERT_PGA_WRITEABLE(m, bits)   (void)0
+#definevm_page_xbusy_claim(m)
 #endif
 
 #if BYTE_ORDER == BIG_ENDIAN

Modified: head/sys/vm/vm_swapout.c
==
--- head/sys/vm/vm_swapout.cWed Feb 19 08:17:27 2020(r358097)
+++ head/sys/vm/vm_swapout.cWed Feb 19 09:10:11 2020(r358098)
@@ -540,6 +540,7 @@ vm_thread_swapout(struct thread *td)
if (m == NULL)
panic("vm_thread_swapout: kstack already missing?");
vm_page_dirty(m);
+   vm_page_xunbusy_unchecked(m);
vm_page_unwire(m, PQ_LAUNDRY);
}
VM_OBJECT_WUNLOCK(ksobj);
@@ -564,7 +565,6 @@ vm_thread_swapin(struct thread *td, int oom_alloc)
for (i = 0; i < pages;) {
vm_page_assert_xbusied(ma[i]);
if 

svn commit: r358097 - in head/sys: kern vm

2020-02-19 Thread Jeff Roberson
Author: jeff
Date: Wed Feb 19 08:17:27 2020
New Revision: 358097
URL: https://svnweb.freebsd.org/changeset/base/358097

Log:
  Eliminate some unnecessary uses of UMA_ZONE_VM.  Only zones involved in
  virtual address or physical page allocation need to be marked with this
  flag.
  
  Reviewed by:  markj
  Tested by:pho
  Differential Revision:https://reviews.freebsd.org/D23712

Modified:
  head/sys/kern/subr_vmem.c
  head/sys/kern/vfs_subr.c
  head/sys/vm/swap_pager.c
  head/sys/vm/vm_page.c
  head/sys/vm/vm_pager.c

Modified: head/sys/kern/subr_vmem.c
==
--- head/sys/kern/subr_vmem.c   Wed Feb 19 08:15:20 2020(r358096)
+++ head/sys/kern/subr_vmem.c   Wed Feb 19 08:17:27 2020(r358097)
@@ -561,8 +561,7 @@ qc_init(vmem_t *vm, vmem_size_t qcache_max)
qc->qc_vmem = vm;
qc->qc_size = size;
qc->qc_cache = uma_zcache_create(qc->qc_name, size,
-   NULL, NULL, NULL, NULL, qc_import, qc_release, qc,
-   UMA_ZONE_VM);
+   NULL, NULL, NULL, NULL, qc_import, qc_release, qc, 0);
MPASS(qc->qc_cache);
}
 }
@@ -668,10 +667,10 @@ vmem_startup(void)
mtx_init(_list_lock, "vmem list lock", NULL, MTX_DEF);
vmem_zone = uma_zcreate("vmem",
sizeof(struct vmem), NULL, NULL, NULL, NULL,
-   UMA_ALIGN_PTR, UMA_ZONE_VM);
+   UMA_ALIGN_PTR, 0);
vmem_bt_zone = uma_zcreate("vmem btag",
sizeof(struct vmem_btag), NULL, NULL, NULL, NULL,
-   UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
+   UMA_ALIGN_PTR, UMA_ZONE_VM);
 #ifndef UMA_MD_SMALL_ALLOC
mtx_init(_bt_lock, "btag lock", NULL, MTX_DEF);
uma_prealloc(vmem_bt_zone, BT_MAXALLOC);

Modified: head/sys/kern/vfs_subr.c
==
--- head/sys/kern/vfs_subr.cWed Feb 19 08:15:20 2020(r358096)
+++ head/sys/kern/vfs_subr.cWed Feb 19 08:17:27 2020(r358097)
@@ -671,7 +671,7 @@ vntblinit(void *dummy __unused)
 */
buf_trie_zone = uma_zcreate("BUF TRIE", pctrie_node_size(),
NULL, NULL, pctrie_zone_init, NULL, UMA_ALIGN_PTR, 
-   UMA_ZONE_NOFREE | UMA_ZONE_VM);
+   UMA_ZONE_NOFREE);
uma_prealloc(buf_trie_zone, nbuf);
 
vnodes_created = counter_u64_alloc(M_WAITOK);

Modified: head/sys/vm/swap_pager.c
==
--- head/sys/vm/swap_pager.cWed Feb 19 08:15:20 2020(r358096)
+++ head/sys/vm/swap_pager.cWed Feb 19 08:17:27 2020(r358097)
@@ -585,11 +585,11 @@ swap_pager_swap_init(void)
n = maxswzone != 0 ? maxswzone / sizeof(struct swblk) :
vm_cnt.v_page_count / 2;
swpctrie_zone = uma_zcreate("swpctrie", pctrie_node_size(), NULL, NULL,
-   pctrie_zone_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
+   pctrie_zone_init, NULL, UMA_ALIGN_PTR, 0);
if (swpctrie_zone == NULL)
panic("failed to create swap pctrie zone.");
swblk_zone = uma_zcreate("swblk", sizeof(struct swblk), NULL, NULL,
-   NULL, NULL, _Alignof(struct swblk) - 1, UMA_ZONE_VM);
+   NULL, NULL, _Alignof(struct swblk) - 1, 0);
if (swblk_zone == NULL)
panic("failed to create swap blk zone.");
n2 = n;

Modified: head/sys/vm/vm_page.c
==
--- head/sys/vm/vm_page.c   Wed Feb 19 08:15:20 2020(r358096)
+++ head/sys/vm/vm_page.c   Wed Feb 19 08:17:27 2020(r358097)
@@ -202,7 +202,7 @@ vm_page_init(void *dummy)
 {
 
fakepg_zone = uma_zcreate("fakepg", sizeof(struct vm_page), NULL, NULL,
-   NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM);
+   NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
bogus_page = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ |
VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
 }
@@ -2022,7 +2022,7 @@ again:
 #endif
vmd = VM_DOMAIN(domain);
if (vmd->vmd_pgcache[pool].zone != NULL) {
-   m = uma_zalloc(vmd->vmd_pgcache[pool].zone, M_NOWAIT);
+   m = uma_zalloc(vmd->vmd_pgcache[pool].zone, M_NOWAIT | M_NOVM);
if (m != NULL) {
flags |= PG_PCPU_CACHE;
goto found;

Modified: head/sys/vm/vm_pager.c
==
--- head/sys/vm/vm_pager.c  Wed Feb 19 08:15:20 2020(r358096)
+++ head/sys/vm/vm_pager.c  Wed Feb 19 08:17:27 2020(r358097)
@@ -185,7 +185,7 @@ vm_pager_bufferinit(void)
/* Main zone for paging bufs. */
pbuf_zone = uma_zcreate("pbuf", sizeof(struct buf),
pbuf_ctor, pbuf_dtor, pbuf_init, NULL, UMA_ALIGN_CACHE,
-   

svn commit: r358096 - head/sys/sys

2020-02-19 Thread Jeff Roberson
Author: jeff
Date: Wed Feb 19 08:15:20 2020
New Revision: 358096
URL: https://svnweb.freebsd.org/changeset/base/358096

Log:
  Type validating smr protected pointer accessors.
  
  This API is intended to provide some measure of safety with SMR
  protected pointers.  A struct wrapper provides type checking and
  a guarantee that all access is mediated by the API unless abused.  All
  modifying functions take an assert as an argument to guarantee that
  the required synchronization is present.
  
  Reviewed by:  kib, markj, mjg
  Differential Revision:https://reviews.freebsd.org/D23711

Modified:
  head/sys/sys/smr.h

Modified: head/sys/sys/smr.h
==
--- head/sys/sys/smr.h  Wed Feb 19 06:28:55 2020(r358095)
+++ head/sys/sys/smr.h  Wed Feb 19 08:15:20 2020(r358096)
@@ -77,6 +77,98 @@ struct smr {
 #defineSMR_ASSERT_NOT_ENTERED(smr) 
\
 KASSERT(!SMR_ENTERED(smr), ("In smr section."));
 
+#define SMR_ASSERT(ex, fn) \
+KASSERT((ex), (fn ": Assertion " #ex " failed at %s:%d", __FILE__, 
__LINE__))
+
+/*
+ * SMR Accessors are meant to provide safe access to SMR protected
+ * pointers and prevent misuse and accidental access.
+ *
+ * Accessors are grouped by type:
+ * entered - Use while in a read section (between smr_enter/smr_exit())
+ * serialized  - Use while holding a lock that serializes writers.   Updates
+ *   are synchronized with readers via included barriers.
+ * unserialized- Use after the memory is out of scope and not visible 
to
+ *   readers.
+ *
+ * All acceses include a parameter for an assert to verify the required
+ * synchronization.  For example, a writer might use:
+ *
+ * smr_serilized_store(pointer, value, mtx_owned());
+ *
+ * These are only enabled in INVARIANTS kernels.
+ */
+
+/* Type restricting pointer access to force smr accessors. */
+#defineSMR_TYPE_DECLARE(smrtype, type) 
\
+typedef struct {   \
+   type__ptr;  /* Do not access directly */\
+} smrtype
+
+/*
+ * Read from an SMR protected pointer while in a read section.
+ */
+#definesmr_entered_load(p, smr) ({ 
\
+   SMR_ASSERT(SMR_ENTERED((smr)), "smr_entered_load"); \
+   (__typeof((p)->__ptr))atomic_load_acq_ptr((uintptr_t *)&(p)->__ptr); \
+})
+
+/*
+ * Read from an SMR protected pointer while serialized by an
+ * external mechanism.  'ex' should contain an assert that the
+ * external mechanism is held.  i.e. mtx_owned()
+ */
+#definesmr_serialized_load(p, ex) ({   
\
+   SMR_ASSERT(ex, "smr_serialized_load");  \
+   (__typeof((p)->__ptr))atomic_load_ptr((uintptr_t *)&(p)->__ptr);\
+})
+
+/*
+ * Store 'v' to an SMR protected pointer while serialized by an
+ * external mechanism.  'ex' should contain an assert that the
+ * external mechanism is held.  i.e. mtx_owned()
+ */
+#definesmr_serialized_store(p, v, ex) do { 
\
+   SMR_ASSERT(ex, "smr_serialized_store"); \
+   __typeof((p)->__ptr) _v = (v);  \
+   atomic_store_rel_ptr((uintptr_t *)&(p)->__ptr, (uintptr_t)_v);  \
+} while (0)
+
+/*
+ * swap 'v' with an SMR protected pointer and return the old value
+ * while serialized by an external mechanism.  'ex' should contain
+ * an assert that the external mechanism is provided.  i.e. mtx_owned()
+ */
+#definesmr_serialized_swap(p, v, ex) ({
\
+   SMR_ASSERT(ex, "smr_serialized_swap");  \
+   __typeof((p)->__ptr) _v = (v);  \
+   /* Release barrier guarantees contents are visible to reader */ \
+   atomic_thread_fence_rel();  \
+   (__typeof((p)->__ptr))atomic_swap_ptr(  \
+   (uintptr_t *)&(p)->__ptr, (uintptr_t)_v);   \
+})
+
+/*
+ * Read from an SMR protected pointer when no serialization is required
+ * such as in the destructor callback or when the caller guarantees other
+ * synchronization.
+ */
+#definesmr_unserialized_load(p, ex) ({ 
\
+   SMR_ASSERT(ex, "smr_unserialized_load");\
+   (__typeof((p)->__ptr))atomic_load_ptr((uintptr_t *)&(p)->__ptr);\
+})
+
+/*
+ * Store to an SMR protected pointer when no serialiation is required
+ * such as in the destructor callback or when the caller guarantees other
+ * synchronization.
+ */
+#definesmr_unserialized_store(p, v, ex) do {   
\
+   SMR_ASSERT(ex, "smr_unserialized_store");

svn commit: r358012 - head/sys/vm

2020-02-16 Thread Jeff Roberson
Author: jeff
Date: Mon Feb 17 01:59:55 2020
New Revision: 358012
URL: https://svnweb.freebsd.org/changeset/base/358012

Log:
  Add a simple accessor that returns the bytes of memory consumed by a zone.

Modified:
  head/sys/vm/uma.h
  head/sys/vm/uma_core.c

Modified: head/sys/vm/uma.h
==
--- head/sys/vm/uma.h   Mon Feb 17 01:08:00 2020(r358011)
+++ head/sys/vm/uma.h   Mon Feb 17 01:59:55 2020(r358012)
@@ -671,6 +671,11 @@ void uma_prealloc(uma_zone_t zone, int itemcnt);
 int uma_zone_exhausted(uma_zone_t zone);
 
 /*
+ * Returns the bytes of memory consumed by the zone.
+ */
+size_t uma_zone_memory(uma_zone_t zone);
+
+/*
  * Common UMA_ZONE_PCPU zones.
  */
 extern uma_zone_t pcpu_zone_int;

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Mon Feb 17 01:08:00 2020(r358011)
+++ head/sys/vm/uma_core.c  Mon Feb 17 01:59:55 2020(r358012)
@@ -4681,6 +4681,27 @@ uma_prealloc(uma_zone_t zone, int items)
}
 }
 
+/*
+ * Returns a snapshot of memory consumption in bytes.
+ */
+size_t
+uma_zone_memory(uma_zone_t zone)
+{
+   size_t sz;
+   int i;
+
+   sz = 0;
+   if (zone->uz_flags & UMA_ZFLAG_CACHE) {
+   for (i = 0; i < vm_ndomains; i++)
+   sz += zone->uz_domain[i].uzd_nitems;
+   return (sz * zone->uz_size);
+   }
+   for (i = 0; i < vm_ndomains; i++)
+   sz += zone->uz_keg->uk_domain[i].ud_pages;
+
+   return (sz * PAGE_SIZE);
+}
+
 /* See uma.h */
 void
 uma_reclaim(int req)
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r358011 - head/sys/vm

2020-02-16 Thread Jeff Roberson
Author: jeff
Date: Mon Feb 17 01:08:00 2020
New Revision: 358011
URL: https://svnweb.freebsd.org/changeset/base/358011

Log:
  Refactor _vm_page_busy_sleep to reduce the delta between the various
  sleep routines and introduce a variant that supports lockless sleep.
  
  Reviewed by:  kib
  Differential Revision:https://reviews.freebsd.org/D23612

Modified:
  head/sys/vm/vm_page.c
  head/sys/vm/vm_page.h

Modified: head/sys/vm/vm_page.c
==
--- head/sys/vm/vm_page.c   Mon Feb 17 01:06:18 2020(r358010)
+++ head/sys/vm/vm_page.c   Mon Feb 17 01:08:00 2020(r358011)
@@ -174,7 +174,7 @@ static uma_zone_t fakepg_zone;
 
 static void vm_page_alloc_check(vm_page_t m);
 static bool _vm_page_busy_sleep(vm_object_t obj, vm_page_t m,
-const char *wmesg, bool nonshared, bool locked);
+vm_pindex_t pindex, const char *wmesg, int allocflags, bool locked);
 static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
 static void vm_page_enqueue(vm_page_t m, uint8_t queue);
 static bool vm_page_free_prep(vm_page_t m);
@@ -846,7 +846,8 @@ vm_page_acquire_flags(vm_page_t m, int allocflags)
 /*
  * vm_page_busy_sleep_flags
  *
- * Sleep for busy according to VM_ALLOC_ parameters.
+ * Sleep for busy according to VM_ALLOC_ parameters.  Returns true
+ * if the caller should retry and false otherwise.
  */
 static bool
 vm_page_busy_sleep_flags(vm_object_t object, vm_page_t m, const char *wmesg,
@@ -855,18 +856,19 @@ vm_page_busy_sleep_flags(vm_object_t object, vm_page_t
 
if ((allocflags & VM_ALLOC_NOWAIT) != 0)
return (false);
+
/*
-* Reference the page before unlocking and
-* sleeping so that the page daemon is less
-* likely to reclaim it.
+* Reference the page before unlocking and sleeping so that
+* the page daemon is less likely to reclaim it.
 */
if ((allocflags & VM_ALLOC_NOCREAT) == 0)
-   vm_page_aflag_set(m, PGA_REFERENCED);
-   if (_vm_page_busy_sleep(object, m, wmesg, (allocflags &
-   VM_ALLOC_IGN_SBUSY) != 0, true))
+   vm_page_reference(m);
+
+   if (_vm_page_busy_sleep(object, m, m->pindex, wmesg, allocflags, true))
VM_OBJECT_WLOCK(object);
if ((allocflags & VM_ALLOC_WAITFAIL) != 0)
return (false);
+
return (true);
 }
 
@@ -900,8 +902,8 @@ vm_page_busy_acquire(vm_page_t m, int allocflags)
else
locked = false;
MPASS(locked || vm_page_wired(m));
-   if (_vm_page_busy_sleep(obj, m, "vmpba",
-   (allocflags & VM_ALLOC_SBUSY) != 0, locked))
+   if (_vm_page_busy_sleep(obj, m, m->pindex, "vmpba", allocflags,
+   locked) && locked)
VM_OBJECT_WLOCK(obj);
if ((allocflags & VM_ALLOC_WAITFAIL) != 0)
return (false);
@@ -1026,19 +1028,49 @@ vm_page_busy_sleep(vm_page_t m, const char *wmesg, boo
VM_OBJECT_ASSERT_LOCKED(obj);
vm_page_lock_assert(m, MA_NOTOWNED);
 
-   if (!_vm_page_busy_sleep(obj, m, wmesg, nonshared, true))
+   if (!_vm_page_busy_sleep(obj, m, m->pindex, wmesg,
+   nonshared ? VM_ALLOC_SBUSY : 0 , true))
VM_OBJECT_DROP(obj);
 }
 
 /*
+ * vm_page_busy_sleep_unlocked:
+ *
+ * Sleep if the page is busy, using the page pointer as wchan.
+ * This is used to implement the hard-path of busying mechanism.
+ *
+ * If nonshared is true, sleep only if the page is xbusy.
+ *
+ * The object lock must not be held on entry.  The operation will
+ * return if the page changes identity.
+ */
+void
+vm_page_busy_sleep_unlocked(vm_object_t obj, vm_page_t m, vm_pindex_t pindex,
+const char *wmesg, bool nonshared)
+{
+
+   VM_OBJECT_ASSERT_UNLOCKED(obj);
+   vm_page_lock_assert(m, MA_NOTOWNED);
+
+   _vm_page_busy_sleep(obj, m, pindex, wmesg,
+   nonshared ? VM_ALLOC_SBUSY : 0, false);
+}
+
+/*
  * _vm_page_busy_sleep:
  *
- * Internal busy sleep function.
+ * Internal busy sleep function.  Verifies the page identity and
+ * lockstate against parameters.  Returns true if it sleeps and
+ * false otherwise.
+ *
+ * If locked is true the lock will be dropped for any true returns
+ * and held for any false returns.
  */
 static bool
-_vm_page_busy_sleep(vm_object_t obj, vm_page_t m, const char *wmesg,
-bool nonshared, bool locked)
+_vm_page_busy_sleep(vm_object_t obj, vm_page_t m, vm_pindex_t pindex,
+const char *wmesg, int allocflags, bool locked)
 {
+   bool xsleep;
u_int x;
 
/*
@@ -1049,23 +1081,36 @@ _vm_page_busy_sleep(vm_object_t obj, vm_page_t m, cons
if (locked)
VM_OBJECT_DROP(obj);
vm_object_busy_wait(obj, wmesg);
-   

svn commit: r358010 - head/sys/vm

2020-02-16 Thread Jeff Roberson
Author: jeff
Date: Mon Feb 17 01:06:18 2020
New Revision: 358010
URL: https://svnweb.freebsd.org/changeset/base/358010

Log:
  UMA has become more particular about zone types.  Use the right allocator
  calls in uma_zwait().

Modified:
  head/sys/vm/uma_core.c

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Sun Feb 16 23:10:59 2020(r358009)
+++ head/sys/vm/uma_core.c  Mon Feb 17 01:06:18 2020(r358010)
@@ -2944,10 +2944,13 @@ uma_zdestroy(uma_zone_t zone)
 void
 uma_zwait(uma_zone_t zone)
 {
-   void *item;
 
-   item = uma_zalloc_arg(zone, NULL, M_WAITOK);
-   uma_zfree(zone, item);
+   if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
+   uma_zfree_smr(zone, uma_zalloc_smr(zone, M_WAITOK));
+   else if ((zone->uz_flags & UMA_ZONE_PCPU) != 0)
+   uma_zfree_pcpu(zone, uma_zalloc_pcpu(zone, M_WAITOK));
+   else
+   uma_zfree(zone, uma_zalloc(zone, M_WAITOK));
 }
 
 void *
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r357988 - head/sys/vm

2020-02-15 Thread Jeff Roberson
Author: jeff
Date: Sun Feb 16 01:07:19 2020
New Revision: 357988
URL: https://svnweb.freebsd.org/changeset/base/357988

Log:
  Slightly restructure uma_zalloc* to generate better code from clang and
  reduce duplication among zalloc functions.
  
  Reviewed by:  markj
  Discussed with:   mjg
  Differential Revision:https://reviews.freebsd.org/D23672

Modified:
  head/sys/vm/uma_core.c

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Sun Feb 16 00:12:53 2020(r357987)
+++ head/sys/vm/uma_core.c  Sun Feb 16 01:07:19 2020(r357988)
@@ -3013,8 +3013,8 @@ item_ctor(uma_zone_t zone, int uz_flags, int size, voi
if (!skipdbg)
uma_dbg_alloc(zone, NULL, item);
 #endif
-   if (flags & M_ZERO)
-   bzero(item, size);
+   if (__predict_false(flags & M_ZERO))
+   return (memset(item, 0, size));
 
return (item);
 }
@@ -3117,11 +3117,35 @@ uma_zfree_debug(uma_zone_t zone, void *item, void *uda
 }
 #endif
 
+static inline void *
+cache_alloc_item(uma_zone_t zone, uma_cache_t cache, uma_cache_bucket_t bucket,
+void *udata, int flags)
+{
+   void *item;
+   int size, uz_flags;
+
+   item = cache_bucket_pop(cache, bucket);
+   size = cache_uz_size(cache);
+   uz_flags = cache_uz_flags(cache);
+   critical_exit();
+   return (item_ctor(zone, uz_flags, size, udata, flags, item));
+}
+
 static __noinline void *
-uma_zalloc_single(uma_zone_t zone, void *udata, int flags)
+cache_alloc_retry(uma_zone_t zone, uma_cache_t cache, void *udata, int flags)
 {
+   uma_cache_bucket_t bucket;
int domain;
 
+   while (cache_alloc(zone, cache, udata, flags)) {
+   cache = >uz_cpu[curcpu];
+   bucket = >uc_allocbucket;
+   if (__predict_false(bucket->ucb_cnt == 0))
+   continue;
+   return (cache_alloc_item(zone, cache, bucket, udata, flags));
+   }
+   critical_exit();
+
/*
 * We can not get a bucket so try to return a single item.
 */
@@ -3138,10 +3162,10 @@ uma_zalloc_smr(uma_zone_t zone, int flags)
 {
uma_cache_bucket_t bucket;
uma_cache_t cache;
-   void *item;
-   int size, uz_flags;
 
 #ifdef UMA_ZALLOC_DEBUG
+   void *item;
+
KASSERT((zone->uz_flags & UMA_ZONE_SMR) != 0,
("uma_zalloc_arg: called with non-SMR zone.\n"));
if (uma_zalloc_debug(zone, , NULL, flags) == EJUSTRETURN)
@@ -3149,21 +3173,11 @@ uma_zalloc_smr(uma_zone_t zone, int flags)
 #endif
 
critical_enter();
-   do {
-   cache = >uz_cpu[curcpu];
-   bucket = >uc_allocbucket;
-   size = cache_uz_size(cache);
-   uz_flags = cache_uz_flags(cache);
-   if (__predict_true(bucket->ucb_cnt != 0)) {
-   item = cache_bucket_pop(cache, bucket);
-   critical_exit();
-   return (item_ctor(zone, uz_flags, size, NULL, flags,
-   item));
-   }
-   } while (cache_alloc(zone, cache, NULL, flags));
-   critical_exit();
-
-   return (uma_zalloc_single(zone, NULL, flags));
+   cache = >uz_cpu[curcpu];
+   bucket = >uc_allocbucket;
+   if (__predict_false(bucket->ucb_cnt == 0))
+   return (cache_alloc_retry(zone, cache, NULL, flags));
+   return (cache_alloc_item(zone, cache, bucket, NULL, flags));
 }
 
 /* See uma.h */
@@ -3172,8 +3186,6 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags
 {
uma_cache_bucket_t bucket;
uma_cache_t cache;
-   void *item;
-   int size, uz_flags;
 
/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
random_harvest_fast_uma(, sizeof(zone), RANDOM_UMA);
@@ -3183,6 +3195,8 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags
zone, flags);
 
 #ifdef UMA_ZALLOC_DEBUG
+   void *item;
+
KASSERT((zone->uz_flags & UMA_ZONE_SMR) == 0,
("uma_zalloc_arg: called with SMR zone.\n"));
if (uma_zalloc_debug(zone, , udata, flags) == EJUSTRETURN)
@@ -3201,21 +3215,11 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags
 * must detect and handle migration if it has occurred.
 */
critical_enter();
-   do {
-   cache = >uz_cpu[curcpu];
-   bucket = >uc_allocbucket;
-   size = cache_uz_size(cache);
-   uz_flags = cache_uz_flags(cache);
-   if (__predict_true(bucket->ucb_cnt != 0)) {
-   item = cache_bucket_pop(cache, bucket);
-   critical_exit();
-   return (item_ctor(zone, uz_flags, size, udata, flags,
-   item));
-   }
-   } while (cache_alloc(zone, cache, udata, flags));
-   critical_exit();

svn commit: r357884 - head/sys/kern

2020-02-13 Thread Jeff Roberson
Author: jeff
Date: Thu Feb 13 21:10:17 2020
New Revision: 357884
URL: https://svnweb.freebsd.org/changeset/base/357884

Log:
  Since r357804 pcpu zones are required to use zalloc_pcpu().  Prior to this
  it was only required if you were zeroing.  Switch to these interfaces.
  
  Reviewed by:  mjg

Modified:
  head/sys/kern/subr_smr.c

Modified: head/sys/kern/subr_smr.c
==
--- head/sys/kern/subr_smr.cThu Feb 13 20:58:51 2020(r357883)
+++ head/sys/kern/subr_smr.cThu Feb 13 21:10:17 2020(r357884)
@@ -414,7 +414,7 @@ smr_create(const char *name)
int i;
 
s = uma_zalloc(smr_shared_zone, M_WAITOK);
-   smr = uma_zalloc(smr_zone, M_WAITOK);
+   smr = uma_zalloc_pcpu(smr_zone, M_WAITOK);
 
s->s_name = name;
s->s_rd_seq = s->s_wr_seq = SMR_SEQ_INIT;
@@ -436,7 +436,7 @@ smr_destroy(smr_t smr)
 
smr_synchronize(smr);
uma_zfree(smr_shared_zone, smr->c_shared);
-   uma_zfree(smr_zone, smr);
+   uma_zfree_pcpu(smr_zone, smr);
 }
 
 /*
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r357883 - head/sys/vm

2020-02-13 Thread Jeff Roberson
Author: jeff
Date: Thu Feb 13 20:58:51 2020
New Revision: 357883
URL: https://svnweb.freebsd.org/changeset/base/357883

Log:
  Fix a case where ub_seq would fail to be set if the cross bucket was
  flushed due to memory pressure.
  
  Reviewed by:  markj
  Differential Revision:http://reviews.freebsd.org/D23614

Modified:
  head/sys/vm/uma_core.c

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Thu Feb 13 20:50:21 2020(r357882)
+++ head/sys/vm/uma_core.c  Thu Feb 13 20:58:51 2020(r357883)
@@ -553,12 +553,13 @@ zone_fetch_bucket(uma_zone_t zone, uma_zone_domain_t z
if ((bucket = STAILQ_FIRST(>uzd_buckets)) == NULL)
return (NULL);
 
+   /* SMR Buckets can not be re-used until readers expire. */
if ((zone->uz_flags & UMA_ZONE_SMR) != 0 &&
bucket->ub_seq != SMR_SEQ_INVALID) {
if (!smr_poll(zone->uz_smr, bucket->ub_seq, false))
return (NULL);
bucket->ub_seq = SMR_SEQ_INVALID;
-   dtor = (zone->uz_dtor != NULL) | UMA_ALWAYS_CTORDTOR;
+   dtor = (zone->uz_dtor != NULL) || UMA_ALWAYS_CTORDTOR;
}
MPASS(zdom->uzd_nitems >= bucket->ub_cnt);
STAILQ_REMOVE_HEAD(>uzd_buckets, ub_link);
@@ -678,6 +679,7 @@ cache_bucket_load(uma_cache_bucket_t bucket, uma_bucke
 
CRITICAL_ASSERT(curthread);
MPASS(bucket->ucb_bucket == NULL);
+   MPASS(b->ub_seq == SMR_SEQ_INVALID);
 
bucket->ucb_bucket = b;
bucket->ucb_cnt = b->ub_cnt;
@@ -979,10 +981,10 @@ bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
if ((zone->uz_flags & UMA_ZONE_SMR) != 0 &&
bucket->ub_seq != SMR_SEQ_INVALID) {
smr_wait(zone->uz_smr, bucket->ub_seq);
+   bucket->ub_seq = SMR_SEQ_INVALID;
for (i = 0; i < bucket->ub_cnt; i++)
item_dtor(zone, bucket->ub_bucket[i],
zone->uz_size, NULL, SKIP_NONE);
-   bucket->ub_seq = SMR_SEQ_INVALID;
}
if (zone->uz_fini)
for (i = 0; i < bucket->ub_cnt; i++) 
@@ -1014,6 +1016,7 @@ cache_drain(uma_zone_t zone)
 {
uma_cache_t cache;
uma_bucket_t bucket;
+   smr_seq_t seq;
int cpu;
 
/*
@@ -1024,6 +1027,9 @@ cache_drain(uma_zone_t zone)
 * XXX: It would good to be able to assert that the zone is being
 * torn down to prevent improper use of cache_drain().
 */
+   seq = SMR_SEQ_INVALID;
+   if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
+   seq = smr_current(zone->uz_smr);
CPU_FOREACH(cpu) {
cache = >uz_cpu[cpu];
bucket = cache_bucket_unload_alloc(cache);
@@ -1033,11 +1039,13 @@ cache_drain(uma_zone_t zone)
}
bucket = cache_bucket_unload_free(cache);
if (bucket != NULL) {
+   bucket->ub_seq = seq;
bucket_drain(zone, bucket);
bucket_free(zone, bucket, NULL);
}
bucket = cache_bucket_unload_cross(cache);
if (bucket != NULL) {
+   bucket->ub_seq = seq;
bucket_drain(zone, bucket);
bucket_free(zone, bucket, NULL);
}
@@ -1069,7 +1077,6 @@ cache_drain_safe_cpu(uma_zone_t zone, void *unused)
return;
 
b1 = b2 = b3 = NULL;
-   ZONE_LOCK(zone);
critical_enter();
if (zone->uz_flags & UMA_ZONE_FIRSTTOUCH)
domain = PCPU_GET(domain);
@@ -1077,32 +1084,33 @@ cache_drain_safe_cpu(uma_zone_t zone, void *unused)
domain = 0;
cache = >uz_cpu[curcpu];
b1 = cache_bucket_unload_alloc(cache);
-   if (b1 != NULL && b1->ub_cnt != 0) {
-   zone_put_bucket(zone, >uz_domain[domain], b1, false);
-   b1 = NULL;
-   }
 
/*
 * Don't flush SMR zone buckets.  This leaves the zone without a
 * bucket and forces every free to synchronize().
 */
-   if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
-   goto out;
-   b2 = cache_bucket_unload_free(cache);
+   if ((zone->uz_flags & UMA_ZONE_SMR) == 0) {
+   b2 = cache_bucket_unload_free(cache);
+   b3 = cache_bucket_unload_cross(cache);
+   }
+   critical_exit();
+
+   ZONE_LOCK(zone);
+   if (b1 != NULL && b1->ub_cnt != 0) {
+   zone_put_bucket(zone, >uz_domain[domain], b1, false);
+   b1 = NULL;
+   }
if (b2 != NULL && b2->ub_cnt != 0) {
zone_put_bucket(zone, >uz_domain[domain], b2, false);
b2 = NULL;
}
-   b3 = cache_bucket_unload_cross(cache);
-
-out:
-   critical_exit();
ZONE_UNLOCK(zone);
-   if (b1)

svn commit: r357882 - in head/sys: kern sys

2020-02-13 Thread Jeff Roberson
Author: jeff
Date: Thu Feb 13 20:50:21 2020
New Revision: 357882
URL: https://svnweb.freebsd.org/changeset/base/357882

Log:
  Add more precise SMR entry asserts.

Modified:
  head/sys/kern/subr_smr.c
  head/sys/sys/smr.h

Modified: head/sys/kern/subr_smr.c
==
--- head/sys/kern/subr_smr.cThu Feb 13 20:49:45 2020(r357881)
+++ head/sys/kern/subr_smr.cThu Feb 13 20:50:21 2020(r357882)
@@ -193,8 +193,7 @@ smr_advance(smr_t smr)
/*
 * It is illegal to enter while in an smr section.
 */
-   KASSERT(curthread->td_critnest == 0,
-   ("smr_advance: Not allowed in a critical section."));
+   SMR_ASSERT_NOT_ENTERED(smr);
 
/*
 * Modifications not done in a smr section need to be visible
@@ -237,6 +236,8 @@ smr_advance_deferred(smr_t smr, int limit)
smr_seq_t goal;
smr_t csmr;
 
+   SMR_ASSERT_NOT_ENTERED(smr);
+
critical_enter();
csmr = zpcpu_get(smr);
if (++csmr->c_deferred >= limit) {
@@ -275,8 +276,8 @@ smr_poll(smr_t smr, smr_seq_t goal, bool wait)
/*
 * It is illegal to enter while in an smr section.
 */
-   KASSERT(!wait || curthread->td_critnest == 0,
-   ("smr_poll: Blocking not allowed in a critical section."));
+   KASSERT(!wait || !SMR_ENTERED(smr),
+   ("smr_poll: Blocking not allowed in a SMR section."));
 
/*
 * Use a critical section so that we can avoid ABA races

Modified: head/sys/sys/smr.h
==
--- head/sys/sys/smr.h  Thu Feb 13 20:49:45 2020(r357881)
+++ head/sys/sys/smr.h  Thu Feb 13 20:50:21 2020(r357882)
@@ -68,6 +68,15 @@ struct smr {
int c_deferred; /* Deferred advance counter. */
 };
 
+#defineSMR_ENTERED(smr)
\
+(curthread->td_critnest != 0 && zpcpu_get((smr))->c_seq != SMR_SEQ_INVALID)
+
+#defineSMR_ASSERT_ENTERED(smr) 
\
+KASSERT(SMR_ENTERED(smr), ("Not in smr section"))
+
+#defineSMR_ASSERT_NOT_ENTERED(smr) 
\
+KASSERT(!SMR_ENTERED(smr), ("In smr section."));
+
 /*
  * Return the current write sequence number.
  */
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r357641 - head/sys/kern

2020-02-06 Thread Jeff Roberson
Author: jeff
Date: Thu Feb  6 20:51:46 2020
New Revision: 357641
URL: https://svnweb.freebsd.org/changeset/base/357641

Log:
  Fix a race in smr_advance() that could result in unnecessary poll calls.
  
  This was relatively harmless but surprising to see in counters.  The
  race occurred when rd_seq was read after the goal was updated and we
  incorrectly calculated the delta between them.
  
  Reviewed by:  rlibby
  Differential Revision:https://reviews.freebsd.org/D23464

Modified:
  head/sys/kern/subr_smr.c

Modified: head/sys/kern/subr_smr.c
==
--- head/sys/kern/subr_smr.cThu Feb  6 20:47:50 2020(r357640)
+++ head/sys/kern/subr_smr.cThu Feb  6 20:51:46 2020(r357641)
@@ -160,7 +160,7 @@ static uma_zone_t smr_zone;
 #defineSMR_SEQ_INCR(UINT_MAX / 1)
 #defineSMR_SEQ_INIT(UINT_MAX - 10)
 /* Force extra polls to test the integer overflow detection. */
-#defineSMR_SEQ_MAX_DELTA   (1000)
+#defineSMR_SEQ_MAX_DELTA   (SMR_SEQ_INCR * 32)
 #defineSMR_SEQ_MAX_ADVANCE SMR_SEQ_MAX_DELTA / 2
 #endif
 
@@ -188,7 +188,7 @@ smr_seq_t
 smr_advance(smr_t smr)
 {
smr_shared_t s;
-   smr_seq_t goal;
+   smr_seq_t goal, s_rd_seq;
 
/*
 * It is illegal to enter while in an smr section.
@@ -203,12 +203,18 @@ smr_advance(smr_t smr)
atomic_thread_fence_rel();
 
/*
+* Load the current read seq before incrementing the goal so
+* we are guaranteed it is always < goal.
+*/
+   s = zpcpu_get(smr)->c_shared;
+   s_rd_seq = atomic_load_acq_int(>s_rd_seq);
+
+   /*
 * Increment the shared write sequence by 2.  Since it is
 * initialized to 1 this means the only valid values are
 * odd and an observed value of 0 in a particular CPU means
 * it is not currently in a read section.
 */
-   s = zpcpu_get(smr)->c_shared;
goal = atomic_fetchadd_int(>s_wr_seq, SMR_SEQ_INCR) + SMR_SEQ_INCR;
counter_u64_add(advance, 1);
 
@@ -217,7 +223,7 @@ smr_advance(smr_t smr)
 * far ahead of the read sequence number.  This keeps the
 * wrap detecting arithmetic working in pathological cases.
 */
-   if (goal - atomic_load_int(>s_rd_seq) >= SMR_SEQ_MAX_DELTA) {
+   if (SMR_SEQ_DELTA(goal, s_rd_seq) >= SMR_SEQ_MAX_DELTA) {
counter_u64_add(advance_wait, 1);
smr_wait(smr, goal - SMR_SEQ_MAX_ADVANCE);
}
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r357640 - head/sys/net

2020-02-06 Thread Jeff Roberson
Author: jeff
Date: Thu Feb  6 20:47:50 2020
New Revision: 357640
URL: https://svnweb.freebsd.org/changeset/base/357640

Log:
  Temporarily force IFF_NEEDSEPOCH until drivers have been resolved.
  
  Recent network epoch changes have left some drivers unexpectedly broken
  and there is not yet a consensus on the correct fix.  This is patch is
  a minor performance impact until we can agree on the correct path
  forward.
  
  Reviewed by:  core, network, imp, glebius, hselasky
  Differential Revision:https://reviews.freebsd.org/D23515

Modified:
  head/sys/net/if.c

Modified: head/sys/net/if.c
==
--- head/sys/net/if.c   Thu Feb  6 20:32:53 2020(r357639)
+++ head/sys/net/if.c   Thu Feb  6 20:47:50 2020(r357640)
@@ -546,6 +546,8 @@ if_alloc_domain(u_char type, int numa_domain)
 #ifdef VIMAGE
ifp->if_vnet = curvnet;
 #endif
+   /* XXX */
+   ifp->if_flags |= IFF_NEEDSEPOCH;
if (if_com_alloc[type] != NULL) {
ifp->if_l2com = if_com_alloc[type](type, ifp);
if (ifp->if_l2com == NULL) {
@@ -4152,7 +4154,8 @@ if_setdrvflags(if_t ifp, int flags)
 int
 if_setflags(if_t ifp, int flags)
 {
-   ((struct ifnet *)ifp)->if_flags = flags;
+   /* XXX Temporary */
+   ((struct ifnet *)ifp)->if_flags = flags | IFF_NEEDSEPOCH;
return (0);
 }
 
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r357637 - head/sys/kern

2020-02-06 Thread Jeff Roberson
Author: jeff
Date: Thu Feb  6 20:10:21 2020
New Revision: 357637
URL: https://svnweb.freebsd.org/changeset/base/357637

Log:
  Add some global counters for SMR.  These may eventually become per-smr
  counters.  In my stress test there is only one poll for every 15,000
  frees.  This means we are effectively amortizing the cache coherency
  overhead even with very high write rates (3M/s/core).
  
  Reviewed by:  markj, rlibby
  Differential Revision:https://reviews.freebsd.org/D23463

Modified:
  head/sys/kern/subr_smr.c

Modified: head/sys/kern/subr_smr.c
==
--- head/sys/kern/subr_smr.cThu Feb  6 18:51:36 2020(r357636)
+++ head/sys/kern/subr_smr.cThu Feb  6 20:10:21 2020(r357637)
@@ -30,11 +30,13 @@ __FBSDID("$FreeBSD$");
 
 #include 
 #include 
-#include 
+#include 
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -162,6 +164,17 @@ static uma_zone_t smr_zone;
 #defineSMR_SEQ_MAX_ADVANCE SMR_SEQ_MAX_DELTA / 2
 #endif
 
+static SYSCTL_NODE(_debug, OID_AUTO, smr, CTLFLAG_RW, NULL, "SMR Stats");
+static counter_u64_t advance = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, advance, CTLFLAG_RD, , "");
+static counter_u64_t advance_wait = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, advance_wait, CTLFLAG_RD, 
_wait, "");
+static counter_u64_t poll = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, poll, CTLFLAG_RD, , "");
+static counter_u64_t poll_scan = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, poll_scan, CTLFLAG_RD, _scan, 
"");
+
+
 /*
  * Advance the write sequence and return the new value for use as the
  * wait goal.  This guarantees that any changes made by the calling
@@ -197,14 +210,17 @@ smr_advance(smr_t smr)
 */
s = zpcpu_get(smr)->c_shared;
goal = atomic_fetchadd_int(>s_wr_seq, SMR_SEQ_INCR) + SMR_SEQ_INCR;
+   counter_u64_add(advance, 1);
 
/*
 * Force a synchronization here if the goal is getting too
 * far ahead of the read sequence number.  This keeps the
 * wrap detecting arithmetic working in pathological cases.
 */
-   if (goal - atomic_load_int(>s_rd_seq) >= SMR_SEQ_MAX_DELTA)
+   if (goal - atomic_load_int(>s_rd_seq) >= SMR_SEQ_MAX_DELTA) {
+   counter_u64_add(advance_wait, 1);
smr_wait(smr, goal - SMR_SEQ_MAX_ADVANCE);
+   }
 
return (goal);
 }
@@ -263,6 +279,7 @@ smr_poll(smr_t smr, smr_seq_t goal, bool wait)
success = true;
critical_enter();
s = zpcpu_get(smr)->c_shared;
+   counter_u64_add_protected(poll, 1);
 
/*
 * Acquire barrier loads s_wr_seq after s_rd_seq so that we can not
@@ -306,6 +323,7 @@ smr_poll(smr_t smr, smr_seq_t goal, bool wait)
 * gone inactive.  Keep track of the oldest sequence currently
 * active as rd_seq.
 */
+   counter_u64_add_protected(poll_scan, 1);
rd_seq = s_wr_seq;
CPU_FOREACH(i) {
c = zpcpu_get_cpu(smr, i);
@@ -366,7 +384,7 @@ smr_poll(smr_t smr, smr_seq_t goal, bool wait)
s_rd_seq = atomic_load_int(>s_rd_seq);
do {
if (SMR_SEQ_LEQ(rd_seq, s_rd_seq))
-   break;
+   goto out;
} while (atomic_fcmpset_int(>s_rd_seq, _rd_seq, rd_seq) == 0);
 
 out:
@@ -426,3 +444,14 @@ smr_init(void)
smr_zone = uma_zcreate("SMR CPU", sizeof(struct smr),
NULL, NULL, NULL, NULL, (CACHE_LINE_SIZE * 2) - 1, UMA_ZONE_PCPU);
 }
+
+static void
+smr_init_counters(void *unused)
+{
+
+   advance = counter_u64_alloc(M_WAITOK);
+   advance_wait = counter_u64_alloc(M_WAITOK);
+   poll = counter_u64_alloc(M_WAITOK);
+   poll_scan = counter_u64_alloc(M_WAITOK);
+}
+SYSINIT(smr_counters, SI_SUB_CPU, SI_ORDER_ANY, smr_init_counters, NULL);
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r357528 - head/sys/vm

2020-02-04 Thread Jeff Roberson
Author: jeff
Date: Tue Feb  4 20:33:01 2020
New Revision: 357528
URL: https://svnweb.freebsd.org/changeset/base/357528

Log:
  Add an explicit busy state for free pages.  This improves behavior with
  potential bugs that access freed pages as well as providing a path
  towards lockless page lookup.
  
  Reviewed by:  kib
  Differential Revision:https://reviews.freebsd.org/D23444

Modified:
  head/sys/vm/vm_page.c
  head/sys/vm/vm_page.h

Modified: head/sys/vm/vm_page.c
==
--- head/sys/vm/vm_page.c   Tue Feb  4 20:28:06 2020(r357527)
+++ head/sys/vm/vm_page.c   Tue Feb  4 20:33:01 2020(r357528)
@@ -508,7 +508,7 @@ vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segi
 
m->object = NULL;
m->ref_count = 0;
-   m->busy_lock = VPB_UNBUSIED;
+   m->busy_lock = VPB_FREED;
m->flags = m->a.flags = 0;
m->phys_addr = pa;
m->a.queue = PQ_NONE;
@@ -988,6 +988,8 @@ vm_page_sunbusy(vm_page_t m)
 
x = m->busy_lock;
for (;;) {
+   KASSERT(x != VPB_FREED,
+   ("vm_page_sunbusy: Unlocking freed page."));
if (VPB_SHARERS(x) > 1) {
if (atomic_fcmpset_int(>busy_lock, ,
x - VPB_ONE_SHARER))
@@ -1155,6 +1157,17 @@ vm_page_xunbusy_hard_unchecked(vm_page_t m)
vm_page_xunbusy_hard_tail(m);
 }
 
+static void
+vm_page_busy_free(vm_page_t m)
+{
+   u_int x;
+
+   atomic_thread_fence_rel();
+   x = atomic_swap_int(>busy_lock, VPB_FREED);
+   if ((x & VPB_BIT_WAITERS) != 0)
+   wakeup(m);
+}
+
 /*
  * vm_page_unhold_pages:
  *
@@ -1249,7 +1262,8 @@ vm_page_putfake(vm_page_t m)
KASSERT((m->oflags & VPO_UNMANAGED) != 0, ("managed %p", m));
KASSERT((m->flags & PG_FICTITIOUS) != 0,
("vm_page_putfake: bad page %p", m));
-   vm_page_xunbusy(m);
+   vm_page_assert_xbusied(m);
+   vm_page_busy_free(m);
uma_zfree(fakepg_zone, m);
 }
 
@@ -2012,11 +2026,12 @@ found:
m->a.flags = 0;
m->oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ?
VPO_UNMANAGED : 0;
-   m->busy_lock = VPB_UNBUSIED;
if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0)
m->busy_lock = VPB_CURTHREAD_EXCLUSIVE;
-   if ((req & VM_ALLOC_SBUSY) != 0)
+   else if ((req & VM_ALLOC_SBUSY) != 0)
m->busy_lock = VPB_SHARERS_WORD(1);
+   else
+   m->busy_lock = VPB_UNBUSIED;
if (req & VM_ALLOC_WIRED) {
vm_wire_add(1);
m->ref_count = 1;
@@ -2202,11 +2217,12 @@ found:
flags |= PG_NODUMP;
oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ?
VPO_UNMANAGED : 0;
-   busy_lock = VPB_UNBUSIED;
if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0)
busy_lock = VPB_CURTHREAD_EXCLUSIVE;
-   if ((req & VM_ALLOC_SBUSY) != 0)
+   else if ((req & VM_ALLOC_SBUSY) != 0)
busy_lock = VPB_SHARERS_WORD(1);
+   else
+   busy_lock = VPB_UNBUSIED;
if ((req & VM_ALLOC_WIRED) != 0)
vm_wire_add(npages);
if (object != NULL) {
@@ -2268,7 +2284,7 @@ vm_page_alloc_check(vm_page_t m)
("page %p has unexpected queue %d, flags %#x",
m, m->a.queue, (m->a.flags & PGA_QUEUE_STATE_MASK)));
KASSERT(m->ref_count == 0, ("page %p has references", m));
-   KASSERT(!vm_page_busied(m), ("page %p is busy", m));
+   KASSERT(vm_page_busy_freed(m), ("page %p is not freed", m));
KASSERT(m->dirty == 0, ("page %p is dirty", m));
KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT,
("page %p has unexpected memattr %d",
@@ -3592,9 +3608,6 @@ vm_page_free_prep(vm_page_t m)
 */
atomic_thread_fence_acq();
 
-   if (vm_page_sbusied(m))
-   panic("vm_page_free_prep: freeing shared busy page %p", m);
-
 #if defined(DIAGNOSTIC) && defined(PHYS_TO_DMAP)
if (PMAP_HAS_DMAP && (m->flags & PG_ZERO) != 0) {
uint64_t *p;
@@ -3621,7 +3634,7 @@ vm_page_free_prep(vm_page_t m)
((m->object->flags & OBJ_UNMANAGED) != 0),
("vm_page_free_prep: managed flag mismatch for page %p",
m));
-   vm_page_object_remove(m);
+   vm_page_assert_xbusied(m);
 
/*
 * The object reference can be released without an atomic
@@ -3631,13 +3644,13 @@ vm_page_free_prep(vm_page_t m)
m->ref_count == VPRC_OBJREF,
("vm_page_free_prep: page %p has unexpected ref_count %u",
m, m->ref_count));
+   vm_page_object_remove(m);
m->object = NULL;
m->ref_count -= VPRC_OBJREF;
-  

svn commit: r357527 - head/sys/vm

2020-02-04 Thread Jeff Roberson
Author: jeff
Date: Tue Feb  4 20:28:06 2020
New Revision: 357527
URL: https://svnweb.freebsd.org/changeset/base/357527

Log:
  Use literal bucket sizes for smaller buckets rather than the rounding
  system.  Small bucket sizes already pack well even if they are an odd
  number of words.  This prevents any potential new instances of the
  problem fixed in r357463 as well as making the system easier to
  understand.
  
  Reviewed by:  markj
  Differential Revision:https://reviews.freebsd.org/D23494

Modified:
  head/sys/vm/uma_core.c

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Tue Feb  4 20:09:25 2020(r357526)
+++ head/sys/vm/uma_core.c  Tue Feb  4 20:28:06 2020(r357527)
@@ -236,16 +236,15 @@ struct uma_bucket_zone {
 (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
 
 #defineBUCKET_MAX  BUCKET_SIZE(256)
-#defineBUCKET_MIN  BUCKET_SIZE(4)
+#defineBUCKET_MIN  2
 
 struct uma_bucket_zone bucket_zones[] = {
-#ifndef __ILP32__
-   { NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
-#endif
-   { NULL, "6 Bucket", BUCKET_SIZE(6), 3072 },
-   { NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
-   { NULL, "12 Bucket", BUCKET_SIZE(12), 1536 },
-   { NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
+   /* Literal bucket sizes. */
+   { NULL, "2 Bucket", 2, 4096 },
+   { NULL, "4 Bucket", 4, 3072 },
+   { NULL, "8 Bucket", 8, 2048 },
+   { NULL, "16 Bucket", 16, 1024 },
+   /* Rounded down power of 2 sizes for efficiency. */
{ NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
{ NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
{ NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r357494 - head/lib/libmemstat

2020-02-03 Thread Jeff Roberson
Author: jeff
Date: Tue Feb  4 05:27:45 2020
New Revision: 357494
URL: https://svnweb.freebsd.org/changeset/base/357494

Log:
  Fix libmemstat_uma build after r357485.
  
  Submitted by: cy

Modified:
  head/lib/libmemstat/memstat_uma.c

Modified: head/lib/libmemstat/memstat_uma.c
==
--- head/lib/libmemstat/memstat_uma.c   Tue Feb  4 05:27:05 2020
(r357493)
+++ head/lib/libmemstat/memstat_uma.c   Tue Feb  4 05:27:45 2020
(r357494)
@@ -459,9 +459,9 @@ skip_percpu:
if (ret != 0)
continue;
for (ubp =
-   TAILQ_FIRST(_buckets);
+   STAILQ_FIRST(_buckets);
ubp != NULL;
-   ubp = TAILQ_NEXT(, ub_link)) {
+   ubp = STAILQ_NEXT(, ub_link)) {
ret = kread(kvm, ubp, ,
   sizeof(ub), 0);
if (ret != 0)
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r357487 - in head/sys: kern sys

2020-02-03 Thread Jeff Roberson
Author: jeff
Date: Tue Feb  4 02:44:52 2020
New Revision: 357487
URL: https://svnweb.freebsd.org/changeset/base/357487

Log:
  Implement a deferred write advancement feature that can be used to further
  amortize shared cacheline writes.
  
  Discussed with: rlibby
  Differential Revision:https://reviews.freebsd.org/D23462

Modified:
  head/sys/kern/subr_smr.c
  head/sys/sys/smr.h

Modified: head/sys/kern/subr_smr.c
==
--- head/sys/kern/subr_smr.cTue Feb  4 02:42:54 2020(r357486)
+++ head/sys/kern/subr_smr.cTue Feb  4 02:44:52 2020(r357487)
@@ -209,6 +209,26 @@ smr_advance(smr_t smr)
return (goal);
 }
 
+smr_seq_t
+smr_advance_deferred(smr_t smr, int limit)
+{
+   smr_seq_t goal;
+   smr_t csmr;
+
+   critical_enter();
+   csmr = zpcpu_get(smr);
+   if (++csmr->c_deferred >= limit) {
+   goal = SMR_SEQ_INVALID;
+   csmr->c_deferred = 0;
+   } else
+   goal = smr_shared_current(csmr->c_shared) + SMR_SEQ_INCR;
+   critical_exit();
+   if (goal != SMR_SEQ_INVALID)
+   return (goal);
+
+   return (smr_advance(smr));
+}
+
 /*
  * Poll to determine whether all readers have observed the 'goal' write
  * sequence number.
@@ -255,6 +275,17 @@ smr_poll(smr_t smr, smr_seq_t goal, bool wait)
 * c_seq can only reference time after this wr_seq.
 */
s_wr_seq = atomic_load_acq_int(>s_wr_seq);
+
+   /*
+* This may have come from a deferred advance.  Consider one
+* increment past the current wr_seq valid and make sure we
+* have advanced far enough to succeed.  We simply add to avoid
+* an additional fence.
+*/
+   if (goal == s_wr_seq + SMR_SEQ_INCR) {
+   atomic_add_int(>s_wr_seq, SMR_SEQ_INCR);
+   s_wr_seq = goal;
+   }
 
/*
 * Detect whether the goal is valid and has already been observed.

Modified: head/sys/sys/smr.h
==
--- head/sys/sys/smr.h  Tue Feb  4 02:42:54 2020(r357486)
+++ head/sys/sys/smr.h  Tue Feb  4 02:44:52 2020(r357487)
@@ -64,6 +64,7 @@ typedef struct smr_shared *smr_shared_t;
 struct smr {
smr_seq_t   c_seq;  /* Current observed sequence. */
smr_shared_tc_shared;   /* Shared SMR state. */
+   int c_deferred; /* Deferred advance counter. */
 };
 
 /*
@@ -144,6 +145,13 @@ smr_exit(smr_t smr)
  * required to ensure that all modifications are visible to readers.
  */
 smr_seq_t smr_advance(smr_t smr);
+
+/*
+ * Advances the write sequence number only after N calls.  Returns
+ * the correct goal for a wr_seq that has not yet occurred.  Used to
+ * minimize shared cacheline invalidations for frequent writers.
+ */
+smr_seq_t smr_advance_deferred(smr_t smr, int limit);
 
 /*
  * Returns true if a goal sequence has been reached.  If
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r357486 - head/sys/kern

2020-02-03 Thread Jeff Roberson
Author: jeff
Date: Tue Feb  4 02:42:54 2020
New Revision: 357486
URL: https://svnweb.freebsd.org/changeset/base/357486

Log:
  Fix a recursion on the thread lock by acquiring it after call rtp_to_pri().
  
  Reported by:  swills
  Reviewed by:  kib, markj
  Differential Revision:https://reviews.freebsd.org/D23495

Modified:
  head/sys/kern/kern_thr.c

Modified: head/sys/kern/kern_thr.c
==
--- head/sys/kern/kern_thr.cTue Feb  4 02:41:24 2020(r357485)
+++ head/sys/kern/kern_thr.cTue Feb  4 02:42:54 2020(r357486)
@@ -271,14 +271,12 @@ thread_create(struct thread *td, struct rtprio *rtp,
 
tidhash_add(newtd);
 
+   /* ignore timesharing class */
+   if (rtp != NULL && !(td->td_pri_class == PRI_TIMESHARE &&
+   rtp->type == RTP_PRIO_NORMAL))
+   rtp_to_pri(rtp, newtd);
+
thread_lock(newtd);
-   if (rtp != NULL) {
-   if (!(td->td_pri_class == PRI_TIMESHARE &&
- rtp->type == RTP_PRIO_NORMAL)) {
-   rtp_to_pri(rtp, newtd);
-   sched_prio(newtd, newtd->td_user_pri);
-   } /* ignore timesharing class */
-   }
TD_SET_CAN_RUN(newtd);
sched_add(newtd, SRQ_BORING);
 
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r357485 - head/sys/vm

2020-02-03 Thread Jeff Roberson
Author: jeff
Date: Tue Feb  4 02:41:24 2020
New Revision: 357485
URL: https://svnweb.freebsd.org/changeset/base/357485

Log:
  Use STAILQ instead of TAILQ for bucket lists.  We only need FIFO behavior
  and this is more space efficient.
  
  Stop queueing recently used buckets to the head of the list.  If the bucket
  goes to a different processor the cache coherency will be more expensive.
  We already try to encourage cache-hot behavior in the per-cpu layer.
  
  Reviewed by:  rlibby
  Differential Revision:https://reviews.freebsd.org/D23493

Modified:
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Tue Feb  4 02:06:21 2020(r357484)
+++ head/sys/vm/uma_core.c  Tue Feb  4 02:41:24 2020(r357485)
@@ -544,7 +544,7 @@ zone_fetch_bucket(uma_zone_t zone, uma_zone_domain_t z
 
ZONE_LOCK_ASSERT(zone);
 
-   if ((bucket = TAILQ_FIRST(>uzd_buckets)) == NULL)
+   if ((bucket = STAILQ_FIRST(>uzd_buckets)) == NULL)
return (NULL);
 
if ((zone->uz_flags & UMA_ZONE_SMR) != 0 &&
@@ -555,7 +555,7 @@ zone_fetch_bucket(uma_zone_t zone, uma_zone_domain_t z
dtor = (zone->uz_dtor != NULL) | UMA_ALWAYS_CTORDTOR;
}
MPASS(zdom->uzd_nitems >= bucket->ub_cnt);
-   TAILQ_REMOVE(>uzd_buckets, bucket, ub_link);
+   STAILQ_REMOVE_HEAD(>uzd_buckets, ub_link);
zdom->uzd_nitems -= bucket->ub_cnt;
if (zdom->uzd_imin > zdom->uzd_nitems)
zdom->uzd_imin = zdom->uzd_nitems;
@@ -583,10 +583,7 @@ zone_put_bucket(uma_zone_t zone, uma_zone_domain_t zdo
KASSERT(!ws || zone->uz_bkt_count < zone->uz_bkt_max,
("%s: zone %p overflow", __func__, zone));
 
-   if (ws && bucket->ub_seq == SMR_SEQ_INVALID)
-   TAILQ_INSERT_HEAD(>uzd_buckets, bucket, ub_link);
-   else
-   TAILQ_INSERT_TAIL(>uzd_buckets, bucket, ub_link);
+   STAILQ_INSERT_TAIL(>uzd_buckets, bucket, ub_link);
zdom->uzd_nitems += bucket->ub_cnt;
if (ws && zdom->uzd_imax < zdom->uzd_nitems)
zdom->uzd_imax = zdom->uzd_nitems;
@@ -1187,11 +1184,11 @@ bucket_cache_reclaim(uma_zone_t zone, bool drain)
target = drain ? 0 : lmax(zdom->uzd_wss, zdom->uzd_nitems -
zdom->uzd_imin);
while (zdom->uzd_nitems > target) {
-   bucket = TAILQ_FIRST(>uzd_buckets);
+   bucket = STAILQ_FIRST(>uzd_buckets);
if (bucket == NULL)
break;
tofree = bucket->ub_cnt;
-   TAILQ_REMOVE(>uzd_buckets, bucket, ub_link);
+   STAILQ_REMOVE_HEAD(>uzd_buckets, ub_link);
zdom->uzd_nitems -= tofree;
 
/*
@@ -2365,7 +2362,7 @@ zone_ctor(void *mem, int size, void *udata, int flags)
ZONE_CROSS_LOCK_INIT(zone);
 
for (i = 0; i < vm_ndomains; i++)
-   TAILQ_INIT(>uz_domain[i].uzd_buckets);
+   STAILQ_INIT(>uz_domain[i].uzd_buckets);
 
 #ifdef INVARIANTS
if (arg->uminit == trash_init && arg->fini == trash_fini)
@@ -3930,7 +3927,7 @@ zone_free_cross(uma_zone_t zone, uma_bucket_t bucket, 
"uma_zfree: zone %s(%p) draining cross bucket %p",
zone->uz_name, zone, bucket);
 
-   TAILQ_INIT();
+   STAILQ_INIT();
 
/*
 * To avoid having ndomain * ndomain buckets for sorting we have a
@@ -3949,19 +3946,19 @@ zone_free_cross(uma_zone_t zone, uma_bucket_t bucket, 
}
zdom->uzd_cross->ub_bucket[zdom->uzd_cross->ub_cnt++] = item;
if (zdom->uzd_cross->ub_cnt == zdom->uzd_cross->ub_entries) {
-   TAILQ_INSERT_HEAD(, zdom->uzd_cross,
+   STAILQ_INSERT_HEAD(, zdom->uzd_cross,
ub_link);
zdom->uzd_cross = NULL;
}
bucket->ub_cnt--;
}
ZONE_CROSS_UNLOCK(zone);
-   if (!TAILQ_EMPTY()) {
+   if (!STAILQ_EMPTY()) {
ZONE_LOCK(zone);
-   while ((b = TAILQ_FIRST()) != NULL) {
+   while ((b = STAILQ_FIRST()) != NULL) {
if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
bucket->ub_seq = smr_current(zone->uz_smr);
-   TAILQ_REMOVE(, b, ub_link);
+   STAILQ_REMOVE_HEAD(, ub_link);
if (zone->uz_bkt_count >= zone->uz_bkt_max) {
ZONE_UNLOCK(zone);
bucket_drain(zone, b);

Modified: head/sys/vm/uma_int.h
==
--- head/sys/vm/uma_int.h   Tue Feb  4 02:06:21 2020(r357484)

svn commit: r357392 - head/sys/vm

2020-02-01 Thread Jeff Roberson
Author: jeff
Date: Sat Feb  1 23:46:30 2020
New Revision: 357392
URL: https://svnweb.freebsd.org/changeset/base/357392

Log:
  Fix a bug in r356776 where the page allocator was not properly restored to
  the percpu page allocator after it had been temporarily overridden by
  startup_alloc.
  
  Reported by:  pho, bdragon

Modified:
  head/sys/vm/uma_core.c

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Sat Feb  1 23:16:30 2020(r357391)
+++ head/sys/vm/uma_core.c  Sat Feb  1 23:46:30 2020(r357392)
@@ -2101,7 +2101,9 @@ zone_kva_available(uma_zone_t zone, void *unused)
if ((zone->uz_flags & UMA_ZFLAG_CACHE) != 0)
return;
KEG_GET(zone, keg);
-   if (keg->uk_allocf == startup_alloc)
+   if (keg->uk_flags & UMA_ZONE_PCPU)
+   keg->uk_allocf = pcpu_page_alloc;
+   else if (keg->uk_allocf == startup_alloc)
keg->uk_allocf = page_alloc;
 }
 
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r357355 - in head/sys: kern sys

2020-01-31 Thread Jeff Roberson
Author: jeff
Date: Fri Jan 31 22:21:15 2020
New Revision: 357355
URL: https://svnweb.freebsd.org/changeset/base/357355

Log:
  Add two missing fences with comments describing them.  These were found by
  inspection and after a lengthy discussion with jhb and kib.  They have not
  produced test failures.
  
  Don't pointer chase through cpu0's smr.  Use cpu correct smr even when not
  in a critical section to reduce the likelihood of false sharing.

Modified:
  head/sys/kern/subr_smr.c
  head/sys/sys/smr.h

Modified: head/sys/kern/subr_smr.c
==
--- head/sys/kern/subr_smr.cFri Jan 31 21:20:22 2020(r357354)
+++ head/sys/kern/subr_smr.cFri Jan 31 22:21:15 2020(r357355)
@@ -195,7 +195,7 @@ smr_advance(smr_t smr)
 * odd and an observed value of 0 in a particular CPU means
 * it is not currently in a read section.
 */
-   s = smr->c_shared;
+   s = zpcpu_get(smr)->c_shared;
goal = atomic_fetchadd_int(>s_wr_seq, SMR_SEQ_INCR) + SMR_SEQ_INCR;
 
/*
@@ -242,16 +242,21 @@ smr_poll(smr_t smr, smr_seq_t goal, bool wait)
 */
success = true;
critical_enter();
-   s = smr->c_shared;
+   s = zpcpu_get(smr)->c_shared;
 
/*
 * Acquire barrier loads s_wr_seq after s_rd_seq so that we can not
 * observe an updated read sequence that is larger than write.
 */
s_rd_seq = atomic_load_acq_int(>s_rd_seq);
-   s_wr_seq = smr_current(smr);
 
/*
+* wr_seq must be loaded prior to any c_seq value so that a stale
+* c_seq can only reference time after this wr_seq.
+*/
+   s_wr_seq = atomic_load_acq_int(>s_wr_seq);
+
+   /*
 * Detect whether the goal is valid and has already been observed.
 *
 * The goal must be in the range of s_wr_seq >= goal >= s_rd_seq for
@@ -335,6 +340,12 @@ smr_poll(smr_t smr, smr_seq_t goal, bool wait)
 
 out:
critical_exit();
+
+   /*
+* Serialize with smr_advance()/smr_exit().  The caller is now free
+* to modify memory as expected.
+*/
+   atomic_thread_fence_acq();
 
return (success);
 }

Modified: head/sys/sys/smr.h
==
--- head/sys/sys/smr.h  Fri Jan 31 21:20:22 2020(r357354)
+++ head/sys/sys/smr.h  Fri Jan 31 22:21:15 2020(r357355)
@@ -70,10 +70,17 @@ struct smr {
  * Return the current write sequence number.
  */
 static inline smr_seq_t
+smr_shared_current(smr_shared_t s)
+{
+
+   return (atomic_load_int(>s_wr_seq));
+}
+
+static inline smr_seq_t
 smr_current(smr_t smr)
 {
 
-   return (atomic_load_int(>c_shared->s_wr_seq));
+   return (smr_shared_current(zpcpu_get(smr)->c_shared));
 }
 
 /*
@@ -106,7 +113,7 @@ smr_enter(smr_t smr)
 * is detected and handled there.
 */
/* This is an add because we do not have atomic_store_acq_int */
-   atomic_add_acq_int(>c_seq, smr_current(smr));
+   atomic_add_acq_int(>c_seq, smr_shared_current(smr->c_shared));
 }
 
 /*
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r357317 - in head/tools/uma: . smrstress

2020-01-30 Thread Jeff Roberson
Author: jeff
Date: Fri Jan 31 02:18:56 2020
New Revision: 357317
URL: https://svnweb.freebsd.org/changeset/base/357317

Log:
  Implement a simple UMA SMR stress testing tool.

Added:
  head/tools/uma/
  head/tools/uma/smrstress/
  head/tools/uma/smrstress/Makefile   (contents, props changed)
  head/tools/uma/smrstress/smrstress.c   (contents, props changed)

Added: head/tools/uma/smrstress/Makefile
==
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/tools/uma/smrstress/Makefile   Fri Jan 31 02:18:56 2020
(r357317)
@@ -0,0 +1,8 @@
+#
+# $FreeBSD$
+#
+
+KMOD= smrstress
+SRCS= smrstress.c
+
+.include 

Added: head/tools/uma/smrstress/smrstress.c
==
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/tools/uma/smrstress/smrstress.cFri Jan 31 02:18:56 2020
(r357317)
@@ -0,0 +1,227 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Jeffrey Roberson 
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice unmodified, this list of conditions, and the following
+ *disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+
+static uma_zone_t smrs_zone;
+static smr_t smrs_smr;
+
+static int smrs_cpus;
+static int smrs_writers;
+static int smrs_started;
+static int smrs_iterations = 1000;
+static int smrs_failures = 0;
+static volatile int smrs_completed;
+
+struct smrs {
+   int generation;
+   volatile u_int  count;
+};
+
+uintptr_t smrs_current;
+
+static void
+smrs_error(struct smrs *smrs, const char *fmt, ...)
+{
+   va_list ap;
+
+   atomic_add_int(_failures, 1);
+   printf("SMR ERROR: wr_seq %d, rd_seq %d, c_seq %d, generation %d, count 
%d ",
+   smrs_smr->c_shared->s_wr_seq, smrs_smr->c_shared->s_rd_seq,
+   zpcpu_get(smrs_smr)->c_seq, smrs->generation, smrs->count);
+   va_start(ap, fmt);
+   (void)vprintf(fmt, ap);
+   va_end(ap);
+}
+
+static void
+smrs_read(void)
+{
+   struct smrs *cur;
+   int cnt;
+
+   /* Wait for the writer to exit. */
+   while (smrs_completed == 0) {
+   smr_enter(smrs_smr);
+   cur = (void *)atomic_load_ptr(_current);
+   if (cur->generation == -1)
+   smrs_error(cur, "read early: Use after free!\n");
+   atomic_add_int(>count, 1);
+   DELAY(100);
+   cnt = atomic_fetchadd_int(>count, -1);
+   if (cur->generation == -1)
+   smrs_error(cur, "read late: Use after free!\n");
+   else if (cnt <= 0)
+   smrs_error(cur, "Invalid ref\n");
+   smr_exit(smrs_smr);
+   maybe_yield();
+   }
+}
+
+static void
+smrs_write(void)
+{
+   struct smrs *cur;
+   int i;
+
+   for (i = 0; i < smrs_iterations; i++) {
+   cur = uma_zalloc_smr(smrs_zone, M_WAITOK);
+   cur = (void *)atomic_swap_ptr(_current, (uintptr_t)cur);
+   uma_zfree_smr(smrs_zone, cur);
+   }
+}
+
+static void
+smrs_thread(void *arg)
+{
+   int rw = (intptr_t)arg;
+
+   if (rw < smrs_writers)
+   smrs_write();
+   else
+   smrs_read();
+   atomic_add_int(_completed, 1);
+}
+
+static void
+smrs_start(void)
+{
+   struct smrs *cur;
+   int i;
+
+   smrs_cpus = mp_ncpus;
+   if (mp_ncpus > 3)
+   smrs_writers = 2;
+   else
+   smrs_writers = 1;
+   smrs_started = smrs_cpus;
+   smrs_completed = 

svn commit: r357316 - in head/sys: kern sys

2020-01-30 Thread Jeff Roberson
Author: jeff
Date: Fri Jan 31 02:08:09 2020
New Revision: 357316
URL: https://svnweb.freebsd.org/changeset/base/357316

Log:
  Don't use "All rights reserved" in new copyrights.
  
  Requested by: rgrimes

Modified:
  head/sys/kern/subr_smr.c
  head/sys/sys/_smr.h
  head/sys/sys/smr.h

Modified: head/sys/kern/subr_smr.c
==
--- head/sys/kern/subr_smr.cFri Jan 31 02:03:22 2020(r357315)
+++ head/sys/kern/subr_smr.cFri Jan 31 02:08:09 2020(r357316)
@@ -1,8 +1,7 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
- * Copyright (c) 2019 Jeffrey Roberson 
- * All rights reserved.
+ * Copyright (c) 2019,2020 Jeffrey Roberson 
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions

Modified: head/sys/sys/_smr.h
==
--- head/sys/sys/_smr.h Fri Jan 31 02:03:22 2020(r357315)
+++ head/sys/sys/_smr.h Fri Jan 31 02:08:09 2020(r357316)
@@ -1,8 +1,7 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
- * Copyright (c) 2019,2020 Jeffrey Roberson 
- * All rights reserved.
+ * Copyright (c) 2019, 2020 Jeffrey Roberson 
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions

Modified: head/sys/sys/smr.h
==
--- head/sys/sys/smr.h  Fri Jan 31 02:03:22 2020(r357315)
+++ head/sys/sys/smr.h  Fri Jan 31 02:08:09 2020(r357316)
@@ -1,8 +1,7 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
- * Copyright (c) 2019,2020 Jeffrey Roberson 
- * All rights reserved.
+ * Copyright (c) 2019, 2020 Jeffrey Roberson 
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r357315 - head/sys/vm

2020-01-30 Thread Jeff Roberson
Author: jeff
Date: Fri Jan 31 02:03:22 2020
New Revision: 357315
URL: https://svnweb.freebsd.org/changeset/base/357315

Log:
  Fix LINT build with MEMGUARD.

Modified:
  head/sys/vm/uma_core.c

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Fri Jan 31 00:49:51 2020(r357314)
+++ head/sys/vm/uma_core.c  Fri Jan 31 02:03:22 2020(r357315)
@@ -2980,7 +2980,7 @@ uma_zalloc_debug(uma_zone_t zone, void **itemp, void *
 #endif
 
 #ifdef DEBUG_MEMGUARD
-   if ((zone->uz_flags & UMA_ZONE_SMR == 0) && memguard_cmp_zone(zone)) {
+   if ((zone->uz_flags & UMA_ZONE_SMR) == 0 && memguard_cmp_zone(zone)) {
void *item;
item = memguard_alloc(zone->uz_size, flags);
if (item != NULL) {
@@ -3014,7 +3014,7 @@ uma_zfree_debug(uma_zone_t zone, void *item, void *uda
("uma_zfree_debug: called with spinlock or critical section held"));
 
 #ifdef DEBUG_MEMGUARD
-   if ((zone->uz_flags & UMA_ZONE_SMR == 0) && is_memguard_addr(item)) {
+   if ((zone->uz_flags & UMA_ZONE_SMR) == 0 && is_memguard_addr(item)) {
if (zone->uz_dtor != NULL)
zone->uz_dtor(item, zone->uz_size, udata);
if (zone->uz_fini != NULL)
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r357293 - head/sys/net

2020-01-30 Thread Jeff Roberson

On Thu, 30 Jan 2020, Hans Petter Selasky wrote:


On 2020-01-30 21:56, Gleb Smirnoff wrote:

On Thu, Jan 30, 2020 at 12:04:03PM +, Hans Petter Selasky wrote:
H> Author: hselasky
H> Date: Thu Jan 30 12:04:02 2020
H> New Revision: 357293
H> URL: https://svnweb.freebsd.org/changeset/base/357293
H>
H> Log:
H>   Widen EPOCH(9) usage in netisr.
H>
H>   Software interrupt handlers are allowed to sleep. In swi_net() there
H>   is a read lock behind NETISR_RLOCK() which in turn ends up calling
H>   msleep() which means the whole of swi_net() cannot be protected by an
H>   EPOCH(9) section. By default the NETISR_LOCKING feature is disabled.
H>
H>   This issue was introduced by r357004. This is a preparation step for
H>   replacing the functionality provided by r357004.
H>
H>   Found by:   kib@
H>   Sponsored by:   Mellanox Technologies

What?! NETISR_RLOCK() which in turn ends up calling msleep()? Can you 
please

explain this nonsense?



See rms_rlock_fallback() in kern_rmlock.c .


The network stack uses rm_ not rms_.

Jeff



--HPS


___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r357293 - head/sys/net

2020-01-30 Thread Jeff Roberson

On Thu, 30 Jan 2020, Gleb Smirnoff wrote:


On Thu, Jan 30, 2020 at 12:04:03PM +, Hans Petter Selasky wrote:
H> Author: hselasky
H> Date: Thu Jan 30 12:04:02 2020
H> New Revision: 357293
H> URL: https://svnweb.freebsd.org/changeset/base/357293
H>
H> Log:
H>   Widen EPOCH(9) usage in netisr.
H>
H>   Software interrupt handlers are allowed to sleep. In swi_net() there
H>   is a read lock behind NETISR_RLOCK() which in turn ends up calling
H>   msleep() which means the whole of swi_net() cannot be protected by an
H>   EPOCH(9) section. By default the NETISR_LOCKING feature is disabled.
H>
H>   This issue was introduced by r357004. This is a preparation step for
H>   replacing the functionality provided by r357004.
H>
H>   Found by:   kib@
H>   Sponsored by:   Mellanox Technologies

What?! NETISR_RLOCK() which in turn ends up calling msleep()? Can you please
explain this nonsense?


It does not look like NETISR_RLOCK is configured as sleepable so it just 
uses a mtx.


Jeff



--
Gleb Smirnoff


___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r357291 - in head/sys/dev: bwi bwn ipw iwi iwm iwn malo mwl ral rtwn/pci wi wpi wtap

2020-01-30 Thread Jeff Roberson

On Thu, 30 Jan 2020, Gleb Smirnoff wrote:


On Thu, Jan 30, 2020 at 10:28:01AM +, Hans Petter Selasky wrote:
H> Author: hselasky
H> Date: Thu Jan 30 10:28:01 2020
H> New Revision: 357291
H> URL: https://svnweb.freebsd.org/changeset/base/357291
H>
H> Log:
H>   Widen EPOCH(9) usage in PCI WLAN drivers.
H>
H>   Make sure all occurrences of ieee80211_input_xxx() in sys/dev are
H>   covered by a network epoch section. Do not depend on the interrupt
H>   handler nor any taskqueues being in a network epoch section.
H>
H>   This patch should unbreak the PCI WLAN drivers after r357004.
H>
H>   Pointy hat: glebius@
H>   Sponsored by:   Mellanox Technologies

Hey, I have reviewed all of them.

The following drivers were not broken, and your change does 100%
recursive epoch_enter:

bwi, ipw, iwi, iwm, iwn, ral, rtwn, wi, wpi

The following drivers use taskq and would be fixed by D23408:

bwn, malo, mwl, wtap

P.S. A funny note about wtap. You modified even a function that
is a dead code - wtap_rx_deliver(). Gives some clue on quality
of your sweep over all drivers.


I would strongly suggest that we not make more changes to this area 
without a discussion on a review to make sure we're all in agreement. 
There are some fine technical details which would benefit from multiple 
eyes and failing to act together is creating more conflict than is 
necessary.  I volunteer to be on the reviews as an impartial third party.


Thanks,
Jeff



--
Gleb Smirnoff


___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r357291 - in head/sys/dev: bwi bwn ipw iwi iwm iwn malo mwl ral rtwn/pci wi wpi wtap

2020-01-30 Thread Jeff Roberson

On Thu, 30 Jan 2020, Hans Petter Selasky wrote:


Author: hselasky
Date: Thu Jan 30 10:28:01 2020
New Revision: 357291
URL: https://svnweb.freebsd.org/changeset/base/357291

Log:
 Widen EPOCH(9) usage in PCI WLAN drivers.

 Make sure all occurrences of ieee80211_input_xxx() in sys/dev are
 covered by a network epoch section. Do not depend on the interrupt
 handler nor any taskqueues being in a network epoch section.

 This patch should unbreak the PCI WLAN drivers after r357004.

 Pointy hat:glebius@


I understand your frustration at the bugs introduced by this change but 
the passive aggressive "pointy hat" is not appropriate.  It is sufficient 
to note the revision.


Jeff



 Sponsored by:  Mellanox Technologies

Modified:
 head/sys/dev/bwi/if_bwi.c
 head/sys/dev/bwn/if_bwn.c
 head/sys/dev/ipw/if_ipw.c
 head/sys/dev/iwi/if_iwi.c
 head/sys/dev/iwm/if_iwm.c
 head/sys/dev/iwn/if_iwn.c
 head/sys/dev/malo/if_malo.c
 head/sys/dev/mwl/if_mwl.c
 head/sys/dev/ral/rt2560.c
 head/sys/dev/ral/rt2661.c
 head/sys/dev/ral/rt2860.c
 head/sys/dev/rtwn/pci/rtwn_pci_rx.c
 head/sys/dev/wi/if_wi.c
 head/sys/dev/wpi/if_wpi.c
 head/sys/dev/wtap/if_wtap.c

Modified: head/sys/dev/bwi/if_bwi.c
==
--- head/sys/dev/bwi/if_bwi.c   Thu Jan 30 09:56:57 2020(r357290)
+++ head/sys/dev/bwi/if_bwi.c   Thu Jan 30 10:28:01 2020(r357291)
@@ -1506,6 +1506,7 @@ bwi_stop_locked(struct bwi_softc *sc, int statechg)
void
bwi_intr(void *xsc)
{
+   struct epoch_tracker et;
struct bwi_softc *sc = xsc;
struct bwi_mac *mac;
uint32_t intr_status;
@@ -1625,7 +1626,9 @@ bwi_intr(void *xsc)
device_printf(sc->sc_dev, "intr noise\n");

if (txrx_intr_status[0] & BWI_TXRX_INTR_RX) {
+   NET_EPOCH_ENTER(et);
rx_data = sc->sc_rxeof(sc);
+   NET_EPOCH_EXIT(et);
if (sc->sc_flags & BWI_F_STOP) {
BWI_UNLOCK(sc);
return;

Modified: head/sys/dev/bwn/if_bwn.c
==
--- head/sys/dev/bwn/if_bwn.c   Thu Jan 30 09:56:57 2020(r357290)
+++ head/sys/dev/bwn/if_bwn.c   Thu Jan 30 10:28:01 2020(r357291)
@@ -5072,6 +5072,7 @@ bwn_intr(void *arg)
static void
bwn_intrtask(void *arg, int npending)
{
+   struct epoch_tracker et;
struct bwn_mac *mac = arg;
struct bwn_softc *sc = mac->mac_sc;
uint32_t merged = 0;
@@ -5132,6 +5133,7 @@ bwn_intrtask(void *arg, int npending)
if (mac->mac_reason_intr & BWN_INTR_NOISESAMPLE_OK)
bwn_intr_noise(mac);

+   NET_EPOCH_ENTER(et);
if (mac->mac_flags & BWN_MAC_FLAG_DMA) {
if (mac->mac_reason[0] & BWN_DMAINTR_RX_DONE) {
bwn_dma_rx(mac->mac_method.dma.rx);
@@ -5139,6 +5141,7 @@ bwn_intrtask(void *arg, int npending)
}
} else
rx = bwn_pio_rx(>mac_method.pio.rx);
+   NET_EPOCH_EXIT(et);

KASSERT(!(mac->mac_reason[1] & BWN_DMAINTR_RX_DONE), ("%s", __func__));
KASSERT(!(mac->mac_reason[2] & BWN_DMAINTR_RX_DONE), ("%s", __func__));

Modified: head/sys/dev/ipw/if_ipw.c
==
--- head/sys/dev/ipw/if_ipw.c   Thu Jan 30 09:56:57 2020(r357290)
+++ head/sys/dev/ipw/if_ipw.c   Thu Jan 30 10:28:01 2020(r357291)
@@ -1159,6 +1159,7 @@ static void
ipw_rx_data_intr(struct ipw_softc *sc, struct ipw_status *status,
struct ipw_soft_bd *sbd, struct ipw_soft_buf *sbuf)
{
+   struct epoch_tracker et;
struct ieee80211com *ic = >sc_ic;
struct mbuf *mnew, *m;
struct ieee80211_node *ni;
@@ -1230,11 +1231,13 @@ ipw_rx_data_intr(struct ipw_softc *sc, struct ipw_stat

IPW_UNLOCK(sc);
ni = ieee80211_find_rxnode(ic, mtod(m, struct ieee80211_frame_min *));
+   NET_EPOCH_ENTER(et);
if (ni != NULL) {
(void) ieee80211_input(ni, m, rssi - nf, nf);
ieee80211_free_node(ni);
} else
(void) ieee80211_input_all(ic, m, rssi - nf, nf);
+   NET_EPOCH_EXIT(et);
IPW_LOCK(sc);

bus_dmamap_sync(sc->rbd_dmat, sc->rbd_map, BUS_DMASYNC_PREWRITE);

Modified: head/sys/dev/iwi/if_iwi.c
==
--- head/sys/dev/iwi/if_iwi.c   Thu Jan 30 09:56:57 2020(r357290)
+++ head/sys/dev/iwi/if_iwi.c   Thu Jan 30 10:28:01 2020(r357291)
@@ -1181,6 +1181,7 @@ static void
iwi_frame_intr(struct iwi_softc *sc, struct iwi_rx_data *data, int i,
struct iwi_frame *frame)
{
+   struct epoch_tracker et;
struct ieee80211com *ic = >sc_ic;
struct mbuf *mnew, *m;
struct ieee80211_node *ni;
@@ -1270,11 +1271,13 @@ iwi_frame_intr(struct iwi_softc *sc, struct iwi_rx_dat
IWI_UNLOCK(sc);

 

Re: svn commit: r357253 - head/sys/vm

2020-01-29 Thread Jeff Roberson

Thank you,
Jeff

On Wed, 29 Jan 2020, Konstantin Belousov wrote:


Author: kib
Date: Wed Jan 29 12:02:47 2020
New Revision: 357253
URL: https://svnweb.freebsd.org/changeset/base/357253

Log:
 Restore OOM logic on page fault after r357026.

 Right now OOM is initiated unconditionally on the page allocation
 failure, after the wait.

 Reported by:   Mark Millard 
 Reviewed by:   cy, markj
 Sponsored by:  The FreeBSD Foundation
 Differential revision: https://reviews.freebsd.org/D23409

Modified:
 head/sys/vm/vm_fault.c

Modified: head/sys/vm/vm_fault.c
==
--- head/sys/vm/vm_fault.c  Wed Jan 29 09:36:59 2020(r357252)
+++ head/sys/vm/vm_fault.c  Wed Jan 29 12:02:47 2020(r357253)
@@ -1073,12 +1073,14 @@ vm_fault_allocate(struct faultstate *fs)
fs->oom < vm_pfault_oom_attempts) {
fs->oom++;
vm_waitpfault(dset, vm_pfault_oom_wait * hz);
+   } else  {
+   if (bootverbose)
+   printf(
+   "proc %d (%s) failed to alloc page on fault, starting OOM\n",
+   curproc->p_pid, curproc->p_comm);
+   vm_pageout_oom(VM_OOM_MEM_PF);
+   fs->oom = 0;
}
-   if (bootverbose)
-   printf(
-"proc %d (%s) failed to alloc page on fault, starting OOM\n",
-   curproc->p_pid, curproc->p_comm);
-   vm_pageout_oom(VM_OOM_MEM_PF);
return (KERN_RESOURCE_SHORTAGE);
}
fs->oom = 0;


___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r357051 - head/sys/dev/bge

2020-01-26 Thread Jeff Roberson

On Sun, 26 Jan 2020, John Baldwin wrote:


On 1/23/20 7:32 PM, Gleb Smirnoff wrote:

On Thu, Jan 23, 2020 at 05:09:14PM -1000, Jeff Roberson wrote:
J> While we don't have a policy strictly requiring reviews it is the norm to
J> have substantial changes socialized and reviewed.  I appreciate the work
J> that you are doing but it likely should've been discussed somewhere
J> more publicly.  I apologized if I missed it but I don't see reference to
J> anything.

That was https://reviews.freebsd.org/D23242


A review alone isn't sufficient for large, sweeping changes in my mind.
For major changes, a thread on arch@ or net@ or the like is probably more
appropriate.  You can include a link to a review or git branch, etc. in
that e-mail, but phabricator aren't as well suited to higher-level
design-review type discussion, more for implementation-review.

J> Architecturally I am more concerned with the coarseness of net_epoch and
J> the duration of hold becoming a resource utilization problem in high
J> turn-over workloads.  Like short connection tcp.  Has anyone done
J> substantial testing here?  epoch as it is today will hold every free
J> callback for a minimum of several clock ticks and a maximum of 2x the
J> duration of the longest epoch section time.  With preemption, etc. this
J> could be 100s of ms of PCBs held.

We also are concerned about that theoretically. Haven't yet seen effect
in practice, but our sessions are mostly longer living. First we have the
tunable to limit batching. Second, there are some ideas on how to improve
the garbage collector performance if it becomes an issue.


There are other workloads than Netflix. ;)  Verisign has incredibly short-lived
connections with very high turnover.  I think though that they have already
abandoned the in-tree network stack for a userland stack built on netmap.  
Still,
I think that there are probably other FreeBSD users that are probably somewhere
in the middle that shouldn't be ignored.

Packet batching would not be impossible by simply using m_nextpkt chains in
mbufs passed up to ether_input and having ether_input pass them in a loop to
the next higher loop (as a first step).  That would reduce unlock/lock 
operations
in drivers (for those still using locks on receive) as well as permitting
ether_input to process batches under a single epoch invocation.


This is actually the approach that I took for nokia.  You could prefetch 
m->m_nextpkt at the top of the loop iteration.  It was very effective 
there.


Seeing how many drivers and unexpected entry points had to have the 
NET_EPOCH added I would want to review again once it's stable and see if 
there is a way to simplify through API changes.  It seems like more than 
expected slipped through the cracks and I worry about long-term 
maintenance.


Thanks,
Jeff



--
John Baldwin


___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r357051 - head/sys/dev/bge

2020-01-23 Thread Jeff Roberson

On Thu, 23 Jan 2020, Gleb Smirnoff wrote:


On Thu, Jan 23, 2020 at 05:09:14PM -1000, Jeff Roberson wrote:
J> While we don't have a policy strictly requiring reviews it is the norm to
J> have substantial changes socialized and reviewed.  I appreciate the work
J> that you are doing but it likely should've been discussed somewhere
J> more publicly.  I apologized if I missed it but I don't see reference to
J> anything.

That was https://reviews.freebsd.org/D23242


Ok thank you.  Can you tag commits so people can see the discussion?  Was 
it in one I missed?  When I'm committing a long patch series I include the 
link in all of them.


Ryan, are you subscribed to @networking?



J> Architecturally I am more concerned with the coarseness of net_epoch and
J> the duration of hold becoming a resource utilization problem in high
J> turn-over workloads.  Like short connection tcp.  Has anyone done
J> substantial testing here?  epoch as it is today will hold every free
J> callback for a minimum of several clock ticks and a maximum of 2x the
J> duration of the longest epoch section time.  With preemption, etc. this
J> could be 100s of ms of PCBs held.

We also are concerned about that theoretically. Haven't yet seen effect
in practice, but our sessions are mostly longer living. First we have the
tunable to limit batching. Second, there are some ideas on how to improve
the garbage collector performance if it becomes an issue.


I am often surprised at how much cpu time I see on linux spent in RCU free 
processing.  Many of these things are written in such a way that 
everything is cache cold by the time you swing back around.  So the 
callout walk is a giant cold linked list.


Thanks,
Jeff



--
Gleb Smirnoff


___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r357051 - head/sys/dev/bge

2020-01-23 Thread Jeff Roberson

On Thu, 23 Jan 2020, Gleb Smirnoff wrote:


On Thu, Jan 23, 2020 at 08:33:58PM -0500, Ryan Stone wrote:
R> > Because at interrupt level we can batch multiple packets in a single epoch.
R> > This speeds up unfiltered packet forwarding performance by 5%.
R> >
R> > With driver level pfil hooks I would claim even more improvement, because 
before
R> > the change we needed to enter epoch twice - once for filtering, than for 
ether_input.
R> >
R> > Epoch isn't a layer, it is a synchronisation primitive, so I disagree about
R> > statement on layering violation.
R>
R> Epoch is a synchronization primitive, but the net_epoch is absolutely
R> a part of the networking layer.  If we need better batching then the
R> correct solution is to introduce a batched interface for drivers to
R> push packets up the stack, not to mess around at the interrupt layer.

Such interface of course would be valuable but will not cover case
when an interrupt arrives during processing of previous one. So its
batching possiblities are limited compared to interrupt level batching.

And I already noted that ether_input() isn't the only way to enter
the network stack.

R> Note that the only reason why this works for mlx4/mlx5 is that
R> linuxkpi *always* requests a INTR_TYPE_NET no matter what driver is
R> running.  This means that all drm graphics driver interrupts are now
R> running under the net_epoch:
R>
R> 
https://svnweb.freebsd.org/base/head/sys/compat/linuxkpi/common/include/linux/interrupt.h?revision=352205=markup#l103


The historical reason is that linuxkpi was originally made to support ofed 
and there was no real way to get this information from the driver.




Well, it is not my fault that a video driver requests INTR_TYPE_NET
interrupt. I mean, you can't put this as a rationale against using
network epoch for all interrrupts that declare theirselves as network
interrupts. However, this is harmless.


While we don't have a policy strictly requiring reviews it is the norm to 
have substantial changes socialized and reviewed.  I appreciate the work 
that you are doing but it likely should've been discussed somewhere 
more publicly.  I apologized if I missed it but I don't see reference to 
anything.


Architecturally I am more concerned with the coarseness of net_epoch and 
the duration of hold becoming a resource utilization problem in high 
turn-over workloads.  Like short connection tcp.  Has anyone done 
substantial testing here?  epoch as it is today will hold every free 
callback for a minimum of several clock ticks and a maximum of 2x the 
duration of the longest epoch section time.  With preemption, etc. this 
could be 100s of ms of PCBs held.


Thanks,
Jeff



--
Gleb Smirnoff


___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r357055 - head/sys/sparc64/sparc64

2020-01-23 Thread Jeff Roberson

On Thu, 23 Jan 2020, Mark Johnston wrote:


Author: markj
Date: Thu Jan 23 17:18:58 2020
New Revision: 357055
URL: https://svnweb.freebsd.org/changeset/base/357055

Log:
 sparc64: Busy the TSB page before freeing it in pmap_release().

 This is now required by vm_page_free().

 PR:243534
 Reported and tested by:Michael Reim 

Modified:
 head/sys/sparc64/sparc64/pmap.c

Modified: head/sys/sparc64/sparc64/pmap.c
==
--- head/sys/sparc64/sparc64/pmap.c Thu Jan 23 17:08:33 2020
(r357054)
+++ head/sys/sparc64/sparc64/pmap.c Thu Jan 23 17:18:58 2020
(r357055)
@@ -1302,6 +1302,7 @@ pmap_release(pmap_t pm)
m = TAILQ_FIRST(>memq);
m->md.pmap = NULL;
vm_page_unwire_noq(m);
+   vm_page_xbusy(m);


vm_page_xbusy() is unsafe long-term and I will be removing it as soon as I 
get patches into drm.  It technically 'works' now but not for great 
reasons.


Thanks,
Jeff


vm_page_free_zero(m);
}
VM_OBJECT_WUNLOCK(obj);


___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r357028 - head/sys/vm

2020-01-22 Thread Jeff Roberson
You may be asking yourself, why did jeff just squish a bunch of code 
around with little functional change.  I could not justify adding more 
complexity to a 900 line function with a handful of gotos.  It is ~300 
lines now.  I tested the whole set together.  I apologize if there are 
bugs.


Other than comment cleanup, I will probably not commit again for a little 
while.  The commits that follow will enable lockless vm object lookup 
which has given a 10x improvement in massively parallel builds on large 
machines (100core+).


Thanks,
Jeff

On Thu, 23 Jan 2020, Jeff Roberson wrote:


Author: jeff
Date: Thu Jan 23 05:23:37 2020
New Revision: 357028
URL: https://svnweb.freebsd.org/changeset/base/357028

Log:
 (fault 9/9) Move zero fill into a dedicated function to make the object lock
 state more clear.

 Reviewed by:   kib
 Differential Revision: https://reviews.freebsd.org/D23326

Modified:
 head/sys/vm/vm_fault.c

Modified: head/sys/vm/vm_fault.c
==
--- head/sys/vm/vm_fault.c  Thu Jan 23 05:22:02 2020(r357027)
+++ head/sys/vm/vm_fault.c  Thu Jan 23 05:23:37 2020(r357028)
@@ -960,35 +960,8 @@ vm_fault_next(struct faultstate *fs)
 */
VM_OBJECT_ASSERT_WLOCKED(fs->object);
next_object = fs->object->backing_object;
-   if (next_object == NULL) {
-   /*
-* If there's no object left, fill the page in the top
-* object with zeros.
-*/
-   VM_OBJECT_WUNLOCK(fs->object);
-   if (fs->object != fs->first_object) {
-   vm_object_pip_wakeup(fs->object);
-   fs->object = fs->first_object;
-   fs->pindex = fs->first_pindex;
-   }
-   MPASS(fs->first_m != NULL);
-   MPASS(fs->m == NULL);
-   fs->m = fs->first_m;
-   fs->first_m = NULL;
-
-   /*
-* Zero the page if necessary and mark it valid.
-*/
-   if ((fs->m->flags & PG_ZERO) == 0) {
-   pmap_zero_page(fs->m);
-   } else {
-   VM_CNT_INC(v_ozfod);
-   }
-   VM_CNT_INC(v_zfod);
-   vm_page_valid(fs->m);
-
+   if (next_object == NULL)
return (false);
-   }
MPASS(fs->first_m != NULL);
KASSERT(fs->object != next_object, ("object loop %p", next_object));
VM_OBJECT_WLOCK(next_object);
@@ -1002,6 +975,36 @@ vm_fault_next(struct faultstate *fs)
return (true);
}

+static void
+vm_fault_zerofill(struct faultstate *fs)
+{
+
+   /*
+* If there's no object left, fill the page in the top
+* object with zeros.
+*/
+   if (fs->object != fs->first_object) {
+   vm_object_pip_wakeup(fs->object);
+   fs->object = fs->first_object;
+   fs->pindex = fs->first_pindex;
+   }
+   MPASS(fs->first_m != NULL);
+   MPASS(fs->m == NULL);
+   fs->m = fs->first_m;
+   fs->first_m = NULL;
+
+   /*
+* Zero the page if necessary and mark it valid.
+*/
+   if ((fs->m->flags & PG_ZERO) == 0) {
+   pmap_zero_page(fs->m);
+   } else {
+   VM_CNT_INC(v_ozfod);
+   }
+   VM_CNT_INC(v_zfod);
+   vm_page_valid(fs->m);
+}
+
/*
 * Allocate a page directly or via the object populate method.
 */
@@ -1407,11 +1410,13 @@ RetryFault:
 * traverse into a backing object or zero fill if none is
 * found.
 */
-   if (!vm_fault_next()) {
-   /* Don't try to prefault neighboring pages. */
-   faultcount = 1;
-   break;  /* break to PAGE HAS BEEN FOUND. */
-   }
+   if (vm_fault_next())
+   continue;
+   VM_OBJECT_WUNLOCK(fs.object);
+   vm_fault_zerofill();
+   /* Don't try to prefault neighboring pages. */
+   faultcount = 1;
+   break;  /* break to PAGE HAS BEEN FOUND. */
}

/*


___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r357028 - head/sys/vm

2020-01-22 Thread Jeff Roberson
Author: jeff
Date: Thu Jan 23 05:23:37 2020
New Revision: 357028
URL: https://svnweb.freebsd.org/changeset/base/357028

Log:
  (fault 9/9) Move zero fill into a dedicated function to make the object lock
  state more clear.
  
  Reviewed by:  kib
  Differential Revision:https://reviews.freebsd.org/D23326

Modified:
  head/sys/vm/vm_fault.c

Modified: head/sys/vm/vm_fault.c
==
--- head/sys/vm/vm_fault.c  Thu Jan 23 05:22:02 2020(r357027)
+++ head/sys/vm/vm_fault.c  Thu Jan 23 05:23:37 2020(r357028)
@@ -960,35 +960,8 @@ vm_fault_next(struct faultstate *fs)
 */
VM_OBJECT_ASSERT_WLOCKED(fs->object);
next_object = fs->object->backing_object;
-   if (next_object == NULL) {
-   /*
-* If there's no object left, fill the page in the top
-* object with zeros.
-*/
-   VM_OBJECT_WUNLOCK(fs->object);
-   if (fs->object != fs->first_object) {
-   vm_object_pip_wakeup(fs->object);
-   fs->object = fs->first_object;
-   fs->pindex = fs->first_pindex;
-   }
-   MPASS(fs->first_m != NULL);
-   MPASS(fs->m == NULL);
-   fs->m = fs->first_m;
-   fs->first_m = NULL;
-
-   /*
-* Zero the page if necessary and mark it valid.
-*/
-   if ((fs->m->flags & PG_ZERO) == 0) {
-   pmap_zero_page(fs->m);
-   } else {
-   VM_CNT_INC(v_ozfod);
-   }
-   VM_CNT_INC(v_zfod);
-   vm_page_valid(fs->m);
-
+   if (next_object == NULL)
return (false);
-   }
MPASS(fs->first_m != NULL);
KASSERT(fs->object != next_object, ("object loop %p", next_object));
VM_OBJECT_WLOCK(next_object);
@@ -1002,6 +975,36 @@ vm_fault_next(struct faultstate *fs)
return (true);
 }
 
+static void
+vm_fault_zerofill(struct faultstate *fs)
+{
+
+   /*
+* If there's no object left, fill the page in the top
+* object with zeros.
+*/
+   if (fs->object != fs->first_object) {
+   vm_object_pip_wakeup(fs->object);
+   fs->object = fs->first_object;
+   fs->pindex = fs->first_pindex;
+   }
+   MPASS(fs->first_m != NULL);
+   MPASS(fs->m == NULL);
+   fs->m = fs->first_m;
+   fs->first_m = NULL;
+
+   /*
+* Zero the page if necessary and mark it valid.
+*/
+   if ((fs->m->flags & PG_ZERO) == 0) {
+   pmap_zero_page(fs->m);
+   } else {
+   VM_CNT_INC(v_ozfod);
+   }
+   VM_CNT_INC(v_zfod);
+   vm_page_valid(fs->m);
+}
+
 /*
  * Allocate a page directly or via the object populate method.
  */
@@ -1407,11 +1410,13 @@ RetryFault:
 * traverse into a backing object or zero fill if none is
 * found.
 */
-   if (!vm_fault_next()) {
-   /* Don't try to prefault neighboring pages. */
-   faultcount = 1;
-   break;  /* break to PAGE HAS BEEN FOUND. */
-   }
+   if (vm_fault_next())
+   continue;
+   VM_OBJECT_WUNLOCK(fs.object);
+   vm_fault_zerofill();
+   /* Don't try to prefault neighboring pages. */
+   faultcount = 1;
+   break;  /* break to PAGE HAS BEEN FOUND. */
}
 
/*
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r357027 - head/sys/vm

2020-01-22 Thread Jeff Roberson
Author: jeff
Date: Thu Jan 23 05:22:02 2020
New Revision: 357027
URL: https://svnweb.freebsd.org/changeset/base/357027

Log:
  (fault 8/9) Restructure some code to reduce duplication and simplify flow
  control.
  
  Reviewed by:  dougm, kib, markj
  Differential Revision:https://reviews.freebsd.org/D23321

Modified:
  head/sys/vm/vm_fault.c

Modified: head/sys/vm/vm_fault.c
==
--- head/sys/vm/vm_fault.c  Thu Jan 23 05:19:39 2020(r357026)
+++ head/sys/vm/vm_fault.c  Thu Jan 23 05:22:02 2020(r357027)
@@ -1359,49 +1359,47 @@ RetryFault:
 * object without dropping the lock to preserve atomicity of
 * shadow faults.
 */
-   if (fs.object->type == OBJT_DEFAULT) {
-   if (vm_fault_next())
-   continue;
-   /* Don't try to prefault neighboring pages. */
-   faultcount = 1;
-   break;
-   }
+   if (fs.object->type != OBJT_DEFAULT) {
+   /*
+* At this point, we have either allocated a new page
+* or found an existing page that is only partially
+* valid.
+*
+* We hold a reference on the current object and the
+* page is exclusive busied.  The exclusive busy
+* prevents simultaneous faults and collapses while
+* the object lock is dropped.
+*/
+   VM_OBJECT_WUNLOCK(fs.object);
 
-   /*
-* At this point, we have either allocated a new page or found
-* an existing page that is only partially valid.
-*
-* We hold a reference on the current object and the page is
-* exclusive busied.  The exclusive busy prevents simultaneous
-* faults and collapses while the object lock is dropped.
-*/
-   VM_OBJECT_WUNLOCK(fs.object);
+   /*
+* If the pager for the current object might have
+* the page, then determine the number of additional
+* pages to read and potentially reprioritize
+* previously read pages for earlier reclamation.
+* These operations should only be performed once per
+* page fault.  Even if the current pager doesn't
+* have the page, the number of additional pages to
+* read will apply to subsequent objects in the
+* shadow chain.
+*/
+   if (nera == -1 && !P_KILLED(curproc))
+   nera = vm_fault_readahead();
 
-   /*
-* If the pager for the current object might have the page,
-* then determine the number of additional pages to read and
-* potentially reprioritize previously read pages for earlier
-* reclamation.  These operations should only be performed
-* once per page fault.  Even if the current pager doesn't
-* have the page, the number of additional pages to read will
-* apply to subsequent objects in the shadow chain.
-*/
-   if (nera == -1 && !P_KILLED(curproc))
-   nera = vm_fault_readahead();
-
-   rv = vm_fault_getpages(, nera, , );
-   if (rv == KERN_SUCCESS) {
-   faultcount = behind + 1 + ahead;
-   hardfault = true;
-   break; /* break to PAGE HAS BEEN FOUND. */
-   }
-   if (rv == KERN_RESOURCE_SHORTAGE)
-   goto RetryFault;
-   VM_OBJECT_WLOCK(fs.object);
-   if (rv == KERN_OUT_OF_BOUNDS) {
-   fault_page_free();
-   unlock_and_deallocate();
-   return (rv);
+   rv = vm_fault_getpages(, nera, , );
+   if (rv == KERN_SUCCESS) {
+   faultcount = behind + 1 + ahead;
+   hardfault = true;
+   break; /* break to PAGE HAS BEEN FOUND. */
+   }
+   if (rv == KERN_RESOURCE_SHORTAGE)
+   goto RetryFault;
+   VM_OBJECT_WLOCK(fs.object);
+   if (rv == KERN_OUT_OF_BOUNDS) {
+   fault_page_free();
+   unlock_and_deallocate();
+   return (rv);

svn commit: r357026 - head/sys/vm

2020-01-22 Thread Jeff Roberson
Author: jeff
Date: Thu Jan 23 05:19:39 2020
New Revision: 357026
URL: https://svnweb.freebsd.org/changeset/base/357026

Log:
  (fault 7/9) Move fault population and allocation into a dedicated function
  
  Reviewed by:  kib
  Differential Revision:https://reviews.freebsd.org/D23320

Modified:
  head/sys/vm/vm_fault.c

Modified: head/sys/vm/vm_fault.c
==
--- head/sys/vm/vm_fault.c  Thu Jan 23 05:18:00 2020(r357025)
+++ head/sys/vm/vm_fault.c  Thu Jan 23 05:19:39 2020(r357026)
@@ -126,6 +126,7 @@ struct faultstate {
vm_prot_t   fault_type;
vm_prot_t   prot;
int fault_flags;
+   int oom;
boolean_t   wired;
 
/* Page reference for cow. */
@@ -455,7 +456,7 @@ vm_fault_populate(struct faultstate *fs)
 */
vm_fault_restore_map_lock(fs);
if (fs->map->timestamp != fs->map_generation)
-   return (KERN_RESOURCE_SHORTAGE); /* RetryFault */
+   return (KERN_RESTART);
return (KERN_NOT_RECEIVER);
}
if (rv != VM_PAGER_OK)
@@ -471,7 +472,7 @@ vm_fault_populate(struct faultstate *fs)
if (fs->map->timestamp != fs->map_generation) {
vm_fault_populate_cleanup(fs->first_object, pager_first,
pager_last);
-   return (KERN_RESOURCE_SHORTAGE); /* RetryFault */
+   return (KERN_RESTART);
}
 
/*
@@ -1001,7 +1002,87 @@ vm_fault_next(struct faultstate *fs)
return (true);
 }
 
+/*
+ * Allocate a page directly or via the object populate method.
+ */
+static int
+vm_fault_allocate(struct faultstate *fs)
+{
+   struct domainset *dset;
+   int alloc_req;
+   int rv;
 
+
+   if ((fs->object->flags & OBJ_SIZEVNLOCK) != 0) {
+   rv = vm_fault_lock_vnode(fs, true);
+   MPASS(rv == KERN_SUCCESS || rv == KERN_RESOURCE_SHORTAGE);
+   if (rv == KERN_RESOURCE_SHORTAGE)
+   return (rv);
+   }
+
+   if (fs->pindex >= fs->object->size)
+   return (KERN_OUT_OF_BOUNDS);
+
+   if (fs->object == fs->first_object &&
+   (fs->first_object->flags & OBJ_POPULATE) != 0 &&
+   fs->first_object->shadow_count == 0) {
+   rv = vm_fault_populate(fs);
+   switch (rv) {
+   case KERN_SUCCESS:
+   case KERN_FAILURE:
+   case KERN_RESTART:
+   return (rv);
+   case KERN_NOT_RECEIVER:
+   /*
+* Pager's populate() method
+* returned VM_PAGER_BAD.
+*/
+   break;
+   default:
+   panic("inconsistent return codes");
+   }
+   }
+
+   /*
+* Allocate a new page for this object/offset pair.
+*
+* Unlocked read of the p_flag is harmless. At worst, the P_KILLED
+* might be not observed there, and allocation can fail, causing
+* restart and new reading of the p_flag.
+*/
+   dset = fs->object->domain.dr_policy;
+   if (dset == NULL)
+   dset = curthread->td_domain.dr_policy;
+   if (!vm_page_count_severe_set(>ds_mask) || P_KILLED(curproc)) {
+#if VM_NRESERVLEVEL > 0
+   vm_object_color(fs->object, atop(fs->vaddr) - fs->pindex);
+#endif
+   alloc_req = P_KILLED(curproc) ?
+   VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL;
+   if (fs->object->type != OBJT_VNODE &&
+   fs->object->backing_object == NULL)
+   alloc_req |= VM_ALLOC_ZERO;
+   fs->m = vm_page_alloc(fs->object, fs->pindex, alloc_req);
+   }
+   if (fs->m == NULL) {
+   unlock_and_deallocate(fs);
+   if (vm_pfault_oom_attempts < 0 ||
+   fs->oom < vm_pfault_oom_attempts) {
+   fs->oom++;
+   vm_waitpfault(dset, vm_pfault_oom_wait * hz);
+   }
+   if (bootverbose)
+   printf(
+"proc %d (%s) failed to alloc page on fault, starting OOM\n",
+   curproc->p_pid, curproc->p_comm);
+   vm_pageout_oom(VM_OOM_MEM_PF);
+   return (KERN_RESOURCE_SHORTAGE);
+   }
+   fs->oom = 0;
+
+   return (KERN_NOT_RECEIVER);
+}
+
 /*
  * Call the pager to retrieve the page if there is a chance
  * that the pager has it, and potentially retrieve additional
@@ -1131,9 +1212,8 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fa
 int fault_flags, vm_page_t *m_hold)
 {
struct faultstate fs;
-   struct domainset *dset;
-   int ahead, alloc_req, behind, faultcount;
-   int nera, oom, result, rv;
+   int ahead, behind, 

svn commit: r357025 - head/sys/vm

2020-01-22 Thread Jeff Roberson
Author: jeff
Date: Thu Jan 23 05:18:00 2020
New Revision: 357025
URL: https://svnweb.freebsd.org/changeset/base/357025

Log:
  (fault 6/9) Move getpages and associated logic into a dedicated function.
  
  Reviewed by:  kib
  Differential Revision:https://reviews.freebsd.org/D23311

Modified:
  head/sys/vm/vm_fault.c

Modified: head/sys/vm/vm_fault.c
==
--- head/sys/vm/vm_fault.c  Thu Jan 23 05:14:41 2020(r357024)
+++ head/sys/vm/vm_fault.c  Thu Jan 23 05:18:00 2020(r357025)
@@ -1001,7 +1001,96 @@ vm_fault_next(struct faultstate *fs)
return (true);
 }
 
+
 /*
+ * Call the pager to retrieve the page if there is a chance
+ * that the pager has it, and potentially retrieve additional
+ * pages at the same time.
+ */
+static int
+vm_fault_getpages(struct faultstate *fs, int nera, int *behindp, int *aheadp)
+{
+   vm_offset_t e_end, e_start;
+   int ahead, behind, cluster_offset, rv;
+   u_char behavior;
+
+   /*
+* Prepare for unlocking the map.  Save the map
+* entry's start and end addresses, which are used to
+* optimize the size of the pager operation below.
+* Even if the map entry's addresses change after
+* unlocking the map, using the saved addresses is
+* safe.
+*/
+   e_start = fs->entry->start;
+   e_end = fs->entry->end;
+   behavior = vm_map_entry_behavior(fs->entry);
+
+   /*
+* Release the map lock before locking the vnode or
+* sleeping in the pager.  (If the current object has
+* a shadow, then an earlier iteration of this loop
+* may have already unlocked the map.)
+*/
+   unlock_map(fs);
+
+   rv = vm_fault_lock_vnode(fs, false);
+   MPASS(rv == KERN_SUCCESS || rv == KERN_RESOURCE_SHORTAGE);
+   if (rv == KERN_RESOURCE_SHORTAGE)
+   return (rv);
+   KASSERT(fs->vp == NULL || !fs->map->system_map,
+   ("vm_fault: vnode-backed object mapped by system map"));
+
+   /*
+* Page in the requested page and hint the pager,
+* that it may bring up surrounding pages.
+*/
+   if (nera == -1 || behavior == MAP_ENTRY_BEHAV_RANDOM ||
+   P_KILLED(curproc)) {
+   behind = 0;
+   ahead = 0;
+   } else {
+   /* Is this a sequential fault? */
+   if (nera > 0) {
+   behind = 0;
+   ahead = nera;
+   } else {
+   /*
+* Request a cluster of pages that is
+* aligned to a VM_FAULT_READ_DEFAULT
+* page offset boundary within the
+* object.  Alignment to a page offset
+* boundary is more likely to coincide
+* with the underlying file system
+* block than alignment to a virtual
+* address boundary.
+*/
+   cluster_offset = fs->pindex % VM_FAULT_READ_DEFAULT;
+   behind = ulmin(cluster_offset,
+   atop(fs->vaddr - e_start));
+   ahead = VM_FAULT_READ_DEFAULT - 1 - cluster_offset;
+   }
+   ahead = ulmin(ahead, atop(e_end - fs->vaddr) - 1);
+   }
+   *behindp = behind;
+   *aheadp = ahead;
+   rv = vm_pager_get_pages(fs->object, >m, 1, behindp, aheadp);
+   if (rv == VM_PAGER_OK)
+   return (KERN_SUCCESS);
+   if (rv == VM_PAGER_ERROR)
+   printf("vm_fault: pager read error, pid %d (%s)\n",
+   curproc->p_pid, curproc->p_comm);
+   /*
+* If an I/O error occurred or the requested page was
+* outside the range of the pager, clean up and return
+* an error.
+*/
+   if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD)
+   return (KERN_OUT_OF_BOUNDS);
+   return (KERN_NOT_RECEIVER);
+}
+
+/*
  * Wait/Retry if the page is busy.  We have to do this if the page is
  * either exclusive or shared busy because the vm_pager may be using
  * read busy for pageouts (and even pageins if it is the vnode pager),
@@ -1043,10 +1132,8 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fa
 {
struct faultstate fs;
struct domainset *dset;
-   vm_offset_t e_end, e_start;
-   int ahead, alloc_req, behind, cluster_offset, faultcount;
+   int ahead, alloc_req, behind, faultcount;
int nera, oom, result, rv;
-   u_char behavior;
bool dead, hardfault;
 
VM_CNT_INC(v_vm_faults);
@@ -1282,104 +1369,28 @@ readrest:
 * have the page, the number of additional pages to read will
 * apply to subsequent objects in the shadow chain.
 */
-   if (nera == -1 && 

svn commit: r357024 - head/sys/vm

2020-01-22 Thread Jeff Roberson
Author: jeff
Date: Thu Jan 23 05:14:41 2020
New Revision: 357024
URL: https://svnweb.freebsd.org/changeset/base/357024

Log:
  (fault 5/9)  Move the backing_object traversal into a dedicated function.
  
  Reviewed by:  dougm, kib, markj
  Differential Revision:https://reviews.freebsd.org/D23310

Modified:
  head/sys/vm/vm_fault.c

Modified: head/sys/vm/vm_fault.c
==
--- head/sys/vm/vm_fault.c  Thu Jan 23 05:11:01 2020(r357023)
+++ head/sys/vm/vm_fault.c  Thu Jan 23 05:14:41 2020(r357024)
@@ -932,6 +932,75 @@ vm_fault_cow(struct faultstate *fs)
curthread->td_cow++;
 }
 
+static bool
+vm_fault_next(struct faultstate *fs)
+{
+   vm_object_t next_object;
+
+   /*
+* The requested page does not exist at this object/
+* offset.  Remove the invalid page from the object,
+* waking up anyone waiting for it, and continue on to
+* the next object.  However, if this is the top-level
+* object, we must leave the busy page in place to
+* prevent another process from rushing past us, and
+* inserting the page in that object at the same time
+* that we are.
+*/
+   if (fs->object == fs->first_object) {
+   fs->first_m = fs->m;
+   fs->m = NULL;
+   } else
+   fault_page_free(>m);
+
+   /*
+* Move on to the next object.  Lock the next object before
+* unlocking the current one.
+*/
+   VM_OBJECT_ASSERT_WLOCKED(fs->object);
+   next_object = fs->object->backing_object;
+   if (next_object == NULL) {
+   /*
+* If there's no object left, fill the page in the top
+* object with zeros.
+*/
+   VM_OBJECT_WUNLOCK(fs->object);
+   if (fs->object != fs->first_object) {
+   vm_object_pip_wakeup(fs->object);
+   fs->object = fs->first_object;
+   fs->pindex = fs->first_pindex;
+   }
+   MPASS(fs->first_m != NULL);
+   MPASS(fs->m == NULL);
+   fs->m = fs->first_m;
+   fs->first_m = NULL;
+
+   /*
+* Zero the page if necessary and mark it valid.
+*/
+   if ((fs->m->flags & PG_ZERO) == 0) {
+   pmap_zero_page(fs->m);
+   } else {
+   VM_CNT_INC(v_ozfod);
+   }
+   VM_CNT_INC(v_zfod);
+   vm_page_valid(fs->m);
+
+   return (false);
+   }
+   MPASS(fs->first_m != NULL);
+   KASSERT(fs->object != next_object, ("object loop %p", next_object));
+   VM_OBJECT_WLOCK(next_object);
+   vm_object_pip_add(next_object, 1);
+   if (fs->object != fs->first_object)
+   vm_object_pip_wakeup(fs->object);
+   fs->pindex += OFF_TO_IDX(fs->object->backing_object_offset);
+   VM_OBJECT_WUNLOCK(fs->object);
+   fs->object = next_object;
+
+   return (true);
+}
+
 /*
  * Wait/Retry if the page is busy.  We have to do this if the page is
  * either exclusive or shared busy because the vm_pager may be using
@@ -974,7 +1043,6 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fa
 {
struct faultstate fs;
struct domainset *dset;
-   vm_object_t next_object;
vm_offset_t e_end, e_start;
int ahead, alloc_req, behind, cluster_offset, faultcount;
int nera, oom, result, rv;
@@ -1187,8 +1255,13 @@ readrest:
 * object without dropping the lock to preserve atomicity of
 * shadow faults.
 */
-   if (fs.object->type == OBJT_DEFAULT)
-   goto next;
+   if (fs.object->type == OBJT_DEFAULT) {
+   if (vm_fault_next())
+   continue;
+   /* Don't try to prefault neighboring pages. */
+   faultcount = 1;
+   break;
+   }
 
/*
 * At this point, we have either allocated a new page or found
@@ -1304,70 +1377,14 @@ readrest:
 
}
 
-next:
/*
-* The requested page does not exist at this object/
-* offset.  Remove the invalid page from the object,
-* waking up anyone waiting for it, and continue on to
-* the next object.  However, if this is the top-level
-* object, we must leave the busy page in place to
-* prevent another process from rushing past us, and
-* inserting the page in that object at the same time
-* that we are.
+* The page was not found in the current object.  Try to 
traverse
+* into a backing object or zero fill if 

svn commit: r357023 - head/sys/vm

2020-01-22 Thread Jeff Roberson
Author: jeff
Date: Thu Jan 23 05:11:01 2020
New Revision: 357023
URL: https://svnweb.freebsd.org/changeset/base/357023

Log:
  (fault 4/9) Move copy-on-write into a dedicated function.
  
  Reviewed by:  kib, markj
  Differential Revision:https://reviews.freebsd.org/D23304

Modified:
  head/sys/vm/vm_fault.c

Modified: head/sys/vm/vm_fault.c
==
--- head/sys/vm/vm_fault.c  Thu Jan 23 05:07:01 2020(r357022)
+++ head/sys/vm/vm_fault.c  Thu Jan 23 05:11:01 2020(r357023)
@@ -846,6 +846,92 @@ vm_fault_relookup(struct faultstate *fs)
return (KERN_SUCCESS);
 }
 
+static void
+vm_fault_cow(struct faultstate *fs)
+{
+   bool is_first_object_locked;
+
+   /*
+* This allows pages to be virtually copied from a backing_object
+* into the first_object, where the backing object has no other
+* refs to it, and cannot gain any more refs.  Instead of a bcopy,
+* we just move the page from the backing object to the first
+* object.  Note that we must mark the page dirty in the first
+* object so that it will go out to swap when needed.
+*/
+   is_first_object_locked = false;
+   if (
+   /*
+* Only one shadow object and no other refs.
+*/
+   fs->object->shadow_count == 1 && fs->object->ref_count == 1 &&
+   /*
+* No other ways to look the object up
+*/
+   fs->object->handle == NULL && (fs->object->flags & OBJ_ANON) != 0 &&
+   /*
+* We don't chase down the shadow chain and we can acquire locks.
+*/
+   (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs->first_object)) &&
+   fs->object == fs->first_object->backing_object &&
+   VM_OBJECT_TRYWLOCK(fs->object)) {
+
+   /*
+* Remove but keep xbusy for replace.  fs->m is moved into
+* fs->first_object and left busy while fs->first_m is
+* conditionally freed.
+*/
+   vm_page_remove_xbusy(fs->m);
+   vm_page_replace(fs->m, fs->first_object, fs->first_pindex,
+   fs->first_m);
+   vm_page_dirty(fs->m);
+#if VM_NRESERVLEVEL > 0
+   /*
+* Rename the reservation.
+*/
+   vm_reserv_rename(fs->m, fs->first_object, fs->object,
+   OFF_TO_IDX(fs->first_object->backing_object_offset));
+#endif
+   VM_OBJECT_WUNLOCK(fs->object);
+   VM_OBJECT_WUNLOCK(fs->first_object);
+   fs->first_m = fs->m;
+   fs->m = NULL;
+   VM_CNT_INC(v_cow_optim);
+   } else {
+   if (is_first_object_locked)
+   VM_OBJECT_WUNLOCK(fs->first_object);
+   /*
+* Oh, well, lets copy it.
+*/
+   pmap_copy_page(fs->m, fs->first_m);
+   vm_page_valid(fs->first_m);
+   if (fs->wired && (fs->fault_flags & VM_FAULT_WIRE) == 0) {
+   vm_page_wire(fs->first_m);
+   vm_page_unwire(fs->m, PQ_INACTIVE);
+   }
+   /*
+* Save the cow page to be released after
+* pmap_enter is complete.
+*/
+   fs->m_cow = fs->m;
+   fs->m = NULL;
+   }
+   /*
+* fs->object != fs->first_object due to above 
+* conditional
+*/
+   vm_object_pip_wakeup(fs->object);
+
+   /*
+* Only use the new page below...
+*/
+   fs->object = fs->first_object;
+   fs->pindex = fs->first_pindex;
+   fs->m = fs->first_m;
+   VM_CNT_INC(v_cow_faults);
+   curthread->td_cow++;
+}
+
 /*
  * Wait/Retry if the page is busy.  We have to do this if the page is
  * either exclusive or shared busy because the vm_pager may be using
@@ -893,7 +979,7 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fa
int ahead, alloc_req, behind, cluster_offset, faultcount;
int nera, oom, result, rv;
u_char behavior;
-   bool dead, hardfault, is_first_object_locked;
+   bool dead, hardfault;
 
VM_CNT_INC(v_vm_faults);
 
@@ -1302,90 +1388,8 @@ next:
 * We only really need to copy if we want to write it.
 */
if ((fs.fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) {
+   vm_fault_cow();
/*
-* This allows pages to be virtually copied from a 
-* backing_object into the first_object, where the 
-* backing object has no other refs to it, and cannot
-* gain any more refs.  Instead of a bcopy, we just 
-* move the page from the backing object to the 
- 

svn commit: r357022 - head/sys/vm

2020-01-22 Thread Jeff Roberson
Author: jeff
Date: Thu Jan 23 05:07:01 2020
New Revision: 357022
URL: https://svnweb.freebsd.org/changeset/base/357022

Log:
  (fault 3/9) Move map relookup into a dedicated function.
  
  Add a new VM return code KERN_RESTART which means, deallocate and restart in
  fault.
  
  Reviewed by:  kib, markj
  Differential Revision:https://reviews.freebsd.org/D23303

Modified:
  head/sys/vm/vm_fault.c
  head/sys/vm/vm_param.h

Modified: head/sys/vm/vm_fault.c
==
--- head/sys/vm/vm_fault.c  Thu Jan 23 05:05:39 2020(r357021)
+++ head/sys/vm/vm_fault.c  Thu Jan 23 05:07:01 2020(r357022)
@@ -795,6 +795,57 @@ vm_fault_lookup(struct faultstate *fs)
return (KERN_SUCCESS);
 }
 
+static int
+vm_fault_relookup(struct faultstate *fs)
+{
+   vm_object_t retry_object;
+   vm_pindex_t retry_pindex;
+   vm_prot_t retry_prot;
+   int result;
+
+   if (!vm_map_trylock_read(fs->map))
+   return (KERN_RESTART);
+
+   fs->lookup_still_valid = true;
+   if (fs->map->timestamp == fs->map_generation)
+   return (KERN_SUCCESS);
+
+   result = vm_map_lookup_locked(>map, fs->vaddr, fs->fault_type,
+   >entry, _object, _pindex, _prot,
+   >wired);
+   if (result != KERN_SUCCESS) {
+   /*
+* If retry of map lookup would have blocked then
+* retry fault from start.
+*/
+   if (result == KERN_FAILURE)
+   return (KERN_RESTART);
+   return (result);
+   }
+   if (retry_object != fs->first_object ||
+   retry_pindex != fs->first_pindex)
+   return (KERN_RESTART);
+
+   /*
+* Check whether the protection has changed or the object has
+* been copied while we left the map unlocked. Changing from
+* read to write permission is OK - we leave the page
+* write-protected, and catch the write fault. Changing from
+* write to read permission means that we can't mark the page
+* write-enabled after all.
+*/
+   fs->prot &= retry_prot;
+   fs->fault_type &= retry_prot;
+   if (fs->prot == 0)
+   return (KERN_RESTART);
+
+   /* Reassert because wired may have changed. */
+   KASSERT(fs->wired || (fs->fault_flags & VM_FAULT_WIRE) == 0,
+   ("!wired && VM_FAULT_WIRE"));
+
+   return (KERN_SUCCESS);
+}
+
 /*
  * Wait/Retry if the page is busy.  We have to do this if the page is
  * either exclusive or shared busy because the vm_pager may be using
@@ -837,10 +888,8 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fa
 {
struct faultstate fs;
struct domainset *dset;
-   vm_object_t next_object, retry_object;
+   vm_object_t next_object;
vm_offset_t e_end, e_start;
-   vm_pindex_t retry_pindex;
-   vm_prot_t retry_prot;
int ahead, alloc_req, behind, cluster_offset, faultcount;
int nera, oom, result, rv;
u_char behavior;
@@ -1363,56 +1412,12 @@ next:
 * lookup.
 */
if (!fs.lookup_still_valid) {
-   if (!vm_map_trylock_read(fs.map)) {
+   result = vm_fault_relookup();
+   if (result != KERN_SUCCESS) {
fault_deallocate();
-   goto RetryFault;
-   }
-   fs.lookup_still_valid = true;
-   if (fs.map->timestamp != fs.map_generation) {
-   result = vm_map_lookup_locked(, vaddr, 
fs.fault_type,
-   , _object, _pindex, 
_prot,
-   );
-
-   /*
-* If we don't need the page any longer, put it on the 
inactive
-* list (the easiest thing to do here).  If no one 
needs it,
-* pageout will grab it eventually.
-*/
-   if (result != KERN_SUCCESS) {
-   fault_deallocate();
-
-   /*
-* If retry of map lookup would have blocked 
then
-* retry fault from start.
-*/
-   if (result == KERN_FAILURE)
-   goto RetryFault;
-   return (result);
-   }
-   if ((retry_object != fs.first_object) ||
-   (retry_pindex != fs.first_pindex)) {
-   fault_deallocate();
+   if (result == KERN_RESTART)
goto RetryFault;
-   }
-
-   /*
-* Check whether the protection has changed or the 
object has
-* been copied 

svn commit: r357021 - head/sys/vm

2020-01-22 Thread Jeff Roberson
Author: jeff
Date: Thu Jan 23 05:05:39 2020
New Revision: 357021
URL: https://svnweb.freebsd.org/changeset/base/357021

Log:
  (fault 2/9) Move map lookup into a dedicated function.
  
  Reviewed by:  kib, markj
  Differential Revision:https://reviews.freebsd.org/D23302

Modified:
  head/sys/vm/vm_fault.c

Modified: head/sys/vm/vm_fault.c
==
--- head/sys/vm/vm_fault.c  Thu Jan 23 05:03:34 2020(r357020)
+++ head/sys/vm/vm_fault.c  Thu Jan 23 05:05:39 2020(r357021)
@@ -747,6 +747,54 @@ vm_fault_readahead(struct faultstate *fs)
return (nera);
 }
 
+static int
+vm_fault_lookup(struct faultstate *fs)
+{
+   int result;
+
+   KASSERT(!fs->lookup_still_valid,
+  ("vm_fault_lookup: Map already locked."));
+   result = vm_map_lookup(>map, fs->vaddr, fs->fault_type |
+   VM_PROT_FAULT_LOOKUP, >entry, >first_object,
+   >first_pindex, >prot, >wired);
+   if (result != KERN_SUCCESS) {
+   unlock_vp(fs);
+   return (result);
+   }
+
+   fs->map_generation = fs->map->timestamp;
+
+   if (fs->entry->eflags & MAP_ENTRY_NOFAULT) {
+   panic("%s: fault on nofault entry, addr: %#lx",
+   __func__, (u_long)fs->vaddr);
+   }
+
+   if (fs->entry->eflags & MAP_ENTRY_IN_TRANSITION &&
+   fs->entry->wiring_thread != curthread) {
+   vm_map_unlock_read(fs->map);
+   vm_map_lock(fs->map);
+   if (vm_map_lookup_entry(fs->map, fs->vaddr, >entry) &&
+   (fs->entry->eflags & MAP_ENTRY_IN_TRANSITION)) {
+   unlock_vp(fs);
+   fs->entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
+   vm_map_unlock_and_wait(fs->map, 0);
+   } else
+   vm_map_unlock(fs->map);
+   return (KERN_RESOURCE_SHORTAGE);
+   }
+
+   MPASS((fs->entry->eflags & MAP_ENTRY_GUARD) == 0);
+
+   if (fs->wired)
+   fs->fault_type = fs->prot | (fs->fault_type & VM_PROT_COPY);
+   else
+   KASSERT((fs->fault_flags & VM_FAULT_WIRE) == 0,
+   ("!fs->wired && VM_FAULT_WIRE"));
+   fs->lookup_still_valid = true;
+
+   return (KERN_SUCCESS);
+}
+
 /*
  * Wait/Retry if the page is busy.  We have to do this if the page is
  * either exclusive or shared busy because the vm_pager may be using
@@ -807,6 +855,8 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fa
fs.vaddr = vaddr;
fs.m_hold = m_hold;
fs.fault_flags = fault_flags;
+   fs.map = map;
+   fs.lookup_still_valid = false;
faultcount = 0;
nera = -1;
hardfault = false;
@@ -820,44 +870,13 @@ RetryFault_oom:
 * Find the backing store object and offset into it to begin the
 * search.
 */
-   fs.map = map;
-   result = vm_map_lookup(, fs.vaddr, fs.fault_type |
-   VM_PROT_FAULT_LOOKUP, , _object,
-   _pindex, , );
+   result = vm_fault_lookup();
if (result != KERN_SUCCESS) {
-   unlock_vp();
+   if (result == KERN_RESOURCE_SHORTAGE)
+   goto RetryFault;
return (result);
}
 
-   fs.map_generation = fs.map->timestamp;
-
-   if (fs.entry->eflags & MAP_ENTRY_NOFAULT) {
-   panic("%s: fault on nofault entry, addr: %#lx",
-   __func__, (u_long)fs.vaddr);
-   }
-
-   if (fs.entry->eflags & MAP_ENTRY_IN_TRANSITION &&
-   fs.entry->wiring_thread != curthread) {
-   vm_map_unlock_read(fs.map);
-   vm_map_lock(fs.map);
-   if (vm_map_lookup_entry(fs.map, fs.vaddr, ) &&
-   (fs.entry->eflags & MAP_ENTRY_IN_TRANSITION)) {
-   unlock_vp();
-   fs.entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
-   vm_map_unlock_and_wait(fs.map, 0);
-   } else
-   vm_map_unlock(fs.map);
-   goto RetryFault;
-   }
-
-   MPASS((fs.entry->eflags & MAP_ENTRY_GUARD) == 0);
-
-   if (fs.wired)
-   fs.fault_type = fs.prot | (fs.fault_type & VM_PROT_COPY);
-   else
-   KASSERT((fs.fault_flags & VM_FAULT_WIRE) == 0,
-   ("!fs.wired && VM_FAULT_WIRE"));
-
/*
 * Try to avoid lock contention on the top-level object through
 * special-case handling of some types of page faults, specifically,
@@ -890,8 +909,6 @@ RetryFault_oom:
 */
vm_object_reference_locked(fs.first_object);
vm_object_pip_add(fs.first_object, 1);
-
-   fs.lookup_still_valid = true;
 
fs.m_cow = fs.m = fs.first_m = NULL;
 
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To 

svn commit: r357020 - head/sys/vm

2020-01-22 Thread Jeff Roberson
Author: jeff
Date: Thu Jan 23 05:03:34 2020
New Revision: 357020
URL: https://svnweb.freebsd.org/changeset/base/357020

Log:
  (fault 1/9) Move a handful of stack variables into the faultstate.
  
  This additionally fixes a potential bug/pessimization where we could fail to
  reload the original fault_type on restart.
  
  Reviewed by:  kib, markj
  Differential Revision:https://reviews.freebsd.org/D23301

Modified:
  head/sys/vm/vm_fault.c

Modified: head/sys/vm/vm_fault.c
==
--- head/sys/vm/vm_fault.c  Thu Jan 23 04:56:38 2020(r357019)
+++ head/sys/vm/vm_fault.c  Thu Jan 23 05:03:34 2020(r357020)
@@ -120,19 +120,35 @@ __FBSDID("$FreeBSD$");
 #defineVM_FAULT_DONTNEED_MIN   1048576
 
 struct faultstate {
-   vm_offset_t vaddr;
-   vm_page_t m;
+   /* Fault parameters. */
+   vm_offset_t vaddr;
+   vm_page_t   *m_hold;
+   vm_prot_t   fault_type;
+   vm_prot_t   prot;
+   int fault_flags;
+   boolean_t   wired;
+
+   /* Page reference for cow. */
vm_page_t m_cow;
-   vm_object_t object;
-   vm_pindex_t pindex;
-   vm_page_t first_m;
+
+   /* Current object. */
+   vm_object_t object;
+   vm_pindex_t pindex;
+   vm_page_t   m;
+
+   /* Top-level map object. */
vm_object_t first_object;
-   vm_pindex_t first_pindex;
-   vm_map_t map;
-   vm_map_entry_t entry;
-   int map_generation;
-   bool lookup_still_valid;
-   struct vnode *vp;
+   vm_pindex_t first_pindex;
+   vm_page_t   first_m;
+
+   /* Map state. */
+   vm_map_tmap;
+   vm_map_entry_t  entry;
+   int map_generation;
+   boollookup_still_valid;
+
+   /* Vnode if locked. */
+   struct vnode*vp;
 };
 
 static void vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr,
@@ -233,21 +249,20 @@ unlock_and_deallocate(struct faultstate *fs)
 }
 
 static void
-vm_fault_dirty(vm_map_entry_t entry, vm_page_t m, vm_prot_t prot,
-vm_prot_t fault_type, int fault_flags)
+vm_fault_dirty(struct faultstate *fs, vm_page_t m)
 {
bool need_dirty;
 
-   if (((prot & VM_PROT_WRITE) == 0 &&
-   (fault_flags & VM_FAULT_DIRTY) == 0) ||
+   if (((fs->prot & VM_PROT_WRITE) == 0 &&
+   (fs->fault_flags & VM_FAULT_DIRTY) == 0) ||
(m->oflags & VPO_UNMANAGED) != 0)
return;
 
VM_PAGE_OBJECT_BUSY_ASSERT(m);
 
-   need_dirty = ((fault_type & VM_PROT_WRITE) != 0 &&
-   (fault_flags & VM_FAULT_WIRE) == 0) ||
-   (fault_flags & VM_FAULT_DIRTY) != 0;
+   need_dirty = ((fs->fault_type & VM_PROT_WRITE) != 0 &&
+   (fs->fault_flags & VM_FAULT_WIRE) == 0) ||
+   (fs->fault_flags & VM_FAULT_DIRTY) != 0;
 
vm_object_set_writeable_dirty(m->object);
 
@@ -268,7 +283,7 @@ vm_fault_dirty(vm_map_entry_t entry, vm_page_t m, vm_p
 * sure the page isn't marked NOSYNC.  Applications sharing
 * data should use the same flags to avoid ping ponging.
 */
-   if ((entry->eflags & MAP_ENTRY_NOSYNC) != 0)
+   if ((fs->entry->eflags & MAP_ENTRY_NOSYNC) != 0)
vm_page_aflag_set(m, PGA_NOSYNC);
else
vm_page_aflag_clear(m, PGA_NOSYNC);
@@ -280,8 +295,7 @@ vm_fault_dirty(vm_map_entry_t entry, vm_page_t m, vm_p
  * Unlocks fs.first_object and fs.map on success.
  */
 static int
-vm_fault_soft_fast(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot,
-int fault_type, int fault_flags, boolean_t wired, vm_page_t *m_hold)
+vm_fault_soft_fast(struct faultstate *fs)
 {
vm_page_t m, m_map;
 #if (defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \
@@ -291,12 +305,14 @@ vm_fault_soft_fast(struct faultstate *fs, vm_offset_t 
int flags;
 #endif
int psind, rv;
+   vm_offset_t vaddr;
 
MPASS(fs->vp == NULL);
+   vaddr = fs->vaddr;
vm_object_busy(fs->first_object);
m = vm_page_lookup(fs->first_object, fs->first_pindex);
/* A busy page can be mapped for read|execute access. */
-   if (m == NULL || ((prot & VM_PROT_WRITE) != 0 &&
+   if (m == NULL || ((fs->prot & VM_PROT_WRITE) != 0 &&
vm_page_busied(m)) || !vm_page_all_valid(m)) {
rv = KERN_FAILURE;
goto out;
@@ -311,10 +327,10 @@ vm_fault_soft_fast(struct faultstate *fs, vm_offset_t 
rounddown2(vaddr, pagesizes[m_super->psind]) >= fs->entry->start &&
roundup2(vaddr + 1, pagesizes[m_super->psind]) <= fs->entry->end &&
(vaddr & (pagesizes[m_super->psind] - 1)) == (VM_PAGE_TO_PHYS(m) &
-   (pagesizes[m_super->psind] - 1)) && !wired &&
+   (pagesizes[m_super->psind] - 1)) && 

svn commit: r357017 - in head/sys: dev/spibus kern vm

2020-01-22 Thread Jeff Roberson
Author: jeff
Date: Thu Jan 23 04:54:49 2020
New Revision: 357017
URL: https://svnweb.freebsd.org/changeset/base/357017

Log:
  Consistently use busy and vm_page_valid() rather than touching page bits
  directly.  This improves API compliance, asserts, etc.
  
  Reviewed by:  kib, markj
  Differential Revision:https://reviews.freebsd.org/D23283

Modified:
  head/sys/dev/spibus/spigen.c
  head/sys/kern/kern_kcov.c
  head/sys/kern/kern_sendfile.c
  head/sys/vm/vm_glue.c
  head/sys/vm/vm_kern.c

Modified: head/sys/dev/spibus/spigen.c
==
--- head/sys/dev/spibus/spigen.cThu Jan 23 03:38:41 2020
(r357016)
+++ head/sys/dev/spibus/spigen.cThu Jan 23 04:54:49 2020
(r357017)
@@ -325,8 +325,9 @@ spigen_mmap_single(struct cdev *cdev, vm_ooffset_t *of
vm_object_reference_locked(mmap->bufobj); // kernel and userland both
for (n = 0; n < pages; n++) {
m[n] = vm_page_grab(mmap->bufobj, n,
-   VM_ALLOC_NOBUSY | VM_ALLOC_ZERO | VM_ALLOC_WIRED);
-   m[n]->valid = VM_PAGE_BITS_ALL;
+   VM_ALLOC_ZERO | VM_ALLOC_WIRED);
+   vm_page_valid(m[n]);
+   vm_page_xunbusy(m[n]);
}
VM_OBJECT_WUNLOCK(mmap->bufobj);
pmap_qenter(mmap->kvaddr, m, pages);

Modified: head/sys/kern/kern_kcov.c
==
--- head/sys/kern/kern_kcov.c   Thu Jan 23 03:38:41 2020(r357016)
+++ head/sys/kern/kern_kcov.c   Thu Jan 23 04:54:49 2020(r357017)
@@ -383,8 +383,9 @@ kcov_alloc(struct kcov_info *info, size_t entries)
VM_OBJECT_WLOCK(info->bufobj);
for (n = 0; n < pages; n++) {
m = vm_page_grab(info->bufobj, n,
-   VM_ALLOC_NOBUSY | VM_ALLOC_ZERO | VM_ALLOC_WIRED);
-   m->valid = VM_PAGE_BITS_ALL;
+   VM_ALLOC_ZERO | VM_ALLOC_WIRED);
+   vm_page_valid(m);
+   vm_page_xunbusy(m);
pmap_qenter(info->kvaddr + n * PAGE_SIZE, , 1);
}
VM_OBJECT_WUNLOCK(info->bufobj);

Modified: head/sys/kern/kern_sendfile.c
==
--- head/sys/kern/kern_sendfile.c   Thu Jan 23 03:38:41 2020
(r357016)
+++ head/sys/kern/kern_sendfile.c   Thu Jan 23 04:54:49 2020
(r357017)
@@ -388,7 +388,7 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
if (!vm_pager_has_page(obj, OFF_TO_IDX(vmoff(i, off)), NULL,
)) {
pmap_zero_page(pa[i]);
-   pa[i]->valid = VM_PAGE_BITS_ALL;
+   vm_page_valid(pa[i]);
MPASS(pa[i]->dirty == 0);
vm_page_xunbusy(pa[i]);
i++;

Modified: head/sys/vm/vm_glue.c
==
--- head/sys/vm/vm_glue.c   Thu Jan 23 03:38:41 2020(r357016)
+++ head/sys/vm/vm_glue.c   Thu Jan 23 04:54:49 2020(r357017)
@@ -340,10 +340,12 @@ vm_thread_stack_create(struct domainset *ds, vm_object
 * page of stack.
 */
VM_OBJECT_WLOCK(ksobj);
-   (void)vm_page_grab_pages(ksobj, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY |
-   VM_ALLOC_WIRED, ma, pages);
-   for (i = 0; i < pages; i++)
-   ma[i]->valid = VM_PAGE_BITS_ALL;
+   (void)vm_page_grab_pages(ksobj, 0, VM_ALLOC_NORMAL | VM_ALLOC_WIRED,
+   ma, pages);
+   for (i = 0; i < pages; i++) {
+   vm_page_valid(ma[i]);
+   vm_page_xunbusy(ma[i]);
+   }
VM_OBJECT_WUNLOCK(ksobj);
pmap_qenter(ks, ma, pages);
*ksobjp = ksobj;

Modified: head/sys/vm/vm_kern.c
==
--- head/sys/vm/vm_kern.c   Thu Jan 23 03:38:41 2020(r357016)
+++ head/sys/vm/vm_kern.c   Thu Jan 23 04:54:49 2020(r357017)
@@ -193,7 +193,7 @@ kmem_alloc_attr_domain(int domain, vm_size_t size, int
if (vmem_alloc(vmem, size, M_BESTFIT | flags, ))
return (0);
offset = addr - VM_MIN_KERNEL_ADDRESS;
-   pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED;
+   pflags = malloc2vm_flags(flags) | VM_ALLOC_WIRED;
pflags &= ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL);
pflags |= VM_ALLOC_NOWAIT;
prot = (flags & M_EXEC) != 0 ? VM_PROT_ALL : VM_PROT_RW;
@@ -223,7 +223,8 @@ retry:
vm_phys_domain(m), domain));
if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
-   m->valid = VM_PAGE_BITS_ALL;
+   vm_page_valid(m);
+   vm_page_xunbusy(m);
pmap_enter(kernel_pmap, addr + i, m, 

svn commit: r357015 - head/sys/vm

2020-01-22 Thread Jeff Roberson
Author: jeff
Date: Thu Jan 23 03:37:35 2020
New Revision: 357015
URL: https://svnweb.freebsd.org/changeset/base/357015

Log:
  Some architectures with DMAP still consume boot kva.  Simplify the test for
  claiming kva in uma_startup2() to handle this.
  
  Reported by:  bdragon

Modified:
  head/sys/vm/uma_core.c

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Thu Jan 23 03:36:50 2020(r357014)
+++ head/sys/vm/uma_core.c  Thu Jan 23 03:37:35 2020(r357015)
@@ -2614,7 +2614,7 @@ void
 uma_startup2(void)
 {
 
-   if (!PMAP_HAS_DMAP) {
+   if (bootstart != bootmem) {
vm_map_lock(kernel_map);
(void)vm_map_insert(kernel_map, NULL, 0, bootstart, bootmem,
VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT);
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r357014 - head/sys/kern

2020-01-22 Thread Jeff Roberson
Author: jeff
Date: Thu Jan 23 03:36:50 2020
New Revision: 357014
URL: https://svnweb.freebsd.org/changeset/base/357014

Log:
  Block the thread lock in sched_throw() and use cpu_switch() to unblock
  it.  The introduction of lockless switch in r355784 created a race to
  re-use the exiting thread that was only possible to hit on a hypervisor.
  
  Reported/Tested by:   rlibby
  Discussed with:   rlibby, jhb

Modified:
  head/sys/kern/sched_ule.c

Modified: head/sys/kern/sched_ule.c
==
--- head/sys/kern/sched_ule.c   Thu Jan 23 01:49:22 2020(r357013)
+++ head/sys/kern/sched_ule.c   Thu Jan 23 03:36:50 2020(r357014)
@@ -2894,7 +2894,7 @@ sched_throw(struct thread *td)
struct thread *newtd;
struct tdq *tdq;
 
-   if (td == NULL) {
+   if (__predict_false(td == NULL)) {
 #ifdef SMP
PCPU_SET(sched, DPCPU_PTR(tdq));
 #endif
@@ -2912,13 +2912,18 @@ sched_throw(struct thread *td)
tdq_load_rem(tdq, td);
td->td_lastcpu = td->td_oncpu;
td->td_oncpu = NOCPU;
+   thread_lock_block(td);
}
newtd = choosethread();
spinlock_enter();
TDQ_UNLOCK(tdq);
KASSERT(curthread->td_md.md_spinlock_count == 1,
("invalid count %d", curthread->td_md.md_spinlock_count));
-   cpu_throw(td, newtd);   /* doesn't return */
+   /* doesn't return */
+   if (__predict_false(td == NULL))
+   cpu_throw(td, newtd);   /* doesn't return */
+   else
+   cpu_switch(td, newtd, TDQ_LOCKPTR(tdq));
 }
 
 /*
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r356936 - head/sys/vm

2020-01-20 Thread Jeff Roberson
Author: jeff
Date: Tue Jan 21 00:12:57 2020
New Revision: 356936
URL: https://svnweb.freebsd.org/changeset/base/356936

Log:
  Move readahead and dropbehind fault functionality into a helper routine for
  clarity.
  
  Reviewed by:  dougm, kib, markj
  Differential Revision:https://reviews.freebsd.org/D23282

Modified:
  head/sys/vm/vm_fault.c

Modified: head/sys/vm/vm_fault.c
==
--- head/sys/vm/vm_fault.c  Mon Jan 20 23:44:10 2020(r356935)
+++ head/sys/vm/vm_fault.c  Tue Jan 21 00:12:57 2020(r356936)
@@ -120,6 +120,7 @@ __FBSDID("$FreeBSD$");
 #defineVM_FAULT_DONTNEED_MIN   1048576
 
 struct faultstate {
+   vm_offset_t vaddr;
vm_page_t m;
vm_page_t m_cow;
vm_object_t object;
@@ -680,6 +681,59 @@ vm_fault_lock_vnode(struct faultstate *fs, bool objloc
 }
 
 /*
+ * Calculate the desired readahead.  Handle drop-behind.
+ *
+ * Returns the number of readahead blocks to pass to the pager.
+ */
+static int
+vm_fault_readahead(struct faultstate *fs)
+{
+   int era, nera;
+   u_char behavior;
+
+   KASSERT(fs->lookup_still_valid, ("map unlocked"));
+   era = fs->entry->read_ahead;
+   behavior = vm_map_entry_behavior(fs->entry);
+   if (behavior == MAP_ENTRY_BEHAV_RANDOM) {
+   nera = 0;
+   } else if (behavior == MAP_ENTRY_BEHAV_SEQUENTIAL) {
+   nera = VM_FAULT_READ_AHEAD_MAX;
+   if (fs->vaddr == fs->entry->next_read)
+   vm_fault_dontneed(fs, fs->vaddr, nera);
+   } else if (fs->vaddr == fs->entry->next_read) {
+   /*
+* This is a sequential fault.  Arithmetically
+* increase the requested number of pages in
+* the read-ahead window.  The requested
+* number of pages is "# of sequential faults
+* x (read ahead min + 1) + read ahead min"
+*/
+   nera = VM_FAULT_READ_AHEAD_MIN;
+   if (era > 0) {
+   nera += era + 1;
+   if (nera > VM_FAULT_READ_AHEAD_MAX)
+   nera = VM_FAULT_READ_AHEAD_MAX;
+   }
+   if (era == VM_FAULT_READ_AHEAD_MAX)
+   vm_fault_dontneed(fs, fs->vaddr, nera);
+   } else {
+   /*
+* This is a non-sequential fault.
+*/
+   nera = 0;
+   }
+   if (era != nera) {
+   /*
+* A read lock on the map suffices to update
+* the read ahead count safely.
+*/
+   fs->entry->read_ahead = nera;
+   }
+
+   return (nera);
+}
+
+/*
  * Wait/Retry if the page is busy.  We have to do this if the page is
  * either exclusive or shared busy because the vm_pager may be using
  * read busy for pageouts (and even pageins if it is the vnode pager),
@@ -725,7 +779,7 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fa
vm_offset_t e_end, e_start;
vm_pindex_t retry_pindex;
vm_prot_t prot, retry_prot;
-   int ahead, alloc_req, behind, cluster_offset, era, faultcount;
+   int ahead, alloc_req, behind, cluster_offset, faultcount;
int nera, oom, result, rv;
u_char behavior;
boolean_t wired;/* Passed by reference. */
@@ -737,6 +791,7 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fa
return (KERN_PROTECTION_FAILURE);
 
fs.vp = NULL;
+   fs.vaddr = vaddr;
faultcount = 0;
nera = -1;
hardfault = false;
@@ -989,45 +1044,7 @@ readrest:
 * apply to subsequent objects in the shadow chain.
 */
if (nera == -1 && !P_KILLED(curproc)) {
-   KASSERT(fs.lookup_still_valid, ("map unlocked"));
-   era = fs.entry->read_ahead;
-   behavior = vm_map_entry_behavior(fs.entry);
-   if (behavior == MAP_ENTRY_BEHAV_RANDOM) {
-   nera = 0;
-   } else if (behavior == MAP_ENTRY_BEHAV_SEQUENTIAL) {
-   nera = VM_FAULT_READ_AHEAD_MAX;
-   if (vaddr == fs.entry->next_read)
-   vm_fault_dontneed(, vaddr, nera);
-   } else if (vaddr == fs.entry->next_read) {
-   /*
-* This is a sequential fault.  Arithmetically
-* increase the requested number of pages in
-* the read-ahead window.  The requested
-* number of pages is "# of sequential faults
-* x (read ahead min + 1) + read ahead min"
-*/
-   

svn commit: r356933 - head/sys/vm

2020-01-20 Thread Jeff Roberson
Author: jeff
Date: Mon Jan 20 22:49:52 2020
New Revision: 356933
URL: https://svnweb.freebsd.org/changeset/base/356933

Log:
  Reduce object locking in vm_fault.  Once we have an exclusively busied page we
  no longer need an object lock.  This reduces the longest hold times and
  eliminates some trylock code blocks.
  
  Reviewed by:  kib, markj
  Differential Revision:https://reviews.freebsd.org/D23034

Modified:
  head/sys/vm/vm_fault.c

Modified: head/sys/vm/vm_fault.c
==
--- head/sys/vm/vm_fault.c  Mon Jan 20 22:15:33 2020(r356932)
+++ head/sys/vm/vm_fault.c  Mon Jan 20 22:49:52 2020(r356933)
@@ -342,10 +342,10 @@ vm_fault_soft_fast(struct faultstate *fs, vm_offset_t 
*m_hold = m;
vm_page_wire(m);
}
-   vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags);
if (psind == 0 && !wired)
vm_fault_prefault(fs, vaddr, PFBAK, PFFOR, true);
VM_OBJECT_RUNLOCK(fs->first_object);
+   vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags);
vm_map_lookup_done(fs->map, fs->entry);
curthread->td_ru.ru_minflt++;
 
@@ -632,7 +632,7 @@ vm_fault_trap(vm_map_t map, vm_offset_t vaddr, vm_prot
 }
 
 static int
-vm_fault_lock_vnode(struct faultstate *fs)
+vm_fault_lock_vnode(struct faultstate *fs, bool objlocked)
 {
struct vnode *vp;
int error, locked;
@@ -668,7 +668,10 @@ vm_fault_lock_vnode(struct faultstate *fs)
}
 
vhold(vp);
-   unlock_and_deallocate(fs);
+   if (objlocked)
+   unlock_and_deallocate(fs);
+   else
+   fault_deallocate(fs);
error = vget(vp, locked | LK_RETRY | LK_CANRECURSE, curthread);
vdrop(vp);
fs->vp = vp;
@@ -863,9 +866,11 @@ RetryFault_oom:
 */
if (!vm_page_all_valid(fs.m))
goto readrest;
-   break; /* break to PAGE HAS BEEN FOUND */
+   VM_OBJECT_WUNLOCK(fs.object);
+   break; /* break to PAGE HAS BEEN FOUND. */
}
KASSERT(fs.m == NULL, ("fs.m should be NULL, not %p", fs.m));
+   VM_OBJECT_ASSERT_WLOCKED(fs.object);
 
/*
 * Page is not resident.  If the pager might contain the page
@@ -876,7 +881,7 @@ RetryFault_oom:
if (fs.object->type != OBJT_DEFAULT ||
fs.object == fs.first_object) {
if ((fs.object->flags & OBJ_SIZEVNLOCK) != 0) {
-   rv = vm_fault_lock_vnode();
+   rv = vm_fault_lock_vnode(, true);
MPASS(rv == KERN_SUCCESS ||
rv == KERN_RESOURCE_SHORTAGE);
if (rv == KERN_RESOURCE_SHORTAGE)
@@ -956,12 +961,23 @@ RetryFault_oom:
 
 readrest:
/*
+* Default objects have no pager so no exclusive busy exists
+* to protect this page in the chain.  Skip to the next
+* object without dropping the lock to preserve atomicity of
+* shadow faults.
+*/
+   if (fs.object->type == OBJT_DEFAULT)
+   goto next;
+
+   /*
 * At this point, we have either allocated a new page or found
 * an existing page that is only partially valid.
 *
 * We hold a reference on the current object and the page is
-* exclusive busied.
+* exclusive busied.  The exclusive busy prevents simultaneous
+* faults and collapses while the object lock is dropped.
 */
+   VM_OBJECT_WUNLOCK(fs.object);
 
/*
 * If the pager for the current object might have the page,
@@ -972,8 +988,7 @@ readrest:
 * have the page, the number of additional pages to read will
 * apply to subsequent objects in the shadow chain.
 */
-   if (fs.object->type != OBJT_DEFAULT && nera == -1 &&
-   !P_KILLED(curproc)) {
+   if (nera == -1 && !P_KILLED(curproc)) {
KASSERT(fs.lookup_still_valid, ("map unlocked"));
era = fs.entry->read_ahead;
behavior = vm_map_entry_behavior(fs.entry);
@@ -1039,7 +1054,7 @@ readrest:
 */
unlock_map();
 
-   rv = vm_fault_lock_vnode();
+   rv = vm_fault_lock_vnode(, false);
MPASS(rv == KERN_SUCCESS ||
rv == KERN_RESOURCE_SHORTAGE);
if (rv == KERN_RESOURCE_SHORTAGE)
@@ -1080,15 +1095,14 @@ 

svn commit: r356902 - in head/sys: dev/md fs/tmpfs kern vm

2020-01-19 Thread Jeff Roberson
Author: jeff
Date: Sun Jan 19 23:47:32 2020
New Revision: 356902
URL: https://svnweb.freebsd.org/changeset/base/356902

Log:
  Don't hold the object lock while calling getpages.
  
  The vnode pager does not want the object lock held.  Moving this out allows
  further object lock scope reduction in callers.  While here add some missing
  paging in progress calls and an assert.  The object handle is now protected
  explicitly with pip.
  
  Reviewed by:  kib, markj
  Differential Revision:https://reviews.freebsd.org/D23033

Modified:
  head/sys/dev/md/md.c
  head/sys/fs/tmpfs/tmpfs_subr.c
  head/sys/kern/kern_sendfile.c
  head/sys/kern/uipc_shm.c
  head/sys/vm/device_pager.c
  head/sys/vm/phys_pager.c
  head/sys/vm/sg_pager.c
  head/sys/vm/swap_pager.c
  head/sys/vm/vm_fault.c
  head/sys/vm/vm_object.h
  head/sys/vm/vm_page.c
  head/sys/vm/vm_pager.c
  head/sys/vm/vm_swapout.c
  head/sys/vm/vnode_pager.c

Modified: head/sys/dev/md/md.c
==
--- head/sys/dev/md/md.cSun Jan 19 22:52:36 2020(r356901)
+++ head/sys/dev/md/md.cSun Jan 19 23:47:32 2020(r356902)
@@ -1057,11 +1057,12 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
lastend = (bp->bio_offset + bp->bio_length - 1) % PAGE_SIZE + 1;
 
rv = VM_PAGER_OK;
-   VM_OBJECT_WLOCK(sc->object);
vm_object_pip_add(sc->object, 1);
for (i = bp->bio_offset / PAGE_SIZE; i <= lastp; i++) {
len = ((i == lastp) ? lastend : PAGE_SIZE) - offs;
+   VM_OBJECT_WLOCK(sc->object);
m = vm_page_grab(sc->object, i, VM_ALLOC_SYSTEM);
+   VM_OBJECT_WUNLOCK(sc->object);
if (bp->bio_cmd == BIO_READ) {
if (vm_page_all_valid(m))
rv = VM_PAGER_OK;
@@ -1069,7 +1070,9 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
rv = vm_pager_get_pages(sc->object, , 1,
NULL, NULL);
if (rv == VM_PAGER_ERROR) {
+   VM_OBJECT_WLOCK(sc->object);
vm_page_free(m);
+   VM_OBJECT_WUNLOCK(sc->object);
break;
} else if (rv == VM_PAGER_FAIL) {
/*
@@ -1099,7 +1102,9 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
rv = vm_pager_get_pages(sc->object, , 1,
NULL, NULL);
if (rv == VM_PAGER_ERROR) {
+   VM_OBJECT_WLOCK(sc->object);
vm_page_free(m);
+   VM_OBJECT_WUNLOCK(sc->object);
break;
} else if (rv == VM_PAGER_FAIL)
pmap_zero_page(m);
@@ -1122,8 +1127,10 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
else
rv = vm_pager_get_pages(sc->object, , 1,
NULL, NULL);
+   VM_OBJECT_WLOCK(sc->object);
if (rv == VM_PAGER_ERROR) {
vm_page_free(m);
+   VM_OBJECT_WUNLOCK(sc->object);
break;
} else if (rv == VM_PAGER_FAIL) {
vm_page_free(m);
@@ -1139,6 +1146,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
m = NULL;
}
}
+   VM_OBJECT_WUNLOCK(sc->object);
}
if (m != NULL) {
vm_page_xunbusy(m);
@@ -1160,7 +1168,6 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
ma_offs += len;
}
vm_object_pip_wakeup(sc->object);
-   VM_OBJECT_WUNLOCK(sc->object);
return (rv != VM_PAGER_ERROR ? 0 : ENOSPC);
 }
 

Modified: head/sys/fs/tmpfs/tmpfs_subr.c
==
--- head/sys/fs/tmpfs/tmpfs_subr.c  Sun Jan 19 22:52:36 2020
(r356901)
+++ head/sys/fs/tmpfs/tmpfs_subr.c  Sun Jan 19 23:47:32 2020
(r356902)
@@ -1480,8 +1480,12 @@ retry:
VM_ALLOC_WAITFAIL);
if (m == NULL)
goto retry;
+   vm_object_pip_add(uobj, 1);
+   VM_OBJECT_WUNLOCK(uobj);
rv = vm_pager_get_pages(uobj, , 1, NULL,
NULL);
+   VM_OBJECT_WLOCK(uobj);
+   vm_object_pip_wakeup(uobj);
if (rv == 

svn commit: r356887 - head/sys/vm

2020-01-19 Thread Jeff Roberson
Author: jeff
Date: Sun Jan 19 18:36:03 2020
New Revision: 356887
URL: https://svnweb.freebsd.org/changeset/base/356887

Log:
  It has not been possible to recursively terminate a vnode object for some time
  now.  Eliminate the dead code that supports it.
  
  Approved by:  kib, markj
  Differential Revision:https://reviews.freebsd.org/D22908

Modified:
  head/sys/vm/vnode_pager.c

Modified: head/sys/vm/vnode_pager.c
==
--- head/sys/vm/vnode_pager.c   Sun Jan 19 18:30:23 2020(r356886)
+++ head/sys/vm/vnode_pager.c   Sun Jan 19 18:36:03 2020(r356887)
@@ -200,36 +200,24 @@ vnode_destroy_vobject(struct vnode *vp)
MPASS(obj->type == OBJT_VNODE);
umtx_shm_object_terminated(obj);
if (obj->ref_count == 0) {
+   KASSERT((obj->flags & OBJ_DEAD) == 0,
+  ("vnode_destroy_vobject: Terminating dead object"));
+   vm_object_set_flag(obj, OBJ_DEAD);
+
/*
-* don't double-terminate the object
+* Clean pages and flush buffers.
 */
-   if ((obj->flags & OBJ_DEAD) == 0) {
-   vm_object_set_flag(obj, OBJ_DEAD);
+   vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
+   VM_OBJECT_WUNLOCK(obj);
 
-   /*
-* Clean pages and flush buffers.
-*/
-   vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
-   VM_OBJECT_WUNLOCK(obj);
+   vinvalbuf(vp, V_SAVE, 0, 0);
 
-   vinvalbuf(vp, V_SAVE, 0, 0);
+   BO_LOCK(>v_bufobj);
+   vp->v_bufobj.bo_flag |= BO_DEAD;
+   BO_UNLOCK(>v_bufobj);
 
-   BO_LOCK(>v_bufobj);
-   vp->v_bufobj.bo_flag |= BO_DEAD;
-   BO_UNLOCK(>v_bufobj);
-
-   VM_OBJECT_WLOCK(obj);
-   vm_object_terminate(obj);
-   } else {
-   /*
-* Waiters were already handled during object
-* termination.  The exclusive vnode lock hopefully
-* prevented new waiters from referencing the dying
-* object.
-*/
-   vp->v_object = NULL;
-   VM_OBJECT_WUNLOCK(obj);
-   }
+   VM_OBJECT_WLOCK(obj);
+   vm_object_terminate(obj);
} else {
/*
 * Woe to the process that tries to page now :-).
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r356886 - head/sys/vm

2020-01-19 Thread Jeff Roberson
Author: jeff
Date: Sun Jan 19 18:30:23 2020
New Revision: 356886
URL: https://svnweb.freebsd.org/changeset/base/356886

Log:
  Make collapse synchronization more explicit and allow it to complete during
  paging.
  
  Shadow objects are marked with a COLLAPSING flag while they are collapsing 
with
  their backing object.  This gives us an explicit test rather than overloading
  paging-in-progress.  While split is on-going we mark an object with SPLIT.
  These two operations will modify the swap tree so they must be serialized
  and swap_pager_getpages() can now directly detect these conditions and page
  more conservatively.
  
  Callers to vm_object_collapse() now will reliably wait for a collapse to 
finish
  so that the backing chain is as short as possible before other decisions are
  made that may inflate the object chain.  For example, split, coalesce, etc.
  It is now safe to run fault concurrently with collapse.  It is safe to 
increase
  or decrease paging in progress with no lock so long as there is another valid
  ref on increase.
  
  This change makes collapse more reliable as a secondary benefit.  The primary
  benefit is making it safe to drop the object lock much earlier in fault or
  never acquire it at all.
  
  This was tested with a new shadow chain test script that uncovered long
  standing bugs and will be integrated with stress2.
  
  Reviewed by:  kib, markj
  Differential Revision:https://reviews.freebsd.org/D22908

Modified:
  head/sys/vm/swap_pager.c
  head/sys/vm/vm_object.c
  head/sys/vm/vm_object.h

Modified: head/sys/vm/swap_pager.c
==
--- head/sys/vm/swap_pager.cSun Jan 19 18:18:17 2020(r356885)
+++ head/sys/vm/swap_pager.cSun Jan 19 18:30:23 2020(r356886)
@@ -974,15 +974,12 @@ swp_pager_xfer_source(vm_object_t srcobject, vm_object
 * Destination has no swapblk and is not resident, transfer source.
 * swp_pager_meta_build() can sleep.
 */
-   vm_object_pip_add(srcobject, 1);
VM_OBJECT_WUNLOCK(srcobject);
-   vm_object_pip_add(dstobject, 1);
dstaddr = swp_pager_meta_build(dstobject, pindex, addr);
KASSERT(dstaddr == SWAPBLK_NONE,
("Unexpected destination swapblk"));
-   vm_object_pip_wakeup(dstobject);
VM_OBJECT_WLOCK(srcobject);
-   vm_object_pip_wakeup(srcobject);
+
return (true);
 }
 
@@ -995,8 +992,7 @@ swp_pager_xfer_source(vm_object_t srcobject, vm_object
  * we keep the destination's.
  *
  * This routine is allowed to sleep.  It may sleep allocating metadata
- * indirectly through swp_pager_meta_build() or if paging is still in
- * progress on the source.
+ * indirectly through swp_pager_meta_build().
  *
  * The source object contains no vm_page_t's (which is just as well)
  *
@@ -1019,18 +1015,14 @@ swap_pager_copy(vm_object_t srcobject, vm_object_t dst
 */
if (destroysource && (srcobject->flags & OBJ_ANON) == 0 &&
srcobject->handle != NULL) {
-   vm_object_pip_add(srcobject, 1);
VM_OBJECT_WUNLOCK(srcobject);
-   vm_object_pip_add(dstobject, 1);
VM_OBJECT_WUNLOCK(dstobject);
sx_xlock(_alloc_sx);
TAILQ_REMOVE(NOBJLIST(srcobject->handle), srcobject,
pager_object_list);
sx_xunlock(_alloc_sx);
VM_OBJECT_WLOCK(dstobject);
-   vm_object_pip_wakeup(dstobject);
VM_OBJECT_WLOCK(srcobject);
-   vm_object_pip_wakeup(srcobject);
}
 
/*
@@ -1207,26 +1199,29 @@ swap_pager_getpages(vm_object_t object, vm_page_t *ma,
 
reqcount = count;
 
-   /*
-* Determine the final number of read-behind pages and
-* allocate them BEFORE releasing the object lock.  Otherwise,
-* there can be a problematic race with vm_object_split().
-* Specifically, vm_object_split() might first transfer pages
-* that precede ma[0] in the current object to a new object,
-* and then this function incorrectly recreates those pages as
-* read-behind pages in the current object.
-*/
KASSERT(object->type == OBJT_SWAP,
("%s: object not swappable", __func__));
if (!swap_pager_haspage(object, ma[0]->pindex, , ))
return (VM_PAGER_FAIL);
 
+   KASSERT(reqcount - 1 <= maxahead,
+   ("page count %d extends beyond swap block", reqcount));
+
/*
+* Do not transfer any pages other than those that are xbusied
+* when running during a split or collapse operation.  This
+* prevents clustering from re-creating pages which are being
+* moved into another object.
+*/
+   if ((object->flags & (OBJ_SPLIT | OBJ_DEAD)) != 0) {
+   maxahead = reqcount - 1;
+   maxbehind = 0;
+   }
+
+   

svn commit: r356885 - in head/sys: kern sys

2020-01-19 Thread Jeff Roberson
Author: jeff
Date: Sun Jan 19 18:18:17 2020
New Revision: 356885
URL: https://svnweb.freebsd.org/changeset/base/356885

Log:
  Provide an API for interlocked refcount sleeps.
  
  Reviewed by:  kib, markj
  Differential Revision:https://reviews.freebsd.org/D22908

Modified:
  head/sys/kern/kern_synch.c
  head/sys/sys/refcount.h

Modified: head/sys/kern/kern_synch.c
==
--- head/sys/kern/kern_synch.c  Sun Jan 19 17:47:04 2020(r356884)
+++ head/sys/kern/kern_synch.c  Sun Jan 19 18:18:17 2020(r356885)
@@ -381,15 +381,21 @@ refcount_release_last(volatile u_int *count, u_int n, 
  * a precise answer should use refcount_wait().
  */
 void
-refcount_sleep(volatile u_int *count, const char *wmesg, int pri)
+_refcount_sleep(volatile u_int *count, struct lock_object *lock,
+const char *wmesg, int pri)
 {
void *wchan;
u_int old;
 
-   if (REFCOUNT_COUNT(*count) == 0)
+   if (REFCOUNT_COUNT(*count) == 0) {
+   if (lock != NULL)
+   LOCK_CLASS(lock)->lc_unlock(lock);
return;
+   }
wchan = __DEVOLATILE(void *, count);
sleepq_lock(wchan);
+   if (lock != NULL)
+   LOCK_CLASS(lock)->lc_unlock(lock);
old = *count;
for (;;) {
if (REFCOUNT_COUNT(old) == 0) {

Modified: head/sys/sys/refcount.h
==
--- head/sys/sys/refcount.h Sun Jan 19 17:47:04 2020(r356884)
+++ head/sys/sys/refcount.h Sun Jan 19 18:18:17 2020(r356885)
@@ -46,7 +46,6 @@
 #defineREFCOUNT_COUNT(x)   ((x) & ~REFCOUNT_WAITER)
 
 bool refcount_release_last(volatile u_int *count, u_int n, u_int old);
-void refcount_sleep(volatile u_int *count, const char *wmesg, int prio);
 
 /*
  * Attempt to handle reference count overflow and underflow.  Force the counter
@@ -135,13 +134,29 @@ refcount_release(volatile u_int *count)
return (refcount_releasen(count, 1));
 }
 
+#ifdef _KERNEL
+struct lock_object;
+void _refcount_sleep(volatile u_int *count, struct lock_object *,
+const char *wmesg, int prio);
+
 static __inline void
+refcount_sleep(volatile u_int *count, const char *wmesg, int prio)
+{
+
+   _refcount_sleep(count, NULL, wmesg, prio);
+}
+
+#definerefcount_sleep_interlock(count, lock, wmesg, prio)  
\
+   _refcount_sleep((count), (struct lock_object *)(lock), (wmesg), (prio))
+
+static __inline void
 refcount_wait(volatile u_int *count, const char *wmesg, int prio)
 {
 
while (*count != 0)
refcount_sleep(count, wmesg, prio);
 }
+#endif
 
 /*
  * This functions returns non-zero if the refcount was
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r356822 - head/sys/vm

2020-01-16 Thread Jeff Roberson
Author: jeff
Date: Fri Jan 17 03:44:04 2020
New Revision: 356822
URL: https://svnweb.freebsd.org/changeset/base/356822

Log:
  Fix a long standing bug that was made worse in r355765.  When we are cowing a
  page that was previously mapped read-only it exists in pmap until pmap_enter()
  returns.  However, we held no reference to the original page after the copy
  was complete.  This allowed vm_object_scan_all_shadowed() to collapse an
  object that still had pages mapped.  To resolve this, add another page pointer
  to the faultstate so we can keep the page xbusy until we're done with
  pmap_enter().  Handle busy pages in scan_all_shadowed.  This is already done
  in vm_object_collapse_scan().
  
  Reviewed by:  kib, markj
  Differential Revision:https://reviews.freebsd.org/D23155

Modified:
  head/sys/vm/vm_fault.c
  head/sys/vm/vm_object.c

Modified: head/sys/vm/vm_fault.c
==
--- head/sys/vm/vm_fault.c  Fri Jan 17 01:20:48 2020(r356821)
+++ head/sys/vm/vm_fault.c  Fri Jan 17 03:44:04 2020(r356822)
@@ -121,6 +121,7 @@ __FBSDID("$FreeBSD$");
 
 struct faultstate {
vm_page_t m;
+   vm_page_t m_cow;
vm_object_t object;
vm_pindex_t pindex;
vm_page_t first_m;
@@ -208,6 +209,7 @@ static void
 fault_deallocate(struct faultstate *fs)
 {
 
+   fault_page_release(>m_cow);
fault_page_release(>m);
vm_object_pip_wakeup(fs->object);
if (fs->object != fs->first_object) {
@@ -818,7 +820,7 @@ RetryFault_oom:
 
fs.lookup_still_valid = true;
 
-   fs.m = fs.first_m = NULL;
+   fs.m_cow = fs.m = fs.first_m = NULL;
 
/*
 * Search for the page at object/offset.
@@ -1254,9 +1256,11 @@ readrest:
vm_page_unwire(fs.m, PQ_INACTIVE);
}
/*
-* We no longer need the old page or object.
+* Save the cow page to be released after
+* pmap_enter is complete.
 */
-   fault_page_release();
+   fs.m_cow = fs.m;
+   fs.m = NULL;
}
/*
 * fs.object != fs.first_object due to above 

Modified: head/sys/vm/vm_object.c
==
--- head/sys/vm/vm_object.c Fri Jan 17 01:20:48 2020(r356821)
+++ head/sys/vm/vm_object.c Fri Jan 17 03:44:04 2020(r356822)
@@ -1605,6 +1605,14 @@ vm_object_scan_all_shadowed(vm_object_t object)
break;
 
/*
+* If the backing object page is busy a grandparent or older
+* page may still be undergoing CoW.  It is not safe to
+* collapse the backing object until it is quiesced.
+*/
+   if (p != NULL && vm_page_busied(p))
+   return (false);
+
+   /*
 * See if the parent has the page or if the parent's object
 * pager has the page.  If the parent has the page but the page
 * is not valid, the parent's object pager must have the page.
@@ -1907,8 +1915,7 @@ vm_object_collapse(vm_object_t object)
 * If we do not entirely shadow the backing object,
 * there is nothing we can do so we give up.
 */
-   if (object->resident_page_count != object->size &&
-   !vm_object_scan_all_shadowed(object)) {
+   if (!vm_object_scan_all_shadowed(object)) {
VM_OBJECT_WUNLOCK(backing_object);
break;
}
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r356776 - in head/sys: kern vm

2020-01-15 Thread Jeff Roberson
Author: jeff
Date: Thu Jan 16 05:01:21 2020
New Revision: 356776
URL: https://svnweb.freebsd.org/changeset/base/356776

Log:
  Simplify VM and UMA startup by eliminating boot pages.  Instead use careful
  ordering to allocate early pages in the same way boot pages were but only
  as needed.  After the KVA allocator has started up we allocate the KVA that
  we consumed during boot.  This also makes the boot pages freeable since they
  have vm_page structures allocated with the rest of memory.
  
  Parts of this patch were written and tested by markj.
  
  Reviewed by:  glebius, markj
  Differential Revision:https://reviews.freebsd.org/D23102

Modified:
  head/sys/kern/subr_vmem.c
  head/sys/vm/uma_core.c
  head/sys/vm/vm_init.c
  head/sys/vm/vm_kern.c
  head/sys/vm/vm_page.c

Modified: head/sys/kern/subr_vmem.c
==
--- head/sys/kern/subr_vmem.c   Thu Jan 16 03:38:06 2020(r356775)
+++ head/sys/kern/subr_vmem.c   Thu Jan 16 05:01:21 2020(r356776)
@@ -77,8 +77,6 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 
-intvmem_startup_count(void);
-
 #defineVMEM_OPTORDER   5
 #defineVMEM_OPTVALUE   (1 << VMEM_OPTORDER)
 #defineVMEM_MAXORDER   \
@@ -661,17 +659,6 @@ vmem_bt_alloc(uma_zone_t zone, vm_size_t bytes, int do
pause("btalloc", 1);
 
return (NULL);
-}
-
-/*
- * How many pages do we need to startup_alloc.
- */
-int
-vmem_startup_count(void)
-{
-
-   return (howmany(BT_MAXALLOC, slab_ipers(sizeof(struct vmem_btag),
-   UMA_ALIGN_PTR)));
 }
 #endif
 

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Thu Jan 16 03:38:06 2020(r356775)
+++ head/sys/vm/uma_core.c  Thu Jan 16 05:01:21 2020(r356776)
@@ -101,6 +101,8 @@ __FBSDID("$FreeBSD$");
 #include 
 #endif
 
+#include 
+
 /*
  * This is the zone and keg from which all zones are spawned.
  */
@@ -151,11 +153,10 @@ static LIST_HEAD(,uma_zone) uma_cachezones =
 static struct rwlock_padalign __exclusive_cache_line uma_rwlock;
 
 /*
- * Pointer and counter to pool of pages, that is preallocated at
- * startup to bootstrap UMA.
+ * First available virual address for boot time allocations.
  */
-static char *bootmem;
-static int boot_pages;
+static vm_offset_t bootstart;
+static vm_offset_t bootmem;
 
 static struct sx uma_reclaim_lock;
 
@@ -173,9 +174,7 @@ SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_total, CTLFLAG_RD
 /* Is the VM done starting up? */
 static enum {
BOOT_COLD,
-   BOOT_STRAPPED,
-   BOOT_PAGEALLOC,
-   BOOT_BUCKETS,
+   BOOT_KVA,
BOOT_RUNNING,
BOOT_SHUTDOWN,
 } booted = BOOT_COLD;
@@ -257,9 +256,7 @@ enum zfreeskip {
 
 /* Prototypes.. */
 
-intuma_startup_count(int);
-void   uma_startup(void *, int);
-void   uma_startup1(void);
+void   uma_startup1(vm_offset_t);
 void   uma_startup2(void);
 
 static void *noobj_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
@@ -278,6 +275,7 @@ static int zone_ctor(void *, int, void *, int);
 static void zone_dtor(void *, int, void *);
 static int zero_init(void *, int, int);
 static void zone_foreach(void (*zfunc)(uma_zone_t, void *), void *);
+static void zone_foreach_unlocked(void (*zfunc)(uma_zone_t, void *), void *);
 static void zone_timeout(uma_zone_t zone, void *);
 static int hash_alloc(struct uma_hash *, u_int);
 static int hash_expand(struct uma_hash *, struct uma_hash *);
@@ -370,7 +368,7 @@ static void
 bucket_enable(void)
 {
 
-   KASSERT(booted >= BOOT_BUCKETS, ("Bucket enable before init"));
+   KASSERT(booted >= BOOT_KVA, ("Bucket enable before init"));
bucketdisable = vm_page_count_min();
 }
 
@@ -456,13 +454,11 @@ bucket_alloc(uma_zone_t zone, void *udata, int flags)
uma_bucket_t bucket;
 
/*
-* This is to stop us from allocating per cpu buckets while we're
-* running out of vm.boot_pages.  Otherwise, we would exhaust the
-* boot pages.  This also prevents us from allocating buckets in
-* low memory situations.
+* Don't allocate buckets in low memory situations.
 */
if (bucketdisable)
return (NULL);
+
/*
 * To limit bucket recursion we store the original zone flags
 * in a cookie passed via zalloc_arg/zfree_arg.  This allows the
@@ -1226,9 +1222,6 @@ keg_drain(uma_keg_t keg)
dom = >uk_domain[i];
KEG_LOCK(keg, i);
LIST_FOREACH_SAFE(slab, >ud_free_slab, us_link, tmp) {
-   /* We have nowhere to free these to. */
-   if (slab->us_flags & UMA_SLAB_BOOT)
-   continue;
if (keg->uk_flags & UMA_ZFLAG_HASH)
UMA_HASH_REMOVE(>uk_hash, slab);
   

Re: svn commit: r356755 - in head/sys: net netinet netinet6 netpfil/ipfw/nat64 sys

2020-01-15 Thread Jeff Roberson

On Wed, 15 Jan 2020, Gleb Smirnoff wrote:


Author: glebius
Date: Wed Jan 15 06:05:20 2020
New Revision: 356755
URL: https://svnweb.freebsd.org/changeset/base/356755

Log:
 Introduce NET_EPOCH_CALL() macro and use it everywhere where we free
 data based on the network epoch.   The macro reverses the argument
 order of epoch_call(9) - first function, then its argument. NFC


Is there some practical impact of changing the argument order or does it 
just seem more natural to you?


Jeff



Modified:
 head/sys/net/bpf.c
 head/sys/net/if.c
 head/sys/net/if_gre.c
 head/sys/net/if_lagg.c
 head/sys/net/if_vlan.c
 head/sys/netinet/in.c
 head/sys/netinet/in_pcb.c
 head/sys/netinet/ip_gre.c
 head/sys/netinet/tcp_ratelimit.c
 head/sys/netinet6/in6.c
 head/sys/netinet6/ip6_gre.c
 head/sys/netpfil/ipfw/nat64/nat64lsn.c
 head/sys/sys/epoch.h

Modified: head/sys/net/bpf.c
==
--- head/sys/net/bpf.c  Wed Jan 15 05:48:36 2020(r356754)
+++ head/sys/net/bpf.c  Wed Jan 15 06:05:20 2020(r356755)
@@ -274,10 +274,10 @@ static struct filterops bpfread_filtops = {
 *
 * 2. An userland application uses ioctl() call to bpf_d descriptor.
 * All such call are serialized with global lock. BPF filters can be
- * changed, but pointer to old filter will be freed using epoch_call().
+ * changed, but pointer to old filter will be freed using NET_EPOCH_CALL().
 * Thus it should be safe for bpf_tap/bpf_mtap* code to do access to
 * filter pointers, even if change will happen during bpf_tap execution.
- * Destroying of bpf_d descriptor also is doing using epoch_call().
+ * Destroying of bpf_d descriptor also is doing using NET_EPOCH_CALL().
 *
 * 3. An userland application can write packets into bpf_d descriptor.
 * There we need to be sure, that ifnet won't disappear during bpfwrite().
@@ -288,7 +288,7 @@ static struct filterops bpfread_filtops = {
 *
 * 5. The kernel invokes bpfdetach() on interface destroying. All lists
 * are modified with global lock held and actual free() is done using
- * epoch_call().
+ * NET_EPOCH_CALL().
 */

static void
@@ -314,7 +314,7 @@ bpfif_rele(struct bpf_if *bp)

if (!refcount_release(>bif_refcnt))
return;
-   epoch_call(net_epoch_preempt, >epoch_ctx, bpfif_free);
+   NET_EPOCH_CALL(bpfif_free, >epoch_ctx);
}

static void
@@ -330,7 +330,7 @@ bpfd_rele(struct bpf_d *d)

if (!refcount_release(>bd_refcnt))
return;
-   epoch_call(net_epoch_preempt, >epoch_ctx, bpfd_free);
+   NET_EPOCH_CALL(bpfd_free, >epoch_ctx);
}

static struct bpf_program_buffer*
@@ -2036,8 +2036,7 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_lo
BPFD_UNLOCK(d);

if (fcode != NULL)
-   epoch_call(net_epoch_preempt, >epoch_ctx,
-   bpf_program_buffer_free);
+   NET_EPOCH_CALL(bpf_program_buffer_free, >epoch_ctx);

if (track_event)
EVENTHANDLER_INVOKE(bpf_track,

Modified: head/sys/net/if.c
==
--- head/sys/net/if.c   Wed Jan 15 05:48:36 2020(r356754)
+++ head/sys/net/if.c   Wed Jan 15 06:05:20 2020(r356755)
@@ -654,7 +654,7 @@ if_free(struct ifnet *ifp)
IFNET_WUNLOCK();

if (refcount_release(>if_refcount))
-   epoch_call(net_epoch_preempt, >if_epoch_ctx, if_destroy);
+   NET_EPOCH_CALL(if_destroy, >if_epoch_ctx);
CURVNET_RESTORE();
}

@@ -677,7 +677,7 @@ if_rele(struct ifnet *ifp)

if (!refcount_release(>if_refcount))
return;
-   epoch_call(net_epoch_preempt, >if_epoch_ctx, if_destroy);
+   NET_EPOCH_CALL(if_destroy, >if_epoch_ctx);
}

void
@@ -1826,7 +1826,7 @@ ifa_free(struct ifaddr *ifa)
{

if (refcount_release(>ifa_refcnt))
-   epoch_call(net_epoch_preempt, >ifa_epoch_ctx, ifa_destroy);
+   NET_EPOCH_CALL(ifa_destroy, >ifa_epoch_ctx);
}


@@ -3410,7 +3410,7 @@ if_freemulti(struct ifmultiaddr *ifma)
KASSERT(ifma->ifma_refcount == 0, ("if_freemulti_epoch: refcount %d",
ifma->ifma_refcount));

-   epoch_call(net_epoch_preempt, >ifma_epoch_ctx, if_destroymulti);
+   NET_EPOCH_CALL(if_destroymulti, >ifma_epoch_ctx);
}



Modified: head/sys/net/if_gre.c
==
--- head/sys/net/if_gre.c   Wed Jan 15 05:48:36 2020(r356754)
+++ head/sys/net/if_gre.c   Wed Jan 15 06:05:20 2020(r356755)
@@ -392,7 +392,7 @@ gre_delete_tunnel(struct gre_softc *sc)
if ((gs = sc->gre_so) != NULL && CK_LIST_EMPTY(>list)) {
CK_LIST_REMOVE(gs, chain);
soclose(gs->so);
-   epoch_call(net_epoch_preempt, >epoch_ctx, gre_sofree);
+   NET_EPOCH_CALL(gre_sofree, >epoch_ctx);
sc->gre_so = NULL;
}
GRE2IFP(sc)->if_drv_flags &= 

svn commit: r356714 - head/sys/ufs/ffs

2020-01-13 Thread Jeff Roberson
Author: jeff
Date: Tue Jan 14 02:00:24 2020
New Revision: 356714
URL: https://svnweb.freebsd.org/changeset/base/356714

Log:
  Fix a long standing bug in journaled soft-updates.  The dirrem structure
  needs to handle file removal, directory removal, file move, directory move,
  etc.  The code in handle_workitem_remove() needs to propagate any completed
  journal entries to the write that will render the change stable.  In the
  case of a moved directory this means the new parent.  However, for an
  overwrite that frees a directory (DIRCHG) we must move the jsegdep to the
  removed inode to be released when it is stable in the cg bitmap or the
  unlinked inode list.  This case was previously unhandled and caused a
  panic.
  
  Reported by:  mckusick, pho
  Reviewed by:  mckusick
  Tested by:pho

Modified:
  head/sys/ufs/ffs/ffs_softdep.c

Modified: head/sys/ufs/ffs/ffs_softdep.c
==
--- head/sys/ufs/ffs/ffs_softdep.c  Tue Jan 14 01:43:04 2020
(r356713)
+++ head/sys/ufs/ffs/ffs_softdep.c  Tue Jan 14 02:00:24 2020
(r356714)
@@ -9849,14 +9849,20 @@ handle_workitem_remove(dirrem, flags)
/*
 * Move all dependencies waiting on the remove to complete
 * from the dirrem to the inode inowait list to be completed
-* after the inode has been updated and written to disk.  Any
-* marked MKDIR_PARENT are saved to be completed when the .. ref
-* is removed.
+* after the inode has been updated and written to disk.
+*
+* Any marked MKDIR_PARENT are saved to be completed when the 
+* dotdot ref is removed unless DIRCHG is specified.  For
+* directory change operations there will be no further
+* directory writes and the jsegdeps need to be moved along
+* with the rest to be completed when the inode is free or
+* stable in the inode free list.
 */
LIST_INIT();
while ((wk = LIST_FIRST(>dm_jwork)) != NULL) {
WORKLIST_REMOVE(wk);
-   if (wk->wk_state & MKDIR_PARENT) {
+   if ((dirrem->dm_state & DIRCHG) == 0 &&
+   wk->wk_state & MKDIR_PARENT) {
wk->wk_state &= ~MKDIR_PARENT;
WORKLIST_INSERT(, wk);
continue;
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r356389 - head/sys/vm

2020-01-06 Thread Jeff Roberson

On Mon, 6 Jan 2020, Gleb Smirnoff wrote:


 Jeff,

On Sun, Jan 05, 2020 at 10:54:26PM +, Jeff Roberson wrote:
J> Author: jeff
J> Date: Sun Jan  5 22:54:25 2020
J> New Revision: 356389
J> URL: https://svnweb.freebsd.org/changeset/base/356389
J>
J> Log:
J>   The fix in r356353 was insufficient.  Not every architecture returns 0 for
J>   EARLY_COUNTER.  Only amd64 seems to.
J>
J>   Suggested by:   markj
J>   Reported by:lwhsu
J>   Reviewed by:markj
J>   PR: 243117
J>
J> Modified:
J>   head/sys/vm/uma_core.c
J>
J> Modified: head/sys/vm/uma_core.c
J> 
==
J> --- head/sys/vm/uma_core.cSun Jan  5 21:35:02 2020(r356388)
J> +++ head/sys/vm/uma_core.cSun Jan  5 22:54:25 2020(r356389)
J> @@ -4153,8 +4153,10 @@ uma_zone_get_cur(uma_zone_t zone)
J>   int64_t nitems;
J>   u_int i;
J>
J> - nitems = counter_u64_fetch(zone->uz_allocs) -
J> - counter_u64_fetch(zone->uz_frees);
J> + nitems = 0;
J> + if (zone->uz_allocs != EARLY_COUNTER && zone->uz_frees != EARLY_COUNTER)
J> + nitems = counter_u64_fetch(zone->uz_allocs) -
J> + counter_u64_fetch(zone->uz_frees);
J>   CPU_FOREACH(i)
J>   nitems += atomic_load_64(>uz_cpu[i].uc_allocs) -
J>   atomic_load_64(>uz_cpu[i].uc_frees);
J> @@ -4168,7 +4170,9 @@ uma_zone_get_allocs(uma_zone_t zone)
J>   uint64_t nitems;
J>   u_int i;
J>
J> - nitems = counter_u64_fetch(zone->uz_allocs);
J> + nitems = 0;
J> + if (zone->uz_allocs != EARLY_COUNTER)
J> + nitems = counter_u64_fetch(zone->uz_allocs);
J>   CPU_FOREACH(i)
J>   nitems += atomic_load_64(>uz_cpu[i].uc_allocs);
J>
J> @@ -4181,7 +4185,9 @@ uma_zone_get_frees(uma_zone_t zone)
J>   uint64_t nitems;
J>   u_int i;
J>
J> - nitems = counter_u64_fetch(zone->uz_frees);
J> + nitems = 0;
J> + if (zone->uz_frees != EARLY_COUNTER)
J> + nitems = counter_u64_fetch(zone->uz_frees);
J>   CPU_FOREACH(i)
J>   nitems += atomic_load_64(>uz_cpu[i].uc_frees);

IMHO, tidier code would be not to check the pointers, but check UMA booted 
status:

if (__predict_true(booted == BOOT_RUNNING))
nitems = counter_u64_fetch(zone->uz_frees);


That seems fine.  I have drained my UMA patch queue for now but if I come 
back around I will include this.


Thanks,
Jeff



--
Gleb Smirnoff


___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r356393 - head/sys/vm

2020-01-05 Thread Jeff Roberson
Author: jeff
Date: Mon Jan  6 02:51:19 2020
New Revision: 356393
URL: https://svnweb.freebsd.org/changeset/base/356393

Log:
  Fix uma boot pages calculations on NUMA machines that also don't have
  MD_UMA_SMALL_ALLOC.  This is unusual but not impossible.  Fix the alignemnt
  of zones while here.  This was already correct because uz_cpu strongly
  aligned the zone structure but the specified alignment did not match
  reality and involved redundant defines.
  
  Reviewed by:  markj, rlibby
  Differential Revision:https://reviews.freebsd.org/D23046

Modified:
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h
  head/sys/vm/vm_page.c

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Mon Jan  6 01:51:23 2020(r356392)
+++ head/sys/vm/uma_core.c  Mon Jan  6 02:51:19 2020(r356393)
@@ -2508,27 +2508,28 @@ zone_foreach(void (*zfunc)(uma_zone_t, void *arg), voi
  * zone of zones and zone of kegs are accounted separately.
  */
 #defineUMA_BOOT_ZONES  11
-/* Zone of zones and zone of kegs have arbitrary alignment. */
-#defineUMA_BOOT_ALIGN  32
 static int zsize, ksize;
 int
 uma_startup_count(int vm_zones)
 {
int zones, pages;
+   u_int zppera, zipers;
+   u_int kppera, kipers;
size_t space, size;
 
ksize = sizeof(struct uma_keg) +
(sizeof(struct uma_domain) * vm_ndomains);
+   ksize = roundup(ksize, UMA_SUPER_ALIGN);
zsize = sizeof(struct uma_zone) +
(sizeof(struct uma_cache) * (mp_maxid + 1)) +
(sizeof(struct uma_zone_domain) * vm_ndomains);
+   zsize = roundup(zsize, UMA_SUPER_ALIGN);
 
/*
-* Memory for the zone of kegs and its keg,
-* and for zone of zones.
+* Memory for the zone of kegs and its keg, and for zone
+* of zones.  Allocated directly in uma_startup().
 */
-   pages = howmany(roundup(zsize, CACHE_LINE_SIZE) * 2 +
-   roundup(ksize, CACHE_LINE_SIZE), PAGE_SIZE);
+   pages = howmany(zsize * 2 + ksize, PAGE_SIZE);
 
 #ifdef UMA_MD_SMALL_ALLOC
zones = UMA_BOOT_ZONES;
@@ -2542,23 +2543,33 @@ uma_startup_count(int vm_zones)
/* Memory for the rest of startup zones, UMA and VM, ... */
if (zsize > space) {
/* See keg_large_init(). */
-   u_int ppera;
+   zppera = howmany(zsize + slab_sizeof(1), PAGE_SIZE);
+   zipers = 1;
+   zones += vm_zones;
+   } else {
+   zppera = 1;
+   zipers = space / zsize;
+   }
+   pages += howmany(zones, zipers) * zppera;
 
-   ppera = howmany(roundup2(zsize, UMA_BOOT_ALIGN), PAGE_SIZE);
-   if (PAGE_SIZE * ppera - roundup2(zsize, UMA_BOOT_ALIGN) < size)
-   ppera++;
-   pages += (zones + vm_zones) * ppera;
-   } else if (roundup2(zsize, UMA_BOOT_ALIGN) > space)
-   /* See keg_small_init() special case for uk_ppera = 1. */
-   pages += zones;
-   else
-   pages += howmany(zones,
-   space / roundup2(zsize, UMA_BOOT_ALIGN));
-
/* ... and their kegs. Note that zone of zones allocates a keg! */
-   pages += howmany(zones + 1,
-   space / roundup2(ksize, UMA_BOOT_ALIGN));
+   if (ksize > space) {
+   /* See keg_large_init(). */
+   kppera = howmany(ksize + slab_sizeof(1), PAGE_SIZE);
+   kipers = 1;
+   } else {
+   kppera = 1;
+   kipers = space / ksize;
+   }
+   pages += howmany(zones + 1, kipers) * kppera;
 
+   /*
+* Allocate an additional slab for zones and kegs on NUMA
+* systems.  The round-robin allocation policy will populate at
+* least one slab per-domain.
+*/
+   pages += (vm_ndomains - 1) * (zppera + kppera);
+
return (pages);
 }
 
@@ -2578,11 +2589,11 @@ uma_startup(void *mem, int npages)
/* Use bootpages memory for the zone of zones and zone of kegs. */
m = (uintptr_t)mem;
zones = (uma_zone_t)m;
-   m += roundup(zsize, CACHE_LINE_SIZE);
+   m += zsize;
kegs = (uma_zone_t)m;
-   m += roundup(zsize, CACHE_LINE_SIZE);
+   m += zsize;
masterkeg = (uma_keg_t)m;
-   m += roundup(ksize, CACHE_LINE_SIZE);
+   m += ksize;
m = roundup(m, PAGE_SIZE);
npages -= (m - (uintptr_t)mem) / PAGE_SIZE;
mem = (void *)m;
@@ -2596,7 +2607,7 @@ uma_startup(void *mem, int npages)
args.uminit = zero_init;
args.fini = NULL;
args.keg = masterkeg;
-   args.align = UMA_BOOT_ALIGN - 1;
+   args.align = UMA_SUPER_ALIGN - 1;
args.flags = UMA_ZFLAG_INTERNAL;
zone_ctor(kegs, zsize, , M_WAITOK);
 
@@ -2610,7 +2621,7 @@ uma_startup(void *mem, int npages)
args.uminit = zero_init;
args.fini = NULL;
 

svn commit: r356389 - head/sys/vm

2020-01-05 Thread Jeff Roberson
Author: jeff
Date: Sun Jan  5 22:54:25 2020
New Revision: 356389
URL: https://svnweb.freebsd.org/changeset/base/356389

Log:
  The fix in r356353 was insufficient.  Not every architecture returns 0 for
  EARLY_COUNTER.  Only amd64 seems to.
  
  Suggested by: markj
  Reported by:  lwhsu
  Reviewed by:  markj
  PR:   243117

Modified:
  head/sys/vm/uma_core.c

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Sun Jan  5 21:35:02 2020(r356388)
+++ head/sys/vm/uma_core.c  Sun Jan  5 22:54:25 2020(r356389)
@@ -4153,8 +4153,10 @@ uma_zone_get_cur(uma_zone_t zone)
int64_t nitems;
u_int i;
 
-   nitems = counter_u64_fetch(zone->uz_allocs) -
-   counter_u64_fetch(zone->uz_frees);
+   nitems = 0;
+   if (zone->uz_allocs != EARLY_COUNTER && zone->uz_frees != EARLY_COUNTER)
+   nitems = counter_u64_fetch(zone->uz_allocs) -
+   counter_u64_fetch(zone->uz_frees);
CPU_FOREACH(i)
nitems += atomic_load_64(>uz_cpu[i].uc_allocs) -
atomic_load_64(>uz_cpu[i].uc_frees);
@@ -4168,7 +4170,9 @@ uma_zone_get_allocs(uma_zone_t zone)
uint64_t nitems;
u_int i;
 
-   nitems = counter_u64_fetch(zone->uz_allocs);
+   nitems = 0;
+   if (zone->uz_allocs != EARLY_COUNTER)
+   nitems = counter_u64_fetch(zone->uz_allocs);
CPU_FOREACH(i)
nitems += atomic_load_64(>uz_cpu[i].uc_allocs);
 
@@ -4181,7 +4185,9 @@ uma_zone_get_frees(uma_zone_t zone)
uint64_t nitems;
u_int i;
 
-   nitems = counter_u64_fetch(zone->uz_frees);
+   nitems = 0;
+   if (zone->uz_frees != EARLY_COUNTER)
+   nitems = counter_u64_fetch(zone->uz_frees);
CPU_FOREACH(i)
nitems += atomic_load_64(>uz_cpu[i].uc_frees);
 
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r356348 - in head/sys: kern vm

2020-01-05 Thread Jeff Roberson

On Sun, 5 Jan 2020, Mark Linimon wrote:


On Sat, Jan 04, 2020 at 03:15:34AM +, Jeff Roberson wrote:

  Use a separate lock for the zone and keg.


Out of curiosity, will there be measurable real-world speedups from
this an similar work, or will this mostly apply to edge cases, or ... ?


It depends on which real world.  A lot of workloads don't really show much 
allocator activity.  For very high speed networking, and especially very 
high speed networking on big NUMA machines, the speedup is considerable. 
Netflix reported the earlier round of work cut the time spent in uma by 
about 30%.  For non-numa machines the last ~6 patches cut another 30% off 
of that in my tests.  Even for Netflix, uma was not in the top 5 of their 
profile before this work.


The major perf upshot was somewhere around an 8x improvement when freeing 
on a different NUMA domain than you allocated from when the allocation 
policy is first-touch.  This is called a cross-domain or 'xdomain' free in 
the code.  This made it possible to enable first-touch for UMA by default 
on all NUMA machines.


I wrote a simple allocator perf test that loops allocating 2k mbufs and 
appending them to a random remote core's queue after which it drains its 
local queue.  10 million iterations across 32 cores in two numa domains 
gives 320,000,000 packets allocated and freed.  The time within the same 
domain was about 4 seconds, before this patch series going to a different 
domain was around 40 seconds and after it was around 5 seconds.  So only 
a ~25% penalty when doing 2 million packets-per-second-per-core.


Many of the recent changes were really as much about code organization and 
readability as performance.  After 18 years of features coming and going, 
reorganizations, etc. it was getting a bit crufty.


Jeff



mcl


___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r356353 - head/sys/vm

2020-01-04 Thread Jeff Roberson
Author: jeff
Date: Sat Jan  4 19:29:25 2020
New Revision: 356353
URL: https://svnweb.freebsd.org/changeset/base/356353

Log:
  Fix an assertion introduced in r356348.  On architectures without
  UMA_MD_SMALL_ALLOC vmem has a more complicated startup sequence that
  violated the new assert.  Resolve this by rewriting the COLD asserts to
  look at the per-cpu allocation counts for evidence of api activity.
  
  Discussed with:   rlibby
  Reviewed by:  markj
  Reported by:  lwhsu

Modified:
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Sat Jan  4 18:59:46 2020(r356352)
+++ head/sys/vm/uma_core.c  Sat Jan  4 19:29:25 2020(r356353)
@@ -294,7 +294,10 @@ static int sysctl_handle_uma_zone_flags(SYSCTL_HANDLER
 static int sysctl_handle_uma_slab_efficiency(SYSCTL_HANDLER_ARGS);
 static int sysctl_handle_uma_zone_items(SYSCTL_HANDLER_ARGS);
 
+static uint64_t uma_zone_get_allocs(uma_zone_t zone);
+
 #ifdef INVARIANTS
+static uint64_t uma_keg_get_allocs(uma_keg_t zone);
 static inline struct noslabbits *slab_dbg_bits(uma_slab_t slab, uma_keg_t keg);
 
 static bool uma_dbg_kskip(uma_keg_t keg, void *mem);
@@ -4184,6 +4187,22 @@ uma_zone_get_frees(uma_zone_t zone)
 
return (nitems);
 }
+
+#ifdef INVARIANTS
+/* Used only for KEG_ASSERT_COLD(). */
+static uint64_t
+uma_keg_get_allocs(uma_keg_t keg)
+{
+   uma_zone_t z;
+   uint64_t nitems;
+
+   nitems = 0;
+   LIST_FOREACH(z, >uk_zones, uz_link)
+   nitems += uma_zone_get_allocs(z);
+
+   return (nitems);
+}
+#endif
 
 /* See uma.h */
 void

Modified: head/sys/vm/uma_int.h
==
--- head/sys/vm/uma_int.h   Sat Jan  4 18:59:46 2020(r356352)
+++ head/sys/vm/uma_int.h   Sat Jan  4 19:29:25 2020(r356353)
@@ -305,7 +305,7 @@ typedef struct uma_keg  * uma_keg_t;
 
 #ifdef _KERNEL
 #defineKEG_ASSERT_COLD(k)  
\
-   KASSERT((k)->uk_domain[0].ud_pages == 0,\
+   KASSERT(uma_keg_get_allocs((k)) == 0,   \
("keg %s initialization after use.", (k)->uk_name))
 
 /*
@@ -529,7 +529,7 @@ struct uma_zone {
 #defineUZ_ITEMS_SLEEPER(1LL << UZ_ITEMS_SLEEPER_SHIFT)
 
 #defineZONE_ASSERT_COLD(z) 
\
-   KASSERT((z)->uz_bkt_count == 0, \
+   KASSERT(uma_zone_get_allocs((z)) == 0,  \
("zone %s initialization after use.", (z)->uz_name))
 
 #undef UMA_ALIGN
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r356351 - in head/sys: conf vm

2020-01-04 Thread Jeff Roberson
Author: jeff
Date: Sat Jan  4 18:48:13 2020
New Revision: 356351
URL: https://svnweb.freebsd.org/changeset/base/356351

Log:
  UMA NUMA flag day.  UMA_ZONE_NUMA was a source of confusion.  Make the names
  more consistent with other NUMA features as UMA_ZONE_FIRSTTOUCH and
  UMA_ZONE_ROUNDROBIN.  The system will now pick a select a default depending
  on kernel configuration.  API users need only specify one if they want to
  override the default.
  
  Remove the UMA_XDOMAIN and UMA_FIRSTTOUCH kernel options and key only off
  of NUMA.  XDOMAIN is now fast enough in all cases to enable whenever NUMA
  is.
  
  Reviewed by:  markj
  Discussed with:   rlibby
  Differential Revision:https://reviews.freebsd.org/D22831

Modified:
  head/sys/conf/options
  head/sys/vm/uma.h
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h
  head/sys/vm/vm_glue.c

Modified: head/sys/conf/options
==
--- head/sys/conf/options   Sat Jan  4 07:56:28 2020(r356350)
+++ head/sys/conf/options   Sat Jan  4 18:48:13 2020(r356351)
@@ -621,8 +621,6 @@ NO_SWAPPING opt_vm.h
 MALLOC_MAKE_FAILURES   opt_vm.h
 MALLOC_PROFILE opt_vm.h
 MALLOC_DEBUG_MAXZONES  opt_vm.h
-UMA_XDOMAINopt_vm.h
-UMA_FIRSTTOUCH opt_vm.h
 
 # The MemGuard replacement allocator used for tamper-after-free detection
 DEBUG_MEMGUARD opt_vm.h

Modified: head/sys/vm/uma.h
==
--- head/sys/vm/uma.h   Sat Jan  4 07:56:28 2020(r356350)
+++ head/sys/vm/uma.h   Sat Jan  4 18:48:13 2020(r356351)
@@ -268,11 +268,9 @@ uma_zone_t uma_zcache_create(char *name, int size, uma
 #defineUMA_ZONE_PCPU   0x8000  /*
 * Allocates mp_maxid + 1 slabs of 
PAGE_SIZE
 */
-#defineUMA_ZONE_NUMA   0x1 /*
-* NUMA aware Zone.  Implements a best
-* effort first-touch policy.
-*/
-#defineUMA_ZONE_MINBUCKET  0x2 /* Use smallest buckets. */
+#defineUMA_ZONE_MINBUCKET  0x1 /* Use smallest buckets. */
+#defineUMA_ZONE_FIRSTTOUCH 0x2 /* First touch NUMA policy */
+#defineUMA_ZONE_ROUNDROBIN 0x4 /* Round-robin NUMA policy. */
 
 /*
  * These flags are shared between the keg and zone.  In zones wishing to add
@@ -281,7 +279,8 @@ uma_zone_t uma_zcache_create(char *name, int size, uma
  */
 #defineUMA_ZONE_INHERIT
\
 (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_NOFREE |\
-UMA_ZONE_HASH | UMA_ZONE_VTOSLAB | UMA_ZONE_PCPU | UMA_ZONE_NUMA)
+UMA_ZONE_HASH | UMA_ZONE_VTOSLAB | UMA_ZONE_PCPU | \
+UMA_ZONE_FIRSTTOUCH | UMA_ZONE_ROUNDROBIN)
 
 /* Definitions for align */
 #define UMA_ALIGN_PTR  (sizeof(void *) - 1)/* Alignment fit for ptr */

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Sat Jan  4 07:56:28 2020(r356350)
+++ head/sys/vm/uma_core.c  Sat Jan  4 18:48:13 2020(r356351)
@@ -360,7 +360,8 @@ bucket_init(void)
size += sizeof(void *) * ubz->ubz_entries;
ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
-   UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET | UMA_ZONE_NUMA);
+   UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET |
+   UMA_ZONE_FIRSTTOUCH);
}
 }
 
@@ -387,11 +388,9 @@ bucket_zone_max(uma_zone_t zone, int nitems)
int bpcpu;
 
bpcpu = 2;
-#ifdef UMA_XDOMAIN
-   if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
+   if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0)
/* Count the cross-domain bucket. */
bpcpu++;
-#endif
 
for (ubz = _zones[0]; ubz->ubz_entries != 0; ubz++)
if (ubz->ubz_entries * bpcpu * mp_ncpus > nitems)
@@ -637,7 +636,7 @@ cache_bucket_load_free(uma_cache_t cache, uma_bucket_t
cache_bucket_load(>uc_freebucket, b);
 }
 
-#ifdef UMA_XDOMAIN
+#ifdef NUMA
 static inline void 
 cache_bucket_load_cross(uma_cache_t cache, uma_bucket_t b)
 {
@@ -999,7 +998,7 @@ cache_drain_safe_cpu(uma_zone_t zone, void *unused)
b1 = b2 = b3 = NULL;
ZONE_LOCK(zone);
critical_enter();
-   if (zone->uz_flags & UMA_ZONE_NUMA)
+   if (zone->uz_flags & UMA_ZONE_FIRSTTOUCH)
domain = PCPU_GET(domain);
else
domain = 0;
@@ -1905,8 +1904,8 @@ keg_ctor(void *mem, int size, void *udata, int flags)
 
/*
 * We use a global round-robin policy by default.  Zones with
-* UMA_ZONE_NUMA 

svn commit: r356350 - head/sys/vm

2020-01-03 Thread Jeff Roberson
Author: jeff
Date: Sat Jan  4 07:56:28 2020
New Revision: 356350
URL: https://svnweb.freebsd.org/changeset/base/356350

Log:
  Sort cross-domain frees into per-domain buckets before inserting these
  onto their respective bucket lists.  This is a several order of magnitude
  improvement in contention on the keg lock under heavy free traffic while
  requiring only an additional bucket per-domain worth of memory.
  
  Discussed with:   markj, rlibby
  Differential Revision:https://reviews.freebsd.org/D22830

Modified:
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Sat Jan  4 03:30:08 2020(r356349)
+++ head/sys/vm/uma_core.c  Sat Jan  4 07:56:28 2020(r356350)
@@ -951,10 +951,6 @@ cache_drain(uma_zone_t zone)
 *
 * XXX: It would good to be able to assert that the zone is being
 * torn down to prevent improper use of cache_drain().
-*
-* XXX: We lock the zone before passing into bucket_cache_reclaim() as
-* it is used elsewhere.  Should the tear-down path be made special
-* there in some form?
 */
CPU_FOREACH(cpu) {
cache = >uz_cpu[cpu];
@@ -974,9 +970,7 @@ cache_drain(uma_zone_t zone)
bucket_free(zone, bucket, NULL);
}
}
-   ZONE_LOCK(zone);
bucket_cache_reclaim(zone, true);
-   ZONE_UNLOCK(zone);
 }
 
 static void
@@ -1082,9 +1076,29 @@ bucket_cache_reclaim(uma_zone_t zone, bool drain)
int i;
 
for (i = 0; i < vm_ndomains; i++) {
+   /*
+* The cross bucket is partially filled and not part of
+* the item count.  Reclaim it individually here.
+*/
zdom = >uz_domain[i];
+   ZONE_CROSS_LOCK(zone);
+   bucket = zdom->uzd_cross;
+   zdom->uzd_cross = NULL;
+   ZONE_CROSS_UNLOCK(zone);
+   if (bucket != NULL) {
+   bucket_drain(zone, bucket);
+   bucket_free(zone, bucket, NULL);
+   }
 
/*
+* Shrink the zone bucket size to ensure that the per-CPU caches
+* don't grow too large.
+*/
+   ZONE_LOCK(zone);
+   if (i == 0 && zone->uz_bucket_size > zone->uz_bucket_size_min)
+   zone->uz_bucket_size--;
+
+   /*
 * If we were asked to drain the zone, we are done only once
 * this bucket cache is empty.  Otherwise, we reclaim items in
 * excess of the zone's estimated working set size.  If the
@@ -1114,14 +1128,8 @@ bucket_cache_reclaim(uma_zone_t zone, bool drain)
bucket_free(zone, bucket, NULL);
ZONE_LOCK(zone);
}
+   ZONE_UNLOCK(zone);
}
-
-   /*
-* Shrink the zone bucket size to ensure that the per-CPU caches
-* don't grow too large.
-*/
-   if (zone->uz_bucket_size > zone->uz_bucket_size_min)
-   zone->uz_bucket_size--;
 }
 
 static void
@@ -1224,8 +1232,8 @@ zone_reclaim(uma_zone_t zone, int waitok, bool drain)
msleep(zone, >uz_lock, PVM, "zonedrain", 1);
}
zone->uz_flags |= UMA_ZFLAG_RECLAIMING;
-   bucket_cache_reclaim(zone, drain);
ZONE_UNLOCK(zone);
+   bucket_cache_reclaim(zone, drain);
 
/*
 * The DRAINING flag protects us from being freed while
@@ -2263,6 +2271,7 @@ zone_ctor(void *mem, int size, void *udata, int flags)
zone_foreach(zone_count, );
zone->uz_namecnt = cnt.count;
ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
+   ZONE_CROSS_LOCK_INIT(zone);
 
for (i = 0; i < vm_ndomains; i++)
TAILQ_INIT(>uz_domain[i].uzd_buckets);
@@ -2448,6 +2457,7 @@ zone_dtor(void *arg, int size, void *udata)
counter_u64_free(zone->uz_fails);
free(zone->uz_ctlname, M_UMA);
ZONE_LOCK_FINI(zone);
+   ZONE_CROSS_LOCK_FINI(zone);
 }
 
 /*
@@ -3724,7 +3734,76 @@ zfree_item:
zone_free_item(zone, item, udata, SKIP_DTOR);
 }
 
+#ifdef UMA_XDOMAIN
+/*
+ * sort crossdomain free buckets to domain correct buckets and cache
+ * them.
+ */
 static void
+zone_free_cross(uma_zone_t zone, uma_bucket_t bucket, void *udata)
+{
+   struct uma_bucketlist fullbuckets;
+   uma_zone_domain_t zdom;
+   uma_bucket_t b;
+   void *item;
+   int domain;
+
+   CTR3(KTR_UMA,
+   "uma_zfree: zone %s(%p) draining cross bucket %p",
+   zone->uz_name, zone, bucket);
+
+   TAILQ_INIT();
+
+   /*
+* To avoid having ndomain * ndomain buckets for sorting we have a
+* lock on the current crossfree bucket.  A full matrix with

svn commit: r356349 - in head: lib/libmemstat sys/vm

2020-01-03 Thread Jeff Roberson
Author: jeff
Date: Sat Jan  4 03:30:08 2020
New Revision: 356349
URL: https://svnweb.freebsd.org/changeset/base/356349

Log:
  Use per-domain keg locks.  This provides both a lock and separate space
  accounting for each NUMA domain.  Independent keg domain locks are important
  with cross-domain frees.  Hashed zones are non-numa and use a single keg
  lock to protect the hash table.
  
  Reviewed by:  markj, rlibby
  Differential Revision:https://reviews.freebsd.org/D22829

Modified:
  head/lib/libmemstat/memstat_uma.c
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/lib/libmemstat/memstat_uma.c
==
--- head/lib/libmemstat/memstat_uma.c   Sat Jan  4 03:15:34 2020
(r356348)
+++ head/lib/libmemstat/memstat_uma.c   Sat Jan  4 03:30:08 2020
(r356349)
@@ -311,10 +311,12 @@ memstat_kvm_uma(struct memory_type_list *list, void *k
LIST_HEAD(, uma_keg) uma_kegs;
struct memory_type *mtp;
struct uma_zone_domain uzd;
+   struct uma_domain ukd;
struct uma_bucket *ubp, ub;
struct uma_cache *ucp, *ucp_array;
struct uma_zone *uzp, uz;
struct uma_keg *kzp, kz;
+   uint64_t kegfree;
int hint_dontsearch, i, mp_maxid, ndomains, ret;
char name[MEMTYPE_MAXNAME];
cpuset_t all_cpus;
@@ -454,18 +456,29 @@ skip_percpu:
for (i = 0; i < ndomains; i++) {
ret = kread(kvm, _domain[i], ,
   sizeof(uzd), 0);
+   if (ret != 0)
+   continue;
for (ubp =
TAILQ_FIRST(_buckets);
ubp != NULL;
ubp = TAILQ_NEXT(, ub_link)) {
ret = kread(kvm, ubp, ,
   sizeof(ub), 0);
+   if (ret != 0)
+   continue;
mtp->mt_zonefree += ub.ub_cnt;
}
}
if (!((kz.uk_flags & UMA_ZONE_SECONDARY) &&
LIST_FIRST(_zones) != uzp)) {
-   mtp->mt_kegfree = kz.uk_free;
+   kegfree = 0;
+   for (i = 0; i < ndomains; i++) {
+   ret = kread(kvm, >uk_domain[i],
+   , sizeof(ukd), 0);
+   if (ret != 0)
+   kegfree += ukd.ud_free;
+   }
+   mtp->mt_kegfree = kegfree;
mtp->mt_free += mtp->mt_kegfree;
}
mtp->mt_free += mtp->mt_zonefree;

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Sat Jan  4 03:15:34 2020(r356348)
+++ head/sys/vm/uma_core.c  Sat Jan  4 03:30:08 2020(r356349)
@@ -740,23 +740,28 @@ static void
 zone_timeout(uma_zone_t zone, void *unused)
 {
uma_keg_t keg;
-   u_int slabs;
+   u_int slabs, pages;
 
if ((zone->uz_flags & UMA_ZONE_HASH) == 0)
goto update_wss;
 
keg = zone->uz_keg;
-   KEG_LOCK(keg);
+
/*
+* Hash zones are non-numa by definition so the first domain
+* is the only one present.
+*/
+   KEG_LOCK(keg, 0);
+   pages = keg->uk_domain[0].ud_pages;
+
+   /*
 * Expand the keg hash table.
 *
 * This is done if the number of slabs is larger than the hash size.
 * What I'm trying to do here is completely reduce collisions.  This
 * may be a little aggressive.  Should I allow for two collisions max?
 */
-   if (keg->uk_flags & UMA_ZONE_HASH &&
-   (slabs = keg->uk_pages / keg->uk_ppera) >
-keg->uk_hash.uh_hashsize) {
+   if ((slabs = pages / keg->uk_ppera) > keg->uk_hash.uh_hashsize) {
struct uma_hash newhash;
struct uma_hash oldhash;
int ret;
@@ -767,9 +772,9 @@ zone_timeout(uma_zone_t zone, void *unused)
 * I have to do everything in stages and check for
 * races.
 */
-   KEG_UNLOCK(keg);
+   KEG_UNLOCK(keg, 0);
ret = hash_alloc(, 1 << fls(slabs));
-   KEG_LOCK(keg);
+   KEG_LOCK(keg, 0);
if (ret) {
if (hash_expand(>uk_hash, )) {
oldhash = keg->uk_hash;
@@ -777,12 +782,12 @@ zone_timeout(uma_zone_t 

svn commit: r356348 - in head/sys: kern vm

2020-01-03 Thread Jeff Roberson
Author: jeff
Date: Sat Jan  4 03:15:34 2020
New Revision: 356348
URL: https://svnweb.freebsd.org/changeset/base/356348

Log:
  Use a separate lock for the zone and keg.  This provides concurrency
  between populating buckets from the slab layer and fetching full buckets
  from the zone layer.  Eliminate some nonsense locking patterns where
  we lock to fetch a single variable.
  
  Reviewed by:  markj
  Differential Revision:https://reviews.freebsd.org/D22828

Modified:
  head/sys/kern/kern_mbuf.c
  head/sys/vm/uma.h
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/sys/kern/kern_mbuf.c
==
--- head/sys/kern/kern_mbuf.c   Sat Jan  4 03:04:46 2020(r356347)
+++ head/sys/kern/kern_mbuf.c   Sat Jan  4 03:15:34 2020(r356348)
@@ -715,7 +715,7 @@ mb_dtor_pack(void *mem, int size, void *arg)
 * is deliberate. We don't want to acquire the zone lock for every
 * mbuf free.
 */
-   if (uma_zone_exhausted_nolock(zone_clust))
+   if (uma_zone_exhausted(zone_clust))
uma_zone_reclaim(zone_pack, UMA_RECLAIM_DRAIN);
 }
 

Modified: head/sys/vm/uma.h
==
--- head/sys/vm/uma.h   Sat Jan  4 03:04:46 2020(r356347)
+++ head/sys/vm/uma.h   Sat Jan  4 03:15:34 2020(r356348)
@@ -641,7 +641,6 @@ void uma_prealloc(uma_zone_t zone, int itemcnt);
  * Non-zero if zone is exhausted.
  */
 int uma_zone_exhausted(uma_zone_t zone);
-int uma_zone_exhausted_nolock(uma_zone_t zone);
 
 /*
  * Common UMA_ZONE_PCPU zones.

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Sat Jan  4 03:04:46 2020(r356347)
+++ head/sys/vm/uma_core.c  Sat Jan  4 03:15:34 2020(r356348)
@@ -922,7 +922,7 @@ bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
 /*
  * Drains the per cpu caches for a zone.
  *
- * NOTE: This may only be called while the zone is being turn down, and not
+ * NOTE: This may only be called while the zone is being torn down, and not
  * during normal operation.  This is necessary in order that we do not have
  * to migrate CPUs to drain the per-CPU caches.
  *
@@ -1041,7 +1041,7 @@ pcpu_cache_drain_safe(uma_zone_t zone)
int cpu;
 
/*
-* Polite bucket sizes shrinking was not enouth, shrink aggressively.
+* Polite bucket sizes shrinking was not enough, shrink aggressively.
 */
if (zone)
cache_shrink(zone, NULL);
@@ -1222,7 +1222,7 @@ zone_reclaim(uma_zone_t zone, int waitok, bool drain)
while (zone->uz_flags & UMA_ZFLAG_RECLAIMING) {
if (waitok == M_NOWAIT)
goto out;
-   msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
+   msleep(zone, >uz_lock, PVM, "zonedrain", 1);
}
zone->uz_flags |= UMA_ZFLAG_RECLAIMING;
bucket_cache_reclaim(zone, drain);
@@ -1258,8 +1258,8 @@ zone_trim(uma_zone_t zone, void *unused)
 
 /*
  * Allocate a new slab for a keg.  This does not insert the slab onto a list.
- * If the allocation was successful, the keg lock will be held upon return,
- * otherwise the keg will be left unlocked.
+ * The keg should be locked on entry and will be dropped and reacquired on
+ * return.
  *
  * Arguments:
  * flags   Wait flags for the item initialization routine
@@ -1283,8 +1283,6 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int dom
KASSERT(domain >= 0 && domain < vm_ndomains,
("keg_alloc_slab: domain %d out of range", domain));
KEG_LOCK_ASSERT(keg);
-   MPASS(zone->uz_lockptr == >uk_lock);
-
allocf = keg->uk_allocf;
KEG_UNLOCK(keg);
 
@@ -1293,7 +1291,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int dom
if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
slab = zone_alloc_item(keg->uk_slabzone, NULL, domain, aflags);
if (slab == NULL)
-   goto out;
+   goto fail;
}
 
/*
@@ -1317,8 +1315,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int dom
if (mem == NULL) {
if (keg->uk_flags & UMA_ZONE_OFFPAGE)
zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
-   slab = NULL;
-   goto out;
+   goto fail;
}
uma_total_inc(size);
 
@@ -1348,8 +1345,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int dom
break;
if (i != keg->uk_ipers) {
keg_free_slab(keg, slab, i);
-   slab = NULL;
-   goto out;
+   goto fail;
}
}
KEG_LOCK(keg);
@@ -1363,8 +1359,11 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int dom
  

svn commit: r356347 - head/sys/vm

2020-01-03 Thread Jeff Roberson
Author: jeff
Date: Sat Jan  4 03:04:46 2020
New Revision: 356347
URL: https://svnweb.freebsd.org/changeset/base/356347

Log:
  Use atomics for the zone limit and sleeper count.  This relies on the
  sleepq to serialize sleepers.  This patch retains the existing sleep/wakeup
  paradigm to limit 'thundering herd' wakeups.  It resolves a missing wakeup
  in one case but otherwise should be bug for bug compatible.  In particular,
  there are still various races surrounding adjusting the limit via sysctl
  that are now documented.
  
  Discussed with:   markj
  Reviewed by:  rlibby
  Differential Revision:https://reviews.freebsd.org/D22827

Modified:
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Sat Jan  4 01:13:00 2020(r356346)
+++ head/sys/vm/uma_core.c  Sat Jan  4 03:04:46 2020(r356347)
@@ -75,6 +75,7 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -267,8 +268,9 @@ static void hash_free(struct uma_hash *hash);
 static void uma_timeout(void *);
 static void uma_startup3(void);
 static void *zone_alloc_item(uma_zone_t, void *, int, int);
-static void *zone_alloc_item_locked(uma_zone_t, void *, int, int);
 static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
+static int zone_alloc_limit(uma_zone_t zone, int count, int flags);
+static void zone_free_limit(uma_zone_t zone, int count);
 static void bucket_enable(void);
 static void bucket_init(void);
 static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
@@ -290,6 +292,7 @@ static int sysctl_handle_uma_zone_allocs(SYSCTL_HANDLE
 static int sysctl_handle_uma_zone_frees(SYSCTL_HANDLER_ARGS);
 static int sysctl_handle_uma_zone_flags(SYSCTL_HANDLER_ARGS);
 static int sysctl_handle_uma_slab_efficiency(SYSCTL_HANDLER_ARGS);
+static int sysctl_handle_uma_zone_items(SYSCTL_HANDLER_ARGS);
 
 #ifdef INVARIANTS
 static inline struct noslabbits *slab_dbg_bits(uma_slab_t slab, uma_keg_t keg);
@@ -893,7 +896,7 @@ hash_free(struct uma_hash *hash)
  *
  * Arguments:
  * zone   The zone to free to, must be unlocked.
- * bucket The free/alloc bucket with items, cpu queue must be locked.
+ * bucket The free/alloc bucket with items.
  *
  * Returns:
  * Nothing
@@ -904,20 +907,15 @@ bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
 {
int i;
 
-   if (bucket == NULL)
+   if (bucket == NULL || bucket->ub_cnt == 0)
return;
 
if (zone->uz_fini)
for (i = 0; i < bucket->ub_cnt; i++) 
zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
-   if (zone->uz_max_items > 0) {
-   ZONE_LOCK(zone);
-   zone->uz_items -= bucket->ub_cnt;
-   if (zone->uz_sleepers && zone->uz_items < zone->uz_max_items)
-   wakeup_one(zone);
-   ZONE_UNLOCK(zone);
-   }
+   if (zone->uz_max_items > 0)
+   zone_free_limit(zone, bucket->ub_cnt);
bucket->ub_cnt = 0;
 }
 
@@ -2096,10 +2094,11 @@ zone_alloc_sysctl(uma_zone_t zone, void *unused)
 */
oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
"limit", CTLFLAG_RD, NULL, "");
+   SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+   "items", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE,
+   zone, 0, sysctl_handle_uma_zone_items, "QU",
+   "current number of allocated items if limit is set");
SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
-   "items", CTLFLAG_RD, >uz_items, 0,
-   "current number of cached items");
-   SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"max_items", CTLFLAG_RD, >uz_max_items, 0,
"Maximum number of cached items");
SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
@@ -2108,6 +2107,12 @@ zone_alloc_sysctl(uma_zone_t zone, void *unused)
SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"sleeps", CTLFLAG_RD, >uz_sleeps, 0,
"Total zone limit sleeps");
+   SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+   "bucket_max", CTLFLAG_RD, >uz_bkt_max, 0,
+   "Maximum number of items in the bucket cache");
+   SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+   "bucket_cnt", CTLFLAG_RD, >uz_bkt_count, 0,
+   "Number of items in the bucket cache");
 
/*
 * Per-domain information.
@@ -2961,15 +2966,15 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags
domain = PCPU_GET(domain);
else
domain = UMA_ANYDOMAIN;
-   return (zone_alloc_item_locked(zone, udata, domain, flags));
+   return (zone_alloc_item(zone, udata, domain, flags));
 

Re: svn commit: r356308 - in head/sys: cddl/compat/opensolaris/kern cddl/compat/opensolaris/sys conf kern mips/include powerpc/include sys

2020-01-02 Thread Jeff Roberson
Thank you to everyone involved.  This will make a lot of MI code much 
simpler.


Jeff

On Thu, 2 Jan 2020, Brandon Bergren wrote:


Author: bdragon
Date: Thu Jan  2 23:20:37 2020
New Revision: 356308
URL: https://svnweb.freebsd.org/changeset/base/356308

Log:
 [PowerPC] [MIPS] Implement 32-bit kernel emulation of atomic64 operations

 This is a lock-based emulation of 64-bit atomics for kernel use, split off
 from an earlier patch by jhibbits.

 This is needed to unblock future improvements that reduce the need for
 locking on 64-bit platforms by using atomic updates.

 The implementation allows for future integration with userland atomic64,
 but as that implies going through sysarch for every use, the current
 status quo of userland doing its own locking may be for the best.

 Submitted by:  jhibbits (original patch), kevans (mips bits)
 Reviewed by:   jhibbits, jeff, kevans
 Differential Revision: https://reviews.freebsd.org/D22976

Added:
 head/sys/kern/subr_atomic64.c   (contents, props changed)
 head/sys/sys/_atomic64e.h   (contents, props changed)
Modified:
 head/sys/cddl/compat/opensolaris/kern/opensolaris_atomic.c
 head/sys/cddl/compat/opensolaris/sys/atomic.h
 head/sys/conf/files.mips
 head/sys/conf/files.powerpc
 head/sys/mips/include/atomic.h
 head/sys/powerpc/include/atomic.h

Modified: head/sys/cddl/compat/opensolaris/kern/opensolaris_atomic.c
==
--- head/sys/cddl/compat/opensolaris/kern/opensolaris_atomic.c  Thu Jan  2 
23:18:43 2020(r356307)
+++ head/sys/cddl/compat/opensolaris/kern/opensolaris_atomic.c  Thu Jan  2 
23:20:37 2020(r356308)
@@ -33,7 +33,8 @@ __FBSDID("$FreeBSD$");
#include 

#if !defined(__LP64__) && !defined(__mips_n32) && \
-!defined(ARM_HAVE_ATOMIC64) && !defined(I386_HAVE_ATOMIC64)
+!defined(ARM_HAVE_ATOMIC64) && !defined(I386_HAVE_ATOMIC64) && \
+!defined(HAS_EMULATED_ATOMIC64)

#ifdef _KERNEL
#include 

Modified: head/sys/cddl/compat/opensolaris/sys/atomic.h
==
--- head/sys/cddl/compat/opensolaris/sys/atomic.h   Thu Jan  2 23:18:43 
2020(r356307)
+++ head/sys/cddl/compat/opensolaris/sys/atomic.h   Thu Jan  2 23:20:37 
2020(r356308)
@@ -42,7 +42,8 @@
#endif

#if !defined(__LP64__) && !defined(__mips_n32) && \
-!defined(ARM_HAVE_ATOMIC64) && !defined(I386_HAVE_ATOMIC64)
+!defined(ARM_HAVE_ATOMIC64) && !defined(I386_HAVE_ATOMIC64) && \
+!defined(HAS_EMULATED_ATOMIC64)
extern void atomic_add_64(volatile uint64_t *target, int64_t delta);
extern void atomic_dec_64(volatile uint64_t *target);
extern uint64_t atomic_swap_64(volatile uint64_t *a, uint64_t value);
@@ -109,7 +110,8 @@ atomic_cas_32(volatile uint32_t *target, uint32_t cmp,
#endif

#if defined(__LP64__) || defined(__mips_n32) || \
-defined(ARM_HAVE_ATOMIC64) || defined(I386_HAVE_ATOMIC64)
+defined(ARM_HAVE_ATOMIC64) || defined(I386_HAVE_ATOMIC64) || \
+defined(HAS_EMULATED_ATOMIC64)
static __inline void
atomic_dec_64(volatile uint64_t *target)
{

Modified: head/sys/conf/files.mips
==
--- head/sys/conf/files.mipsThu Jan  2 23:18:43 2020(r356307)
+++ head/sys/conf/files.mipsThu Jan  2 23:20:37 2020(r356308)
@@ -50,6 +50,7 @@ mips/mips/vm_machdep.cstandard
# misc opt-in bits
kern/kern_clocksource.c standard
kern/link_elf_obj.c standard
+kern/subr_atomic64.c   optionalmips | mipsel | mipshf 
| mipselhf
kern/subr_busdma_bufalloc.c standard
kern/subr_dummy_vdso_tc.c   standard
kern/subr_sfbuf.c   optionalmips | mipsel | mipsn32

Modified: head/sys/conf/files.powerpc
==
--- head/sys/conf/files.powerpc Thu Jan  2 23:18:43 2020(r356307)
+++ head/sys/conf/files.powerpc Thu Jan  2 23:20:37 2020(r356308)
@@ -76,6 +76,7 @@ dev/uart/uart_cpu_powerpc.c   optionaluart
dev/usb/controller/ehci_fsl.c   optionalehci mpc85xx
dev/vt/hw/ofwfb/ofwfb.c optionalvt aim
kern/kern_clocksource.c standard
+kern/subr_atomic64.c   optionalpowerpc | powerpcspe
kern/subr_dummy_vdso_tc.c   standard
kern/syscalls.c optionalktr
kern/subr_sfbuf.c   standard

Added: head/sys/kern/subr_atomic64.c
==
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/kern/subr_atomic64.c   Thu Jan  2 23:20:37 2020
(r356308)
@@ -0,0 +1,140 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2019 Justin Hibbits
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * 

Re: svn commit: r356185 - in head: lib/geom lib/geom/sched sys/geom sys/geom/sched sys/modules/geom sys/modules/geom/geom_sched sys/sys

2019-12-30 Thread Jeff Roberson

On Mon, 30 Dec 2019, Warner Losh wrote:




On Mon, Dec 30, 2019 at 12:55 PM Alexander Motin  wrote:
  On 30.12.2019 12:02, Alexey Dokuchaev wrote:
  > On Mon, Dec 30, 2019 at 08:55:14AM -0700, Warner Losh wrote:
  >> On Mon, Dec 30, 2019, 5:32 AM Alexey Dokuchaev wrote:
  >>> On Sun, Dec 29, 2019 at 09:16:04PM +, Alexander Motin
  wrote:
   New Revision: 356185
   URL: https://svnweb.freebsd.org/changeset/base/356185
  
   Log:
  ? ?Remove GEOM_SCHED class and gsched tool.
  ? ?[...]
  >>>
  >>> Wow, that was unexpected, I use it on all my machines' HDD
  drives.
  >>> Is there a planned replacement, or I'd better create a port
  for the
  >>> GEOM_SCHED class and gsched(8) tool?
  >>
  >> How much of a performance improvement do you see with it?
  >>
  >> There has been no tweaks to this geom in years and years. It
  was tuned
  >> to 10 year old hard drives and never retuned for anything
  newer.
  >
  > Well, hard drives essentially didn't change since then, still
  being the
  > same roration media. :)

  At least some papers about gsched I read mention adX devices,
  which
  means old ATA stack and no NCQ.? It can be quite a significant
  change to
  let HDD to do its own scheduling.? Also about a year ago in
  r335066
  Warner added sysctl debug.bioq_batchsize, which if set to
  non-zero value
  may, I think, improve fairness between several processes, just
  not sure
  why it was never enabled.


I never?enabled it because I never had a good?car size as the default. I'm
guessing? it's somewhere?on the order of 2 times the queue size in hardware,
but with modern drives I think phk might be right and that disabling
disksort entirely might be optimal, or close to optimal.
?
  >> And when I played with it a few years ago, I saw no
  improvements...
  >
  > Admittedly, I've only did some tests no later than in 8.4
  times when I
  > first started using it.? Fair point, though, I should redo them
  again.

  I'm sorry to create a regression for you, if there is really
  one.? As I
  have written I don't have so much against the scheduler part
  itself, as
  against the accumulated technical debt and the way integration
  is done,
  such as mechanism of live insertion, etc.? Without unmapped I/O
  and
  direct dispatch I bet it must be quite slow on bigger systems,
  that is
  why I doubted anybody really use it.

  > Is there a planned replacement, or I'd better create a port
  for the
  > GEOM_SCHED class and gsched(8) tool?

  I wasn't planning replacement.? And moving it to ports would be a
  problem, since in process I removed few capabilities critical
  for it:
  nstart/nend for live insertion and BIO classification for
  scheduling.
  But the last I don't mind to return if there appear to be a
  need.? It is
  only the first I am strongly against.? But if somebody would like
  to
  reimplement it, may be it would be better to consider merging
  it with
  CAM I/O scheduler by Warner?? The one at least knows about device
  queue
  depth, etc.? We could return the BIO classification to be used by
  CAM
  scheduler instead, if needed.


I'd be keen on helping anybody that wants to experiment with hard disk
drive optmizations in iosched. My doodles to make it better showed no early
improvements, so Iv'e not tried to bring them into the tree. However, our
workload is basically 'large block random' which isn't the same as others
and others might have a workload that could benefit. I've found a marginal
improvement from the read over writes bias in our workload, and
another?marginal improvement for favoring metadata reads over normal reads
(because?for us, sendfile blocks for some of these reads, but others may see
no improvement). I'm working to clean up the metadata read stuff to get it
into the tree. I've not tested it on ZFS, though, so there will be no ZFS
metadata labeling in the initial commit.

So I like the idea, and would love to work with someone that needs it
and/or whose work loads can be improved by it.


The biggest issue I have found with drive sorting and traditional elevator 
algorithms is that it is not latency limiting.  We have other problems at 
higher layers where we scheduling too many writes simultaneously that 
contribute substantially to I/O latency.  Also read-after-writes are 
blocked in the buffer cache while a senseless number of buffers are queued 
and locked.


An algorithm I have found effective and implemented at least twice is to 
estimate I/O time and then give a maximum sort latency.  For many drives 
you have to go further and starve them for I/O until they complete a 
particularly long running operation or they can continue to decide to 

Re: svn commit: r356142 - in head/sys: dev/ofw sys

2019-12-30 Thread Jeff Roberson

On Sun, 29 Dec 2019, Kevin P. Neal wrote:


On Sat, Dec 28, 2019 at 10:11:48AM -1000, Jeff Roberson wrote:

It seems to be the prevailing theory that headers are not even really
copyrightable.  This has even been tested in court a few times (bsd, java).

http://lkml.iu.edu/hypermail/linux/kernel/0301.1/0362.html

The original definitions from this file were part of posix.1b and so it's
hard to argue they are anything but public.  Coincidentally I know Greg and


Wow is Google v Oracle going to screw this up. I fully expect the US Supreme
Court to make a total hash of that case and cause havoc for the whole
software industry.

The right thing for FreeBSD to do is decrease the size of the attack surface
by getting the licenses as straight as possible. IMHO. But IANAL.


In my opinion, this has already wasted everyone's time with an irrelevant
nit-picking argument.  The onus is not on Pedro to chase this down just so


Lawyers make a living nit-picking. That's why one has to be very careful
to do a preemptive nit-pick before they get involved.


I personally participated when a fortune < 100 company sent a team of 
lawyers to audit the licensing terms of FreeBSD to ensure compliance in a 
multi-billion dollar a year product.  None of them batted an eye at this. 
It is my understanding that this has taken place multiple times and once 
even resulted in phk receiving an official beer from a similarly sized 
company per the terms of his beerware license.  Do we actually believe 
that someone somewhere is going to sue the project or it's users on behalf 
of myself or greg because the user is in compliance with one bsd license 
and not the other?  I trust the lawyer's opinions over yours.


The problem here isn't the license.  It's that everyone who allegedly 
thinks this is of such dire importance that they must continue spaming the 
list and arguing about it hasn't thought it important enough to send a 
single email to Greg Ansley so they personally can commit a 
simplification.  It literally would've taken less time to simply copy 
Greg, ask approval, and commit a change and no one would've argued.  I 
would argue that we don't even need greg's approval to copy 8 standards 
defined function prototypes.  Richard Stallman seems to argue the same and 
I guarantee he's spent more time with copyright lawyers than any of us.


I assert that the point here is not the license at all or it would've been 
resolved already.  I see a lot of low effort sniping on the list off and 
on since my return to opensource.  In most cases it is not even from 
particularly active contributors.  It is very discourging to donate time 
and energy to the project only to be rewarded with criticism delivered 
with an air of superiority.  It drives people away at a time when 
operating systems are all facing declining and aging developer 
populations.


Jeff



--
Kevin P. Nealhttp://www.pobox.com/~kpn/
  On the community of supercomputer fans:
"But what we lack in size we make up for in eccentricity."
 from Steve Gombosi, comp.sys.super, 31 Jul 2000 11:22:43 -0600


___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r356142 - in head/sys: dev/ofw sys

2019-12-28 Thread Jeff Roberson



On Fri, 27 Dec 2019, Rodney W. Grimes wrote:


[ Charset UTF-8 unsupported, converting... ]


On 2019-12-27 23:24, Rodney W. Grimes wrote:

[ Charset UTF-8 unsupported, converting... ]

On 2019-12-27 22:16, Rodney W. Grimes wrote:

Author: pfg
Date: Sat Dec 28 02:58:30 2019
New Revision: 356142
URL: https://svnweb.freebsd.org/changeset/base/356142

Log:
  SPDX: update some tags with two licenses.

Modified:
  head/sys/dev/ofw/openfirm.h
  head/sys/sys/sched.h

Modified: head/sys/dev/ofw/openfirm.h
==
--- head/sys/dev/ofw/openfirm.h Sat Dec 28 02:11:41 2019(r356141)
+++ head/sys/dev/ofw/openfirm.h Sat Dec 28 02:58:30 2019(r356142)
@@ -1,7 +1,7 @@
 /* $NetBSD: openfirm.h,v 1.1 1998/05/15 10:16:00 tsubai Exp $  */

 /*-
- * SPDX-License-Identifier: BSD-4-Clause
+ * SPDX-License-Identifier: (BSD-4-Clause AND BSD-2-Clause-FreeBSD)
  *
  * Copyright (C) 1995, 1996 Wolfgang Solfrank.
  * Copyright (C) 1995, 1996 TooLs GmbH.

Modified: head/sys/sys/sched.h
==
--- head/sys/sys/sched.hSat Dec 28 02:11:41 2019(r356141)
+++ head/sys/sys/sched.hSat Dec 28 02:58:30 2019(r356142)
@@ -1,5 +1,5 @@
 /*-
- * SPDX-License-Identifier: BSD-4-Clause
+ * SPDX-License-Identifier: (BSD-4-Clause AND BSD-2-Clause-FreeBSD)
  *
  * Copyright (c) 1996, 1997
  *  HD Associates, Inc.  All rights reserved.


This situation should not of occured, and leads to an ambigous license state.

It actually happens a lot (I mean two or more licenses in the same
file): SPDX explicitly uses AND (not OR) for cases like this.


What code is under license 2 clause and what under 4 clause?

Anyone redistributing the file has to respect both licenses. If you are
lucky enough to have access to version control you may be able to
discern the author and the corresponding license, otherwise you are
trapped with both.

So the 2 clause add is null, so why have it there?


So that eventually, when the project gets to a point where sufficient
part of the code is rewritten they can opt to change the license to the
simpler form. There are ways to relicense projects gradually, and its
nothing new, in fact it is very much in the BSD spirit to gradually
replace more restricted UNIX code.


The only changing we have done to BSD licenses as in thost cases
that the Regents requested/granted the right to change to lesser
clauses.  Until you get HD & Associtates (in this one case) to
grant that right your walking on a grey edge I would rather not
walk on.

The reference to BSD spirit and replacing more restricted UNIX (tm)
code is way off base in this context.  This is not an AT & T
license we are talking about here.  And again you can not just
modify the existing 4 clause licensed file by slapping a 2 clause
license into it, or the project would of done that everyplace
ages ago.

What is done here in this file is a mistake, and should be corrected.
Can you point me to other files that actually have multiple BSD
licenses in them?


It seems to be the prevailing theory that headers are not even 
really copyrightable.  This has even been tested in court a few times 
(bsd, java).


http://lkml.iu.edu/hypermail/linux/kernel/0301.1/0362.html

The original definitions from this file were part of posix.1b and so it's 
hard to argue they are anything but public.  Coincidentally I know Greg 
and I'm sure he would not object to reducing the whole file to a two 
clause license.


However, I'm not so certain as you are that it is not possible to have two 
copyrights in the same file so long as they are compatible.  In many cases 
we have multiple authors attributed to an individual file.  There are 
cases where software is purposefully licensed under multiple licenses.


https://en.wikipedia.org/wiki/Multi-licensing

This is not an identical situation but it is a common one.  I called my 
brother who is an IP lawyer and spoke with him about it today.  He 
believes this is sufficiently nuanced that we would need a proper legal 
opinion to determine that.


I wrote the original file 17 years ago and placed a two clause copyright 
in it.  trhodes combined sys/posix4/sched.h with sys/sched.h 13 years ago 
in the following commit: 
https://svnweb.freebsd.org/base/head/sys/sys/sched.h?revision=164185=markup


So the original license was in fact two clause.

If a mistake was made, it was made 13 years ago and it is almost 
guaranteed to be legally harmless.  It has nothing to do with what Pedro 
committed today.  I don't trust the armchair lawyering of software 
engineers and so to resolve this we would need to ask the foundation to 
pay their lawyers to pursue it.


In my opinion, this has already wasted everyone's time with an irrelevant 
nit-picking argument.  The onus is not on Pedro to chase this down just so 
he can add SPDX tags.  If this is important to you then you are 

Re: svn commit: r356159 - head/sys/vm

2019-12-28 Thread Jeff Roberson

Fantastic!

On Sat, 28 Dec 2019, Mark Johnston wrote:


Author: markj
Date: Sat Dec 28 19:04:29 2019
New Revision: 356159
URL: https://svnweb.freebsd.org/changeset/base/356159

Log:
 Remove some unused functions.

 The previous series of patches orphaned some vm_page functions, so
 remove them.

 Reviewed by:   dougm, kib
 Sponsored by:  Netflix, Intel
 Differential Revision: https://reviews.freebsd.org/D22886

Modified:
 head/sys/vm/vm_page.c
 head/sys/vm/vm_page.h

Modified: head/sys/vm/vm_page.c
==
--- head/sys/vm/vm_page.c   Sat Dec 28 19:04:15 2019(r356158)
+++ head/sys/vm/vm_page.c   Sat Dec 28 19:04:29 2019(r356159)
@@ -3662,52 +3662,6 @@ vm_page_enqueue(vm_page_t m, uint8_t queue)
}

/*
- * vm_page_requeue:[ internal use only ]
- *
- * Schedule a requeue of the given page.
- *
- * The page must be locked.
- */
-void
-vm_page_requeue(vm_page_t m)
-{
-
-   vm_page_assert_locked(m);
-   KASSERT(vm_page_queue(m) != PQ_NONE,
-   ("%s: page %p is not logically enqueued", __func__, m));
-   KASSERT(m->ref_count > 0,
-   ("%s: page %p does not carry any references", __func__, m));
-
-   if ((m->a.flags & PGA_REQUEUE) == 0)
-   vm_page_aflag_set(m, PGA_REQUEUE);
-   vm_page_pqbatch_submit(m, atomic_load_8(>a.queue));
-}
-
-/*
- * vm_page_swapqueue:  [ internal use only ]
- *
- * Move the page from one queue to another, or to the tail of its
- * current queue, in the face of a possible concurrent free of the
- * page.
- */
-void
-vm_page_swapqueue(vm_page_t m, uint8_t oldq, uint8_t newq)
-{
-   vm_page_astate_t new, old;
-
-   old = vm_page_astate_load(m);
-   do {
-   if (old.queue != oldq || (old.flags & PGA_DEQUEUE) != 0)
-   return;
-   new = old;
-   new.flags |= PGA_REQUEUE;
-   new.queue = newq;
-   } while (!vm_page_pqstate_commit_dequeue(m, , new));
-
-   vm_page_pqbatch_submit(m, newq);
-}
-
-/*
 *  vm_page_free_prep:
 *
 *  Prepares the given page to be put on the free list,

Modified: head/sys/vm/vm_page.h
==
--- head/sys/vm/vm_page.h   Sat Dec 28 19:04:15 2019(r356158)
+++ head/sys/vm/vm_page.h   Sat Dec 28 19:04:29 2019(r356159)
@@ -649,7 +649,6 @@ bool vm_page_remove_xbusy(vm_page_t);
int vm_page_rename(vm_page_t, vm_object_t, vm_pindex_t);
void vm_page_replace(vm_page_t mnew, vm_object_t object,
vm_pindex_t pindex, vm_page_t mold);
-void vm_page_requeue(vm_page_t m);
int vm_page_sbusied(vm_page_t m);
vm_page_t vm_page_scan_contig(u_long npages, vm_page_t m_start,
vm_page_t m_end, u_long alignment, vm_paddr_t boundary, int options);
@@ -659,7 +658,6 @@ int vm_page_sleep_if_busy(vm_page_t m, const char *msg
int vm_page_sleep_if_xbusy(vm_page_t m, const char *msg);
vm_offset_t vm_page_startup(vm_offset_t vaddr);
void vm_page_sunbusy(vm_page_t m);
-void vm_page_swapqueue(vm_page_t m, uint8_t oldq, uint8_t newq);
bool vm_page_try_remove_all(vm_page_t m);
bool vm_page_try_remove_write(vm_page_t m);
int vm_page_trysbusy(vm_page_t m);
@@ -833,31 +831,6 @@ vm_page_aflag_set(vm_page_t m, uint16_t bits)
addr = (void *)>a;
val = bits << VM_PAGE_AFLAG_SHIFT;
atomic_set_32(addr, val);
-}
-
-/*
- * Atomically update the queue state of the page.  The operation fails if
- * any of the queue flags in "fflags" are set or if the "queue" field of
- * the page does not match the expected value; if the operation is
- * successful, the flags in "nflags" are set and all other queue state
- * flags are cleared.
- */
-static inline bool
-vm_page_pqstate_cmpset(vm_page_t m, uint32_t oldq, uint32_t newq,
-uint32_t fflags, uint32_t nflags)
-{
-   vm_page_astate_t new, old;
-
-   old = vm_page_astate_load(m);
-   do {
-   if ((old.flags & fflags) != 0 || old.queue != oldq)
-   return (false);
-   new = old;
-   new.flags = (new.flags & ~PGA_QUEUE_OP_MASK) | nflags;
-   new.queue = newq;
-   } while (!vm_page_astate_fcmpset(m, , new));
-
-   return (true);
}

/*


___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r356109 - head/sys/vm

2019-12-26 Thread Jeff Roberson
Author: jeff
Date: Fri Dec 27 01:50:16 2019
New Revision: 356109
URL: https://svnweb.freebsd.org/changeset/base/356109

Log:
  Fix a pair of bugs introduced in r356002.  When we reclaim physical pages we
  allocate them with VM_ALLOC_NOOBJ which means they are not busy.  For now
  move the busy assert for the new page in vm_page_replace into the public
  api and out of the private api used by contig reclaim.  Fix another issue
  where we would leak busy if the page could not be removed from pmap.
  
  Reported by:  pho
  Discussed with:   markj

Modified:
  head/sys/vm/vm_page.c

Modified: head/sys/vm/vm_page.c
==
--- head/sys/vm/vm_page.c   Fri Dec 27 01:36:53 2019(r356108)
+++ head/sys/vm/vm_page.c   Fri Dec 27 01:50:16 2019(r356109)
@@ -1751,7 +1751,6 @@ vm_page_replace_hold(vm_page_t mnew, vm_object_t objec
bool dropped;
 
VM_OBJECT_ASSERT_WLOCKED(object);
-   vm_page_assert_xbusied(mnew);
vm_page_assert_xbusied(mold);
KASSERT(mnew->object == NULL && (mnew->ref_count & VPRC_OBJREF) == 0,
("vm_page_replace: page %p already in object", mnew));
@@ -1795,6 +1794,8 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, vm
 vm_page_t mold)
 {
 
+   vm_page_assert_xbusied(mnew);
+
if (vm_page_replace_hold(mnew, object, pindex, mold))
vm_page_free(mold);
 }
@@ -2793,6 +2794,7 @@ retry:
 */
if (object->ref_count != 0 &&
!vm_page_try_remove_all(m)) {
+   vm_page_xunbusy(m);
vm_page_free(m_new);
error = EBUSY;
goto unlock;
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r356081 - head/sys/vm

2019-12-25 Thread Jeff Roberson
Author: jeff
Date: Wed Dec 25 20:57:24 2019
New Revision: 356081
URL: https://svnweb.freebsd.org/changeset/base/356081

Log:
  Further reduce the cacheline footprint of fast allocations by duplicating
  the zone size and flags fields in the per-cpu caches.  This allows fast
  alloctions to proceed only touching the single per-cpu cacheline and
  simplifies the common case when no ctor/dtor is specified.
  
  Reviewed by:  markj, rlibby
  Differential Revision:https://reviews.freebsd.org/D22826

Modified:
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Wed Dec 25 20:50:53 2019(r356080)
+++ head/sys/vm/uma_core.c  Wed Dec 25 20:57:24 2019(r356081)
@@ -281,7 +281,6 @@ static uma_keg_t uma_kcreate(uma_zone_t zone, size_t s
 uma_fini fini, int align, uint32_t flags);
 static int zone_import(void *, void **, int, int, int);
 static void zone_release(void *, void **, int);
-static void uma_zero_item(void *, uma_zone_t);
 static bool cache_alloc(uma_zone_t, uma_cache_t, void *, int);
 static bool cache_free(uma_zone_t, uma_cache_t, void *, void *, int);
 
@@ -2183,6 +2182,17 @@ zone_count(uma_zone_t zone, void *arg)
zone->uz_namecnt + 1);
 }
 
+static void
+zone_update_caches(uma_zone_t zone)
+{
+   int i;
+
+   for (i = 0; i <= mp_maxid; i++) {
+   cache_set_uz_size(>uz_cpu[i], zone->uz_size);
+   cache_set_uz_flags(>uz_cpu[i], zone->uz_flags);
+   }
+}
+
 /*
  * Zone header ctor.  This initializes all fields, locks, etc.
  *
@@ -2228,7 +2238,7 @@ zone_ctor(void *mem, int size, void *udata, int flags)
 
 #ifdef INVARIANTS
if (arg->uminit == trash_init && arg->fini == trash_fini)
-   zone->uz_flags |= UMA_ZFLAG_TRASH;
+   zone->uz_flags |= UMA_ZFLAG_TRASH | UMA_ZFLAG_CTORDTOR;
 #endif
 
/*
@@ -2327,6 +2337,9 @@ out:
else
zone->uz_bucket_size = bucket_select(zone->uz_size);
zone->uz_bucket_size_min = zone->uz_bucket_size;
+   if (zone->uz_dtor != NULL || zone->uz_ctor != NULL)
+   zone->uz_flags |= UMA_ZFLAG_CTORDTOR;
+   zone_update_caches(zone);
 
return (0);
 }
@@ -2801,8 +2814,14 @@ uma_zfree_pcpu_arg(uma_zone_t zone, void *item, void *
uma_zfree_arg(zone, item, udata);
 }
 
+#ifdef INVARIANTS
+#defineUMA_ALWAYS_CTORDTOR 1
+#else
+#defineUMA_ALWAYS_CTORDTOR 0
+#endif
+
 static void *
-item_ctor(uma_zone_t zone, void *udata, int flags, void *item)
+item_ctor(uma_zone_t zone, int size, void *udata, int flags, void *item)
 {
 #ifdef INVARIANTS
bool skipdbg;
@@ -2810,10 +2829,10 @@ item_ctor(uma_zone_t zone, void *udata, int flags, voi
skipdbg = uma_dbg_zskip(zone, item);
if (!skipdbg && (zone->uz_flags & UMA_ZFLAG_TRASH) != 0 &&
zone->uz_ctor != trash_ctor)
-   trash_ctor(item, zone->uz_size, udata, flags);
+   trash_ctor(item, size, udata, flags);
 #endif
if (__predict_false(zone->uz_ctor != NULL) &&
-   zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
+   zone->uz_ctor(item, size, udata, flags) != 0) {
counter_u64_add(zone->uz_fails, 1);
zone_free_item(zone, item, udata, SKIP_DTOR | SKIP_CNT);
return (NULL);
@@ -2823,13 +2842,14 @@ item_ctor(uma_zone_t zone, void *udata, int flags, voi
uma_dbg_alloc(zone, NULL, item);
 #endif
if (flags & M_ZERO)
-   uma_zero_item(item, zone);
+   bzero(item, size);
 
return (item);
 }
 
 static inline void
-item_dtor(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
+item_dtor(uma_zone_t zone, void *item, int size, void *udata,
+enum zfreeskip skip)
 {
 #ifdef INVARIANTS
bool skipdbg;
@@ -2842,13 +2862,13 @@ item_dtor(uma_zone_t zone, void *item, void *udata, en
uma_dbg_free(zone, NULL, item);
}
 #endif
-   if (skip < SKIP_DTOR) {
+   if (__predict_true(skip < SKIP_DTOR)) {
if (zone->uz_dtor != NULL)
-   zone->uz_dtor(item, zone->uz_size, udata);
+   zone->uz_dtor(item, size, udata);
 #ifdef INVARIANTS
if (!skipdbg && (zone->uz_flags & UMA_ZFLAG_TRASH) != 0 &&
zone->uz_dtor != trash_dtor)
-   trash_dtor(item, zone->uz_size, udata);
+   trash_dtor(item, size, udata);
 #endif
}
 }
@@ -2860,7 +2880,7 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags
uma_cache_bucket_t bucket;
uma_cache_t cache;
void *item;
-   int cpu, domain;
+   int domain, size, uz_flags;
 
/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
random_harvest_fast_uma(, sizeof(zone), 

svn commit: r356080 - in head: lib/libmemstat sys/vm

2019-12-25 Thread Jeff Roberson
Author: jeff
Date: Wed Dec 25 20:50:53 2019
New Revision: 356080
URL: https://svnweb.freebsd.org/changeset/base/356080

Log:
  Optimize fast path allocations by storing bucket headers in the per-cpu
  cache area.  This allows us to check on bucket space for all per-cpu
  buckets with a single cacheline access and fewer branches.
  
  Reviewed by:  markj, rlibby
  Differential Revision:https://reviews.freebsd.org/D22825

Modified:
  head/lib/libmemstat/memstat_uma.c
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/lib/libmemstat/memstat_uma.c
==
--- head/lib/libmemstat/memstat_uma.c   Wed Dec 25 19:26:35 2019
(r356079)
+++ head/lib/libmemstat/memstat_uma.c   Wed Dec 25 20:50:53 2019
(r356080)
@@ -438,28 +438,9 @@ memstat_kvm_uma(struct memory_type_list *list, void *k
mtp->mt_numallocs += ucp->uc_allocs;
mtp->mt_numfrees += ucp->uc_frees;
 
-   if (ucp->uc_allocbucket != NULL) {
-   ret = kread(kvm, ucp->uc_allocbucket,
-   , sizeof(ub), 0);
-   if (ret != 0) {
-   free(ucp_array);
-   _memstat_mtl_empty(list);
-   list->mtl_error = ret;
-   return (-1);
-   }
-   mtp->mt_free += ub.ub_cnt;
-   }
-   if (ucp->uc_freebucket != NULL) {
-   ret = kread(kvm, ucp->uc_freebucket,
-   , sizeof(ub), 0);
-   if (ret != 0) {
-   free(ucp_array);
-   _memstat_mtl_empty(list);
-   list->mtl_error = ret;
-   return (-1);
-   }
-   mtp->mt_free += ub.ub_cnt;
-   }
+   mtp->mt_free += ucp->uc_allocbucket.ucb_cnt;
+   mtp->mt_free += ucp->uc_freebucket.ucb_cnt;
+   mtp->mt_free += ucp->uc_crossbucket.ucb_cnt;
}
 skip_percpu:
mtp->mt_size = kz.uk_size;

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Wed Dec 25 19:26:35 2019(r356079)
+++ head/sys/vm/uma_core.c  Wed Dec 25 20:50:53 2019(r356080)
@@ -533,6 +533,144 @@ zone_put_bucket(uma_zone_t zone, uma_zone_domain_t zdo
zone->uz_bkt_count += bucket->ub_cnt;
 }
 
+/* Pops an item out of a per-cpu cache bucket. */
+static inline void *
+cache_bucket_pop(uma_cache_t cache, uma_cache_bucket_t bucket)
+{
+   void *item;
+
+   CRITICAL_ASSERT(curthread);
+
+   bucket->ucb_cnt--;
+   item = bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt];
+#ifdef INVARIANTS
+   bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] = NULL;
+   KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
+#endif
+   cache->uc_allocs++;
+
+   return (item);
+}
+
+/* Pushes an item into a per-cpu cache bucket. */
+static inline void
+cache_bucket_push(uma_cache_t cache, uma_cache_bucket_t bucket, void *item)
+{
+
+   CRITICAL_ASSERT(curthread);
+   KASSERT(bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] == NULL,
+   ("uma_zfree: Freeing to non free bucket index."));
+
+   bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] = item;
+   bucket->ucb_cnt++;
+   cache->uc_frees++;
+}
+
+/*
+ * Unload a UMA bucket from a per-cpu cache.
+ */
+static inline uma_bucket_t
+cache_bucket_unload(uma_cache_bucket_t bucket)
+{
+   uma_bucket_t b;
+
+   b = bucket->ucb_bucket;
+   if (b != NULL) {
+   MPASS(b->ub_entries == bucket->ucb_entries);
+   b->ub_cnt = bucket->ucb_cnt;
+   bucket->ucb_bucket = NULL;
+   bucket->ucb_entries = bucket->ucb_cnt = 0;
+   }
+
+   return (b);
+}
+
+static inline uma_bucket_t
+cache_bucket_unload_alloc(uma_cache_t cache)
+{
+
+   return (cache_bucket_unload(>uc_allocbucket));
+}
+
+static inline uma_bucket_t
+cache_bucket_unload_free(uma_cache_t cache)
+{
+
+   return (cache_bucket_unload(>uc_freebucket));
+}
+
+static inline uma_bucket_t
+cache_bucket_unload_cross(uma_cache_t cache)
+{
+
+   return (cache_bucket_unload(>uc_crossbucket));
+}
+
+/*
+ * Load a bucket into a per-cpu cache bucket.
+ */
+static inline 

svn commit: r356079 - head/sys/vm

2019-12-25 Thread Jeff Roberson
Author: jeff
Date: Wed Dec 25 19:26:35 2019
New Revision: 356079
URL: https://svnweb.freebsd.org/changeset/base/356079

Log:
  Fix a bug with _NUMA domains introduced in r339686.  When M_NOWAIT is
  specified there was no loop termination condition in keg_fetch_slab().
  
  Reported by:  pho
  Reviewed by:  markj

Modified:
  head/sys/vm/uma_core.c

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Wed Dec 25 18:24:38 2019(r356078)
+++ head/sys/vm/uma_core.c  Wed Dec 25 19:26:35 2019(r356079)
@@ -3084,6 +3084,8 @@ restart:
return (slab);
}
KEG_LOCK(keg);
+   if (!rr && (flags & M_WAITOK) == 0)
+   break;
if (rr && vm_domainset_iter_policy(, ) != 0) {
if ((flags & M_WAITOK) != 0) {
KEG_UNLOCK(keg);
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r356059 - head/sys/vm

2019-12-24 Thread Jeff Roberson
Author: jeff
Date: Tue Dec 24 18:38:06 2019
New Revision: 356059
URL: https://svnweb.freebsd.org/changeset/base/356059

Log:
  Don't unnecessarily relock the vm object after sleeps.  This results in a
  surprising amount of object contention on loop restarts in fault.
  
  Reviewed by:  kib, markj
  Differential Revision:https://reviews.freebsd.org/D22821

Modified:
  head/sys/vm/vm_fault.c
  head/sys/vm/vm_page.c

Modified: head/sys/vm/vm_fault.c
==
--- head/sys/vm/vm_fault.c  Tue Dec 24 16:52:10 2019(r356058)
+++ head/sys/vm/vm_fault.c  Tue Dec 24 18:38:06 2019(r356059)
@@ -713,8 +713,9 @@ vm_fault_busy_sleep(struct faultstate *fs)
vm_object_pip_wakeup(fs->object);
unlock_map(fs);
if (fs->m == vm_page_lookup(fs->object, fs->pindex))
-   vm_page_sleep_if_busy(fs->m, "vmpfw");
-   VM_OBJECT_WUNLOCK(fs->object);
+   vm_page_busy_sleep(fs->m, "vmpfw", false);
+   else
+   VM_OBJECT_WUNLOCK(fs->object);
VM_CNT_INC(v_intrans);
vm_object_deallocate(fs->first_object);
 }

Modified: head/sys/vm/vm_page.c
==
--- head/sys/vm/vm_page.c   Tue Dec 24 16:52:10 2019(r356058)
+++ head/sys/vm/vm_page.c   Tue Dec 24 18:38:06 2019(r356059)
@@ -176,7 +176,7 @@ SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STR
 static uma_zone_t fakepg_zone;
 
 static void vm_page_alloc_check(vm_page_t m);
-static void _vm_page_busy_sleep(vm_object_t obj, vm_page_t m,
+static bool _vm_page_busy_sleep(vm_object_t obj, vm_page_t m,
 const char *wmesg, bool nonshared, bool locked);
 static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
 static void vm_page_dequeue_complete(vm_page_t m);
@@ -878,8 +878,13 @@ vm_page_acquire_flags(vm_page_t m, int allocflags)
return (locked);
 }
 
+/*
+ * vm_page_busy_sleep_flags
+ *
+ * Sleep for busy according to VM_ALLOC_ parameters.
+ */
 static bool
-vm_page_busy_sleep_flags(vm_object_t object, vm_page_t m, const char *wchan,
+vm_page_busy_sleep_flags(vm_object_t object, vm_page_t m, const char *wmesg,
 int allocflags)
 {
 
@@ -892,9 +897,9 @@ vm_page_busy_sleep_flags(vm_object_t object, vm_page_t
 */
if ((allocflags & VM_ALLOC_NOCREAT) == 0)
vm_page_aflag_set(m, PGA_REFERENCED);
-   vm_page_busy_sleep(m, wchan, (allocflags &
-   VM_ALLOC_IGN_SBUSY) != 0);
-   VM_OBJECT_WLOCK(object);
+   if (_vm_page_busy_sleep(object, m, wmesg, (allocflags &
+   VM_ALLOC_IGN_SBUSY) != 0, true))
+   VM_OBJECT_WLOCK(object);
if ((allocflags & VM_ALLOC_WAITFAIL) != 0)
return (false);
return (true);
@@ -930,9 +935,8 @@ vm_page_busy_acquire(vm_page_t m, int allocflags)
else
locked = false;
MPASS(locked || vm_page_wired(m));
-   _vm_page_busy_sleep(obj, m, "vmpba",
-   (allocflags & VM_ALLOC_SBUSY) != 0, locked);
-   if (locked)
+   if (_vm_page_busy_sleep(obj, m, "vmpba",
+   (allocflags & VM_ALLOC_SBUSY) != 0, locked))
VM_OBJECT_WLOCK(obj);
if ((allocflags & VM_ALLOC_WAITFAIL) != 0)
return (false);
@@ -1055,10 +1059,16 @@ vm_page_busy_sleep(vm_page_t m, const char *wmesg, boo
VM_OBJECT_ASSERT_LOCKED(obj);
vm_page_lock_assert(m, MA_NOTOWNED);
 
-   _vm_page_busy_sleep(obj, m, wmesg, nonshared, true);
+   if (!_vm_page_busy_sleep(obj, m, wmesg, nonshared, true))
+   VM_OBJECT_DROP(obj);
 }
 
-static void
+/*
+ * _vm_page_busy_sleep:
+ *
+ * Internal busy sleep function.
+ */
+static bool
 _vm_page_busy_sleep(vm_object_t obj, vm_page_t m, const char *wmesg,
 bool nonshared, bool locked)
 {
@@ -1072,17 +1082,15 @@ _vm_page_busy_sleep(vm_object_t obj, vm_page_t m, cons
if (locked)
VM_OBJECT_DROP(obj);
vm_object_busy_wait(obj, wmesg);
-   return;
+   return (locked);
}
sleepq_lock(m);
x = m->busy_lock;
if (x == VPB_UNBUSIED || (nonshared && (x & VPB_BIT_SHARED) != 0) ||
((x & VPB_BIT_WAITERS) == 0 &&
!atomic_cmpset_int(>busy_lock, x, x | VPB_BIT_WAITERS))) {
-   if (locked)
-   VM_OBJECT_DROP(obj);
sleepq_release(m);
-   return;
+   return (false);
}
if (locked)
VM_OBJECT_DROP(obj);
@@ -1090,6 +1098,7 @@ _vm_page_busy_sleep(vm_object_t obj, vm_page_t m, cons
sleepq_add(m, NULL, wmesg, 0, 0);
sleepq_wait(m, PVM);
PICKUP_GIANT();
+   return (locked);
 }
 
 /*

svn commit: r356026 - head/sys/vm

2019-12-22 Thread Jeff Roberson
Author: jeff
Date: Sun Dec 22 20:35:50 2019
New Revision: 356026
URL: https://svnweb.freebsd.org/changeset/base/356026

Log:
  Fix a bug introduced in r356002.  Prior versions of this patchset had
  vm_page_remove() rather than !vm_page_wired() as the condition for free.
  When this changed back to wired the busy lock was leaked.
  
  Reported by:  pho
  Reviewed by:  markj

Modified:
  head/sys/vm/vm_fault.c

Modified: head/sys/vm/vm_fault.c
==
--- head/sys/vm/vm_fault.c  Sun Dec 22 20:34:15 2019(r356025)
+++ head/sys/vm/vm_fault.c  Sun Dec 22 20:35:50 2019(r356026)
@@ -180,6 +180,8 @@ fault_page_free(vm_page_t *mp)
VM_OBJECT_ASSERT_WLOCKED(m->object);
if (!vm_page_wired(m))
vm_page_free(m);
+   else
+   vm_page_xunbusy(m);
*mp = NULL;
}
 }
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r356002 - in head/sys: compat/linuxkpi/common/src dev/drm2/ttm dev/netmap dev/xen/gntdev dev/xen/privcmd vm

2019-12-21 Thread Jeff Roberson
Author: jeff
Date: Sun Dec 22 06:56:44 2019
New Revision: 356002
URL: https://svnweb.freebsd.org/changeset/base/356002

Log:
  Make page busy state deterministic on free.  Pages must be xbusy when
  removed from objects including calls to free.  Pages must not be xbusy
  when freed and not on an object.  Strengthen assertions to match these
  expectations.  In practice very little code had to change busy handling
  to meet these rules but we can now make stronger guarantees to busy
  holders and avoid conditionally dropping busy in free.
  
  Refine vm_page_remove() and vm_page_replace() semantics now that we have
  stronger guarantees about busy state.  This removes redundant and
  potentially problematic code that has proliferated.
  
  Discussed with:   markj
  Reviewed by:  kib
  Differential Revision:https://reviews.freebsd.org/D22822

Modified:
  head/sys/compat/linuxkpi/common/src/linux_compat.c
  head/sys/dev/drm2/ttm/ttm_bo_vm.c
  head/sys/dev/netmap/netmap_freebsd.c
  head/sys/dev/xen/gntdev/gntdev.c
  head/sys/dev/xen/privcmd/privcmd.c
  head/sys/vm/device_pager.c
  head/sys/vm/sg_pager.c
  head/sys/vm/vm_fault.c
  head/sys/vm/vm_kern.c
  head/sys/vm/vm_object.c
  head/sys/vm/vm_page.c
  head/sys/vm/vm_page.h

Modified: head/sys/compat/linuxkpi/common/src/linux_compat.c
==
--- head/sys/compat/linuxkpi/common/src/linux_compat.c  Sun Dec 22 06:25:20 
2019(r356001)
+++ head/sys/compat/linuxkpi/common/src/linux_compat.c  Sun Dec 22 06:56:44 
2019(r356002)
@@ -508,10 +508,7 @@ linux_cdev_pager_fault(vm_object_t vm_obj, vm_ooffset_
page = vm_page_getfake(paddr, vm_obj->memattr);
VM_OBJECT_WLOCK(vm_obj);
 
-   vm_page_replace_checked(page, vm_obj,
-   (*mres)->pindex, *mres);
-
-   vm_page_free(*mres);
+   vm_page_replace(page, vm_obj, (*mres)->pindex, *mres);
*mres = page;
}
vm_page_valid(page);

Modified: head/sys/dev/drm2/ttm/ttm_bo_vm.c
==
--- head/sys/dev/drm2/ttm/ttm_bo_vm.c   Sun Dec 22 06:25:20 2019
(r356001)
+++ head/sys/dev/drm2/ttm/ttm_bo_vm.c   Sun Dec 22 06:56:44 2019
(r356002)
@@ -237,6 +237,7 @@ reserve:
goto retry;
}
m1 = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
+   /* XXX This looks like it should just be vm_page_replace? */
if (m1 == NULL) {
if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset))) {
vm_page_xunbusy(m);
@@ -255,6 +256,7 @@ reserve:
vm_page_valid(m);
if (*mres != NULL) {
KASSERT(*mres != m, ("losing %p %p", *mres, m));
+   vm_page_xunbusy(*mres);
vm_page_free(*mres);
}
*mres = m;

Modified: head/sys/dev/netmap/netmap_freebsd.c
==
--- head/sys/dev/netmap/netmap_freebsd.cSun Dec 22 06:25:20 2019
(r356001)
+++ head/sys/dev/netmap/netmap_freebsd.cSun Dec 22 06:56:44 2019
(r356002)
@@ -1022,12 +1022,10 @@ netmap_dev_pager_fault(vm_object_t object, vm_ooffset_
vm_paddr_t paddr;
vm_page_t page;
vm_memattr_t memattr;
-   vm_pindex_t pidx;
 
nm_prdis("object %p offset %jd prot %d mres %p",
object, (intmax_t)offset, prot, mres);
memattr = object->memattr;
-   pidx = OFF_TO_IDX(offset);
paddr = netmap_mem_ofstophys(na->nm_mem, offset);
if (paddr == 0)
return VM_PAGER_FAIL;
@@ -1052,9 +1050,8 @@ netmap_dev_pager_fault(vm_object_t object, vm_ooffset_
VM_OBJECT_WUNLOCK(object);
page = vm_page_getfake(paddr, memattr);
VM_OBJECT_WLOCK(object);
-   vm_page_free(*mres);
+   vm_page_replace(page, object, (*mres)->pindex, *mres);
*mres = page;
-   vm_page_insert(page, object, pidx);
}
vm_page_valid(page);
return (VM_PAGER_OK);

Modified: head/sys/dev/xen/gntdev/gntdev.c
==
--- head/sys/dev/xen/gntdev/gntdev.cSun Dec 22 06:25:20 2019
(r356001)
+++ head/sys/dev/xen/gntdev/gntdev.cSun Dec 22 06:56:44 2019
(r356002)
@@ -806,7 +806,7 @@ gntdev_gmap_pg_fault(vm_object_t object, vm_ooffset_t 
 {
struct gntdev_gmap *gmap = object->handle;
vm_pindex_t pidx, ridx;
-   vm_page_t page, oldm;
+   vm_page_t page;
vm_ooffset_t relative_offset;
 
if (gmap->map == NULL)
@@ -829,15 +829,12 @@ gntdev_gmap_pg_fault(vm_object_t object, vm_ooffset_t 
KASSERT(vm_page_wired(page), ("page %p is not wired", 

svn commit: r355997 - head/sys/vm

2019-12-21 Thread Jeff Roberson
Author: jeff
Date: Sun Dec 22 04:21:16 2019
New Revision: 355997
URL: https://svnweb.freebsd.org/changeset/base/355997

Log:
  Move vm_fault busy logic into its own function for clarity and re-use by
  later changes.
  
  Reviewed by:  kib, markj
  Differential Revision:https://reviews.freebsd.org/D22820

Modified:
  head/sys/vm/vm_fault.c

Modified: head/sys/vm/vm_fault.c
==
--- head/sys/vm/vm_fault.c  Sun Dec 22 03:19:17 2019(r355996)
+++ head/sys/vm/vm_fault.c  Sun Dec 22 04:21:16 2019(r355997)
@@ -684,6 +684,41 @@ vm_fault_lock_vnode(struct faultstate *fs)
return (KERN_RESOURCE_SHORTAGE);
 }
 
+/*
+ * Wait/Retry if the page is busy.  We have to do this if the page is
+ * either exclusive or shared busy because the vm_pager may be using
+ * read busy for pageouts (and even pageins if it is the vnode pager),
+ * and we could end up trying to pagein and pageout the same page
+ * simultaneously.
+ *
+ * We can theoretically allow the busy case on a read fault if the page
+ * is marked valid, but since such pages are typically already pmap'd,
+ * putting that special case in might be more effort then it is worth.
+ * We cannot under any circumstances mess around with a shared busied
+ * page except, perhaps, to pmap it.
+ */
+static void
+vm_fault_busy_sleep(struct faultstate *fs)
+{
+   /*
+* Reference the page before unlocking and
+* sleeping so that the page daemon is less
+* likely to reclaim it.
+*/
+   vm_page_aflag_set(fs->m, PGA_REFERENCED);
+   if (fs->object != fs->first_object) {
+   fault_page_release(>first_m);
+   vm_object_pip_wakeup(fs->first_object);
+   }
+   vm_object_pip_wakeup(fs->object);
+   unlock_map(fs);
+   if (fs->m == vm_page_lookup(fs->object, fs->pindex))
+   vm_page_sleep_if_busy(fs->m, "vmpfw");
+   VM_OBJECT_WUNLOCK(fs->object);
+   VM_CNT_INC(v_intrans);
+   vm_object_deallocate(fs->first_object);
+}
+
 int
 vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
 int fault_flags, vm_page_t *m_hold)
@@ -822,42 +857,8 @@ RetryFault_oom:
 */
fs.m = vm_page_lookup(fs.object, fs.pindex);
if (fs.m != NULL) {
-   /*
-* Wait/Retry if the page is busy.  We have to do this
-* if the page is either exclusive or shared busy
-* because the vm_pager may be using read busy for
-* pageouts (and even pageins if it is the vnode
-* pager), and we could end up trying to pagein and
-* pageout the same page simultaneously.
-*
-* We can theoretically allow the busy case on a read
-* fault if the page is marked valid, but since such
-* pages are typically already pmap'd, putting that
-* special case in might be more effort then it is 
-* worth.  We cannot under any circumstances mess
-* around with a shared busied page except, perhaps,
-* to pmap it.
-*/
if (vm_page_tryxbusy(fs.m) == 0) {
-   /*
-* Reference the page before unlocking and
-* sleeping so that the page daemon is less
-* likely to reclaim it.
-*/
-   vm_page_aflag_set(fs.m, PGA_REFERENCED);
-   if (fs.object != fs.first_object) {
-   fault_page_release(_m);
-   vm_object_pip_wakeup(fs.first_object);
-   }
-   unlock_map();
-   vm_object_pip_wakeup(fs.object);
-   if (fs.m == vm_page_lookup(fs.object,
-   fs.pindex)) {
-   vm_page_sleep_if_busy(fs.m, "vmpfw");
-   }
-   VM_OBJECT_WUNLOCK(fs.object);
-   VM_CNT_INC(v_intrans);
-   vm_object_deallocate(fs.first_object);
+   vm_fault_busy_sleep();
goto RetryFault;
}
 
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r355915 - head/sys/kern

2019-12-19 Thread Jeff Roberson
Author: jeff
Date: Thu Dec 19 18:22:11 2019
New Revision: 355915
URL: https://svnweb.freebsd.org/changeset/base/355915

Log:
  Fix a bug in r355784.  I missed a sched_add() call that needed to reacquire
  the thread lock.
  
  Reported by:  mjg

Modified:
  head/sys/kern/sched_ule.c

Modified: head/sys/kern/sched_ule.c
==
--- head/sys/kern/sched_ule.c   Thu Dec 19 17:01:25 2019(r355914)
+++ head/sys/kern/sched_ule.c   Thu Dec 19 18:22:11 2019(r355915)
@@ -2689,7 +2689,7 @@ sched_affinity(struct thread *td)
return;
if (TD_ON_RUNQ(td)) {
sched_rem(td);
-   sched_add(td, SRQ_BORING);
+   sched_add(td, SRQ_BORING | SRQ_HOLDTD);
return;
}
if (!TD_IS_RUNNING(td))
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r355819 - in head/sys: arm/arm arm64/arm64 i386/i386 mips/mips powerpc/powerpc riscv/riscv

2019-12-16 Thread Jeff Roberson
Author: jeff
Date: Mon Dec 16 20:15:04 2019
New Revision: 355819
URL: https://svnweb.freebsd.org/changeset/base/355819

Log:
  Repeat the spinlock_enter/exit pattern from amd64 on other architectures to
  fix an assert violation introduced in r355784.  Without this spinlock_exit()
  may see owepreempt and switch before reducing the spinlock count.  amd64
  had been optimized to do a single critical enter/exit regardless of the
  number of spinlocks which avoided the problem and this optimization had
  not been applied elsewhere.
  
  Reported by:  emaste
  Suggested by: rlibby
  Discussed with:   jhb, rlibby
  Tested by:manu (arm64)

Modified:
  head/sys/arm/arm/machdep.c
  head/sys/arm64/arm64/machdep.c
  head/sys/i386/i386/machdep.c
  head/sys/mips/mips/machdep.c
  head/sys/powerpc/powerpc/machdep.c
  head/sys/riscv/riscv/machdep.c

Modified: head/sys/arm/arm/machdep.c
==
--- head/sys/arm/arm/machdep.c  Mon Dec 16 20:07:04 2019(r355818)
+++ head/sys/arm/arm/machdep.c  Mon Dec 16 20:15:04 2019(r355819)
@@ -389,9 +389,9 @@ spinlock_enter(void)
cspr = disable_interrupts(PSR_I | PSR_F);
td->td_md.md_spinlock_count = 1;
td->td_md.md_saved_cspr = cspr;
+   critical_enter();
} else
td->td_md.md_spinlock_count++;
-   critical_enter();
 }
 
 void
@@ -401,11 +401,12 @@ spinlock_exit(void)
register_t cspr;
 
td = curthread;
-   critical_exit();
cspr = td->td_md.md_saved_cspr;
td->td_md.md_spinlock_count--;
-   if (td->td_md.md_spinlock_count == 0)
+   if (td->td_md.md_spinlock_count == 0) {
+   critical_exit();
restore_interrupts(cspr);
+   }
 }
 
 /*

Modified: head/sys/arm64/arm64/machdep.c
==
--- head/sys/arm64/arm64/machdep.c  Mon Dec 16 20:07:04 2019
(r355818)
+++ head/sys/arm64/arm64/machdep.c  Mon Dec 16 20:15:04 2019
(r355819)
@@ -635,9 +635,9 @@ spinlock_enter(void)
daif = intr_disable();
td->td_md.md_spinlock_count = 1;
td->td_md.md_saved_daif = daif;
+   critical_enter();
} else
td->td_md.md_spinlock_count++;
-   critical_enter();
 }
 
 void
@@ -647,11 +647,12 @@ spinlock_exit(void)
register_t daif;
 
td = curthread;
-   critical_exit();
daif = td->td_md.md_saved_daif;
td->td_md.md_spinlock_count--;
-   if (td->td_md.md_spinlock_count == 0)
+   if (td->td_md.md_spinlock_count == 0) {
+   critical_exit();
intr_restore(daif);
+   }
 }
 
 #ifndef_SYS_SYSPROTO_H_

Modified: head/sys/i386/i386/machdep.c
==
--- head/sys/i386/i386/machdep.cMon Dec 16 20:07:04 2019
(r355818)
+++ head/sys/i386/i386/machdep.cMon Dec 16 20:15:04 2019
(r355819)
@@ -2679,9 +2679,9 @@ spinlock_enter(void)
flags = intr_disable();
td->td_md.md_spinlock_count = 1;
td->td_md.md_saved_flags = flags;
+   critical_enter();
} else
td->td_md.md_spinlock_count++;
-   critical_enter();
 }
 
 void
@@ -2691,11 +2691,12 @@ spinlock_exit(void)
register_t flags;
 
td = curthread;
-   critical_exit();
flags = td->td_md.md_saved_flags;
td->td_md.md_spinlock_count--;
-   if (td->td_md.md_spinlock_count == 0)
+   if (td->td_md.md_spinlock_count == 0) {
+   critical_exit();
intr_restore(flags);
+   }
 }
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)

Modified: head/sys/mips/mips/machdep.c
==
--- head/sys/mips/mips/machdep.cMon Dec 16 20:07:04 2019
(r355818)
+++ head/sys/mips/mips/machdep.cMon Dec 16 20:15:04 2019
(r355819)
@@ -516,9 +516,9 @@ spinlock_enter(void)
intr = intr_disable();
td->td_md.md_spinlock_count = 1;
td->td_md.md_saved_intr = intr;
+   critical_enter();
} else
td->td_md.md_spinlock_count++;
-   critical_enter();
 }
 
 void
@@ -528,11 +528,12 @@ spinlock_exit(void)
register_t intr;
 
td = curthread;
-   critical_exit();
intr = td->td_md.md_saved_intr;
td->td_md.md_spinlock_count--;
-   if (td->td_md.md_spinlock_count == 0)
+   if (td->td_md.md_spinlock_count == 0) {
+   critical_exit();
intr_restore(intr);
+   }
 }
 
 /*

Modified: head/sys/powerpc/powerpc/machdep.c
==
--- 

Re: svn commit: r355784 - in head/sys: compat/linuxkpi/common/src dev/dpaa kern mips/nlm sys

2019-12-16 Thread Jeff Roberson

On Mon, 16 Dec 2019, Ryan Libby wrote:


On Mon, Dec 16, 2019 at 7:30 AM Ed Maste  wrote:


On Sun, 15 Dec 2019 at 16:27, Jeff Roberson  wrote:


Author: jeff
Date: Sun Dec 15 21:26:50 2019
New Revision: 355784
URL: https://svnweb.freebsd.org/changeset/base/355784

Log:
  schedlock 4/4


FYI i386, arm, arm64, riscv fail to boot now, with "panic: invalid count 2"

Boot logs:
i386: https://ci.freebsd.org/job/FreeBSD-head-i386-test/7797/console
arm: 
https://ci.freebsd.org/hwlab/job/FreeBSD-device-head-beaglebone-test/1317/artifact/device_tests/beaglebone.boot.log
arm64: 
https://ci.freebsd.org/hwlab/job/FreeBSD-device-head-pinea64-test/1194/artifact/device_tests/pinea64.boot.log
riscv: 
https://ci.freebsd.org/hwlab/job/FreeBSD-device-head-pinea64-test/1194/artifact/device_tests/pinea64.boot.log

arm64 is:

panic: invalid count 2
cpuid = 0
time = 1
KDB: stack backtrace:
db_trace_self() at db_trace_self_wrapper+0x28
pc = 0x007359ec  lr = 0x00106744
sp = 0x56b063c0  fp = 0x56b065d0

db_trace_self_wrapper() at vpanic+0x18c
pc = 0x00106744  lr = 0x00408128
sp = 0x56b065e0  fp = 0x56b06690

vpanic() at panic+0x44
pc = 0x00408128  lr = 0x00407ed8
sp = 0x56b066a0  fp = 0x56b06720

panic() at sched_switch+0x81c
pc = 0x00407ed8  lr = 0x00434264
sp = 0x56b06730  fp = 0x56b06810

sched_switch() at mi_switch+0x170
pc = 0x00434264  lr = 0x00413690
sp = 0x56b06820  fp = 0x56b06840

mi_switch() at cpu_idle+0xc8
pc = 0x00413690  lr = 0x007400a0
sp = 0x56b06850  fp = 0x56b06860

cpu_idle() at sched_idletd+0x380
pc = 0x007400a0  lr = 0x00436a90
sp = 0x56b06870  fp = 0x56b06940

sched_idletd() at fork_exit+0x7c
pc = 0x00436a90  lr = 0x003c7ba4
sp = 0x56b06950  fp = 0x56b06980

fork_exit() at fork_trampoline+0x10
pc = 0x003c7ba4  lr = 0x007521ac
sp = 0x56b06990  fp = 0x

KDB: enter: panic
[ thread pid 11 tid 13 ]
Stopped at  0
db>


It looks like amd64 vs i386, riscv, etc are using different motifs in
spinlock_exit().  Perhaps we just need to rearrange them to drop the
spinlock count before critical_exit(), like in amd64.


It took me a moment to see why but I believe you are right.  Interrupts 
being disabled would prevent a local preemption with the flags out of sync 
but critical_exit() might have owepreempt set so we will switch before 
updating the count.


Jeff



Ryan


___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


  1   2   3   4   >