Author: jeff
Date: Sat Jan  4 07:56:28 2020
New Revision: 356350
URL: https://svnweb.freebsd.org/changeset/base/356350

Log:
  Sort cross-domain frees into per-domain buckets before inserting these
  onto their respective bucket lists.  This is a several order of magnitude
  improvement in contention on the keg lock under heavy free traffic while
  requiring only an additional bucket per-domain worth of memory.
  
  Discussed with:               markj, rlibby
  Differential Revision:        https://reviews.freebsd.org/D22830

Modified:
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/sys/vm/uma_core.c
==============================================================================
--- head/sys/vm/uma_core.c      Sat Jan  4 03:30:08 2020        (r356349)
+++ head/sys/vm/uma_core.c      Sat Jan  4 07:56:28 2020        (r356350)
@@ -951,10 +951,6 @@ cache_drain(uma_zone_t zone)
         *
         * XXX: It would good to be able to assert that the zone is being
         * torn down to prevent improper use of cache_drain().
-        *
-        * XXX: We lock the zone before passing into bucket_cache_reclaim() as
-        * it is used elsewhere.  Should the tear-down path be made special
-        * there in some form?
         */
        CPU_FOREACH(cpu) {
                cache = &zone->uz_cpu[cpu];
@@ -974,9 +970,7 @@ cache_drain(uma_zone_t zone)
                        bucket_free(zone, bucket, NULL);
                }
        }
-       ZONE_LOCK(zone);
        bucket_cache_reclaim(zone, true);
-       ZONE_UNLOCK(zone);
 }
 
 static void
@@ -1082,9 +1076,29 @@ bucket_cache_reclaim(uma_zone_t zone, bool drain)
        int i;
 
        for (i = 0; i < vm_ndomains; i++) {
+               /*
+                * The cross bucket is partially filled and not part of
+                * the item count.  Reclaim it individually here.
+                */
                zdom = &zone->uz_domain[i];
+               ZONE_CROSS_LOCK(zone);
+               bucket = zdom->uzd_cross;
+               zdom->uzd_cross = NULL;
+               ZONE_CROSS_UNLOCK(zone);
+               if (bucket != NULL) {
+                       bucket_drain(zone, bucket);
+                       bucket_free(zone, bucket, NULL);
+               }
 
                /*
+                * Shrink the zone bucket size to ensure that the per-CPU caches
+                * don't grow too large.
+                */
+               ZONE_LOCK(zone);
+               if (i == 0 && zone->uz_bucket_size > zone->uz_bucket_size_min)
+                       zone->uz_bucket_size--;
+
+               /*
                 * If we were asked to drain the zone, we are done only once
                 * this bucket cache is empty.  Otherwise, we reclaim items in
                 * excess of the zone's estimated working set size.  If the
@@ -1114,14 +1128,8 @@ bucket_cache_reclaim(uma_zone_t zone, bool drain)
                        bucket_free(zone, bucket, NULL);
                        ZONE_LOCK(zone);
                }
+               ZONE_UNLOCK(zone);
        }
-
-       /*
-        * Shrink the zone bucket size to ensure that the per-CPU caches
-        * don't grow too large.
-        */
-       if (zone->uz_bucket_size > zone->uz_bucket_size_min)
-               zone->uz_bucket_size--;
 }
 
 static void
@@ -1224,8 +1232,8 @@ zone_reclaim(uma_zone_t zone, int waitok, bool drain)
                msleep(zone, &zone->uz_lock, PVM, "zonedrain", 1);
        }
        zone->uz_flags |= UMA_ZFLAG_RECLAIMING;
-       bucket_cache_reclaim(zone, drain);
        ZONE_UNLOCK(zone);
+       bucket_cache_reclaim(zone, drain);
 
        /*
         * The DRAINING flag protects us from being freed while
@@ -2263,6 +2271,7 @@ zone_ctor(void *mem, int size, void *udata, int flags)
        zone_foreach(zone_count, &cnt);
        zone->uz_namecnt = cnt.count;
        ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
+       ZONE_CROSS_LOCK_INIT(zone);
 
        for (i = 0; i < vm_ndomains; i++)
                TAILQ_INIT(&zone->uz_domain[i].uzd_buckets);
@@ -2448,6 +2457,7 @@ zone_dtor(void *arg, int size, void *udata)
        counter_u64_free(zone->uz_fails);
        free(zone->uz_ctlname, M_UMA);
        ZONE_LOCK_FINI(zone);
+       ZONE_CROSS_LOCK_FINI(zone);
 }
 
 /*
@@ -3724,7 +3734,76 @@ zfree_item:
        zone_free_item(zone, item, udata, SKIP_DTOR);
 }
 
+#ifdef UMA_XDOMAIN
+/*
+ * sort crossdomain free buckets to domain correct buckets and cache
+ * them.
+ */
 static void
+zone_free_cross(uma_zone_t zone, uma_bucket_t bucket, void *udata)
+{
+       struct uma_bucketlist fullbuckets;
+       uma_zone_domain_t zdom;
+       uma_bucket_t b;
+       void *item;
+       int domain;
+
+       CTR3(KTR_UMA,
+           "uma_zfree: zone %s(%p) draining cross bucket %p",
+           zone->uz_name, zone, bucket);
+
+       TAILQ_INIT(&fullbuckets);
+
+       /*
+        * To avoid having ndomain * ndomain buckets for sorting we have a
+        * lock on the current crossfree bucket.  A full matrix with
+        * per-domain locking could be used if necessary.
+        */
+       ZONE_CROSS_LOCK(zone);
+       while (bucket->ub_cnt > 0) {
+               item = bucket->ub_bucket[bucket->ub_cnt - 1];
+               domain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
+               zdom = &zone->uz_domain[domain];
+               if (zdom->uzd_cross == NULL) {
+                       zdom->uzd_cross = bucket_alloc(zone, udata, M_NOWAIT);
+                       if (zdom->uzd_cross == NULL)
+                               break;
+               }
+               zdom->uzd_cross->ub_bucket[zdom->uzd_cross->ub_cnt++] = item;
+               if (zdom->uzd_cross->ub_cnt == zdom->uzd_cross->ub_entries) {
+                       TAILQ_INSERT_HEAD(&fullbuckets, zdom->uzd_cross,
+                           ub_link);
+                       zdom->uzd_cross = NULL;
+               }
+               bucket->ub_cnt--;
+       }
+       ZONE_CROSS_UNLOCK(zone);
+       if (!TAILQ_EMPTY(&fullbuckets)) {
+               ZONE_LOCK(zone);
+               while ((b = TAILQ_FIRST(&fullbuckets)) != NULL) {
+                       TAILQ_REMOVE(&fullbuckets, b, ub_link);
+                       if (zone->uz_bkt_count >= zone->uz_bkt_max) {
+                               ZONE_UNLOCK(zone);
+                               bucket_drain(zone, b);
+                               bucket_free(zone, b, udata);
+                               ZONE_LOCK(zone);
+                       } else {
+                               domain = _vm_phys_domain(
+                                   pmap_kextract(
+                                   (vm_offset_t)b->ub_bucket[0]));
+                               zdom = &zone->uz_domain[domain];
+                               zone_put_bucket(zone, zdom, b, true);
+                       }
+               }
+               ZONE_UNLOCK(zone);
+       }
+       if (bucket->ub_cnt != 0)
+               bucket_drain(zone, bucket);
+       bucket_free(zone, bucket, udata);
+}
+#endif
+
+static void
 zone_free_bucket(uma_zone_t zone, uma_bucket_t bucket, void *udata,
     int domain, int itemdomain)
 {
@@ -3735,17 +3814,14 @@ zone_free_bucket(uma_zone_t zone, uma_bucket_t bucket,
         * Buckets coming from the wrong domain will be entirely for the
         * only other domain on two domain systems.  In this case we can
         * simply cache them.  Otherwise we need to sort them back to
-        * correct domains by freeing the contents to the slab layer.
+        * correct domains.
         */
        if (domain != itemdomain && vm_ndomains > 2) {
-               CTR3(KTR_UMA,
-                   "uma_zfree: zone %s(%p) draining cross bucket %p",
-                   zone->uz_name, zone, bucket);
-               bucket_drain(zone, bucket);
-               bucket_free(zone, bucket, udata);
+               zone_free_cross(zone, bucket, udata);
                return;
        }
 #endif
+
        /*
         * Attempt to save the bucket in the zone's domain bucket cache.
         *

Modified: head/sys/vm/uma_int.h
==============================================================================
--- head/sys/vm/uma_int.h       Sat Jan  4 03:30:08 2020        (r356349)
+++ head/sys/vm/uma_int.h       Sat Jan  4 07:56:28 2020        (r356350)
@@ -399,6 +399,7 @@ TAILQ_HEAD(uma_bucketlist, uma_bucket);
 
 struct uma_zone_domain {
        struct uma_bucketlist uzd_buckets; /* full buckets */
+       uma_bucket_t    uzd_cross;      /* Fills from cross buckets. */
        long            uzd_nitems;     /* total item count */
        long            uzd_imax;       /* maximum item count this period */
        long            uzd_imin;       /* minimum item count this period */
@@ -449,6 +450,8 @@ struct uma_zone {
        struct task     uz_maxaction;   /* Task to run when at limit */
        uint16_t        uz_bucket_size_min; /* Min number of items in bucket */
 
+       struct mtx_padalign     uz_cross_lock;  /* Cross domain free lock */
+
        /* Offset 256+, stats and misc. */
        counter_u64_t   uz_allocs;      /* Total number of allocations */
        counter_u64_t   uz_frees;       /* Total number of frees */
@@ -574,6 +577,12 @@ static __inline uma_slab_t hash_sfind(struct uma_hash 
 #define        ZONE_UNLOCK(z)  mtx_unlock(&(z)->uz_lock)
 #define        ZONE_LOCK_FINI(z)       mtx_destroy(&(z)->uz_lock)
 #define        ZONE_LOCK_ASSERT(z)     mtx_assert(&(z)->uz_lock, MA_OWNED)
+
+#define        ZONE_CROSS_LOCK_INIT(z)                                 \
+       mtx_init(&(z)->uz_cross_lock, "UMA Cross", NULL, MTX_DEF)
+#define        ZONE_CROSS_LOCK(z)      mtx_lock(&(z)->uz_cross_lock)
+#define        ZONE_CROSS_UNLOCK(z)    mtx_unlock(&(z)->uz_cross_lock)
+#define        ZONE_CROSS_LOCK_FINI(z) mtx_destroy(&(z)->uz_cross_lock)
 
 /*
  * Find a slab within a hash table.  This is used for OFFPAGE zones to lookup
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to