Module Name:    src
Committed By:   ad
Date:           Sun May 17 15:11:57 UTC 2020

Modified Files:
        src/sys/uvm: uvm.h uvm_page.c

Log Message:
- If the hardware provided NUMA info, then use it to decide how to set up
  the allocator's buckets, instead of doing round robin distribution.  There
  are open questions here but this is better than doing nothing.

- Kernel reserve pages are for the kernel not realtime threads.


To generate a diff of this commit:
cvs rdiff -u -r1.76 -r1.77 src/sys/uvm/uvm.h
cvs rdiff -u -r1.234 -r1.235 src/sys/uvm/uvm_page.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/uvm/uvm.h
diff -u src/sys/uvm/uvm.h:1.76 src/sys/uvm/uvm.h:1.77
--- src/sys/uvm/uvm.h:1.76	Sat Mar 14 20:23:51 2020
+++ src/sys/uvm/uvm.h	Sun May 17 15:11:57 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm.h,v 1.76 2020/03/14 20:23:51 ad Exp $	*/
+/*	$NetBSD: uvm.h,v 1.77 2020/05/17 15:11:57 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -107,7 +107,6 @@ struct uvm {
 	struct pgfreelist page_free[VM_NFREELIST]; /* unallocated pages */
 	u_int	bucketcount;
 	bool	page_init_done;		/* true if uvm_page_init() finished */
-	bool	numa_alloc;		/* use NUMA page allocation strategy */
 
 		/* page daemon trigger */
 	int pagedaemon;			/* daemon sleeps on this */

Index: src/sys/uvm/uvm_page.c
diff -u src/sys/uvm/uvm_page.c:1.234 src/sys/uvm/uvm_page.c:1.235
--- src/sys/uvm/uvm_page.c:1.234	Tue Mar 17 18:31:39 2020
+++ src/sys/uvm/uvm_page.c	Sun May 17 15:11:57 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_page.c,v 1.234 2020/03/17 18:31:39 ad Exp $	*/
+/*	$NetBSD: uvm_page.c,v 1.235 2020/05/17 15:11:57 ad Exp $	*/
 
 /*-
  * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc.
@@ -95,7 +95,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.234 2020/03/17 18:31:39 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.235 2020/05/17 15:11:57 ad Exp $");
 
 #include "opt_ddb.h"
 #include "opt_uvm.h"
@@ -749,7 +749,6 @@ uvm_page_numa_lookup(struct vm_page *pg)
 	static bool warned;
 	paddr_t pa;
 
-	KASSERT(uvm.numa_alloc);
 	KASSERT(uvm_page_numa_region != NULL);
 
 	pa = VM_PAGE_TO_PHYS(pg);
@@ -852,7 +851,7 @@ uvm_page_redim(int newncolors, int newnb
 					    uvm_page_get_bucket(pg) == ob);
 					KASSERT(fl ==
 					    uvm_page_get_freelist(pg));
-					if (uvm.numa_alloc) {
+					if (uvm_page_numa_region != NULL) {
 						nb = uvm_page_numa_lookup(pg);
 					} else {
 						nb = atop(VM_PAGE_TO_PHYS(pg))
@@ -919,8 +918,7 @@ uvm_page_rebucket(void)
 	/*
 	 * If we have more than one NUMA node, and the maximum NUMA node ID
 	 * is less than PGFL_MAX_BUCKETS, then we'll use NUMA distribution
-	 * for free pages.  uvm_pagefree() will not reassign pages to a
-	 * different bucket on free.
+	 * for free pages.
 	 */
 	min_numa = (u_int)-1;
 	max_numa = 0;
@@ -933,28 +931,19 @@ uvm_page_rebucket(void)
 		}
 	}
 	if (min_numa != max_numa && max_numa < PGFL_MAX_BUCKETS) {
-#ifdef NUMA
-		/*
-		 * We can do this, and it seems to work well, but until
-		 * further experiments are done we'll stick with the cache
-		 * locality strategy.
-		 */
 		aprint_debug("UVM: using NUMA allocation scheme\n");
 		for (CPU_INFO_FOREACH(cii, ci)) {
 			ci->ci_data.cpu_uvm->pgflbucket = ci->ci_numa_id;
 		}
-		uvm.numa_alloc = true;
 	 	uvm_page_redim(uvmexp.ncolors, max_numa + 1);
 	 	return;
-#endif
 	}
 
 	/*
 	 * Otherwise we'll go with a scheme to maximise L2/L3 cache locality
 	 * and minimise lock contention.  Count the total number of CPU
 	 * packages, and then try to distribute the buckets among CPU
-	 * packages evenly.  uvm_pagefree() will reassign pages to the
-	 * freeing CPU's preferred bucket on free.
+	 * packages evenly.
 	 */
 	npackage = curcpu()->ci_nsibling[CPUREL_PACKAGE1ST];
 
@@ -1220,18 +1209,13 @@ uvm_pagealloc_strat(struct uvm_object *o
 	 * [3]  only pagedaemon "reserved" pages remain and
 	 *        the requestor isn't the pagedaemon.
 	 * we make kernel reserve pages available if called by a
-	 * kernel thread or a realtime thread.
+	 * kernel thread.
 	 */
 	l = curlwp;
-	if (__predict_true(l != NULL) && lwp_eprio(l) >= PRI_KTHREAD) {
+	if (__predict_true(l != NULL) && (l->l_flag & LW_SYSTEM) != 0) {
 		flags |= UVM_PGA_USERESERVE;
 	}
 
-	/* If the allocator's running in NUMA mode, go with NUMA strategy. */
-	if (uvm.numa_alloc && strat == UVM_PGA_STRAT_NORMAL) {
-		strat = UVM_PGA_STRAT_NUMA;
-	}
-
  again:
 	switch (strat) {
 	case UVM_PGA_STRAT_NORMAL:
@@ -1270,10 +1254,11 @@ uvm_pagealloc_strat(struct uvm_object *o
 
 	case UVM_PGA_STRAT_NUMA:
 		/*
-		 * NUMA strategy: allocating from the correct bucket is more
-		 * important than observing freelist priority.  Look only to
-		 * the current NUMA node; if that fails, we need to look to
-		 * other NUMA nodes, so retry with the normal strategy.
+		 * NUMA strategy (experimental): allocating from the correct
+		 * bucket is more important than observing freelist
+		 * priority.  Look only to the current NUMA node; if that
+		 * fails, we need to look to other NUMA nodes, so retry with
+		 * the normal strategy.
 		 */
 		for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
 			pg = uvm_pgflcache_alloc(ucpu, lcv, color);

Reply via email to