Author: markj
Date: Thu May 28 19:41:00 2020
New Revision: 361595
URL: https://svnweb.freebsd.org/changeset/base/361595

Log:
  Fix boot on systems where NUMA domain 0 is unpopulated.
  
  - Add vm_phys_early_add_seg(), complementing vm_phys_early_alloc(), to
    ensure that segments registered during hammer_time() are placed in the
    right domain.  Otherwise, since the SRAT is not parsed at that point,
    we just add them to domain 0, which may be incorrect and results in a
    domain with only several MB worth of memory.
  - Fix uma_startup1() to try allocating memory for zones from any domain.
    If domain 0 is unpopulated, the allocation will simply fail, resulting
    in a page fault slightly later during boot.
  - Change _vm_phys_domain() to return -1 for addresses not covered by the
    affinity table, and change vm_phys_early_alloc() to handle wildcard
    domains.  This is necessary on amd64, where the page array is dense
    and pmap_page_array_startup() may allocate page table pages for
    non-existent page frames.
  
  Reported and tested by:       Rafael Kitover <rkito...@gmail.com>
  Reviewed by:  cem (earlier version), kib
  Sponsored by: The FreeBSD Foundation
  Differential Revision:        https://reviews.freebsd.org/D25001

Modified:
  head/sys/amd64/amd64/machdep.c
  head/sys/amd64/amd64/pmap.c
  head/sys/i386/i386/machdep.c
  head/sys/i386/i386/pmap.c
  head/sys/vm/uma_core.c
  head/sys/vm/vm_phys.c
  head/sys/vm/vm_phys.h

Modified: head/sys/amd64/amd64/machdep.c
==============================================================================
--- head/sys/amd64/amd64/machdep.c      Thu May 28 19:14:44 2020        
(r361594)
+++ head/sys/amd64/amd64/machdep.c      Thu May 28 19:41:00 2020        
(r361595)
@@ -1223,7 +1223,7 @@ getmemsize(caddr_t kmdp, u_int64_t first)
         * Tell the physical memory allocator about pages used to store
         * the kernel and preloaded data.  See kmem_bootstrap_free().
         */
-       vm_phys_add_seg((vm_paddr_t)kernphys, trunc_page(first));
+       vm_phys_early_add_seg((vm_paddr_t)kernphys, trunc_page(first));
 
        bzero(physmap, sizeof(physmap));
        physmap_idx = 0;

Modified: head/sys/amd64/amd64/pmap.c
==============================================================================
--- head/sys/amd64/amd64/pmap.c Thu May 28 19:14:44 2020        (r361594)
+++ head/sys/amd64/amd64/pmap.c Thu May 28 19:41:00 2020        (r361595)
@@ -1700,7 +1700,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
         * are required for promotion of the corresponding kernel virtual
         * addresses to superpage mappings.
         */
-       vm_phys_add_seg(KPTphys, KPTphys + ptoa(nkpt));
+       vm_phys_early_add_seg(KPTphys, KPTphys + ptoa(nkpt));
 
        /*
         * Account for the virtual addresses mapped by create_pagetables().

Modified: head/sys/i386/i386/machdep.c
==============================================================================
--- head/sys/i386/i386/machdep.c        Thu May 28 19:14:44 2020        
(r361594)
+++ head/sys/i386/i386/machdep.c        Thu May 28 19:41:00 2020        
(r361595)
@@ -1828,7 +1828,7 @@ getmemsize(int first)
         * Tell the physical memory allocator about pages used to store
         * the kernel and preloaded data.  See kmem_bootstrap_free().
         */
-       vm_phys_add_seg((vm_paddr_t)KERNLOAD, trunc_page(first));
+       vm_phys_early_add_seg((vm_paddr_t)KERNLOAD, trunc_page(first));
 
        TUNABLE_INT_FETCH("hw.above4g_allow", &above4g_allow);
        TUNABLE_INT_FETCH("hw.above24g_allow", &above24g_allow);

Modified: head/sys/i386/i386/pmap.c
==============================================================================
--- head/sys/i386/i386/pmap.c   Thu May 28 19:14:44 2020        (r361594)
+++ head/sys/i386/i386/pmap.c   Thu May 28 19:41:00 2020        (r361595)
@@ -633,7 +633,7 @@ __CONCAT(PMTYPE, bootstrap)(vm_paddr_t firstaddr)
         * are required for promotion of the corresponding kernel virtual
         * addresses to superpage mappings.
         */
-       vm_phys_add_seg(KPTphys, KPTphys + ptoa(nkpt));
+       vm_phys_early_add_seg(KPTphys, KPTphys + ptoa(nkpt));
 
        /*
         * Initialize the first available kernel virtual address.

Modified: head/sys/vm/uma_core.c
==============================================================================
--- head/sys/vm/uma_core.c      Thu May 28 19:14:44 2020        (r361594)
+++ head/sys/vm/uma_core.c      Thu May 28 19:41:00 2020        (r361595)
@@ -2810,6 +2810,7 @@ uma_startup1(vm_offset_t virtual_avail)
        size_t ksize, zsize, size;
        uma_keg_t masterkeg;
        uintptr_t m;
+       int domain;
        uint8_t pflag;
 
        bootstart = bootmem = virtual_avail;
@@ -2827,7 +2828,12 @@ uma_startup1(vm_offset_t virtual_avail)
 
        /* Allocate the zone of zones, zone of kegs, and zone of zones keg. */
        size = (zsize * 2) + ksize;
-       m = (uintptr_t)startup_alloc(NULL, size, 0, &pflag, M_NOWAIT | M_ZERO);
+       for (domain = 0; domain < vm_ndomains; domain++) {
+               m = (uintptr_t)startup_alloc(NULL, size, domain, &pflag,
+                   M_NOWAIT | M_ZERO);
+               if (m != 0)
+                       break;
+       }
        zones = (uma_zone_t)m;
        m += zsize;
        kegs = (uma_zone_t)m;
@@ -3191,6 +3197,17 @@ item_dtor(uma_zone_t zone, void *item, int size, void 
        }
 }
 
+static int
+item_domain(void *item)
+{
+       int domain;
+
+       domain = _vm_phys_domain(vtophys(item));
+       KASSERT(domain >= 0 && domain < vm_ndomains,
+           ("%s: unknown domain for item %p", __func__, item));
+       return (domain);
+}
+
 #if defined(INVARIANTS) || defined(DEBUG_MEMGUARD) || defined(WITNESS)
 #define        UMA_ZALLOC_DEBUG
 static int
@@ -4001,7 +4018,7 @@ uma_zfree_smr(uma_zone_t zone, void *item)
        itemdomain = 0;
 #ifdef NUMA
        if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0)
-               itemdomain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
+               itemdomain = item_domain(item);
 #endif
        critical_enter();
        do {
@@ -4085,7 +4102,7 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata
        itemdomain = 0;
 #ifdef NUMA
        if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0)
-               itemdomain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
+               itemdomain = item_domain(item);
 #endif
        critical_enter();
        do {
@@ -4159,7 +4176,7 @@ zone_free_cross(uma_zone_t zone, uma_bucket_t bucket, 
        ZONE_CROSS_LOCK(zone);
        while (bucket->ub_cnt > 0) {
                item = bucket->ub_bucket[bucket->ub_cnt - 1];
-               domain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
+               domain = item_domain(item);
                zdom = ZDOM_GET(zone, domain);
                if (zdom->uzd_cross == NULL) {
                        zdom->uzd_cross = bucket_alloc(zone, udata, M_NOWAIT);
@@ -4182,8 +4199,7 @@ zone_free_cross(uma_zone_t zone, uma_bucket_t bucket, 
 
        while ((b = STAILQ_FIRST(&fullbuckets)) != NULL) {
                STAILQ_REMOVE_HEAD(&fullbuckets, ub_link);
-               domain = _vm_phys_domain(pmap_kextract(
-                   (vm_offset_t)b->ub_bucket[0]));
+               domain = item_domain(b->ub_bucket[0]);
                zone_put_bucket(zone, domain, b, udata, true);
        }
 }

Modified: head/sys/vm/vm_phys.c
==============================================================================
--- head/sys/vm/vm_phys.c       Thu May 28 19:14:44 2020        (r361594)
+++ head/sys/vm/vm_phys.c       Thu May 28 19:41:00 2020        (r361595)
@@ -82,6 +82,8 @@ domainset_t __read_mostly all_domains = DOMAINSET_T_IN
 
 struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX];
 int __read_mostly vm_phys_nsegs;
+static struct vm_phys_seg vm_phys_early_segs[8];
+static int vm_phys_early_nsegs;
 
 struct vm_phys_fictitious_seg;
 static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *,
@@ -653,18 +655,16 @@ _vm_phys_domain(vm_paddr_t pa)
 #ifdef NUMA
        int i;
 
-       if (vm_ndomains == 1 || mem_affinity == NULL)
+       if (vm_ndomains == 1)
                return (0);
-
-       /*
-        * Check for any memory that overlaps.
-        */
        for (i = 0; mem_affinity[i].end != 0; i++)
                if (mem_affinity[i].start <= pa &&
                    mem_affinity[i].end >= pa)
                        return (mem_affinity[i].domain);
-#endif
+       return (-1);
+#else
        return (0);
+#endif
 }
 
 /*
@@ -1611,6 +1611,21 @@ vm_phys_avail_split(vm_paddr_t pa, int i)
        return (0);
 }
 
+void
+vm_phys_early_add_seg(vm_paddr_t start, vm_paddr_t end)
+{
+       struct vm_phys_seg *seg;
+
+       if (vm_phys_early_nsegs == -1)
+               panic("%s: called after initialization", __func__);
+       if (vm_phys_early_nsegs == nitems(vm_phys_early_segs))
+               panic("%s: ran out of early segments", __func__);
+
+       seg = &vm_phys_early_segs[vm_phys_early_nsegs++];
+       seg->start = start;
+       seg->end = end;
+}
+
 /*
  * This routine allocates NUMA node specific memory before the page
  * allocator is bootstrapped.
@@ -1621,6 +1636,8 @@ vm_phys_early_alloc(int domain, size_t alloc_size)
        int i, mem_index, biggestone;
        vm_paddr_t pa, mem_start, mem_end, size, biggestsize, align;
 
+       KASSERT(domain == -1 || (domain >= 0 && domain < vm_ndomains),
+           ("%s: invalid domain index %d", __func__, domain));
 
        /*
         * Search the mem_affinity array for the biggest address
@@ -1633,11 +1650,11 @@ vm_phys_early_alloc(int domain, size_t alloc_size)
        mem_end = -1;
 #ifdef NUMA
        if (mem_affinity != NULL) {
-               for (i = 0; ; i++) {
+               for (i = 0;; i++) {
                        size = mem_affinity[i].end - mem_affinity[i].start;
                        if (size == 0)
                                break;
-                       if (mem_affinity[i].domain != domain)
+                       if (domain != -1 && mem_affinity[i].domain != domain)
                                continue;
                        if (size > biggestsize) {
                                mem_index = i;
@@ -1699,12 +1716,19 @@ vm_phys_early_alloc(int domain, size_t alloc_size)
 void
 vm_phys_early_startup(void)
 {
+       struct vm_phys_seg *seg;
        int i;
 
        for (i = 0; phys_avail[i + 1] != 0; i += 2) {
                phys_avail[i] = round_page(phys_avail[i]);
                phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
        }
+
+       for (i = 0; i < vm_phys_early_nsegs; i++) {
+               seg = &vm_phys_early_segs[i];
+               vm_phys_add_seg(seg->start, seg->end);
+       }
+       vm_phys_early_nsegs = -1;
 
 #ifdef NUMA
        /* Force phys_avail to be split by domain. */

Modified: head/sys/vm/vm_phys.h
==============================================================================
--- head/sys/vm/vm_phys.h       Thu May 28 19:14:44 2020        (r361594)
+++ head/sys/vm/vm_phys.h       Thu May 28 19:41:00 2020        (r361595)
@@ -103,6 +103,7 @@ vm_page_t vm_phys_scan_contig(int domain, u_long npage
 void vm_phys_set_pool(int pool, vm_page_t m, int order);
 boolean_t vm_phys_unfree_page(vm_page_t m);
 int vm_phys_mem_affinity(int f, int t);
+void vm_phys_early_add_seg(vm_paddr_t start, vm_paddr_t end);
 vm_paddr_t vm_phys_early_alloc(int domain, size_t alloc_size);
 void vm_phys_early_startup(void);
 int vm_phys_avail_largest(void);
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to