Hi,

I am experiencing an early failure of UMA on an ARM64 platform with 48
cores enabled. I get a kernel panic during initialization of VM. Here is
the boot log (lines with 'MST:' are my own debug printfs).

Copyright (c) 1992-2015 The FreeBSD Project.
Copyright (c) 1979, 1980, 1983, 1986, 1988, 1989, 1991, 1992, 1993, 1994
    The Regents of the University of California. All rights reserved.
FreeBSD is a registered trademark of The FreeBSD Foundation.
FreeBSD 11.0-CURRENT #333 52fd91e(smp_48)-dirty: Fri May 15 18:26:56 CEST
2015
    
mst@arm64-prime:/usr/home/mst/freebsd_v8/obj_kernel/arm64.aarch64/usr/home/mst/freebsd_v8/kernel/sys/THUNDER-88XX
arm64
FreeBSD clang version 3.6.0 (tags/RELEASE_360/final 230434) 20150225
MST: in vm_mem_init()
MST: in vmem_init() with param *vm == kernel_arena
MST: in vmem_xalloc() with param *vm == kernel_arena
MST: in vmem_xalloc() with param *vm == kmem_arena
panic: mtx_lock() of spin mutex (null) @
/usr/home/mst/freebsd_v8/kernel/sys/kern/subr_vmem.c:1165
cpuid = 0
KDB: enter: panic
[ thread pid 0 tid 0 ]
Stopped at      0xffffff80001f4f80:

The kernel boots fine when MAXCPU is set to 30 or lower, but the error
above always appears when it is set to a higher value.

The panic is triggered by a KASSERT in __mtx_lock_flags() which is called
with the macro VMEM_LOCK(vm) in vmem_xalloc(). This is line 1143 in
subr_vmem.c (log shows different line number due to added printfs).
It looks like the lock belongs to 'kmem_arena' which is uninitialized at
this point (kmeminit() has not been called yet).

While debugging, I tried modifying VM code as a quick workaround. I
replaced the number of cores to 1 wherever mp_ncpus, mp_maxid or MAXCPU
(and others) are read. This, I believe, limits UMA per-cpu caches to just
one, while the rest of the OS (scheduler, etc) sees all 48 cores.
In addition, I changed UMA_BOOT_PAGES in sys/vm/uma_int.h to 512 (default
was 64).
With these tweaks, I got a successful (but not really stable) boot with 48
cores. Of course these are dirty hacks and a proper solution is needed.

I am a bit surprised that the kernel fails with MAXCPU==48 as the amd64
arch has this value set to '256' and I have read posts that other platforms
with even more cores have worked fine. Perhaps I need to tweak some other
VM parameters, apart from UMA_BOOT_PAGES (AKA vm.boot_pages), but I am not
sure how.

I included a full stacktrace and a more verbose log (with UMA_DEBUG macros
enabled) in the attachment. There is also a diff of the hacks I used while
debugging.

Best regards,
Michal Stanek
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
index aef1e4e..be225fb 100644
--- a/sys/kern/kern_malloc.c
+++ b/sys/kern/kern_malloc.c
@@ -874,7 +874,7 @@ malloc_uninit(void *data)
         * Look for memory leaks.
         */
        temp_allocs = temp_bytes = 0;
-       for (i = 0; i < MAXCPU; i++) {
+       for (i = 0; i < 1; i++) {
                mtsp = &mtip->mti_stats[i];
                temp_allocs += mtsp->mts_numallocs;
                temp_allocs -= mtsp->mts_numfrees;
diff --git a/sys/kern/subr_vmem.c b/sys/kern/subr_vmem.c
index 80940be..89d62ed 100644
--- a/sys/kern/subr_vmem.c
+++ b/sys/kern/subr_vmem.c
@@ -665,7 +665,8 @@ vmem_startup(void)
         * CPUs to attempt to allocate new tags concurrently to limit
         * false restarts in UMA.
         */
-       uma_zone_reserve(vmem_bt_zone, BT_MAXALLOC * (mp_ncpus + 1) / 2);
+       //mst look here
+       uma_zone_reserve(vmem_bt_zone, BT_MAXALLOC * (1 + 1) / 2);
        uma_zone_set_allocf(vmem_bt_zone, vmem_bt_alloc);
 #endif
 }
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index b96c421..6382437 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -98,6 +98,14 @@ __FBSDID("$FreeBSD$");
 #include <vm/memguard.h>
 #endif
 
+//mst: override some defines
+#undef curcpu
+#define        curcpu  0
+#undef CPU_FOREACH
+#define        CPU_FOREACH(i)                                                  
\
+       for ((i) = 0; (i) <= 0; (i)++)                          \
+               if (!CPU_ABSENT((i)))
+
 /*
  * This is the zone and keg from which all zones are spawned.  The idea is that
  * even the zone & keg heads are allocated from the allocator, so we use the
@@ -1228,6 +1236,7 @@ keg_small_init(uma_keg_t keg)
 
        if (keg->uk_flags & UMA_ZONE_PCPU) {
                u_int ncpus = mp_ncpus ? mp_ncpus : MAXCPU;
+               ncpus = 1;
 
                keg->uk_slabsize = sizeof(struct pcpu);
                keg->uk_ppera = howmany(ncpus * sizeof(struct pcpu),
@@ -1822,7 +1831,7 @@ uma_startup(void *bootmem, int boot_pages)
 #endif
        args.name = "UMA Zones";
        args.size = sizeof(struct uma_zone) +
-           (sizeof(struct uma_cache) * (mp_maxid + 1));
+           (sizeof(struct uma_cache) * (0 + 1));
        args.ctor = zone_ctor;
        args.dtor = zone_dtor;
        args.uminit = zero_init;
@@ -3301,7 +3310,7 @@ uma_zero_item(void *item, uma_zone_t zone)
 {
 
        if (zone->uz_flags & UMA_ZONE_PCPU) {
-               for (int i = 0; i < mp_ncpus; i++)
+               for (int i = 0; i < 1; i++)
                        bzero(zpcpu_get_cpu(item, i), zone->uz_size);
        } else
                bzero(item, zone->uz_size);
@@ -3465,7 +3474,7 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
         */
        bzero(&ush, sizeof(ush));
        ush.ush_version = UMA_STREAM_VERSION;
-       ush.ush_maxcpus = (mp_maxid + 1);
+       ush.ush_maxcpus = (0 + 1);
        ush.ush_count = count;
        (void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
 
@@ -3509,7 +3518,7 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
                         * accept the possible race associated with bucket
                         * exchange during monitoring.
                         */
-                       for (i = 0; i < (mp_maxid + 1); i++) {
+                       for (i = 0; i < (0 + 1); i++) {
                                bzero(&ups, sizeof(ups));
                                if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
                                        goto skip;
diff --git a/sys/vm/uma_int.h b/sys/vm/uma_int.h
index 11ab24f..b5b5a05 100644
--- a/sys/vm/uma_int.h
+++ b/sys/vm/uma_int.h
@@ -107,7 +107,7 @@
 #define UMA_SLAB_MASK  (PAGE_SIZE - 1) /* Mask to get back to the page */
 #define UMA_SLAB_SHIFT PAGE_SHIFT      /* Number of bits PAGE_MASK */
 
-#define UMA_BOOT_PAGES         64      /* Pages allocated for startup */
+#define UMA_BOOT_PAGES         512     /* Pages allocated for startup */
 
 /* Max waste percentage before going to off page slab management */
 #define UMA_MAX_WASTE  10
_______________________________________________
freebsd-current@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"

Reply via email to