On Friday, June 19, 2020 7:42 PM, Klemens Nanni <[email protected]> wrote:

> This seems to happen on T4-2 machines only, T200, T5220, T5240 and T4-1
> are known to work. I've briefly looked into this before but shelved it
> since noone else reported it; diff below works around the issue such
> that `init-system' works as expected, but it does not fix the bug:

I've tried your fix on tech@, it seems to solve the problem - at least up until
writing it to SP storage and changing the configuration. When I rebooted,
though, it panics with:

panic: rw_enter: vmmaplk locking against myself

Going back to factory-default config seems to make it not panic anymore, though.
I've attached the ddb log and relevant objdump parts too.

>>      2. ldomd always crashes 10-30 minutes after starting. Running it 
>> manually
>>      with "ldomd -d" gives me this error:
>>
>>      ldomd: Cannot allocate memory
> I have never seen or reproduced this before.

Uh, I am sorry, I forgot to include the ldomd backtrace.
Recompiling it with DEBUG="-g3 -O0" and running it under gdb gave me
this backtrace:

#0  *_libc_exit (status=1) at /usr/src/lib/libc/stdlib/exit.c:54
#1  0x000000a2c4acdb24 in *_libc_verr (eval=1, fmt=0x0, ap=0xfffffffffffbd0d0) 
at /usr/src/lib/libc/gen/verr.c:50
#2  0x000000a2c4b23050 in *_libc_err (eval=1, fmt=0x0) at 
/usr/src/lib/libc/gen/err.c:40
#3  0x000000a008607450 in xmalloc (size=24) at 
/usr/src/usr.sbin/ldomd/../ldomctl/util.c:36
#4  0x000000a0086018e8 in add_frag (base=44040192) at 
/usr/src/usr.sbin/ldomd/ldomd.c:348
#5  0x000000a0086017c0 in add_frag_mblock (node=0xa21182d7c0) at 
/usr/src/usr.sbin/ldomd/ldomd.c:333
#6  0x000000a0086016e8 in frag_init () at /usr/src/usr.sbin/ldomd/ldomd.c:320
#7  0x000000a008600f04 in main (argc=0, argv=0xfffffffffffbda78) at 
/usr/src/usr.sbin/ldomd/ldomd.c:192

Is there anything else that I could do to help?
panic: rw_enter: vmmaplk locking against myself
Stopped at      db_enter+0x8:   nop
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
 367053  81919      0    0x100003          0    0  ksh
 363210  51162      0         0x2          0    3  smtpd
 121609  31157      0         0x2          0    2  smtpd
* 43470  98177      0         0x2          0    1K smtpd
rw_enter(4000864fba8, 2, 0, 0, 1caf000, 194da80) at rw_enter+0x284
uvmfault_lookup(400c2c51508, 0, 2f83058228, 2f833c012c, 0, 1) at uvmfault_looku
p+0xec
uvm_fault(4000864fba0, 542eaeacf7f0e000, 0, 1, 8, 6) at uvm_fault+0xd0
data_access_fault(400c2c516f0, 30, 1494de4, 542eaeacf7f0e091, 542eaeacf7f0e090,
 1) at data_access_fault+0x144
sun4v_datatrap(1c1ae10, 2, 8, 0, 0, 40008689a00) at sun4v_datatrap+0x210
malloc(10, 62, 2, 194da80, 1c00, 2) at malloc+0x14c
amap_alloc(4000861caf0, 2, 0, 400c2c51cd0, 0, 1) at amap_alloc+0x1ac
amap_copy(4000864fba0, 400085262e0, 2, 1, 2f833c0000, 2f833c0001) at amap_copy+
0x558
uvm_fault(e, 1, 0, 1, 0, 6) at uvm_fault+0x6bc
data_access_fault(400c2c51ed0, 31, 2f83058228, 2f833c012c, 2f833c0000, 1) at da
ta_access_fault+0x144
sun4v_datatrap(2f83058200, 2f831901f0, 2d7, a4, 8, 2f) at sun4v_datatrap+0x210
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.  Insufficient info makes it difficult to find and fix bugs.

ddb{1}> ps
   PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
 81919  367053  68868      0  7    0x100003                ksh
 91499  261698  16614      0  2         0x2                smtpd
 51162  363210  16614      0  7         0x2                smtpd
 31157  121609  16614      0  7         0x2                smtpd
*98177   43470  16614      0  7         0x2                smtpd
 24976   82320  16614      0  2         0x2                smtpd
   445   80702  16614      0  2         0x2                smtpd
 16614  349638      1      0  3        0x80  poll          smtpd
 20139  251821  10207      0  3    0x100083  nanosleep     sleep
 10207  339749      1      0  3    0x100089  pause         ksh
 97778  199465      1      0  3        0x80  select        sshd
 80123  393338  68599      0  3    0x100083  nanosleep     sleep
 68599   62972      1      0  3    0x100089  pause         ksh
 78454  373485      1      0  3        0x80  poll          ldomd
 96521  292390  20296      0  3    0x100083  nanosleep     sleep
 20296   60129      1      0  3    0x100089  pause         ksh
  5473   38677      1      0  3    0x100080  poll          ntpd
 31985  347921  55071     83  3    0x100092  poll          ntpd
 55071  473514      1     83  3    0x100092  poll          ntpd
 90914  187174  93619     74  3    0x100092  bpf           pflogd
 93619  430006      1      0  3        0x80  netio         pflogd
 10737   25867  83755     73  3    0x100010  biowait       syslogd
 83755  387012      1      0  3    0x100082  netio         syslogd
 42841  473403      1     77  3    0x100090  poll          dhclient
 99729  157445      1      0  3        0x80  poll          dhclient
 92163  459619  53421    115  3    0x100092  kqread        slaacd
 11565  450088  53421    115  3    0x100092  kqread        slaacd
 53421  386377      1      0  3    0x100080  kqread        slaacd
 68868  514129      1      0  3    0x10008b  pause         sh
 96026   32488      0      0  3     0x14200  bored         smr
 64847   66547      0      0  2     0x14200                zerothread
 81680  318756      0      0  3     0x14200  aiodoned      aiodoned
  2628  330580      0      0  3     0x14200  syncer        update
 48519  427508      0      0  3     0x14200  cleaner       cleaner
 12724  181409      0      0  3     0x14200  reaper        reaper
 47119  220161      0      0  3     0x14200  pgdaemon      pagedaemon
  2069   75241      0      0  3     0x14200  bored         crynlk
  9163   74130      0      0  3     0x14200  bored         crypto
 84907  155574      0      0  3     0x14200  bored         sensors
 12258  187715      0      0  3     0x14200  usbtsk        usbtask
 77986  111101      0      0  3     0x14200  usbatsk       usbatsk
  5235  122877      0      0  3  0x40014200                idle3
 46427  182277      0      0  3  0x40014200                idle2
 89072  395200      0      0  3  0x40014200                idle1
 38002   95011      0      0  3     0x14200  bored         softnet
 88404   40192      0      0  3     0x14200  bored         systqmp
 36258  397531      0      0  3     0x14200  bored         systq
 20479  393690      0      0  3  0x40014200  bored         softclock
 66465   49226      0      0  3  0x40014200                idle0
 82720  280448      0      0  3     0x14200  kmalloc       kmthread
     1  121755      0      0  3        0x82  wait          init
     0       0     -1      0  3     0x10200  scheduler     swapper

ddb{1}> trace
rw_enter(4000864fba8, 2, 0, 0, 1caf000, 194da80) at rw_enter+0x284
uvmfault_lookup(400c2c51508, 0, 2f83058228, 2f833c012c, 0, 1) at uvmfault_looku
p+0xec
uvm_fault(4000864fba0, 542eaeacf7f0e000, 0, 1, 8, 6) at uvm_fault+0xd0
data_access_fault(400c2c516f0, 30, 1494de4, 542eaeacf7f0e091, 542eaeacf7f0e090,
 1) at data_access_fault+0x144
sun4v_datatrap(1c1ae10, 2, 8, 0, 0, 40008689a00) at sun4v_datatrap+0x210
malloc(10, 62, 2, 194da80, 1c00, 2) at malloc+0x14c
amap_alloc(4000861caf0, 2, 0, 400c2c51cd0, 0, 1) at amap_alloc+0x1ac
amap_copy(4000864fba0, 400085262e0, 2, 1, 2f833c0000, 2f833c0001) at amap_copy+
0x558
uvm_fault(e, 1, 0, 1, 0, 6) at uvm_fault+0x6bc
data_access_fault(400c2c51ed0, 31, 2f83058228, 2f833c012c, 2f833c0000, 1) at da
ta_access_fault+0x144
sun4v_datatrap(2f83058200, 2f831901f0, 2d7, a4, 8, 2f) at sun4v_datatrap+0x210

(Running "machine ddbcpu X" seems to hang ddb itself so this is all I can give)
> Stopped at      db_enter+0x8:   nop
0000000000000040 <db_enter>:
...
/usr/src/sys/arch/sparc64/sparc64/db_interface.c:571
      44:       91 d0 20 01     ta  1
      48:       01 00 00 00     nop

/usr/src/sys/arch/sparc64/sparc64/db_interface.c:571 is
   571          asm("ta 1; nop");

> rw_enter(4000864fba8, 2, 0, 0, 1caf000, 194da80) at rw_enter+0x284
0000000000000500 <rw_enter>:
...
/usr/src/sys/kern/kern_rwlock.c:173
 778:   d2 5e 20 08     ldx  [ %i0 + 8 ], %o1
 77c:   90 12 20 00     mov  %o0, %o0
                        77c: R_SPARC_M44        .rodata.str1.8+0xd8
 780:   91 2a 30 0c     sllx  %o0, 0xc, %o0
 784:   40 00 00 00     call  784 <rw_enter+0x284>
                        784: R_SPARC_WDISP30    panic
 788:   90 12 20 00     mov  %o0, %o0
                        788: R_SPARC_L44        .rodata.str1.8+0xd8

/usr/src/sys/kern/kern_rwlock.c:173 is
   173                          panic("rw_enter: %s locking against myself",

Reply via email to