On 7.12.2017. 13:07, Martin Pieuchot wrote:
> Same thing for the em(4) panic you reported before. I made an educated
> guess about which race can occur. But having the trace of all active
> processes help a lot!
hi,
i had to trigger same panic before applying your em diff and here it is:
panic: kernel diagnostic assertion "(ifp->if_flags & IFF_RUNNING) == 0"
failed: file "/usr/src/sys/dev/pci/if_em.c", line 1534
Stopped at db_enter+0x5: popq %rbp
TID PID UID PRFLAGS PFLAGS CPU COMMAND
413493 38051 0 0x3 0 2 ifconfig
424155 37800 73 0x100010 0 3 syslogd
*438295 18225 0 0x14000 0x200 1K softnet
db_enter() at db_enter+0x5
panic() at panic+0x141
__assert(ffffffff810ecd54,ffff800023bc3e10,0,ffff800000504000) at
__assert+0x24
em_stop(ffff800000504000,7) at em_stop+0x118
em_init(2) at em_init+0x25
em_watchdog(0) at em_watchdog+0xc1
if_watchdog_task(ffffffff81668600) at if_watchdog_task+0xa4
taskq_thread(0) at taskq_thread+0x67
end trace frame: 0x0, count: 7
https://www.openbsd.org/ddb.html describes the minimum info required in
bug reports. Insufficient info makes it difficult to find and fix bugs.
ddb{1}>
db{1}> ps /o
TID PID UID PRFLAGS PFLAGS CPU COMMAND
413493 38051 0 0x3 0 2 ifconfig
424155 37800 73 0x100010 0 3 syslogd
*438295 18225 0 0x14000 0x200 1K softnet
ddb{1}>
ddb{1}> tr /up 0t413493
ffff800023c63ee0(ffffffff81b61278,0,0,0,44400080000,1) at 0xffff800023c63ee0
end(uvm_fault(0xffffffff81afedf8, 0xfffffffffffffff7, 0, 1) -> e
kernel: page fault trap, code=0
Faulted in DDB; continuing...
ddb{1}>
ddb{1}> tr /up 0t424155
w_locklistdata(0,0,0,0,0,1000000000612) at w_locklistdata+0x42460
mi_child_return.child_retval(6816fd006816fa,0,0,0,0,0) at
mi_child_return.child
_retval+0x38c
Bad frame pointer: 0xffffffff81a91ec8
end trace frame: 0xffffffff81a91ec8, count: -2
ddb{1}>
ddb{1}> tr /p 0t438295
em_devices(632e7361635f66,692f6963702f7665,642f7379732f6372,732f7273752f000a,65
63697665642074,6573657220746f6e) at em_devices+0x31c
end of kernel
end trace frame: 0x66693e2d70666928, count: -1
ddb{1}> ps
PID TID PPID UID S FLAGS WAIT COMMAND
38051 413493 98220 0 7 0x3 ifconfig
98220 510053 42401 0 3 0x10008b pause sh
42401 505610 29809 0 3 0x10008b pause sh
29809 61372 1 0 3 0x10008b pause ksh
84408 518968 1 0 3 0x100083 ttyin getty
75374 407043 1 0 3 0x100083 ttyin getty
90775 272436 1 0 3 0x100083 ttyin getty
2696 405567 1 0 3 0x100083 ttyin getty
40449 47349 1 0 3 0x100083 ttyin getty
43210 337136 1 0 3 0x100098 poll cron
5666 433796 48654 95 3 0x100092 kqread smtpd
46164 34892 48654 103 3 0x100092 kqread smtpd
54175 260135 48654 95 3 0x100092 kqread smtpd
93781 236681 48654 95 3 0x100092 kqread smtpd
19987 267311 48654 95 3 0x100092 kqread smtpd
94891 422493 48654 95 3 0x100092 kqread smtpd
48654 385403 1 0 3 0x100080 kqread smtpd
3471 358479 1 0 3 0x80 select sshd
28005 289816 85351 83 3 0x100092 poll ntpd
85351 307880 53191 83 3 0x100092 poll ntpd
53191 361514 1 0 3 0x100080 poll ntpd
37800 424155 63169 73 7 0x100010 syslogd
63169 409917 1 0 3 0x100082 netio syslogd
5807 224569 0 0 3 0x14200 pgzero zerothread
49859 23 0 0 3 0x14200 aiodoned aiodoned
94546 207693 0 0 3 0x14200 syncer update
81519 201465 0 0 3 0x14200 cleaner cleaner
46403 79488 0 0 3 0x14200 reaper reaper
58020 281411 0 0 3 0x14200 pgdaemon pagedaemon
74507 148042 0 0 3 0x14200 bored crynlk
17591 224078 0 0 3 0x14200 bored crypto
38014 194793 0 0 3 0x14200 usbtsk usbtask
10734 170157 0 0 3 0x14200 usbatsk usbatsk
41674 59869 0 0 3 0x40014200 acpi0 acpi0
27059 66073 0 0 7 0x40014200 idle11
74635 434973 0 0 7 0x40014200 idle10
36027 231269 0 0 7 0x40014200 idle9
347 433094 0 0 7 0x40014200 idle8
73086 477530 0 0 7 0x40014200 idle7
67977 58218 0 0 7 0x40014200 idle6
82891 185471 0 0 7 0x40014200 idle5
84493 342954 0 0 7 0x40014200 idle4
66503 89365 0 0 3 0x40014200 idle3
3776 407212 0 0 3 0x40014200 idle2
93247 71210 0 0 3 0x40014200 idle1
84971 160128 0 0 3 0x14200 bored sensors
*18225 438295 0 0 7 0x14200 softnet
87145 102330 0 0 3 0x14200 bored systqmp
5600 399537 0 0 3 0x14200 bored systq
27906 252867 0 0 2 0x40014200 softclock
71944 475702 0 0 7 0x40014200 idle0
17443 68217 0 0 3 0x14200 bored sbar
1 72998 0 0 3 0x82 wait init
0 0 -1 0 3 0x10200 scheduler swapper
ddb{1}>
ddb{1}> show all pools
Name Size Requests Fail Releases Pgreq Pgrel Npage Hiwat Minpg
Maxpg Idle
arp 56 412 0 398 1 0 1 1 0
8 0
inpcbpl 280 1332 0 1324 1 0 1 1 0
8 0
plimitpl 152 31 0 14 1 0 1 1 0
8 0
myxmcl 12288 2 0 0 1 0 1 1 0
8 0
plcache 128 132 0 0 5 0 5 5 0
8 0
rtentry 112 1930 0 1858 3 0 3 3 0
8 0
syncache 264 1 0 1 1 1 0 1 0
8 0
tcpqe 32 1 0 1 1 1 0 1 0
8 0
tcpcb 544 17 0 12 1 0 1 1 0
8 0
nd6 48 140 0 134 1 0 1 1 0
8 0
art_heap8 4096 1 0 0 1 0 1 1 0
8 0
art_heap4 256 3025 0 2795 19 4 15 16 0
8 0
art_table 32 3026 0 2795 3 0 3 3 0
8 0
art_node 16 1494 0 1423 1 0 1 1 0
8 0
dirhash 1024 72 0 34 5 0 5 5 0
8 0
dino1pl 128 4555 0 1592 101 5 96 96 0
8 0
ffsino 272 4555 0 1592 208 10 198 198 0
8 0
nchpl 144 5750 0 1654 158 6 152 152 0
8 0
uvmvnodes 72 4571 0 0 84 0 84 84 0
8 0
vnodes 192 4571 0 0 229 0 229 229 0
8 0
namei 1024 39222 0 39222 360 359 1 2 0
8 1
percpumem 96 30 0 0 1 0 1 1 0
8 0
ehcixfer 264 181 0 174 1 0 1 1 0
8 0
scxspl 192 24103 0 24103 330 330 0 6 0
8 0
sigapl 432 3958 0 3934 4 0 4 4 0
8 0
knotepl 112 61 0 17 2 0 2 2 0
8 0
kqueuepl 56 8 0 0 1 0 1 1 0
8 0
pipepl 112 2360 0 2360 238 237 1 1 0
8 1
fdescpl 472 3959 0 3934 5 1 4 5 0
8 0
filepl 112 21601 0 21513 4 1 3 4 0
8 0
lockfpl 88 6 0 6 2 2 0 1 0
8 0
sessionpl 112 17 0 5 1 0 1 1 0
8 0
pgrppl 40 25 0 12 1 0 1 1 0
8 0
ucredpl 96 91 0 74 1 0 1 1 0
8 0
zombiepl 144 3934 0 3934 281 280 1 1 0
8 1
processpl 736 3987 0 3934 7 1 6 6 0
8 0
procpl 568 3987 0 3934 5 0 5 5 0
8 0
srpgc 64 598 0 598 162 161 1 1 0
8 1
sockpl 376 2158 0 2106 6 0 6 6 0
8 0
mcl9k 9216 34 0 0 3 0 3 3 0
8 0
mcl2k2 2112 61698 0 0 1302 0 1302 1302 0
8 0
mcl2k 2048 52 0 0 7 0 7 7 0
8 0
mbufpl 256 64115 0 0 1218 2 1216 1216 0
8 0
bufpl 256 23059 0 4686 1149 0 1149 1149 0
8 0
anonpl 16 302789 0 299099 162 142 20 162 0
2023 0
amapchunkpl 152 3916 0 3740 69 60 9 69 0
158 0
amappl16 192 3093 0 3078 13 12 1 8 0
8 0
amappl15 184 312 0 310 3 2 1 2 0
8 0
amappl14 176 731 0 728 4 3 1 3 0
8 0
amappl13 168 1021 0 1018 3 2 1 3 0
8 0
amappl12 160 466 0 465 4 3 1 4 0
8 0
amappl11 152 5159 0 5124 5 3 2 5 0
8 0
amappl10 144 986 0 985 6 5 1 5 0
8 0
amappl9 136 876 0 876 186 185 1 6 0
8 1
amappl8 128 5008 0 5004 55 53 2 33 0
8 0
amappl7 120 616 0 602 7 6 1 6 0
8 0
amappl6 112 992 0 969 7 6 1 6 0
8 0
amappl5 104 2746 0 2708 9 7 2 8 0
8 0
amappl4 96 2396 0 2340 9 6 3 9 0
8 0
amappl3 88 1316 0 1302 13 12 1 13 0
8 0
amappl2 80 24067 0 23860 22 15 7 22 0
8 0
amappl1 72 221288 0 220295 199 179 20 102 0
8 0
amappl 72 1506 0 1426 12 10 2 12 0
75 0
dma16384 16384 1 0 1 1 1 0 1 0
8 0
dma4096 4096 1 0 1 1 1 0 1 0
8 0
dma2048 2048 1 0 1 1 1 0 1 0
8 0
dma512 512 256 0 256 1 1 0 1 0
8 0
dma256 256 8 0 8 1 1 0 1 0
8 0
dma64 64 65 0 65 1 1 0 1 0
8 0
dma32 32 8 0 8 1 1 0 1 0
8 0
dma16 16 1 0 1 1 1 0 1 0
8 0
aobjpl 64 1 0 0 1 0 1 1 0
8 0
uaddrrnd 24 3959 0 3934 1 0 1 1 0
8 0
uaddrbest 32 2 0 0 1 0 1 1 0
8 0
uaddr 24 3959 0 3934 1 0 1 1 0
8 0
vmmpekpl 168 31498 0 31475 2 0 2 2 0
8 0
vmmpepl 168 456735 0 454392 1096 987 109 488 0
357 2
vmsppl 352 3958 0 3934 4 1 3 4 0
8 0
pdppl 4096 3958 0 3934 5 1 4 5 0
8 0
pvpl 32 1197438 0 1186618 394 298 96 386 0
265 0
pmappl 208 3958 0 3934 2 0 2 2 0
8 0
extentpl 40 289 0 190 1 0 1 1 0
8 0
phpool 112 2985 0 382 76 1 75 75 0
8 0
ddb{1}>
maybe irrelevant for this story but i tried
ddb{1}> mach ddbcpu 2
Stopped at x86_ipi_db+0x5: popq %rbp
x86_ipi_db(ffffffff81648ab5) at x86_ipi_db+0x5
x86_ipi_handler() at x86_ipi_handler+0x6a
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1f
--- interrupt ---
end of kernel
end trace frame: 0x5a152405c75250cc, count: 12
0x41cb8c419c524153:
ddb{2}> tr /up 0t413493
ffff800023c63ee0(ffffffff81b61278,0,0,0,44400080000,1) at 0xffff800023c63ee0
end(
and ddb freezes can only reboot box ..