http://defect.opensolaris.org/bz/show_bug.cgi?id=14344
Summary: nwamd hung, possibly deadlocked, waiting to allocate
events
Classification: Development
Product: nwam
Version: nwam1_133
Platform: ANY/Generic
OS/Version: All
Status: ACCEPTED
Severity: blocker
Priority: P1
Component: ON daemon
AssignedTo: nwam-dev at opensolaris.org
ReportedBy: alan.maguire at sun.com
QAContact: nwam-dev at opensolaris.org
--- Comment #0 from amaguire <alan.maguire at sun.com> 2010-02-03 09:21:34 UTC
---
The code that implements the handling of preallocated events appears to be
deadlocking nwamd on multiple systems, x86 and sparc. Stack traces show that
when multiple threads attempt to allocate events we are hung up. Example x86
and sparc stacks below and gcore-generated cores for x86 and sparc are on
zhadum in /export/ws/amaguire/core.nwamd.hung.[i386|sparc]
This appears to be highly reproducible on these test systems. I haven't
root-caused this yet, but given the severity if we can't find a fix soon my
suggestion is we revert to allocating events as required.
x86 pstack:
----------------- lwp# 1 / thread# 1 --------------------
c2ae3bf9 lwp_park (0, 0, 0)
c2adbffd cond_wait_queue (8085888, 80858a8, 0, c2adc526) + 60
c2adc59e __cond_wait (8085888, 80858a8, 8047b98, c2adc5e3) + 86
c2adc5f1 cond_wait (8085888, 80858a8, 8047bc8, c2adc624) + 24
c2adc639 pthread_cond_wait (8085888, 80858a8, 8047be8, 806a14b) + 21
0805d667 nwamd_event_init (13, 0, 0, 0, 80aa608, 1) + 2f
0805dc57 nwamd_event_init_ncu_check (80aa608, 2, 8047cc8, 8064716, 0, 0) + 13
08061f1f nwamd_create_ncu_check_event (0, 0, 80, c2ad7ae1) + b
08064716 nwamd_ncu_handle_state_event (8147978, 8147978, 8047d08, 805e181) +
1be
0805e26d nwamd_event_run_method (8147978, 0, 8047d48, 805e406) + 79
0805e4a5 nwamd_event_handler (8047d80, c2bd45fa, 1, 0, 8047d80, c2bd44d1) +
135
080611a3 main (1, 8047e94, 8047e9c, 805a23f) + 18f
0805a29d _start (1, 8047f24, 0, 8047f34, 8047f4c, 8047f70) + 7d
----------------- lwp# 2 / thread# 2 --------------------
c2ae7c75 sigtimedwait (80861a0, c283eed0, 0)
c2ad2dda sigwait (80861a0) + 22
08060b3d sighandler (0, c2b78000, c283efe8, c2ae38be) + 2d
c2ae3913 _thrp_setup (c29d0200) + 9b
c2ae3ba0 _lwp_start (c29d0200, 0, 0, 0, 0, 0)
----------------- lwp# 3 / thread# 3 --------------------
c2ae972f door (c26083e4, 6a1c, 0, c260ee00, f5f00, a)
0805bf24 nwamd_door_switch (0, c26083e4, 6a1c, 0, 0, 805be18) + 10c
c2ae974c __door_return () + 4c
----------------- lwp# 4 / thread# 4 --------------------
c2ae3bf9 lwp_park (0, 0, 0)
c2adbffd cond_wait_queue (8085888, 80858a8, 0, c2adc526) + 60
c2adc59e __cond_wait (8085888, 80858a8, c24ce5c8, c2adc5e3) + 86
c2adc5f1 cond_wait (8085888, 80858a8, a, c2adc624) + 24
c2adc639 pthread_cond_wait (8085888, 80858a8, c24ce628, 0) + 21
0805d667 nwamd_event_init (c) + 2f
0805da5a nwamd_event_init_if_state (c24ce6bc, 1004843, 0, 8, 0) + 2e
0806a62b routing_events_v4 (0, c2b78000, c24cefe8, c2ae38be) + 253
c2ae3913 _thrp_setup (c29d1a00) + 9b
c2ae3ba0 _lwp_start (c29d1a00, 0, 0, 0, 0, 0)
----------------- lwp# 5 / thread# 5 --------------------
c2ae3bf9 lwp_park (0, 0, 0)
c2adbffd cond_wait_queue (8085888, 80858a8, 0, c2adc526) + 60
c2adc59e __cond_wait (8085888, 80858a8, c23cf5c8, c2adc5e3) + 86
c2adc5f1 cond_wait (8085888, 80858a8, 0, c2adc624) + 24
c2adc639 pthread_cond_wait (8085888, 80858a8, c23cf628, 0) + 21
0805d667 nwamd_event_init (c) + 2f
0805da5a nwamd_event_init_if_state (c23cf6bc, 2000840, 0, 8, 0) + 2e
0806a94c routing_events_v6 (0, c2b78000, c23cffe8, c2ae38be) + 25c
c2ae3913 _thrp_setup (c29d2200) + 9b
c2ae3ba0 _lwp_start (c29d2200, 0, 0, 0, 0, 0)
----------------- lwp# 9 / thread# 9 --------------------
c2ae88c5 pollsys (c22cce28, 1, 0, 0)
c2a889e4 poll (c22cce28, 1, ffffffff, c22cce24) + 4c
c1f935f6 i_dlpi_strgetmsg (81cbf48, ffffffff, c22d0f78, 8, 8, 18) + 196
c1f929b2 dlpi_recv (81cbf48, 0, 0, 0, 0, ffffffff) + b6
0805b1c4 nwamd_dlpi_thread (81e2570, c2b78000, c22d0fe8, c2ae38be) + 20
c2ae3913 _thrp_setup (c29d2a00) + 9b
c2ae3ba0 _lwp_start (c29d2a00, 0, 0, 0, 0, 0)
----------------- lwp# 7 / thread# 7 --------------------
c2ae3bf9 lwp_park (0, 0, 0)
c2adbffd cond_wait_queue (81ccf10, 81ccf20, 0, c2adc526) + 60
c2adc59e __cond_wait (81ccf10, 81ccf20, c21aef88, c2adc5e3) + 86
c2adc5f1 cond_wait (81ccf10, 81ccf20, 200, c21b3fa2) + 24
c21b3fe2 subscriber_event_handler (80b0b08, c2b78000, c21aefe8, c2ae38be) + 4e
c2ae3913 _thrp_setup (c29d3200) + 9b
c2ae3ba0 _lwp_start (c29d3200, 0, 0, 0, 0, 0)
----------------- lwp# 8 / thread# 8 --------------------
c2ae3bf9 lwp_park (0, c20aff38, 0)
c2adbffd cond_wait_queue (c29427e0, c2942800, c20aff38, c2adc246) + 60
c2adc423 cond_wait_common (c29427e0, c2942800, c20aff38, c2adc666) + 1eb
c2adc71c __cond_timedwait (c29427e0, c2942800, c20affa8, c2adc750) + c4
c2adc761 cond_timedwait (c29427e0, c2942800) + 27
c2918165 umem_update_thread (0, c2b78000, c20affe8, c2ae38be) + 191
c2ae3913 _thrp_setup (c29d3a00) + 9b
c2ae3ba0 _lwp_start (c29d3a00, 0, 0, 0, 0, 0)
----------------- lwp# 10 / thread# 10 --------------------
c2ae88c5 pollsys (c1f3ae28, 1, 0, 0)
c2a889e4 poll (c1f3ae28, 1, ffffffff, c1f3ae24) + 4c
c1f935f6 i_dlpi_strgetmsg (81cbdc8, ffffffff, c1f3ef78, 8, 8, 18) + 196
c1f929b2 dlpi_recv (81cbdc8, 0, 0, 0, 0, ffffffff) + b6
0805b1c4 nwamd_dlpi_thread (81ff570, c2b78000, c1f3efe8, c2ae38be) + 20
c2ae3913 _thrp_setup (c29d4200) + 9b
c2ae3ba0 _lwp_start (c29d4200, 0, 0, 0, 0, 0)
----------------- lwp# 11 / thread# 11 --------------------
c2ae972f door (0, 0, 0, c1e1ee00, f5f00, a)
c2acc815 door_create_func (0, c2b78000, c1e1efe8, c2ae38be) + 2f
c2ae3913 _thrp_setup (c29d4a00) + 9b
c2ae3ba0 _lwp_start (c29d4a00, 0, 0, 0, 0, 0)
sparc pstack:
106930: /lib/inet/nwamd
----------------- lwp# 1 / thread# 1 --------------------
ff2c8348 lwp_park (0, 0, 0)
ff2c1130 cond_wait_queue (45250, 45238, 0, 0, 0, 0) + 4c
ff2c176c cond_wait (45250, 45238, 0, ff000000, ffffff, fffc00) + 10
ff2c17a8 pthread_cond_wait (45250, 45238, 0, 0, 45238, 0) + 8
0001c990 nwamd_event_init (4, 1, 0, b67a8, 0, 4) + 28
0001cb70 nwamd_event_init_object_state (1, b67a8, 4, d, 3a10c000, 1) + 14
00029108 nwamd_object_set_state_timed (1, b67a8, 4, d, 0, 31c00) + 14
000214b8 nwamd_ncu_state_machine (228098, 87508, b67a8, a4, ff1f2a00, 228008)
+ 184
00023a50 nwamd_ncu_handle_state_event (4, 87508, 1, 1, b67a8, 2f800) + 218
0001d82c nwamd_event_handler (1a8008, 1d620, 0, 0, 5f5e100, 5f5e100) + 1ac
00020418 main (45c00, 45000, ffbffe8c, 10, 45000, 45000) + 208
00019358 _start (0, 0, 0, 0, 0, 0) + 108
----------------- lwp# 2 / thread# 2 --------------------
ff2cc190 sigtimedwait (45b1c, feffbeb8, 0)
0001fd44 sighandler (45800, 0, feffbf98, 2de18, 45000, 4528c) + 38
ff2c82c4 _lwp_start (0, 0, 0, 0, 0, 0)
----------------- lwp# 3 / thread# 3 --------------------
ff2c8348 lwp_park (0, 0, 0)
ff2c1130 cond_wait_queue (45250, 45238, 0, 0, 0, 0) + 4c
ff2c176c cond_wait (45250, 45238, 0, de, e4, 1) + 10
ff2c17a8 pthread_cond_wait (45250, 45238, 0, 0, 45238, 0) + 8
0001c990 nwamd_event_init (3, 2, 0, fed75418, 0, 3) + 28
0001caf8 nwamd_event_init_object_action (2, fed75418, 0, 0, 100, 100) + 10
0001f700 nwamd_loc_action (fed75418, 0, fed75418, fed75694, 1010101, 80808080)
+ 10
0001ad44 nwamd_door_req_action (fed75580, 1efe48, 1c3954, 0, fed75418, 2b7b0)
+ 2bc
0001b3ac nwamd_door_switch (45178, fed75580, 45000, 451bc, 48, 1c3954) + 174
ff2cd520 __door_return (0, 0, 0, 0, 0, 0) + 40
----------------- lwp# 4 / thread# 4 --------------------
ff2c8348 lwp_park (0, 0, 0)
ff2c1130 cond_wait_queue (45250, 45238, 0, 0, 0, 0) + 4c
ff2c176c cond_wait (45250, 45238, 0, 0, 0, 67e1d) + 10
ff2c17a8 pthread_cond_wait (45250, 45238, 0, 0, 45238, 0) + 8
0001c990 nwamd_event_init (c, 1, 0, 67e10, 0, c) + 28
0001cd48 nwamd_event_init_if_state (fec7b6a4, 1004842, 0, 9, 0, 10) + 4c
0002956c routing_events_v4 (2, 150, 7, fec7b648, 4556c, 32224) + 2b4
ff2c82c4 _lwp_start (0, 0, 0, 0, 0, 0)
----------------- lwp# 5 / thread# 5 --------------------
ff2c8348 lwp_park (0, 0, 0)
ff2c1130 cond_wait_queue (45250, 45238, 0, 0, 0, 0) + 4c
ff2c176c cond_wait (45250, 45238, 0, 0, 0, 67ee5) + 10
ff2c17a8 pthread_cond_wait (45250, 45238, 0, 0, 45238, 0) + 8
0001c990 nwamd_event_init (c, 1, 0, 67ed8, 0, c) + 28
0001cd48 nwamd_event_init_if_state (feb7b6a4, 2004841, 0, 9, 0, 10) + 4c
00029878 routing_events_v6 (2, 150, feb7b648, 7, 45570, 32448) + 2c0
ff2c82c4 _lwp_start (0, 0, 0, 0, 0, 0)
----------------- lwp# 8 / thread# 8 --------------------
ff2c8348 lwp_park (0, 0, 0)
00028e8c nwamd_object_find (1, fea75418, fea75425, fea75614, 1010101, 87508) +
84
0001b130 nwamd_door_req_state (fea75580, 1efcc8, 1c2654, fea714ac, fea75418,
1) + fc
0001b3ac nwamd_door_switch (45178, fea75580, 45000, 451c8, 54, 1c2654) + 174
ff2cd520 __door_return (0, 0, 0, 0, 0, 0) + 40
----------------- lwp# 7 / thread# 7 --------------------
ff2c8348 lwp_park (0, 0, 0)
ff2c1130 cond_wait_queue (1e9f90, 1e9fa0, 0, 0, 0, 0) + 4c
ff2c176c cond_wait (1e9f90, 1e9fa0, 0, 0, 1e9fa0, 0) + 10
fe963ad8 subscriber_event_handler (8fb08, 0, 0, 1e9fa0, 1e9f90, 0) + 34
ff2c82c4 _lwp_start (0, 0, 0, 0, 0, 0)
----------------- lwp# 9 / thread# 9 --------------------
ff2c8348 lwp_park (0, fe7fbec0, 0)
ff2c1130 cond_wait_queue (ff135fe0, ff136000, fe7fbec0, 2, 0, 0) + 4c
ff2c1648 cond_wait_common (ff135fe0, ff136000, fe7fbec0, 1c00, ff1b3a00, 0) +
2d8
ff2c181c __cond_timedwait (ff135fe0, ff136000, fe7fbf90, 0, 0, 0) + 58
ff2c18dc cond_timedwait (ff135fe0, ff136000, fe7fbf90, 0, 9, 0) + 14
ff10afc4 umem_update_thread (4b6939e7, ff136018, ff12c000, 0, ff12f280,
4b6939dd) + 238
ff2c82c4 _lwp_start (0, 0, 0, 0, 0, 0)
----------------- lwp# 10 / thread# 10 --------------------
ff2ccae8 pollsys (fe6fbe20, 1, 0, 0)
ff2724b8 poll (fe6fbe20, 1, ffffffff, 1, 0, 2400) + 80
fe943628 i_dlpi_strgetmsg (fe6f7dfc, ffffffff, fe6fbf34, fe6f7e04, fe6f7df8,
fe6fbf3c) + 1fc
fe942890 dlpi_recv (a0, 0, 0, 0, 0, ffffffff) + e4
0001a670 nwamd_dlpi_thread (205578, 0, 0, ff1b4200, 2710, 2400) + 24
ff2c82c4 _lwp_start (0, 0, 0, 0, 0, 0)
----------------- lwp# 11 / thread# 11 --------------------
ff2ccae8 pollsys (fe57be20, 1, 0, 0)
ff2724b8 poll (fe57be20, 1, ffffffff, 1, 0, 2400) + 80
fe943628 i_dlpi_strgetmsg (fe577dfc, ffffffff, fe57bf34, fe577e04, fe577df8,
fe57bf3c) + 1fc
fe942890 dlpi_recv (a0, 0, 0, 0, 0, ffffffff) + e4
0001a670 nwamd_dlpi_thread (227578, 0, 0, ff1b4a00, 2710, 2400) + 24
ff2c82c4 _lwp_start (0, 0, 0, 0, 0, 0)
----------------- lwp# 13 / thread# 13 --------------------
ff2cd4e8 door (fe475580, 6a20, 0, fe47bfa0, f41a0, a)
ff2cd520 __door_return (0, 0, 0, 0, 0, 0) + 40
----------------- lwp# 14 / thread# 14 --------------------
ff2cd4e8 door (0, 0, 0, fe37bfa0, f41a0, a)
ff2c82c4 _lwp_start (0, 0, 0, 0, 0, 0)
--
Configure bugmail: http://defect.opensolaris.org/bz/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the QA contact for the bug.
You are the assignee for the bug.