Attach backtrace here:
root@scm1:/coredumps/# gdb /usr/lib64/opensaf/osafamfd
core.image\=26115.proc\=osafamfd.pid\=4277.signal\=6.time\=1493639577
GNU gdb (Wind River Linux Sourcery CodeBench 4.8-28) 7.6
Copyright (C) 2013 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law. Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-wrs-linux-gnu".
For bug reporting instructions, please see:
<[email protected]>...
Reading symbols from /usr/lib64/opensaf/osafamfd...Reading symbols from
/usr/lib64/opensaf/.debug/osafamfd...done.
done.
[New LWP 4277]
[New LWP 4279]
[New LWP 4280]
[New LWP 4282]
warning: Could not load shared library symbols for linux-vdso.so.1.
Do you need "set solib-search-path" or "set sysroot"?
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
bCore was generated by `/usr/lib64/opensaf/osafamfd osafamfd'.
Program terminated with signal 6, Aborted.
#0 0x0000003d84a353e9 in __GI_raise (sig=sig@entry=6) at
../nptl/sysdeps/unix/sysv/linux/raise.c:56
56 ../nptl/sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt full
#0 0x0000003d84a353e9 in __GI_raise (sig=sig@entry=6) at
../nptl/sysdeps/unix/sysv/linux/raise.c:56
resultvar = 0
pid = 4277
selftid = 4277
#1 0x0000003d84a38508 in __GI_abort () at abort.c:89
save_stage = 2
act = {__sigaction_handler = {sa_handler = 0x51560d, sa_sigaction =
0x51560d}, sa_mask = {__val = {2006, 5336880, 5335460, 2130303778826, 5320117,
9977552, 264237561592, 140737298378800, 264235064979, 17179869185,
18442240615826079272, 4294967296, 5873756416, 5321392, 14559416,
140737298378864}}, sa_flags = -2052873586, sa_restorer = 0x0}
sigs = {__val = {32, 0 <repeats 15 times>}}
#2 0x0000003d85a2110a in __osafassert_fail (__file=0x51560d "su.cc",
__line=2006, __func=0x516f30 <AVD_SU::dec_curr_act_si()::__FUNCTION__>
"dec_curr_act_si", __assertion=0x5169a4 "saAmfSUNumCurrActiveSIs > 0") at
sysf_def.c:281
No locals.
#3 0x00000000004d907d in AVD_SU::dec_curr_act_si (this=0xde8390) at su.cc:2006
__FUNCTION__ = "dec_curr_act_si"
#4 0x00000000004c0301 in avd_susi_delete (cb=0x75a2e0 <_control_block>,
susi=0xd38320, ckpt=false) at siass.cc:554
i_su_si = 0xd38320
su = 0xde8390
__FUNCTION__ = "avd_susi_delete"
p_su_si = 0x0
p_si_su = 0x0
#5 0x00000000004964e1 in SG_NORED::node_fail (this=0xd7e9a0, cb=0x75a2e0
<_control_block>, su=0xde8390) at sg_nored_fsm.cc:781
l_si = 0x7ffff4ad31a0
old_state = SA_AMF_HA_QUIESCED
su_node_ptr = 0x0
__FUNCTION__ = "node_fail"
#6 0x00000000004b8c78 in avd_node_down_mw_susi_failover (cb=0x75a2e0
<_control_block>, avnd=0x9e3bf0) at sgproc.cc:1983
i_su = @0xde84a0: 0xde8390
__for_range = @0x9e3eb8: {<std::_Vector_base<AVD_SU*,
std::allocator<AVD_SU*> >> = {_M_impl = {<std::allocator<AVD_SU*>> =
{<__gnu_cxx::new_allocator<AVD_SU*>> = {<No data fields>}, <No data fields>},
_M_start = 0xde84a0,
_M_finish = 0xde84a8, _M_end_of_storage = 0xde84a8}}, <No data
fields>}
__for_begin = {_M_current = 0xde84a0}
__for_end = {_M_current = 0xde84a8}
__FUNCTION__ = "avd_node_down_mw_susi_failover"
#7 0x000000000045eb75 in avd_node_failover (node=0x9e3bf0) at ndproc.cc:1142
__FUNCTION__ = "avd_node_failover"
#8 0x0000000000456fea in avd_mds_avnd_down_evh (cb=0x75a2e0 <_control_block>,
evt=0x7f5f78000ec0) at ndfsm.cc:684
node = 0x9e3bf0
__FUNCTION__ = "avd_mds_avnd_down_evh"
#9 0x00000000004514f5 in process_event (cb_now=0x75a2e0 <_control_block>,
evt=0x7f5f78000ec0) at main.cc:775
__FUNCTION__ = "process_event"
#10 0x0000000000451211 in main_loop () at main.cc:696
pollretval = 1
evt = 0x7f5f78000ec0
mbx_fd = {raise_obj = 10, rmv_obj = 11}
polltmo = -1
term_fd = 22
__FUNCTION__ = "main_loop"
cb = 0x75a2e0 <_control_block>
error = SA_AIS_OK
#11 0x000000000045178f in main (argc=2, argv=0x7ffff4ad33e8) at main.cc:848
No locals.
(gdb)
---
** [tickets:#2468] amf: amfd asserts while decrementing opensaf NoRed SI
assignment counter during fail-over.**
**Status:** unassigned
**Milestone:** 5.17.06
**Created:** Thu May 25, 2017 08:46 AM UTC by Praveen
**Last Updated:** Fri May 26, 2017 08:20 AM UTC
**Owner:** nobody
Ticket is based on a issue reported via user list mail dated: 22-May-17,
subject "[users] osafamfd coredump issue.
Here is syslog when the issue occurred:
2017-05-01T07:52:57.714906-04:00 scm2 kernel: tipc: Resetting link
<1.1.16:eth2-1.1.5:bond0>, peer not responding
2017-05-01T07:52:57.714935-04:00 scm2 kernel: tipc: Lost link
<1.1.16:eth2-1.1.5:bond0> on network plane A
2017-05-01T07:52:57.714939-04:00 scm2 kernel: tipc: Lost contact with <1.1.5>
2017-05-01T07:52:57.716788-04:00 scm2 osafimmd[3009]: NO MDS event from svc_id
25 (change:4, dest:287038266327043)
2017-05-01T07:52:57.717304-04:00 scm2 osafclmd[4259]: NO Node 66831 went down.
Not sending track callback for agents on that node
2017-05-01T07:52:57.719178-04:00 scm2 osafimmnd[3020]: NO Global discard node
received for nodeId:1050f pid:15395
2017-05-01T07:52:57.719233-04:00 scm2 osafimmnd[3020]: NO Implementer
disconnected 104 <0, 1050f(down)> (MsgQueueService66831)
2017-05-01T07:52:57.721345-04:00 scm2 osafamfd[4277]: NO Node 'PLD0105' left
the cluster
2017-05-01T07:52:57.722778-04:00 scm2 log_demo[6160]: [0.I.Proc]: FYI state
change notification from NTF, entity PLD0105 now has new state DISABLED (Oper
state safAmfNode=PLD0105,safAmfCluster=myAmfCluster changed)
2017-05-01T07:52:57.732796-04:00 scm2 osafamfd[4277]: su.cc:2006:
dec_curr_act_si: Assertion 'saAmfSUNumCurrActiveSIs > 0' failed.
2017-05-01T07:52:57.778777-04:00 scm2 kernel: tipc: Resetting link
<1.1.16:eth2-1.1.6:bond0>, peer not responding
2017-05-01T07:52:57.778827-04:00 scm2 kernel: tipc: Lost link
<1.1.16:eth2-1.1.6:bond0> on network plane A
2017-05-01T07:52:57.778833-04:00 scm2 kernel: tipc: Lost contact with <1.1.6>
2017-05-01T07:52:57.777979-04:00 scm2 osafimmd[3009]: NO MDS event from svc_id
25 (change:4, dest:288139774320643)
2017-05-01T07:52:57.717343-04:00 scm2 osafclmd[4259]: NO Node 66831 went down.
Not sending track callback for agents on that node
2017-05-01T07:52:57.779373-04:00 scm2 osafclmd[4259]: NO Node 67087 went down.
Not sending track callback for agents on that node
2017-05-01T07:52:57.780552-04:00 scm2 osafimmnd[3020]: NO Global discard node
received for nodeId:1060f pid:17439
2017-05-01T07:52:57.780607-04:00 scm2 osafimmnd[3020]: NO Implementer
disconnected 106 <0, 1060f(down)> (MsgQueueService67087)
2017-05-01T07:52:57.810785-04:00 scm2 osafamfnd[5281]: WA AMF director
unexpectedly crashed
2017-05-01T07:52:57.810839-04:00 scm2 osafamfnd[5281]: Rebooting OpenSAF NodeId
= 69647 EE Name = , Reason: local AVD down(Adest) or both AVD down(Vdest)
received, OwnNodeId = 69647, SupervisionTime = 0
2017-05-01T07:52:57.810978-04:00 scm2 osafimmnd[3020]: NO Implementer locally
disconnected. Marking it as doomed 105 <29, 1100f> (safAmfService)
2017-05-01T07:52:57.812582-04:00 scm2 osafimmnd[3020]: NO Implementer
disconnected 105 <29, 1100f> (safAmfService)
2017-05-01T07:52:57.950567-04:00 scm2 opensaf_reboot: Rebooting local node;
timeout=0
2017-05-01T07:52:58.084968-04:00 scm2 atwdog[28335]: rebooting (-f) local node
---
Sent from sourceforge.net because [email protected] is
subscribed to https://sourceforge.net/p/opensaf/tickets/
To unsubscribe from further messages, a project admin can change settings at
https://sourceforge.net/p/opensaf/admin/tickets/options. Or, if this is a
mailing list, you can unsubscribe from the mailing list.------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-tickets mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-tickets