On 2009-03-22T00:49:08, Lars Marowsky-Bree <[email protected]> wrote:

> I'll let the test case run over night (until it crashes the test master
> ;-), that might throw up a couple or coredumps by morning.

1. crash in openais response send:

(gdb) bt
#0  0x00007fa9f048ae11 in memcpy () from /lib64/libc.so.6
#1  0x0000000000423104 in openais_response_send (conn=0x7640d0, 
msg=0x7ffff8d9ca80, mlen=32) at ipc.c:911
#2  0x00007fa9ec61e3e0 in message_handler_req_exec_ckpt_checkpointopen 
(message=0x7ffff8d9cb30, nodeid=1)
    at ckpt.c:1406
#3  0x000000000041e42e in deliver_fn (nodeid=1, iovec=0x7ffff8d9cec0, 
iov_len=1, endian_conversion_required=0)
    at main.c:415
#4  0x00000000004182ca in app_deliver_fn (nodeid=1, iovec=0x7ffff8d9ceb0, 
iov_len=1, endian_conversion_required=0)
    at totempg.c:443
#5  0x0000000000417f5e in totempg_deliver_fn (nodeid=1, iovec=0x7fa9e800cf98, 
iov_len=1, 
    endian_conversion_required=0) at totempg.c:587
#6  0x0000000000417232 in totemmrp_deliver_fn (nodeid=1, iovec=0x7fa9e800cf98, 
iov_len=3, 
    endian_conversion_required=0) at totemmrp.c:82
#7  0x0000000000415107 in messages_deliver_to_app (instance=0x7fa9f0c6f010, 
skip=0, end_point=35) at totemsrp.c:3553
#8  0x0000000000414cdd in message_handler_orf_token (instance=0x7fa9f0c6f010, 
msg=0x7fa9e8018924, msg_len=70, 
    endian_conversion_needed=0) at totemsrp.c:3425
#9  0x000000000041706d in main_deliver_fn (context=0x7fa9f0c6f010, 
msg=0x7fa9e8018924, msg_len=70) at totemsrp.c:4136
#10 0x000000000040af4c in none_token_recv (rrp_instance=0x7fa9e8017d80, 
iface_no=0, context=0x7fa9f0c6f010, 
    msg=0x7fa9e8018924, msg_len=70, token_seq=29) at totemrrp.c:506
#11 0x000000000040c91e in rrp_deliver_fn (context=0x7fa9e8018260, 
msg=0x7fa9e8018924, msg_len=70) at totemrrp.c:1308
#12 0x0000000000409025 in net_deliver_fn (handle=0, fd=3, revents=1, 
data=0x7fa9e80182a0) at totemnet.c:676
#13 0x000000000040738a in poll_run (handle=0) at aispoll.c:402
#14 0x000000000041ed7d in main (argc=1, argv=0x7ffff8da0508) at main.c:634

(gdb) print conn_info->mem->res_buffer
Cannot access memory at address 0xf42a1
(gdb) print conn_info->mem
$2 = (struct shared_memory *) 0x61
(gdb) print conn_info
$3 = (struct conn_info *) 0x7640d0
(gdb) print *conn_info
$4 = {fd = -14548479, thread = 12159719002490273798, thread_attr = {

(Corupted conn_info)


2. The very crash Chrissie's patch is supposed to have fixed:

Thread 1 (Thread 6988):
#0  0x00007fc07fdfc667 in do_proc_join (name=0x7fff91153bf0, pid=19546, 
nodeid=7, reason=1) at cpg.c:726
726                     if (pi->pid == pid && pi->nodeid == nodeid) {
(gdb) bt
#0  0x00007fc07fdfc667 in do_proc_join (name=0x7fff91153bf0, pid=19546, 
nodeid=7, reason=1) at cpg.c:726
#1  0x00007fc07fdfca0d in message_handler_req_exec_cpg_procjoin 
(message=0x7fff91153be0, nodeid=7)
    at cpg.c:804
#2  0x000000000041e42e in deliver_fn (nodeid=7, iovec=0x7fff91153da0, 
iov_len=1, endian_conversion_required=0)
    at main.c:415
#3  0x00000000004182ca in app_deliver_fn (nodeid=7, iovec=0x7fff91153d90, 
iov_len=1, 
    endian_conversion_required=0) at totempg.c:443
#4  0x0000000000417f5e in totempg_deliver_fn (nodeid=7, iovec=0x754170, 
iov_len=1, 
    endian_conversion_required=0) at totempg.c:587
#5  0x0000000000417232 in totemmrp_deliver_fn (nodeid=7, iovec=0x754170, 
iov_len=1, 
    endian_conversion_required=0) at totemmrp.c:82
#6  0x0000000000415166 in messages_deliver_to_app (instance=0x7fc089024010, 
skip=0, end_point=12824)
    at totemsrp.c:3562
#7  0x00000000004155ca in message_handler_mcast (instance=0x7fc089024010, 
msg=0x75efd4, msg_len=281, 
    endian_conversion_needed=0) at totemsrp.c:3693
#8  0x000000000041706d in main_deliver_fn (context=0x7fc089024010, 
msg=0x75efd4, msg_len=281)
    at totemsrp.c:4136
#9  0x000000000040aec0 in none_mcast_recv (rrp_instance=0x746920, iface_no=0, 
context=0x7fc089024010, 
    msg=0x75efd4, msg_len=281) at totemrrp.c:476
#10 0x000000000040c95c in rrp_deliver_fn (context=0x747770, msg=0x75efd4, 
msg_len=281) at totemrrp.c:1319
#11 0x0000000000409025 in net_deliver_fn (handle=0, fd=1, revents=1, 
data=0x75e950) at totemnet.c:676
#12 0x000000000040738a in poll_run (handle=0) at aispoll.c:402
#13 0x000000000041ed7d in main (argc=1, argv=0x7fff911568b8) at main.c:634

(gdb) print pi
$1 = (struct process_info *) 0xffffffffffffffd8

3. Another instance of the crash I reported in the last mail:

Core was generated by `aisexec'.
Program terminated with signal 11, Segmentation fault.
#0  0x00007fc69f1b813c in notify_lib_joinlist (gi=0x786ea0, conn=0x0, 
joined_list_entries=1, 
    joined_list=0x7fffb04482f0, left_list_entries=0, left_list=0x0, id=4) at 
cpg.c:386
386                     if (pi->pid)
(gdb) bt
#0  0x00007fc69f1b813c in notify_lib_joinlist (gi=0x786ea0, conn=0x0, 
joined_list_entries=1, 
    joined_list=0x7fffb04482f0, left_list_entries=0, left_list=0x0, id=4) at 
cpg.c:386
#1  0x00007fc69f1b97c4 in do_proc_join (name=0x7fffb04483c0, pid=30551, 
nodeid=3, reason=1) at cpg.c:757
#2  0x00007fc69f1b9a0d in message_handler_req_exec_cpg_procjoin 
(message=0x7fffb04483b0, nodeid=3)
    at cpg.c:804
#3  0x000000000041e42e in deliver_fn (nodeid=3, iovec=0x7fffb0448570, 
iov_len=1, endian_conversion_required=0)
    at main.c:415
#4  0x00000000004182ca in app_deliver_fn (nodeid=3, iovec=0x7fffb0448560, 
iov_len=1, 
    endian_conversion_required=0) at totempg.c:443
#5  0x0000000000417f5e in totempg_deliver_fn (nodeid=3, iovec=0x7564e8, 
iov_len=1, 
    endian_conversion_required=0) at totempg.c:587
#6  0x0000000000417232 in totemmrp_deliver_fn (nodeid=3, iovec=0x7564e8, 
iov_len=3, 
    endian_conversion_required=0) at totemmrp.c:82
#7  0x0000000000415107 in messages_deliver_to_app (instance=0x7fc6a831b010, 
skip=0, end_point=11647)
    at totemsrp.c:3553
#8  0x0000000000414cdd in message_handler_orf_token (instance=0x7fc6a831b010, 
msg=0x75efd4, msg_len=70, 
    endian_conversion_needed=0) at totemsrp.c:3425
#9  0x000000000041706d in main_deliver_fn (context=0x7fc6a831b010, 
msg=0x75efd4, msg_len=70)
    at totemsrp.c:4136
#10 0x000000000040af4c in none_token_recv (rrp_instance=0x746920, iface_no=0, 
context=0x7fc6a831b010, 
    msg=0x75efd4, msg_len=70, token_seq=409129) at totemrrp.c:506
#11 0x000000000040c91e in rrp_deliver_fn (context=0x747770, msg=0x75efd4, 
msg_len=70) at totemrrp.c:1308
#12 0x0000000000409025 in net_deliver_fn (handle=0, fd=3, revents=1, 
data=0x75e950) at totemnet.c:676
#13 0x000000000040738a in poll_run (handle=0) at aispoll.c:402
#14 0x000000000041ed7d in main (argc=1, argv=0x7fffb044bbb8) at main.c:634

(gdb) print pi
$1 = (struct process_info *) 0xffffffffffffffd8


Alas, then a crash took down my test master.


Regards,
    Lars

-- 
Teamlead Kernel, SuSE Labs, Research and Development
SUSE LINUX Products GmbH, GF: Markus Rex, HRB 16746 (AG Nürnberg)
"Experience is the name everyone gives to their mistakes." -- Oscar Wilde

_______________________________________________
Openais mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/openais

Reply via email to