Hello all.
I try to start 47 node(corosync+pacemaker) with unicast
transport(waiting while net crew fix multycast) and half of them come to
online state and half not. Then online half start to go offline and I
found coredump.
Before that I start about 7 node with unicast without any problems.
Then 47 corosync are up they make 30-40 Mbit\s on each node.
All information below.
My system are:
CentOS release 5.3 (Final)
Linux mysender22.mail.ru 2.6.18-164.el5PAE #1 SMP Thu Sep 3 04:10:44 EDT
2009 i686 i686 i386 GNU/Linux
rpms:
openais-1.1.3-1.6.el5
openaislib-1.1.3-1.6.el5
corosync-1.4.0-1
corosynclib-1.4.0-1
corosynclib-devel-1.4.0-1
pacemaker-1.1.5-2
pacemaker-libs-1.1.5-2
cluster-glue-libs-1.0.6-1.6
cluster-glue-1.0.6-1.6
resource-agents-1.0.4-1.1.el5
I attached my config.
Backtrace
(gdb) bt
#0 0x005cb402 in __kernel_vsyscall ()
#1 0x009e7d80 in raise () from /lib/libc.so.6
#2 0x009e9691 in abort () from /lib/libc.so.6
#3 0x00f99675 in send_cluster_id () from /usr/libexec/lcrso/pacemaker.lcrso
#4 0x00f94ea8 in pcmk_peer_update () from
/usr/libexec/lcrso/pacemaker.lcrso
#5 0x0804f6d6 in confchg_fn ()
#6 0x007afb9c in totempg_confchg_fn
(configuration_type=TOTEM_CONFIGURATION_REGULAR, member_list=0xbfe12e84,
member_list_entries=18, left_list=0x0, left_list_entries=0,
joined_list=0xbfe13a84, joined_list_entries=13, ring_id=0xb6334668)
at totempg.c:358
#7 0x007ae2c1 in totemmrp_confchg_fn
(configuration_type=TOTEM_CONFIGURATION_REGULAR, member_list=0xbfe12e84,
member_list_entries=18, left_list=0x0, left_list_entries=0,
joined_list=0xbfe13a84, joined_list_entries=13, ring_id=0xb6334668)
at totemmrp.c:108
#8 0x007a867e in memb_state_operational_enter (instance=0xb6313008) at
totemsrp.c:1771
#9 0x007adfb0 in message_handler_orf_token (instance=0xb6313008,
msg=0x8cd22c0, msg_len=70, endian_conversion_needed=0) at totemsrp.c:3615
#10 0x007a3437 in main_deliver_fn (context=0xb6313008, msg=0x41f7,
msg_len=70) at totemsrp.c:4395
#11 0x007a0e90 in none_token_recv (rrp_instance=0x8c68018, iface_no=0,
context=0xb6313008, msg=0x8cd22c0, msg_len=70, token_seq=40) at
totemrrp.c:668
#12 0x007a2e0b in rrp_deliver_fn (context=0x8c6b480, msg=0x8cd22c0,
msg_len=70) at totemrrp.c:1677
#13 0x007a05b5 in net_deliver_fn (handle=7012056932793974784, fd=8,
revents=1, data=0x8cd1c90) at totemudpu.c:1132
#14 0x00797cc9 in poll_run (handle=7012056932793974784) at coropoll.c:513
#15 0x0804eddf in main ()
Core:
http://dl.dropbox.com/u/1932700/corosync.16887.0.core.gz
Binfile:
http://dl.dropbox.com/u/1932700/corosync.gz
--
Best regards,
Proskurin Kirill
totem {
version: 2
# How long before declaring a token lost (ms)
token: 2500
# How many token retransmits before forming a new configuration
token_retransmits_before_loss_const: 10
# How long to wait for join messages in the membership protocol (ms)
join: 200
# How long to wait for consensus to be achieved before starting a new
# round of membership configuration (ms)
consensus: 3000
# Turn off the virtual synchrony filter
vsftype: none
# Number of messages that may be sent by one processor on receipt of
the token
max_messages: 20
# Stagger sending the node join messages by 1..send_join ms
send_join: 50
# Disable encryption
secauth: off
# Recive timeout
fail_recv_const: 5000
interface {
### Unicast madness start
member {
memberaddr: 10.3.1.201
}
member {
memberaddr: 10.3.1.202
}
member {
memberaddr: 10.3.1.203
}
member {
memberaddr: 10.3.1.110
}
member {
memberaddr: 10.3.1.70
}
member {
memberaddr: 10.6.1.169
}
member {
memberaddr: 10.6.1.170
}
member {
memberaddr: 10.3.1.95
}
member {
memberaddr: 10.3.1.94
}
member {
memberaddr: 10.3.1.71
}
member {
memberaddr: 10.3.1.64
}
member {
memberaddr: 10.6.1.173
}
member {
memberaddr: 10.3.1.112
}
member {
memberaddr: 10.6.1.175
}
member {
memberaddr: 10.6.1.186
}
member {
memberaddr: 10.3.1.100
}
member {
memberaddr: 10.6.1.9
}
member {
memberaddr: 10.6.1.3
}
member {
memberaddr: 10.6.1.4
}
member {
memberaddr: 10.6.1.253
}
member {
memberaddr: 10.6.1.6
}
member {
memberaddr: 10.6.1.89
}
member {
memberaddr: 10.6.1.78
}
member {
memberaddr: 10.3.1.171
}
member {
memberaddr: 10.6.1.7
}
member {
memberaddr: 10.6.1.187
}
member {
memberaddr: 10.6.1.188
}
member {
memberaddr: 10.6.1.254
}
member {
memberaddr: 10.6.1.58
}
member {
memberaddr: 10.6.1.59
}
member {
memberaddr: 10.6.1.60
}
member {
memberaddr: 10.6.1.61
}
member {
memberaddr: 10.6.1.62
}
member {
memberaddr: 10.6.1.63
}
member {
memberaddr: 10.6.1.64
}
member {
memberaddr: 10.6.1.65
}
member {
memberaddr: 10.6.1.66
}
member {
memberaddr: 10.6.1.67
}
member {
memberaddr: 10.3.1.155
}
member {
memberaddr: 10.3.1.170
}
member {
memberaddr: 10.6.1.95
}
member {
memberaddr: 10.3.1.58
}
member {
memberaddr: 10.6.1.183
}
member {
memberaddr: 10.6.1.105
}
member {
memberaddr: 10.6.2.48
}
member {
memberaddr: 10.6.2.49
}
member {
memberaddr: 10.6.2.50
}
ringnumber: 0
bindnetaddr: 10.6.0.0
mcastport: 5405
}
transport: udpu
# interface {
# ringnumber: 1
# bindnetaddr: 10.6.0.0
# mcastaddr: 239.255.1.1
# mcastport: 5405
# ttl: 31
# }
}
logging {
fileline: off
to_syslog: no
to_stderr: no
to_logfile: yes
logfile: /var/log/corosync.log
debug: off
timestamp: on
}
amf {
mode: disabled
}
_______________________________________________
Openais mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/openais