Ack, Tested / Srikanth
----- [email protected] wrote: > 00-README.conf | 8 +- > osaf/services/infrastructure/nid/scripts/configure_tipc.in | 83 > +++++++++++++- > 2 files changed, 87 insertions(+), 4 deletions(-) > > > Issue : > --------- > Currently in Opensaf duplicate tipc-id is identified on network and > eventually both controllers crashing > and Cluster is going for reboot. > > steps to reproduce: > ---------------------------- > 1. Both controllers SC-1,SC-2 are up and running with SC-1 active and > SC-2 standby. > 2. Wrongly configure the slot_id of PL-3 with value 3 (same value as > SC-2) > 3. Start the opensaf on PL-3. > > Fix : > ------ > Added verify duplicate TIPC node_id check before configuring > TIPC/starting Opensaf > > This fix is done using tipc toos Usage: > tipc-config command [command ...] > > valid commands: > -addr [=<addr>] Get/set node address > -b [=<bearerpat>] Get bearers > -bd =<bearerpat> Disable bearer > -be =<bearer>[/<domain>[/<priority>]]] Enable bearer > -dest =<addr> Command destination node > -help This usage list > -i Interactive set > operations > -l [=<domain>|<linkpat>] Get links to domain > -log [=<size>] Dump/resize log > -lp =<linkpat>|<bearer>|<media>/<value> Set link priority > -ls [=<linkpat>] Get link statistics > -lsr =<linkpat> Reset link statistics > -lt =<linkpat>|<bearer>|<media>/<value> Set link tolerance > -lw =<linkpat>|<bearer>|<media>/<value> Set link window > -m Get media > -max_clusters [=<value>] Get/set max clusters in > own zone > -max_nodes [=<value>] Get/set max nodes in own > cluster > -max_ports [=<value>] Get/set max number of > ports > -max_publ [=<value>] Get/set max publications > -max_remotes [=<value>] Get/set max non-cluster > neighbors > -max_subscr [=<value>] Get/set max > subscriptions > -max_zones [=<value>] Get/set max zones in own > network > -mng [=enable|disable] Get/set remote > management > -n [=<domain>] Get nodes in domain > -netid[=<value>] Get/set network id > -nt [=[<depth>,]<type>[,<low>[,<up>]]] Get name table > where <depth> = types|names|ports|all > -p Get port info > -r [=<domain>] Get routes to domain > -s Get TIPC status info > -v Verbose output > -V Get tipc-config version > info (tipc-config -nt option ) and > with in the scope of Opensaf startup scripts. > Note : This bug can alos fixed TIPC code , once it is available we can > remove this code. > > We wishes to check whether the duplicate nodes are present in the > cluster > before adding Opensaf newel configured node, this is accomplished > by > adding a dummy node to the TIPC topology and this node has very short > time limit but will not match > any other Opensaf Node configuration. > > The use of a dummy Node having the name sequence {1,1,2000} , this > allows the > Opensaf to find the information at start-up by getting existing name > table of cluster > by using TIPC module not installed command. > > diff --git a/00-README.conf b/00-README.conf > --- a/00-README.conf > +++ b/00-README.conf > @@ -128,13 +128,17 @@ Notes: > as: $ configure_tipc start <interface name> <TIPC netid> > For eg:- $ configure_tipc start eth0 9859 > > -(f) Setting MDS_TIPC_MCAST_ENABLED to 1 or 0, allows OpenSAF to > enable or > +(h) Setting MDS_TIPC_MCAST_ENABLED to 1 or 0, allows OpenSAF to > enable or > disable TIPC Multicast Messaging and this configuration is valid > when > MDS_TRANSPORT is set to TIPC. By Default TIPC Multicast Messaging > is Enabled. > > Note: In case of TIPC Multicast Messaging disabled (0), the > performance > of OpenSAF will be considerably lower compared to Enabled (1). > - > + > +(i) To use TIPC duplicate node address detection in cluster, while > starting Opensaf > + we needs to enabled TIPC_DUPLICATE_NODE_DETECT=YES in > + `/usr/lib(64)/opensaf/configure_tipc` script. > + > > ******************************************************************************* > nodeinit.conf.<node_type> > > diff --git > a/osaf/services/infrastructure/nid/scripts/configure_tipc.in > b/osaf/services/infrastructure/nid/scripts/configure_tipc.in > --- a/osaf/services/infrastructure/nid/scripts/configure_tipc.in > +++ b/osaf/services/infrastructure/nid/scripts/configure_tipc.in > @@ -35,6 +35,16 @@ SUBSLOT_ID_FILE=$pkgsysconfdir/subslot_i > # of the address and the slot ID is shifted up 4 bits. > USE_SUBSLOT_ID=${TIPC_USE_SUBSLOT_ID:-"NO"} > > +# Currently supported max nodes for Opensaf > +TIPC_MAX_NODES=2000 > + > +# Support for enable disable duplicate node verification. > +# When TIPC_DUPLICATE_NODE_DETECT is set to "YES" (the default is > "YES"), > +# at Opensaf start-up , it is per-verified whether any duplicate > node > +# exist in the cluster, set TIPC_DUPLICATE_NODE_DETECT set to NO to > disable > +# per-verification of duplicate node. > +DUPLICATE_NODE_DETECT=${TIPC_DUPLICATE_NODE_DETECT:-"YES"} > + > # constants > SHIFT4=4 > > @@ -112,6 +122,72 @@ else > TIPC_NODEID=$SLOT_ID > fi > > +function tipc_duplicate_node_detect () > +{ > + logger -t opensaf -s "Checking for duplicate Node: $TIPC_NODEID > in Cluster..." > + if ! test -f "$TIPC_MODULE" ; then > + modprobe tipc > + else > + insmod "$TIPC_MODULE" > + fi > + > + ret_val=$? > + if [ $ret_val -ne 0 ] ; then > + logger -p user.err " TIPC Module could not be loaded " > + exit 1 > + fi > + > + # max_nodes is not supported in TIPC 2.0 > + if ${tipc_config} -max_nodes 2> /dev/null; then > + ${tipc_config} -max_nodes=$TIPC_MAX_NODES > + ret_val=$? > + if [ $ret_val -ne 0 ] ; then > + modprobe -r tipc > + exit 1 > + fi > + fi > + > + if [ $# -eq 1 ] ; then > + ################ Address config and check ######### > + ${tipc_config} -a=1.1.$TIPC_MAX_NODES > + ret_z1=$? > + if [ $ret_z1 -ne 0 ] ; then > + echo "Unable to Configure TIPC address, Please try > again, exiting" > + modprobe -r tipc > + exit 1 > + fi > + else > + ${tipc_config} -netid=$TIPC_NETID -a=1.1.$TIPC_MAX_NODES > + ret_z2=$? > + if [ $ret_z2 -ne 0 ] ; then > + echo "Unable to Configure TIPC address, Please try again, > exiting" > + modprobe -r tipc > + exit 1 > + fi > + ${tipc_config} -be=$(echo $ETH_NAME | sed > 's/^/eth:/;s/,/,eth:/g') > + ret_z3=$? > + if [ $ret_z3 -ne 0 ] ; then > + echo "Unable to Configure TIPC bearer interface, Please > try again, exiting" > + modprobe -r tipc > + exit 1 > + else > + ${tipc_config} -nt | grep cluster | grep > "1.1.$TIPC_NODEID:" > /dev/null > + ret_z4=$? > + if [ $ret_z4 -eq 0 ] ; then > + logger -t opensaf -s "Unable to Configure TIPC Node, > Duplicate Node $TIPC_NODEID exist in cluster, exiting..." > + modprobe -r tipc > + exit 1 > + else > + if ! test -f "$TIPC_MODULE" ; then > + modprobe -r tipc > + else > + rmmod "$TIPC_MODULE" > + fi > + fi > + fi > + fi > +} > + > function tipc_configure () > { > echo "Inserting TIPC mdoule..." > @@ -130,10 +206,10 @@ function tipc_configure () > > # max_nodes is not supported in TIPC 2.0 > if ${tipc_config} -max_nodes 2> /dev/null; then > - ${tipc_config} -max_nodes=2000 > + ${tipc_config} -max_nodes=$TIPC_MAX_NODES > ret_val=$? > if [ $ret_val -ne 0 ] ; then > - echo "Unable to set the Max_nodes to 2000, exiting > ....." > + echo "Unable to set the Max_nodes to $TIPC_MAX_NODES, > exiting ....." > modprobe -r tipc > exit 1 > fi > @@ -177,6 +253,9 @@ function tipc_configure () > > # Consider that TIPC could be statically linked > if ! grep TIPC /proc/net/protocols >& /dev/null; then > + if [ "$DUPLICATE_NODE_DETECT" = "YES" ]; then > + tipc_duplicate_node_detect > + fi > tipc_configure > else > # TIPC is already present, is it configured properly? ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/opensaf-devel
