Ack, Tested 

/ Srikanth 

----- [email protected] wrote:

> 00-README.conf                                             |   8 +-
>  osaf/services/infrastructure/nid/scripts/configure_tipc.in |  83
> +++++++++++++-
>  2 files changed, 87 insertions(+), 4 deletions(-)
> 
> 
> Issue :
> ---------
> Currently in Opensaf duplicate tipc-id is identified on network and
> eventually both controllers crashing
> and Cluster is going for reboot.
> 
> steps to reproduce:
> ----------------------------
>  1. Both controllers SC-1,SC-2 are up and running with SC-1 active and
> SC-2 standby.
>  2. Wrongly configure the slot_id of PL-3 with value 3 (same value as
> SC-2)
>  3. Start the opensaf on PL-3.
> 
> Fix :
> ------
> Added verify duplicate TIPC node_id check before configuring
> TIPC/starting Opensaf
> 
> This fix is done using tipc toos Usage:
>        tipc-config command [command ...]
> 
>   valid commands:
>   -addr [=<addr>]                            Get/set node address
>   -b    [=<bearerpat>]                       Get bearers
>   -bd    =<bearerpat>                        Disable bearer
>   -be    =<bearer>[/<domain>[/<priority>]]]  Enable bearer
>   -dest  =<addr>                             Command destination node
>   -help                                      This usage list
>   -i                                         Interactive set
> operations
>   -l    [=<domain>|<linkpat>]                Get links to domain
>   -log  [=<size>]                            Dump/resize log
>   -lp    =<linkpat>|<bearer>|<media>/<value> Set link priority
>   -ls   [=<linkpat>]                         Get link statistics
>   -lsr   =<linkpat>                          Reset link statistics
>   -lt    =<linkpat>|<bearer>|<media>/<value> Set link tolerance
>   -lw    =<linkpat>|<bearer>|<media>/<value> Set link window
>   -m                                         Get media
>   -max_clusters [=<value>]                   Get/set max clusters in
> own zone
>   -max_nodes    [=<value>]                   Get/set max nodes in own
> cluster
>   -max_ports    [=<value>]                   Get/set max number of
> ports
>   -max_publ     [=<value>]                   Get/set max publications
>   -max_remotes  [=<value>]                   Get/set max non-cluster
> neighbors
>   -max_subscr   [=<value>]                   Get/set max
> subscriptions
>   -max_zones    [=<value>]                   Get/set max zones in own
> network
>   -mng  [=enable|disable]                    Get/set remote
> management
>   -n    [=<domain>]                          Get nodes in domain
>   -netid[=<value>]                           Get/set network id
>   -nt   [=[<depth>,]<type>[,<low>[,<up>]]]   Get name table
>         where <depth> = types|names|ports|all
>   -p                                         Get port info
>   -r    [=<domain>]                          Get routes to domain
>   -s                                         Get TIPC status info
>   -v                                         Verbose output
>   -V                                         Get tipc-config version
> info (tipc-config -nt option ) and
> with in the scope of Opensaf startup scripts.
> Note : This bug can alos fixed TIPC code , once it is available we can
> remove this code.
> 
> We wishes to check whether the duplicate nodes are present in the
> cluster
> before adding Opensaf  newel configured node,  this is accomplished
> by
> adding a dummy node  to the TIPC topology and this node has very short
> time limit but will not match
> any other Opensaf Node configuration.
> 
> The use of a dummy Node having the name sequence {1,1,2000} , this
> allows the
> Opensaf to find the information at start-up  by getting existing  name
> table of cluster
> by using   TIPC module not installed command.
> 
> diff --git a/00-README.conf b/00-README.conf
> --- a/00-README.conf
> +++ b/00-README.conf
> @@ -128,13 +128,17 @@ Notes:
>    as: $ configure_tipc start <interface name> <TIPC netid>
>    For eg:- $ configure_tipc start eth0 9859
>  
> -(f) Setting MDS_TIPC_MCAST_ENABLED to 1 or 0, allows OpenSAF to
> enable or
> +(h) Setting MDS_TIPC_MCAST_ENABLED to 1 or 0, allows OpenSAF to
> enable or
>    disable TIPC Multicast Messaging and this configuration is valid
> when
>    MDS_TRANSPORT is set to TIPC. By Default TIPC  Multicast Messaging
> is Enabled.
>  
>    Note: In case of TIPC Multicast Messaging disabled (0), the
> performance
>    of OpenSAF will be considerably lower compared to Enabled (1).
> -  
> +
> +(i) To use TIPC duplicate node address detection in cluster, while
> starting Opensaf
> +    we needs to enabled TIPC_DUPLICATE_NODE_DETECT=YES in
> +    `/usr/lib(64)/opensaf/configure_tipc`  script.  
> +
> 
> *******************************************************************************
>  nodeinit.conf.<node_type>
>  
> diff --git
> a/osaf/services/infrastructure/nid/scripts/configure_tipc.in
> b/osaf/services/infrastructure/nid/scripts/configure_tipc.in
> --- a/osaf/services/infrastructure/nid/scripts/configure_tipc.in
> +++ b/osaf/services/infrastructure/nid/scripts/configure_tipc.in
> @@ -35,6 +35,16 @@ SUBSLOT_ID_FILE=$pkgsysconfdir/subslot_i
>  # of the address and the slot ID is shifted up 4 bits. 
>  USE_SUBSLOT_ID=${TIPC_USE_SUBSLOT_ID:-"NO"}
>  
> +# Currently supported max nodes for Opensaf
> +TIPC_MAX_NODES=2000
> +
> +# Support for enable disable duplicate node verification.
> +# When TIPC_DUPLICATE_NODE_DETECT is set to "YES" (the default is
> "YES"),
> +# at Opensaf start-up , it is per-verified whether any duplicate
> node
> +# exist in the cluster, set TIPC_DUPLICATE_NODE_DETECT set to NO to
> disable
> +# per-verification of duplicate node.
> +DUPLICATE_NODE_DETECT=${TIPC_DUPLICATE_NODE_DETECT:-"YES"}
> +
>  # constants
>  SHIFT4=4
>  
> @@ -112,6 +122,72 @@ else
>      TIPC_NODEID=$SLOT_ID    
>  fi
>  
> +function tipc_duplicate_node_detect ()
> +{
> +    logger -t opensaf -s "Checking for duplicate Node: $TIPC_NODEID
> in Cluster..."
> +    if ! test -f "$TIPC_MODULE"  ; then
> +      modprobe tipc
> +    else
> +      insmod "$TIPC_MODULE"
> +    fi
> + 
> +    ret_val=$?
> +    if [ $ret_val -ne 0 ] ; then
> +        logger -p user.err " TIPC Module could not be loaded "
> +        exit 1
> +    fi
> + 
> +    # max_nodes is not supported in TIPC 2.0
> +    if ${tipc_config} -max_nodes 2> /dev/null; then
> +        ${tipc_config} -max_nodes=$TIPC_MAX_NODES
> +        ret_val=$?
> +        if [ $ret_val -ne 0 ] ; then
> +            modprobe -r tipc
> +            exit 1
> +        fi
> +    fi
> + 
> +    if [ $# -eq 1 ] ; then
> +            ################ Address config and check #########
> +            ${tipc_config} -a=1.1.$TIPC_MAX_NODES
> +            ret_z1=$?
> +            if [ $ret_z1 -ne 0 ] ; then
> +                echo "Unable to Configure TIPC address, Please try
> again, exiting" 
> +                modprobe -r tipc
> +                exit 1
> +            fi
> +    else
> +        ${tipc_config} -netid=$TIPC_NETID -a=1.1.$TIPC_MAX_NODES
> +        ret_z2=$?
> +        if [ $ret_z2 -ne 0 ] ; then
> +            echo "Unable to Configure TIPC address, Please try again,
> exiting" 
> +            modprobe -r tipc
> +            exit 1
> +        fi
> +        ${tipc_config} -be=$(echo $ETH_NAME | sed
> 's/^/eth:/;s/,/,eth:/g')
> +        ret_z3=$?
> +        if [ $ret_z3 -ne 0 ] ; then
> +            echo "Unable to Configure TIPC bearer interface, Please
> try again, exiting" 
> +            modprobe -r tipc
> +            exit 1
> +        else
> +          ${tipc_config} -nt | grep cluster | grep
> "1.1.$TIPC_NODEID:" > /dev/null
> +          ret_z4=$?
> +          if [ $ret_z4 -eq 0 ] ; then
> +            logger -t opensaf -s "Unable to Configure TIPC Node,
> Duplicate Node $TIPC_NODEID exist in cluster, exiting..."
> +            modprobe -r tipc
> +            exit 1
> +          else
> +            if ! test -f "$TIPC_MODULE"  ; then
> +              modprobe -r tipc
> +            else
> +              rmmod "$TIPC_MODULE"
> +            fi
> +          fi
> +        fi
> +    fi
> +}
> +
>  function tipc_configure ()
>  {
>      echo "Inserting TIPC mdoule..."
> @@ -130,10 +206,10 @@ function tipc_configure ()
>  
>      # max_nodes is not supported in TIPC 2.0
>      if ${tipc_config} -max_nodes 2> /dev/null; then
> -        ${tipc_config} -max_nodes=2000 
> +        ${tipc_config} -max_nodes=$TIPC_MAX_NODES 
>          ret_val=$?
>          if [ $ret_val -ne 0 ] ; then 
> -            echo "Unable to set the Max_nodes to 2000, exiting
> ....."
> +            echo "Unable to set the Max_nodes to $TIPC_MAX_NODES,
> exiting ....."
>              modprobe -r tipc
>              exit 1
>          fi
> @@ -177,6 +253,9 @@ function tipc_configure ()
>  
>  # Consider that TIPC could be statically linked
>  if ! grep TIPC /proc/net/protocols >& /dev/null; then
> +    if [ "$DUPLICATE_NODE_DETECT" = "YES" ]; then
> +       tipc_duplicate_node_detect 
> +    fi
>      tipc_configure
>  else
>      # TIPC is already present, is it configured properly?

------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to