Hi!

See my replies inline, marked [AndersW].

regards,
Anders Widell

On 03/17/2016 11:32 AM, praveen malviya wrote:
> Hi Anders,
>
> Please find some comments and queries inline with [Praveen]
>
> Thanks,
> Praveen
>
>
> On 29-Feb-16 8:44 PM, Anders Widell wrote:
>> osaf/services/saf/amf/amfd/clm.cc         |  21 +++++-
>>   osaf/services/saf/amf/amfd/include/amfd.h |   2 +
>>   osaf/services/saf/amf/amfd/include/cb.h   |   1 +
>>   osaf/services/saf/amf/amfd/include/role.h |   2 +
>>   osaf/services/saf/amf/amfd/main.cc        |  78 
>> ++++++---------------------
>>   osaf/services/saf/amf/amfd/ndfsm.cc       |   9 ++-
>>   osaf/services/saf/amf/amfd/node.cc        |   7 +--
>>   osaf/services/saf/amf/amfd/role.cc        |  86 
>> ++++++++++++++++++++++++++++++-
>>   osaf/services/saf/amf/amfd/sgproc.cc      |  11 +++-
>>   osaf/services/saf/amf/amfnd/clm.cc        |  27 ++++++--
>>   10 files changed, 160 insertions(+), 84 deletions(-)
>>
>>
>> Add support for configuring the system with more than two OpenSAF 2N 
>> SUs. In
>> particular, this means that all OpenSAF directors must support 
>> starting up
>> and running without (initially) getting any assignment from AMF. 
>> Locking of
>> an OpenSAF 2N SU is currently not supported on a system configured 
>> with more
>> than two OpenSAF 2N SUs.
> [Praveen] This patch does not contain any change for any restricton on 
> locking of OpenSAF 2N SU as mentioned above.
[AndersW] No. This restriction will be documented. Do you think we need 
to add checks for this case in the admin op as well?
>>
>> diff --git a/osaf/services/saf/amf/amfd/clm.cc 
>> b/osaf/services/saf/amf/amfd/clm.cc
>> --- a/osaf/services/saf/amf/amfd/clm.cc
>> +++ b/osaf/services/saf/amf/amfd/clm.cc
>> @@ -21,8 +21,7 @@
>>   #include <amfd.h>
>>   #include <clm.h>
>>   #include <node.h>
>> -
>> -static SaVersionT clmVersion = { 'B', 4, 1 };
>> +#include "osaf_time.h"
>>
>>   static void clm_node_join_complete(AVD_AVND *node)
>>   {
>> @@ -392,9 +391,21 @@ SaAisErrorT avd_clm_init(void)
>>           SaAisErrorT error = SA_AIS_OK;
>>
>>       TRACE_ENTER();
>> -    error = saClmInitialize_4(&avd_cb->clmHandle, &clm_callbacks, 
>> &clmVersion);
>> -    if (SA_AIS_OK != error) {
>> -        LOG_ER("Failed to initialize with CLM %u", error);
>> +    for (;;) {
>> +        SaVersionT Version = { 'B', 4, 1 };
>> +        error = saClmInitialize_4(&avd_cb->clmHandle, 
>> &clm_callbacks, &Version);
>> +        if (error == SA_AIS_ERR_TRY_AGAIN ||
>> +            error == SA_AIS_ERR_TIMEOUT ||
>> +                    error == SA_AIS_ERR_UNAVAILABLE) {
>> +            if (error != SA_AIS_ERR_TRY_AGAIN) {
>> +                LOG_WA("saClmInitialize_4 returned %u",
>> +                       (unsigned) error);
>> +            }
>> +            osaf_nanosleep(&kHundredMilliseconds);
>> +            continue;
>> +        }
>> +        if (error == SA_AIS_OK) break;
>> +        LOG_ER("Failed to Initialize with CLM: %u", error);
>>           goto done;
>>       }
>>       error = saClmSelectionObjectGet(avd_cb->clmHandle, 
>> &avd_cb->clm_sel_obj);
>> diff --git a/osaf/services/saf/amf/amfd/include/amfd.h 
>> b/osaf/services/saf/amf/amfd/include/amfd.h
>> --- a/osaf/services/saf/amf/amfd/include/amfd.h
>> +++ b/osaf/services/saf/amf/amfd/include/amfd.h
>> @@ -33,6 +33,7 @@
>>   #ifndef AVD_H
>>   #define AVD_H
>>
>> +#include <stdint.h>
>>   #include "logtrace.h"
>>
>>   #include "amf.h"
>> @@ -65,5 +66,6 @@
>>   #include "ckpt_msg.h"
>>   #include "ckpt_edu.h"
>>   #include "ckpt_updt.h"
>> +#include "saAmf.h"
>>
>>   #endif
>> diff --git a/osaf/services/saf/amf/amfd/include/cb.h 
>> b/osaf/services/saf/amf/amfd/include/cb.h
>> --- a/osaf/services/saf/amf/amfd/include/cb.h
>> +++ b/osaf/services/saf/amf/amfd/include/cb.h
>> @@ -207,6 +207,7 @@ typedef struct cl_cb_tag {
>>       SaClmHandleT clmHandle;
>>       SaSelectionObjectT clm_sel_obj;
>>
>> +    bool fully_initialized;
>>       bool swap_switch; /* true - In middle of role switch. */
>>
>>       /** true when active services (IMM, LOG, NTF, etc.) exist
>> diff --git a/osaf/services/saf/amf/amfd/include/role.h 
>> b/osaf/services/saf/amf/amfd/include/role.h
>> --- a/osaf/services/saf/amf/amfd/include/role.h
>> +++ b/osaf/services/saf/amf/amfd/include/role.h
>> @@ -34,6 +34,8 @@ extern uint32_t amfd_switch_qsd_stdby(AV
>>   extern uint32_t amfd_switch_stdby_actv(AVD_CL_CB *cb);
>>   extern uint32_t amfd_switch_qsd_actv(AVD_CL_CB *cb);
>>   extern uint32_t amfd_switch_actv_qsd(AVD_CL_CB *cb);
>> +extern uint32_t initialize_for_assignment(cl_cb_tag* cb,
>> +                                          SaAmfHAStateT ha_state);
>>
>>   #endif /* ROLE_H */
>>
>> diff --git a/osaf/services/saf/amf/amfd/main.cc 
>> b/osaf/services/saf/amf/amfd/main.cc
>> --- a/osaf/services/saf/amf/amfd/main.cc
>> +++ b/osaf/services/saf/amf/amfd/main.cc
>> @@ -56,6 +56,7 @@
>>   #include <sutcomptype.h>
>>   #include <sutype.h>
>>   #include <su.h>
>> +#include "osaf_utility.h"
>>
>>   static const char* internal_version_id_  __attribute__ ((used)) = 
>> "@(#) $Id: " INTERNAL_VERSION_ID " $";
>>
>> @@ -445,7 +446,8 @@ static void rda_cb(uint32_t notused, PCS
>>
>>       if (((avd_cb->avail_state_avd == SA_AMF_HA_STANDBY) ||
>>            (avd_cb->avail_state_avd == SA_AMF_HA_QUIESCED)) &&
>> -        (cb_info->info.io_role == PCS_RDA_ACTIVE)) {
>> +        (cb_info->info.io_role == PCS_RDA_ACTIVE ||
>> +        cb_info->info.io_role == PCS_RDA_STANDBY)) {
>>
>>           uint32_t rc;
>>           AVD_EVT *evt;
>> @@ -474,7 +476,6 @@ static uint32_t initialize(void)
>>   {
>>       AVD_CL_CB *cb = avd_cb;
>>       int rc = NCSCC_RC_FAILURE;
>> -    SaVersionT ntfVersion = { 'A', 0x01, 0x01 };
>>       SaAmfHAStateT role;
>>       char *val;
>>
>> @@ -524,8 +525,13 @@ static uint32_t initialize(void)
>>       }
>>
>>       cb->init_state = AVD_INIT_BGN;
>> +    cb->mbcsv_sel_obj = -1;
>> +    cb->imm_sel_obj = -1;
>> +    cb->clm_sel_obj = -1;
>> +    cb->fully_initialized = false;
>>       cb->swap_switch = false;
>>       cb->active_services_exist = true;
>> +    cb->mbcsv_sel_obj = -1;
> [Praveen] Duplicate initialization, already done above.
[Anders W] Will remove.
>
>>       cb->stby_sync_state = AVD_STBY_IN_SYNC;
>>       cb->sync_required = true;
>>
>> @@ -544,67 +550,20 @@ static uint32_t initialize(void)
>>       /* get the node id of the node on which the AVD is running. */
>>       cb->node_id_avd = m_NCS_GET_NODE_ID;
>>
>> -    if (avd_mds_init(cb) != NCSCC_RC_SUCCESS) {
>> -        LOG_ER("avd_mds_init FAILED");
>> -        goto done;
>> -    }
>> -
>> -    if (NCSCC_RC_FAILURE == avsv_mbcsv_register(cb)) {
>> -        LOG_ER("avsv_mbcsv_register FAILED");
>> -        goto done;
>> -    }
>> -
>> -    if (avd_clm_init() != SA_AIS_OK) {
>> -        LOG_EM("avd_clm_init FAILED");
>> -        goto done;
>> -    }
>> -
>> -    if (avd_imm_init(cb) != SA_AIS_OK) {
>> -        LOG_ER("avd_imm_init FAILED");
>> -        goto done;
>> -    }
>> -
>> -    if ((rc = saNtfInitialize(&cb->ntfHandle, nullptr, &ntfVersion)) 
>> != SA_AIS_OK) {
>> -        LOG_ER("saNtfInitialize Failed (%u)", rc);
>> -        rc = NCSCC_RC_FAILURE;
>> -        goto done;
>> -    }
>> -
>>       if ((rc = rda_get_role(&role)) != NCSCC_RC_SUCCESS) {
>>           LOG_ER("rda_get_role FAILED");
>>           goto done;
>>       }
>>
>> -    cb->avail_state_avd = role;
>> -
>> -    if (NCSCC_RC_SUCCESS != avd_mds_set_vdest_role(cb, role)) {
>> -        LOG_ER("avd_mds_set_vdest_role FAILED");
>> -        goto done;
>> -    }
>> -
>> -    if (NCSCC_RC_SUCCESS != avsv_set_ckpt_role(cb, role)) {
>> -        LOG_ER("avsv_set_ckpt_role FAILED");
>> -        goto done;
>> -    }
>> -
>>       if ((rc = rda_register_callback(0, rda_cb)) != NCSCC_RC_SUCCESS) {
>>           LOG_ER("rda_register_callback FAILED %u", rc);
>>           goto done;
>>       }
>>
>> -    if (role == SA_AMF_HA_ACTIVE) {
>> -        rc = avd_active_role_initialization(cb, role);
>> -        if (rc != NCSCC_RC_SUCCESS) {
>> -            LOG_ER("avd_active_role_initialization FAILED");
>> -            goto done;
>> -        }
>> -    }
>> -    else {
>> -        rc = avd_standby_role_initialization(cb);
>> -        if (rc != NCSCC_RC_SUCCESS) {
>> -            LOG_ER("avd_standby_role_initialization FAILED");
>> -            goto done;
>> -        }
>> +    if ((rc = initialize_for_assignment(cb, role))
>> +        != NCSCC_RC_SUCCESS) {
>> +        LOG_ER("initialize_for_assignment FAILED %u", (unsigned) rc);
>> +        goto done;
>>       }
>>
>>       rc = NCSCC_RC_SUCCESS;
>> @@ -647,14 +606,13 @@ static void main_loop(void)
>>       fds[FD_TERM].events = POLLIN;
>>       fds[FD_MBX].fd = mbx_fd.rmv_obj;
>>       fds[FD_MBX].events = POLLIN;
>> -    fds[FD_MBCSV].fd = cb->mbcsv_sel_obj;
>> -    fds[FD_MBCSV].events = POLLIN;
>> -    fds[FD_CLM].fd = cb->clm_sel_obj;
>> -    fds[FD_CLM].events = POLLIN;
>> -    fds[FD_IMM].fd = cb->imm_sel_obj; // IMM fd must be last in array
>> -    fds[FD_IMM].events = POLLIN;
>> -
>>       while (1) {
>> +        fds[FD_MBCSV].fd = cb->mbcsv_sel_obj;
>> +        fds[FD_MBCSV].events = POLLIN;
>> +        fds[FD_CLM].fd = cb->clm_sel_obj;
>> +        fds[FD_CLM].events = POLLIN;
>> +        fds[FD_IMM].fd = cb->imm_sel_obj; // IMM fd must be last in 
>> array
>> +        fds[FD_IMM].events = POLLIN;
>>
>>           if (cb->immOiHandle != 0) {
>>               fds[FD_IMM].fd = cb->imm_sel_obj;
>> diff --git a/osaf/services/saf/amf/amfd/ndfsm.cc 
>> b/osaf/services/saf/amf/amfd/ndfsm.cc
>> --- a/osaf/services/saf/amf/amfd/ndfsm.cc
>> +++ b/osaf/services/saf/amf/amfd/ndfsm.cc
>> @@ -190,8 +190,9 @@ void avd_nd_ncs_su_assigned(AVD_CL_CB *c
>>       TRACE_ENTER();
>>
>>       for (const auto& ncs_su : avnd->list_of_ncs_su) {
>> -        if ((ncs_su->list_of_susi == AVD_SU_SI_REL_NULL) ||
>> -            (ncs_su->list_of_susi->fsm != AVD_SU_SI_STATE_ASGND)) {
>> +        if ((ncs_su->sg_of_su->curr_assigned_sus() < 2) &&
>> +            ((ncs_su->list_of_susi == AVD_SU_SI_REL_NULL) ||
>> +            (ncs_su->list_of_susi->fsm != AVD_SU_SI_STATE_ASGND))) {
>>               TRACE_LEAVE();
>>               /* this is an unassigned SU so no need to scan further 
>> return here. */
>>               return;
>> @@ -328,6 +329,10 @@ void avd_mds_avnd_down_evh(AVD_CL_CB *cb
>>
>>           if (avd_cb->avail_state_avd == SA_AMF_HA_ACTIVE) {
>>               avd_node_failover(node);
>> +            // Update standby out of sync if standby sc goes down
>> +            if (eravd_cb->node_id_avd_oth == node->node_info.nodeId) {
>> +                cb->stby_sync_state = AVD_STBY_OUT_OF_SYNC;
>> +            }
> [Praveen] Deep down in a function call in  avd_node_failover(), AMF 
> marks standby status IN_SYNC in some special case. AMF marks out of 
> sync status whenever it gets sync request in chkop.cc so it is not 
> needed here.
[AndersW] I don't remember why this was added. Will check and remove if 
it doesn't cause any problems. Isn't it a bit strange though, so say 
that the standby is in sync when in fact it is down?
>
>>           } else {
>>               /* Remove dynamic info for node but keep in nodeid tree.
>>                * Possibly used at the end of controller failover to
>> diff --git a/osaf/services/saf/amf/amfd/node.cc 
>> b/osaf/services/saf/amf/amfd/node.cc
>> --- a/osaf/services/saf/amf/amfd/node.cc
>> +++ b/osaf/services/saf/amf/amfd/node.cc
>> @@ -120,7 +120,7 @@ void AVD_AVND::initialize() {
>>     pg_csi_list = {};
>>     pg_csi_list.order = NCS_DBLIST_ANY_ORDER;
>>     pg_csi_list.cmp_cookie = avsv_dblist_uns32_cmp;
>> -  type = AVSV_AVND_CARD_PAYLOAD;
>> +  type = AVSV_AVND_CARD_SYS_CON;
> [Praveen] Initially keeping as PAYLOAD, AMFD later changes node type 
> of controller AMFNDs to SYS_CON after evaluating some parameters. 
> Above default is set to SYS_CON, there is no change in this patch 
> which sets payload as payload. In this way node type of a payload will 
> also become SYS_CON.
[AndersW] I think actually the "type" member variable ought to be 
removed, since it is not needed. I can update the patch to remove this 
variable completely.
>>     rcv_msg_id = {};
>>     snd_msg_id = {};
>>     cluster_list_node_next = {};
>> @@ -486,11 +486,6 @@ static SaAisErrorT node_ccb_completed_de
>>           return SA_AIS_ERR_BAD_OPERATION;
>>       }
>>
>> -    if (node->type == AVSV_AVND_CARD_SYS_CON) {
>> -        report_ccb_validation_error(opdata, "Cannot remove 
>> controller node");
>> -        return SA_AIS_ERR_BAD_OPERATION;
>> -    }
>> -
> [Praveen] Based on some earliar discussion, I remember deletion of sys 
> controller is restricted. So check should modified to reject the 
> operation if there are only two system controlers in the system.
[AndersW] Now you can create a cluster with many system controller nodes 
(all nodes could be controllers), so we can't have this restriction any 
longer. I don't see any reason to treat the two-node case in a special 
way. You can create a cluster consisting of only one single node, and 
then scale it out to a two-node cluster. Why shouldn't it be possible to 
scale it back to one single node again?
>>       /* Check to see that the node is in admin locked state before 
>> delete */
>>       if (node->saAmfNodeAdminState != 
>> SA_AMF_ADMIN_LOCKED_INSTANTIATION) {
>>           report_ccb_validation_error(opdata, "Node '%s' is not 
>> locked instantiation", opdata->objectName.value);
>> diff --git a/osaf/services/saf/amf/amfd/role.cc 
>> b/osaf/services/saf/amf/amfd/role.cc
>> --- a/osaf/services/saf/amf/amfd/role.cc
>> +++ b/osaf/services/saf/amf/amfd/role.cc
>> @@ -46,6 +46,7 @@
>>   #include <si_dep.h>
>>   #include "osaf_utility.h"
>>   #include "role.h"
>> +#include "nid_api.h"
>>
>>   extern pthread_mutex_t imm_reinit_mutex;
>>
>> @@ -73,7 +74,15 @@ void avd_role_change_evh(AVD_CL_CB *cb,
>>       AVD_ROLE_CHG_CAUSE_T cause = msg->msg_info.d2d_chg_role_req.cause;
>>       SaAmfHAStateT role = msg->msg_info.d2d_chg_role_req.role;
>>
>> -    TRACE_ENTER2("cause=%u, role=%u", cause, role);
>> +    TRACE_ENTER2("cause=%u, role=%u, current_role=%u", cause, role,
>> +        cb->avail_state_avd);
>> +
>> +    if ((status = initialize_for_assignment(cb, role))
>> +        != NCSCC_RC_SUCCESS) {
>> +        LOG_ER("initialize_for_assignment FAILED %u",
>> +            (unsigned) status);
>> +        _exit(EXIT_FAILURE);
>> +    }
>>
>>       if (cb->avail_state_avd == role) {
>>           goto done;
>> @@ -128,6 +137,13 @@ void avd_role_change_evh(AVD_CL_CB *cb,
>>       }
>>
>>       if ((cause == AVD_FAIL_OVER) &&
>> +        (cb->avail_state_avd == SA_AMF_HA_QUIESCED) && (role == 
>> SA_AMF_HA_STANDBY)) {
>> +        /* Fail-over Quiesced to Active */
>> +        status = NCSCC_RC_SUCCESS;
>> +        goto done;
>> +    }
> [Praveen] A quiesced controller never goes from quiesced to stanby 
> only in swichover and not in failover.
> So comment must be /*Fail-over Quiesced to standby (spare controller 
> role change)*/
[AndersW] Will update the comment.
>
>> +
>> +    if ((cause == AVD_FAIL_OVER) &&
>>           (cb->avail_state_avd == SA_AMF_HA_QUIESCED) && (role == 
>> SA_AMF_HA_ACTIVE)) {
>>           /* Fail-over Quiesced to Active */
>>           status = avd_role_failover_qsd_actv(cb, role);
>> @@ -155,7 +171,73 @@ void avd_role_change_evh(AVD_CL_CB *cb,
>>       return;
>>   }
>>
>> -/****************************************************************************\
>>  
>>
>> +uint32_t initialize_for_assignment(cl_cb_tag* cb, SaAmfHAStateT 
>> ha_state)
>> +{
>> +    TRACE_ENTER2("ha_state = %d", static_cast<int>(ha_state));
>> +    SaVersionT ntfVersion = {'A', 0x01, 0x01};
>> +    uint32_t rc = NCSCC_RC_SUCCESS;
>> +    SaAisErrorT error;
>> +    if (cb->fully_initialized) goto done;
>> +    cb->avail_state_avd = ha_state;
>> +    if (ha_state == SA_AMF_HA_QUIESCED) {
>> +        if ((rc = nid_notify(const_cast<char*>("AMFD"),
>> +                     NCSCC_RC_SUCCESS, nullptr)) != NCSCC_RC_SUCCESS) {
>> +            LOG_ER("nid_notify failed");
>> +        }
>> +        goto done;
>> +    }
>> +    if ((rc = avd_mds_init(cb)) != NCSCC_RC_SUCCESS) {
>> +        LOG_ER("avd_mds_init FAILED");
>> +        goto done;
>> +    }
>> +    if ((rc = avsv_mbcsv_register(cb)) != NCSCC_RC_SUCCESS) {
>> +        LOG_ER("avsv_mbcsv_register FAILED");
>> +        goto done;
>> +    }
>> +    if (avd_clm_init() != SA_AIS_OK) {
>> +        LOG_EM("avd_clm_init FAILED");
>> +        rc = NCSCC_RC_FAILURE;
>> +        goto done;
>> +    }
>> +    if (avd_imm_init(cb) != SA_AIS_OK) {
>> +        LOG_ER("avd_imm_init FAILED");
>> +        rc = NCSCC_RC_FAILURE;
>> +        goto done;
>> +    }
>> +    if ((error = saNtfInitialize(&cb->ntfHandle, nullptr, 
>> &ntfVersion)) !=
>> +        SA_AIS_OK) {
>> +        LOG_ER("saNtfInitialize Failed (%u)", error);
>> +        rc = NCSCC_RC_FAILURE;
>> +        goto done;
>> +    }
>> +    if ((rc = avd_mds_set_vdest_role(cb, ha_state)) != 
>> NCSCC_RC_SUCCESS) {
>> +        LOG_ER("avd_mds_set_vdest_role FAILED");
>> +        goto done;
>> +    }
>> +    if ((rc = avsv_set_ckpt_role(cb, ha_state)) != NCSCC_RC_SUCCESS) {
>> +        LOG_ER("avsv_set_ckpt_role FAILED");
>> +        goto done;
>> +    }
>> +    if (ha_state == SA_AMF_HA_ACTIVE) {
>> +        rc = avd_active_role_initialization(cb, ha_state);
>> +        if (rc != NCSCC_RC_SUCCESS) {
>> +            LOG_ER("avd_active_role_initialization FAILED");
>> +            goto done;
>> +        }
>> +    } else if (ha_state == SA_AMF_HA_STANDBY) {
>> +        rc = avd_standby_role_initialization(cb);
>> +        if (rc != NCSCC_RC_SUCCESS) {
>> +            LOG_ER("avd_standby_role_initialization FAILED");
>> +            goto done;
>> +        }
>> +    }
>> +    cb->fully_initialized = true;
>> +done:
>> +    TRACE_LEAVE2("rc = %u", rc);
>> +     return rc;
>> +}
>> +
>> +/****************************************************************************
>>  
>> \
>>    * Function: avd_init_role_set
>>    *
>>    * Purpose:  AVSV function to handle AVD's initial role setting.
>> diff --git a/osaf/services/saf/amf/amfd/sgproc.cc 
>> b/osaf/services/saf/amf/amfd/sgproc.cc
>> --- a/osaf/services/saf/amf/amfd/sgproc.cc
>> +++ b/osaf/services/saf/amf/amfd/sgproc.cc
>> @@ -1390,7 +1390,16 @@ void avd_su_si_assign_evh(AVD_CL_CB *cb,
>>                       /* Since a NCS SU has been assigned trigger the 
>> node FSM. */
>>                       /* For (ncs_spec == SA_TRUE), su will not be 
>> external, so su
>>                              will have node attached. */
>> -                    avd_nd_ncs_su_assigned(cb, susi->su->su_on_node);
>> +                    for (AmfDb<uint32_t, AVD_AVND>::const_iterator 
>> it = node_id_db->begin();
>> +                        it != node_id_db->end(); it++) {
>> +                        AVD_AVND *node = 
>> const_cast<AVD_AVND*>((*it).second);
>> +
>> +                        if (node->node_state == 
>> AVD_AVND_STATE_NCS_INIT && node->adest != 0) {
>> +                            avd_nd_ncs_su_assigned(cb, node);
>> +                        } else {
>> +                            TRACE("Node_state: %u adest: %" PRIx64 " 
>> node not ready for assignments", node->node_state, node->adest);
>> +                        }
>> +                    }
> [Praveen] Could not understand this change? Since spare controllers 
> are also payloads, in which case adest can be 0. Is this for headless 
> case, there comp and su assignment information comes before node up 
> message.
[AndersW] This loop is needed to ensure set_leds is performed. It is 
sufficient that we have an active and a standby assignment for the 
OpenSAF 2N SU, so once we have that we need to loop over all nodes and 
perform set_leds if possible.

I think the check that adest is non-zero was added by Hans, to fix some 
problem that might be related to headless if I remember correctly. 
@Hans, do you remember why this check was needed?
>
>>                   }
>>               }
>>           } else {
>> diff --git a/osaf/services/saf/amf/amfnd/clm.cc 
>> b/osaf/services/saf/amf/amfnd/clm.cc
>> --- a/osaf/services/saf/amf/amfnd/clm.cc
>> +++ b/osaf/services/saf/amf/amfnd/clm.cc
>> @@ -37,6 +37,7 @@
>>   #include "avnd.h"
>>   #include "mds_pvt.h"
>>   #include "nid_api.h"
>> +#include "osaf_time.h"
>>
>>   static void clm_node_left(SaClmNodeIdT node_id)
>>   {
>> @@ -166,7 +167,6 @@ uint32_t avnd_evt_avd_node_up_evh(AVND_C
>>       info = &evt->info.avd->msg_info.d2n_node_up;
>>
>>       /*** update this node with the supplied parameters ***/
>> -    cb->type = info->node_type;
>>       cb->su_failover_max = info->su_failover_max;
>>       cb->su_failover_prob = info->su_failover_prob;
>>
>> @@ -249,8 +249,6 @@ done:
>>       return;
>>   }
>>
>> -static SaVersionT Version = { 'B', 4, 1 };
>> -
>>   static const SaClmCallbacksT_4 callbacks = {
>>           0,
>>           /*.saClmClusterTrackCallback =*/ clm_track_cb
>> @@ -263,11 +261,24 @@ SaAisErrorT avnd_clm_init(void)
>>
>>       TRACE_ENTER();
>>       avnd_cb->first_time_up = true;
> [Praveen] Did not get the reason for its removal?
> Not being updated any where else in the patch.
[AndersW] Nothing is removed here. The CLM initialization loop has been 
enhanced to handle more error codes (TIMEOUT & UNAVAILABLE).
>> -    error = saClmInitialize_4(&avnd_cb->clmHandle, &callbacks, 
>> &Version);
>> -        if (SA_AIS_OK != error) {
>> -                LOG_ER("Failed to Initialize with CLM: %u", error);
>> -                goto done;
>> -        }
>> +    for (;;) {
>> +        SaVersionT Version = { 'B', 4, 1 };
>> +        error = saClmInitialize_4(&avnd_cb->clmHandle, &callbacks,
>> +                      &Version);
>> +        if (error == SA_AIS_ERR_TRY_AGAIN ||
>> +            error == SA_AIS_ERR_TIMEOUT ||
>> +                    error == SA_AIS_ERR_UNAVAILABLE) {
>> +            if (error != SA_AIS_ERR_TRY_AGAIN) {
>> +                LOG_WA("saClmInitialize_4 returned %u",
>> +                       (unsigned) error);
>> +            }
>> +            osaf_nanosleep(&kHundredMilliseconds);
>> +            continue;
>> +        }
>> +        if (error == SA_AIS_OK) break;
>> +        LOG_ER("Failed to Initialize with CLM: %u", error);
>> +        goto done;
>> +    }
>>       error = saClmSelectionObjectGet(avnd_cb->clmHandle, 
>> &avnd_cb->clm_sel_obj);
>>           if (SA_AIS_OK != error) {
>>                   LOG_ER("Failed to get CLM selectionObject: %u", 
>> error);
>>


------------------------------------------------------------------------------
Transform Data into Opportunity.
Accelerate data analysis in your applications with
Intel Data Analytics Acceleration Library.
Click to learn more.
http://pubads.g.doubleclick.net/gampad/clk?id=278785471&iu=/4140
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to