Hi Hung,
Reviewed and tested the patch.
Ack.
/Neel.
On 2016/09/20 11:07 AM, Hung Nguyen wrote:
> osaf/services/saf/immsv/immd/immd_amf.c | 1 +
> osaf/services/saf/immsv/immd/immd_evt.c | 2 +-
> osaf/services/saf/immsv/immd/immd_proc.c | 49
> ++++++++++++++-----------------
> osaf/services/saf/immsv/immd/immd_proc.h | 2 +-
> 4 files changed, 25 insertions(+), 29 deletions(-)
>
>
> During node shutdown, if active IMMD is down before IMMND,
> the IMMD will fail to broadcast discard node message to other IMMNDs.
>
> In this case, standby IMMD will help broadcast the discard messages.
> But it should only do that when the active IMMD is really down
> (immd_remote_up is false)
> or when it becomes active (invoking immd_pending_discards in rda/amf
> callbacks).
>
> This is to prevent the active IMMD and standby IMMD from sending the discard
> messages with different number
> which cause fevs message loss.
>
> diff --git a/osaf/services/saf/immsv/immd/immd_amf.c
> b/osaf/services/saf/immsv/immd/immd_amf.c
> --- a/osaf/services/saf/immsv/immd/immd_amf.c
> +++ b/osaf/services/saf/immsv/immd/immd_amf.c
> @@ -265,6 +265,7 @@ static void immd_saf_csi_set_cb(SaInvoca
> immd_proc_elect_coord(cb, true);
> }
> immd_db_purge_fevs(cb);
> + immd_pending_discards(cb);
> }
> }
>
> diff --git a/osaf/services/saf/immsv/immd/immd_evt.c
> b/osaf/services/saf/immsv/immd/immd_evt.c
> --- a/osaf/services/saf/immsv/immd/immd_evt.c
> +++ b/osaf/services/saf/immsv/immd/immd_evt.c
> @@ -2582,7 +2582,7 @@ static uint32_t immd_evt_proc_rda_callba
> immd_proc_elect_coord(cb, true);
> }
> immd_db_purge_fevs(cb);
> - immd_pending_payload_discards(cb); /*Ensure node down for
> payloads.*/
> + immd_pending_discards(cb);
> }
> done:
> TRACE_LEAVE();
> diff --git a/osaf/services/saf/immsv/immd/immd_proc.c
> b/osaf/services/saf/immsv/immd/immd_proc.c
> --- a/osaf/services/saf/immsv/immd/immd_proc.c
> +++ b/osaf/services/saf/immsv/immd/immd_proc.c
> @@ -615,7 +615,6 @@ uint32_t immd_process_immnd_down(IMMD_CB
> NCS_UBAID uba;
> char *tmpData = NULL;
> bool coord_exists = true; /* Assumption at this point.*/
> - bool possible_fo = false;
> TRACE_ENTER();
>
> TRACE_5("immd_process_immnd_down pid:%u on-active:%u "
> @@ -648,29 +647,26 @@ uint32_t immd_process_immnd_down(IMMD_CB
> "detected at standby immd!! %x. "
> "Possible failover",
>
> immd_get_slot_and_subslot_id_from_node_id(immnd_info->immnd_key),
> cb->immd_self_id);
> - possible_fo = true;
> +
> if (immnd_info->isCoord && immnd_info->syncStarted) {
> immd_proc_abort_sync(cb, immnd_info);
> }
> +
> + if (coord_exists) {
> + immd_proc_rebroadcast_fevs(cb, 2);
> + }
> }
> }
>
> - if (active || possible_fo || !cb->immd_remote_up) {
> + if (active || !cb->immd_remote_up) {
> /*
> ** HAFE - Let IMMND subscribe for IMMND up/down events instead?
> ** ABT - Not for now. IMMND up/down are only subscribed by
> IMMD.
> - ** This is the cleanest solution with respect to FEVS and
> works fine
> - ** for all cases except IMMND down at the active controller.
> - ** That case has to be handled in a special way. Currently we
> do it
> - ** by having both the standby and active IMMD broadcast the
> same
> - ** DISCARD_NODE message. IMMNDs have to cope with the possibly
> - ** redundant message, which they do in fevs_receive discarding
> - ** any duplicate (same sequence no).
> + ** Active IMMD will inform other IMMNDs about the down IMMND.
> + ** If the active is down, the standby (has not become active
> yet)
> + ** will do the job.
> */
> if (coord_exists) {
> - if (possible_fo) {
> - immd_proc_rebroadcast_fevs(cb, 2);
> - }
>
> TRACE_5("Notify all remaining IMMNDs of the departed
> IMMND");
> memset(&send_evt, 0, sizeof(IMMSV_EVT));
> @@ -700,14 +696,13 @@ uint32_t immd_process_immnd_down(IMMD_CB
>
> free(tmpData);
> }
> - } else if(!(immnd_info->isOnController)) {
> - /* Standby NOT immediately sending redundant D2ND_DISCARD_NODE
> in this case.
> - But will record any payload down event in case the active SC
> is included
> - in a burst of node downs. See ticket #563. The active IMMD
> may be going
> - down together with many payload nodes, such that the active
> IMMD never has
> - time to generate the discard node message for all payloads.
> This will be
> - detected if this (standby) IMMD becomes active in close time
> proximity.
> - See immd_pending_payload_discards() below.
> + } else {
> + /* Standby NOT immediately sending D2ND_DISCARD_NODE. But will
> record any
> + * IMMND down event. The active IMMD may never have time to
> generate the
> + * discard node message for the IMMND if the active IMMD goes
> down at the
> + * same time with the IMMND. This will be detected if this
> (standby) IMMD
> + * becomes active in close time proximity.
> + * See immd_pending_payload_discards() below.
> */
>
> LOG_IN("Standby IMMD recording IMMND DOWN for node %x",
> immnd_info->immnd_key);
> @@ -730,16 +725,16 @@ uint32_t immd_process_immnd_down(IMMD_CB
>
>
>
> /****************************************************************************
> - * Name : immd_pending_payload_discards
> + * Name : immd_pending_discards
> *
> * Description : Send possibly redundant discard-node message to IMMNDs
> for
> - * payload nodes (IMMNDs) that have departed and not
> returned.
> + * IMMNDs that have departed and not returned.
> * This is needed to plug the small hole that exists in the
> * handling of IMMND node down, when the current active IMMD
> - * is being taken down concurrently with several payloads.
> + * is being taken down.
> *
> * The active IMMD may then be pulled down after having
> - * received the IMMND MDS down event for the payloads, but
> + * received the IMMND MDS down event for the IMMNDs, but
> * before having created or sent the fevs message
> broadcasting
> * each node down to the IMMND cluster.
> *
> @@ -747,7 +742,7 @@ uint32_t immd_process_immnd_down(IMMD_CB
> * in the current epoch. This is an extra guard against the
> new
> * active shooting down a recently restarted payload. The
> list
> * is also pruned in immd_sbevt.c when it receives info
> about
> - * a payload having re-joined.
> + * a IMMND having re-joined.
> *
> * This function should only be invoked by the just recently
> * newly active IMMD. It is only relevant for fail-over, not
> @@ -758,7 +753,7 @@ uint32_t immd_process_immnd_down(IMMD_CB
> *
> * Notes : None.
>
> *****************************************************************************/
> -void immd_pending_payload_discards(IMMD_CB *cb)
> +void immd_pending_discards(IMMD_CB *cb)
> {
> IMMSV_EVT send_evt;
> char *tmpData = NULL;
> diff --git a/osaf/services/saf/immsv/immd/immd_proc.h
> b/osaf/services/saf/immsv/immd/immd_proc.h
> --- a/osaf/services/saf/immsv/immd/immd_proc.h
> +++ b/osaf/services/saf/immsv/immd/immd_proc.h
> @@ -34,7 +34,7 @@ void immd_proc_rebroadcast_fevs(IMMD_CB
>
> uint32_t immd_process_immnd_down(IMMD_CB *cb, IMMD_IMMND_INFO_NODE *node,
> bool active);
>
> -void immd_pending_payload_discards(IMMD_CB *cb);
> +void immd_pending_discards(IMMD_CB *cb);
>
> void immd_cb_dump(void);
>
------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel