osaf/libs/agents/saf/amfa/ava_api.c         |  23 ++++++++++++++++++++++-
 osaf/libs/common/amf/include/amf_amfparam.h |   1 +
 osaf/services/saf/amf/amfnd/cbq.cc          |   6 ++++++
 osaf/services/saf/amf/amfnd/err.cc          |  17 ++++++++++++++---
 samples/amf/sa_aware/amf_demo_script        |  17 +++++++++--------
 5 files changed, 52 insertions(+), 12 deletions(-)


If a component crash or exit in context of the terminate callback, AMF will not
use the "ava down" event to trigger cleanup and finish component termination.
Instead the CallbackTimeout is awaited which can be very long.

This is a big problem if this happens during an upgrade, it will cause the
upgrade to fail potentially leading to system restore.

By making the saAmfResponse call synchronous for the TerminateCallback, the
"ava down" event can be used to trigger cleanup of the component.

diff --git a/osaf/libs/agents/saf/amfa/ava_api.c 
b/osaf/libs/agents/saf/amfa/ava_api.c
--- a/osaf/libs/agents/saf/amfa/ava_api.c
+++ b/osaf/libs/agents/saf/amfa/ava_api.c
@@ -1830,6 +1830,8 @@ SaAisErrorT saAmfResponse(SaAmfHandleT h
        AVA_PEND_RESP *list_resp = 0;
        AVA_PEND_CBK_REC *rec = 0;
        SaAisErrorT rc = SA_AIS_OK;
+       AVSV_NDA_AVA_MSG *msg_rsp = NULL;
+
        TRACE_ENTER2("SaAmfHandleT passed is %llx", hdl);
 
        if ((!m_AVA_AMF_RESP_ERR_CODE_IS_VALID(error)) || (!inv)) {
@@ -1875,10 +1877,29 @@ SaAisErrorT saAmfResponse(SaAmfHandleT h
 
        /* populate & send the 'AMF response' message */
        m_AVA_AMF_RESP_MSG_FILL(msg, cb->ava_dest, hdl, inv, error, 
cb->comp_name);
-       rc = ava_mds_send(cb, &msg, 0);
+
+       if (rec->cbk_info->type == AVSV_AMF_COMP_TERM)
+               rc = ava_mds_send(cb, &msg, &msg_rsp);
+       else
+               rc = ava_mds_send(cb, &msg, NULL);
+
        if (NCSCC_RC_SUCCESS != rc)
                rc = SA_AIS_ERR_TRY_AGAIN;
 
+       if (rec->cbk_info->type == AVSV_AMF_COMP_TERM) {
+               if (msg_rsp->type != AVSV_AVND_AMF_API_RESP_MSG) {
+                       TRACE_2("ERR_LIBRARY: wrong type");
+                       rc = SA_AIS_ERR_LIBRARY;
+                       goto done;
+               }
+               if (msg_rsp->info.api_resp_info.type != AVSV_AMF_COMP_TERM_RSP) 
{
+                       TRACE_2("ERR_LIBRARY: wrong msg type");
+                       rc = SA_AIS_ERR_LIBRARY;
+                       goto done;
+               }
+               rc = msg_rsp->info.api_resp_info.rc;
+       }
+
        /* if we are done with this rec, free it */
        if ((rec->cbk_info->type != AVSV_AMF_PG_TRACK) && 
!m_AVA_HDL_IS_CBK_REC_IN_DISPATCH(rec)) {
                /* In case of Quiescing callback dont free the rec,
diff --git a/osaf/libs/common/amf/include/amf_amfparam.h 
b/osaf/libs/common/amf/include/amf_amfparam.h
--- a/osaf/libs/common/amf/include/amf_amfparam.h
+++ b/osaf/libs/common/amf/include/amf_amfparam.h
@@ -51,6 +51,7 @@ typedef enum avsv_amf_api_type {
        AVSV_AMF_SEL_OBJ_GET,
        AVSV_AMF_DISPATCH,
        AVSV_AMF_COMP_NAME_GET,
+       AVSV_AMF_COMP_TERM_RSP,
        AVSV_AMF_API_MAX
 } AVSV_AMF_API_TYPE;
 
diff --git a/osaf/services/saf/amf/amfnd/cbq.cc 
b/osaf/services/saf/amf/amfnd/cbq.cc
--- a/osaf/services/saf/amf/amfnd/cbq.cc
+++ b/osaf/services/saf/amf/amfnd/cbq.cc
@@ -380,6 +380,12 @@ uint32_t avnd_evt_ava_resp_evh(AVND_CB *
                rc = avnd_comp_clc_fsm_run(cb, comp, (SA_AIS_OK == resp->err) ?
                                           AVND_COMP_CLC_PRES_FSM_EV_TERM_SUCC 
: AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
                avnd_comp_cbq_rec_pop_and_del(cb, comp, cbk_rec, false);
+
+               // if all OK send a response to the client
+               if ((rc == NCSCC_RC_SUCCESS) && (resp->err == SA_AIS_OK)) {
+                       rc = avnd_amf_resp_send(cb, AVSV_AMF_COMP_TERM_RSP, 
SA_AIS_OK, NULL,
+                                       &api_info->dest, &evt->mds_ctxt, comp, 
false);
+               }
                break;
 
        case AVSV_AMF_CSI_SET:
diff --git a/osaf/services/saf/amf/amfnd/err.cc 
b/osaf/services/saf/amf/amfnd/err.cc
--- a/osaf/services/saf/amf/amfnd/err.cc
+++ b/osaf/services/saf/amf/amfnd/err.cc
@@ -332,13 +332,24 @@ uint32_t avnd_err_process(AVND_CB *cb, A
                esc_rcvr = SA_AMF_NODE_FAILOVER;
        }
 
-       /* We need not entertain errors when comp is not in shape. 
-          In case of terminating, ava down may come before clc response */
+       /* We need not entertain errors when comp is not in shape. */
        if (comp && (m_AVND_COMP_PRES_STATE_IS_UNINSTANTIATED(comp) ||
                     m_AVND_COMP_PRES_STATE_IS_INSTANTIATIONFAILED(comp) ||
-                    m_AVND_COMP_PRES_STATE_IS_TERMINATIONFAILED(comp) || 
m_AVND_COMP_PRES_STATE_IS_TERMINATING(comp)))
+                    m_AVND_COMP_PRES_STATE_IS_TERMINATIONFAILED(comp)))
                goto done;
 
+       if  (m_AVND_COMP_PRES_STATE_IS_TERMINATING(comp)) {
+               // special treat failures while terminating, no escalation just 
cleanup
+               LOG_NO("'%s' faulted due to '%s' : Recovery is 'cleanup'",
+                       comp->name.value, g_comp_err[err_info->src]);
+               rc = avnd_comp_clc_fsm_run(cb, comp, 
AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
+               if (NCSCC_RC_SUCCESS != rc) {
+                       // this is bad situation, what can we do?
+                       LOG_ER("%s: '%s' termination failed", __FUNCTION__, 
comp->name.value);
+               }
+               goto done;
+       }
+
        /* update the err params */
        comp->err_info.src = err_info->src;
 
diff --git a/samples/amf/sa_aware/amf_demo_script 
b/samples/amf/sa_aware/amf_demo_script
--- a/samples/amf/sa_aware/amf_demo_script
+++ b/samples/amf/sa_aware/amf_demo_script
@@ -39,10 +39,10 @@ pidfile="$piddir/${SA_AMF_COMPONENT_NAME
 
 RETVAL=0
 
-start()
+instantiate()
 {
        args="$*"
-       echo -n "Starting $prog: "
+       logger -st $name "instantiate $binary"
        start_daemon -p $pidfile $binary $args
        RETVAL=$?
        if [ $RETVAL -ne 0 ]; then
@@ -51,9 +51,10 @@ start()
        return $RETVAL
 }
 
-stop()
+cleanup()
 {
-       echo -n "Stopping $prog: "
+    pid=$(cat $pidfile)
+       logger -st $name "cleanup $binary pid:$pid"
        killproc -p $pidfile $binary
        RETVAL=$?
        if [ $RETVAL -ne 0 ]; then
@@ -71,13 +72,13 @@ status()
 
 CMD=$1
 case $CMD in
-       start|instantiate)
+       instantiate)
                shift
-               start $*
+               instantiate $*
                RETVAL=$?
                ;;
-       stop|cleanup)
-               stop $*
+       cleanup)
+               cleanup $*
                RETVAL=$?
                ;;
        status)

------------------------------------------------------------------------------
October Webinars: Code for Performance
Free Intel webinars can help you accelerate application performance.
Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from 
the latest Intel processors and coprocessors. See abstracts and register >
http://pubads.g.doubleclick.net/gampad/clk?id=60135031&iu=/4140/ostg.clktrk
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to