Hi Lennart,

  I'm not sure I understand this. Can you explain why you think this is
not backwards compatible?

  This new functionality will put the campaign into "suspended by error
detected" if any new/upgraded SU (or component in the SU) fails during
the campaign (after it has been upgraded).

Alex

On 11/14/2016 09:35 AM, Lennart Lund wrote:
> ------------------------------------------------------------------------
> NOTICE: This email was received from an EXTERNAL sender
> ------------------------------------------------------------------------
> 
> Hi Alex
> 
> After I posted this ack I and Rafael found a possible Non backwards
> compatibility (NBC) problem. If so this must be solved before pushing.
> 
> I believe this change introduce a new behavior when campaigns are
> executed if a component is failing. If I am right this could be solved
> by introducing a new attribute in the SMF configuration class that makes
> it possible to switch on/off this new behavior. The default must be that
> it is switched off (legacy behavior) also the absence of the new
> attribute must be interpreted as off (legacy)
> 
> Thanks
> Lennart
> 
>> -----Original Message-----
>> From: Lennart Lund [mailto:lennart.l...@ericsson.com]
>> Sent: den 14 november 2016 13:56
>> To: Alex Jones <ajo...@genband.com>; reddy.neelaka...@oracle.com;
>> Rafael Odzakow <rafael.odza...@ericsson.com>
>> Cc: opensaf-devel@lists.sourceforge.net
> <mailto:opensaf-devel@lists.sourceforge.net>
>> Subject: Re: [devel] [PATCH 1 of 1] smfd: add support for asynchronous
>> detection of failed AMF entities [#2145]
>>
>> Hi Alex
>>
>> Ack with comment
>> See my comments/questions inline [Lennart]
>>
>> Thanks
>> Lennart
>>
>> > -----Original Message-----
>> > From: Alex Jones [mailto:ajo...@genband.com]
>> > Sent: den 11 november 2016 20:50
>> > To: reddy.neelaka...@oracle.com; Lennart Lund
>> > <lennart.l...@ericsson.com>; Rafael Odzakow
>> > <rafael.odza...@ericsson.com>
>> > Cc: opensaf-devel@lists.sourceforge.net
> <mailto:opensaf-devel@lists.sourceforge.net>
>> > Subject: [PATCH 1 of 1] smfd: add support for asynchronous detection of
>> > failed AMF entities [#2145]
>> >
>> > osaf/services/saf/smfsv/smfd/SmfCampaignThread.cc | 235
>> > +++++++++++++++++++++-
>> > osaf/services/saf/smfsv/smfd/SmfCampaignThread.hh | 15 +
>> > osaf/services/saf/smfsv/smfd/SmfStepTypes.cc | 32 +-
>> > 3 files changed, 263 insertions(+), 19 deletions(-)
>> >
>> >
>> > This patch adds support for section 4.2.1.3 of SMF A.01.02 spec.
>> >
>> > diff --git a/osaf/services/saf/smfsv/smfd/SmfCampaignThread.cc
>> > b/osaf/services/saf/smfsv/smfd/SmfCampaignThread.cc
>> > --- a/osaf/services/saf/smfsv/smfd/SmfCampaignThread.cc
>> > +++ b/osaf/services/saf/smfsv/smfd/SmfCampaignThread.cc
>> > @@ -39,6 +39,7 @@
>> > #include "SmfUtils.hh"
>> >
>> > SmfCampaignThread *SmfCampaignThread::s_instance = NULL;
>> > +SaNtfSubscriptionIdT SmfCampaignThread::operStateSubId(1);
>> >
>> > #define SMF_RDA_RETRY_COUNT 25 /* This is taken as the csi's are
>> assigned
>> > in parallel */
>> >
>> > @@ -140,7 +141,12 @@ void SmfCampaignThread::main(NCSCONTEXT
>> > SmfCampaignThread::~SmfCampaignThread()
>> > {
>> > TRACE_ENTER();
>> > - SaAisErrorT rc = saNtfFinalize(m_ntfHandle);
>> > + SaAisErrorT rc(saNtfNotificationUnsubscribe(operStateSubId));
>> > + if (rc != SA_AIS_OK) {
>> > + LOG_ER("Failed to unsubscribe to oper state notifications
>> > %u", rc);
>> > + }
>> [Lennart] Why is this needed? Does not saNtfFinalize remove SMF as a NTF
>> client including all subscriptions?
>>
>> > +
>> > + rc = saNtfFinalize(m_ntfHandle);
>> > if (rc != SA_AIS_OK) {
>> > LOG_ER("Failed to finalize NTF handle %u", rc);
>> > }
>> > @@ -303,10 +309,14 @@ int SmfCampaignThread::initNtf(void)
>> > {
>> > SaAisErrorT rc = SA_AIS_ERR_TRY_AGAIN;
>> > SaVersionT ntfVersion = { 'A', 1, 1 };
>> > + SaNtfCallbacksT callbacks = {
>> > + ntfNotificationCallback,
>> > + 0
>> > + };
>> > unsigned int numOfTries = 50;
>> >
>> > while (rc == SA_AIS_ERR_TRY_AGAIN && numOfTries > 0) {
>> > - rc = saNtfInitialize(&m_ntfHandle, NULL, &ntfVersion);
>> > + rc = saNtfInitialize(&m_ntfHandle, &callbacks, &ntfVersion);
>> > if (rc != SA_AIS_ERR_TRY_AGAIN) {
>> > break;
>> > }
>> > @@ -319,9 +329,202 @@ int SmfCampaignThread::initNtf(void)
>> > return -1;
>> > }
>> >
>> > + // subscribe to operational state change notifications generated
> by AMF
>> > + rc = initNtfSubscriptions();
>> > + if (rc != SA_AIS_OK) {
>> > + LOG_ER("initNtfSubscriptions FAILED rc=%s", saf_error(rc));
>> > + return -1;
>> > + }
>> > +
>> > return 0;
>> > }
>> >
>> > +SaAisErrorT SmfCampaignThread::initNtfSubscriptions(void) {
>> > + TRACE_ENTER();
>> > + SaAisErrorT rc(SA_AIS_OK);
>> > +
>> > + do {
>> > + SaNtfStateChangeNotificationFilterT stateChangeFilter;
>> > +
>> > + SaAisErrorT rc(saNtfStateChangeNotificationFilterAllocate(
>> > + m_ntfHandle,
>> > + &stateChangeFilter,
>> > + 0,
>> > + 0,
>> > + 0,
>> > + 0,
>> > + 0,
>> > + 0));
>> > +
>> > + if (rc != SA_AIS_OK) {
>> > + LOG_ER("saNtfAttributeChangeNotificationFilterAllocate FAILED
>> rc=%s",
>> > + saf_error(rc));
>> > + break;
>> > + }
>> > +
>> > + SaNtfNotificationTypeFilterHandlesT notificationFilterHandles = {
>> > + 0,
>> > + 0,
>> > + stateChangeFilter.notificationFilterHandle,
>> > + 0,
>> > + 0
>> > + };
>> > +
>> > + rc = saNtfNotificationSubscribe(&notificationFilterHandles,
>> > operStateSubId);
>> > +
>> > + if (rc != SA_AIS_OK) {
>> > + LOG_ER("saNtfNotificationSubscribe FAILED rc=%s", saf_error(rc));
>> > + break;
>> > + }
>> > +
>> > + rc =
>> > saNtfNotificationFilterFree(stateChangeFilter.notificationFilterHandle);
>> > + if (rc != SA_AIS_OK) {
>> > + LOG_ER("saNtfNotificationFilterFree FAILED rc=%s", saf_error(rc));
>> > + break;
>> > + }
>> > + } while (false);
>> > +
>> > + TRACE_LEAVE();
>> > + return rc;
>> > +}
>> > +
>> > +bool SmfCampaignThread::isAMFOperState(const SaNtfClassIdT& classId)
>> > const {
>> > + TRACE_ENTER();
>> > + bool status(false);
>> > +
>> > + if (classId.vendorId == SA_NTF_VENDOR_ID_SAF &&
>> > + classId.majorId == SA_SVC_AMF &&
>> > + classId.minorId == SA_AMF_NTFID_SU_OP_STATE) {
>> > + status = true;
>> > + }
>> > +
>> > + TRACE_LEAVE2("%i", status);
>> > + return status;
>> > +}
>> > +
>> > +void SmfCampaignThread::handleStateChangeNotification(
>> > + const SaNtfStateChangeNotificationT& stateChangeNotification) {
>> > + TRACE_ENTER();
>> > + if (stateChangeNotification.notificationHeader.eventType) {
>> > + if (*stateChangeNotification.notificationHeader.eventType ==
>> > + SA_NTF_OBJECT_STATE_CHANGE) {
>> > + handleObjectStateChangeNotification(stateChangeNotification);
>> > + } else {
>> > + TRACE("ignoring state change notification with event type: %i",
>> > + *stateChangeNotification.notificationHeader.eventType);
>> > + }
>> > + }
>> > + TRACE_LEAVE();
>> > +}
>> > +
>> > +void SmfCampaignThread::handleAmfObjectStateChangeNotification(
>> > + const SaNtfStateChangeNotificationT& stateChangeNotification) {
>> > + TRACE_ENTER();
>> > +
>> > + do {
>> > + bool disabled(false);
>> > +
>> > + // see if this is a failure of an upgraded SU
>> > + for (SaUint16T i(0); i < stateChangeNotification.numStateChanges;
> i++) {
>> > + if (stateChangeNotification.changedStates[i].newState ==
>> > + SA_AMF_OPERATIONAL_DISABLED) {
>> > + disabled = true;
>> > + break;
>> > + }
>> > + }
>> > +
>> > + if (!disabled) {
>> > + TRACE("state change is not to DISABLED -- ignoring");
>> > + break;
>> > + }
>> > +
>> > + for (SaUint16T i(0);
>> > + i < stateChangeNotification.notificationHeader.numAdditionalInfo;
>> > + i++) {
>> > + if
> (stateChangeNotification.notificationHeader.additionalInfo[i].infoId
>> !=
>> > + SA_AMF_MAINTENANCE_CAMPAIGN_DN ||
>> > +
>> stateChangeNotification.notificationHeader.additionalInfo[i].infoType
>> > + != SA_NTF_VALUE_LDAP_NAME) {
>> > + continue;
>> > + }
>> > +
>> > + SaNameT *name(0);
>> > + SaUint16T dataSize(0);
>> > +
>> > + SaAisErrorT rc(saNtfPtrValGet(
>> > + stateChangeNotification.notificationHandle,
>> > +
>> > &stateChangeNotification.notificationHeader.additionalInfo[i].infoValue,
>> > + reinterpret_cast<void **>(&name),
>> > + &dataSize));
>> > +
>> > + if (rc == SA_AIS_OK) {
>> > + SaConstStringT maintenanceCampaign(saAisNameBorrow(name));
>> > +
>> > + if (!strcmp(maintenanceCampaign,
>> > + s_instance->m_campaign->getDn().c_str())) {
>> > + LOG_ER("SU: %s failed after upgrade in campaign",
>> > +
>> >
>> saAisNameBorrow(stateChangeNotification.notificationHeader.notificationO
>> > bject));
>> > + s_instance->m_campaign->getUpgradeCampaign()->suspend();
>> > + s_instance->m_campaign->getUpgradeCampaign()->setCampState(
>> > + SA_SMF_CMPG_ERROR_DETECTED);
>> > +
>> > + std::string error(saAisNameBorrow(
>> > + stateChangeNotification.notificationHeader.notificationObject));
>> > +
>> > + error += " failed after upgrade";
>> > + s_instance->m_campaign->setError(error);
>> > + } else {
>> > + LOG_ER("maintenance campaign received from AMF (%s) is not the "
>> > + "same as ours (%s)",
>> > + maintenanceCampaign,
>> > + s_instance->m_campaign->getDn().c_str());
>> > + }
>> > + } else {
>> > + LOG_ER("saNtfPtrValGet failed: %d", rc);
>> > + }
>> > + }
>> > + } while (false);
>> > +
>> > + TRACE_LEAVE();
>> > +}
>> > +
>> > +void SmfCampaignThread::handleObjectStateChangeNotification(
>> > + const SaNtfStateChangeNotificationT& stateChangeNotification) {
>> > + TRACE_ENTER();
>> > +
>> > + if (stateChangeNotification.notificationHeader.notificationClassId &&
>> > +
>> >
>> isAMFOperState(*stateChangeNotification.notificationHeader.notificationCl
>> > assId)) {
>> > + handleAmfObjectStateChangeNotification(stateChangeNotification);
>> > + } else {
>> > + TRACE("ignoring non-AMF state change notification");
>> > + }
>> > +
>> > + TRACE_LEAVE();
>> > +}
>> > +
>> > +void SmfCampaignThread::ntfNotificationCallback(
>> > + SaNtfSubscriptionIdT subId,
>> > + const SaNtfNotificationsT *notification) {
>> > + TRACE_ENTER();
>> > +
>> > + do {
>> > + if (subId != operStateSubId) {
>> > + TRACE("unknown subscription id received in ntfNotificationCallback:
>> > %d",
>> > + subId);
>> > + break;
>> > + }
>> > +
>> > + if (notification->notificationType == SA_NTF_TYPE_STATE_CHANGE) {
>> > + s_instance->handleStateChangeNotification(
>> > + notification->notification.stateChangeNotification);
>> > + } else {
>> > + TRACE("ignoring NTF notification of type: %i",
>> > + notification->notificationType);
>> > + }
>> > + } while (false);
>> > +
>> > + TRACE_LEAVE();
>> > +}
>> > /**
>> > * SmfCampaignThread::send
>> > * send event to the thread.
>> > @@ -672,18 +875,31 @@ int SmfCampaignThread::handleEvents(void
>> > {
>> > TRACE_ENTER();
>> > NCS_SEL_OBJ mbx_fd = ncs_ipc_get_sel_obj(&m_mbx);
>> > - struct pollfd fds[1];
>> > +
>> > + SaSelectionObjectT ntfSelObj(0);
>> > +
>> > + SaAisErrorT rc(saNtfSelectionObjectGet(m_ntfHandle, &ntfSelObj));
>> > + if (rc != SA_AIS_OK) {
>> > + LOG_ER("saNtfSelectionObjectGet FAILED - %s", saf_error(rc));
>> > + return 1;
>> > + }
>> > +
>> > + struct pollfd fds[2];
>> >
>> > /* Set up all file descriptors to listen to */
>> > fds[0].fd = mbx_fd.rmv_obj;
>> > fds[0].events = POLLIN;
>> > fds[0].revents = 0;
>> >
>> > + fds[1].fd = ntfSelObj;
>> > + fds[1].events = POLLIN;
>> > + fds[1].revents = 0;
>> > +
>> > TRACE("Campaign thread %s waiting for events", m_campaign-
>> > >getDn().c_str());
>> >
>> > while (m_running) {
>> >
>> > - int ret = poll(fds, 1, SMF_UPDATE_ELAPSED_TIME_INTERVAL);
>> > + int ret = poll(fds, sizeof(fds) / sizeof(pollfd),
>> > SMF_UPDATE_ELAPSED_TIME_INTERVAL);
>> >
>> > if (ret == -1) {
>> > if (errno == EINTR)
>> > @@ -699,7 +915,16 @@ int SmfCampaignThread::handleEvents(void
>> > processEvt();
>> > }
>> >
>> > - m_campaign->updateElapsedTime();
>> > + if (fds[1].revents & POLLIN) {
>> > + // dispatch NTF events
>> > + rc = saNtfDispatch(m_ntfHandle, SA_DISPATCH_ALL);
>> > +
>> > + if (rc != SA_AIS_OK) {
>> > + LOG_ER("saNtfDispatch FAILED - %s", saf_error(rc));
>> > + }
>> > + }
>> > +
>> > + m_campaign->updateElapsedTime();
>> > }
>> > TRACE_LEAVE();
>> > return 0;
>> > diff --git a/osaf/services/saf/smfsv/smfd/SmfCampaignThread.hh
>> > b/osaf/services/saf/smfsv/smfd/SmfCampaignThread.hh
>> > --- a/osaf/services/saf/smfsv/smfd/SmfCampaignThread.hh
>> > +++ b/osaf/services/saf/smfsv/smfd/SmfCampaignThread.hh
>> > @@ -190,9 +190,24 @@ class SmfCampaignThread {
>> >
>> > SaAisErrorT createImmHandle(SmfCampaign * i_campaign);
>> > SaAisErrorT deleteImmHandle();
>> > + SaAisErrorT initNtfSubscriptions(void);
>> > +
>> > + void handleStateChangeNotification(const
>> > SaNtfStateChangeNotificationT&);
>> > +
>> > + void handleObjectStateChangeNotification(
>> > + const SaNtfStateChangeNotificationT&);
>> > +
>> > + void handleAmfObjectStateChangeNotification(
>> > + const SaNtfStateChangeNotificationT&);
>> > +
>> > + bool isAMFOperState(const SaNtfClassIdT&) const;
>> > +
>> > + static void ntfNotificationCallback(SaNtfSubscriptionIdT,
>> > + const SaNtfNotificationsT *);
>> >
>> > static void main(NCSCONTEXT info);
>> > static SmfCampaignThread *s_instance;
>> > + static SaNtfSubscriptionIdT operStateSubId;
>> >
>> > NCSCONTEXT m_task_hdl;
>> > SYSF_MBX m_mbx; /* mailbox */
>> > diff --git a/osaf/services/saf/smfsv/smfd/SmfStepTypes.cc
>> > b/osaf/services/saf/smfsv/smfd/SmfStepTypes.cc
>> > --- a/osaf/services/saf/smfsv/smfd/SmfStepTypes.cc
>> > +++ b/osaf/services/saf/smfsv/smfd/SmfStepTypes.cc
>> > @@ -1247,8 +1247,8 @@ SmfStepTypeNodeReboot::execute()
>> > return false;
>> > }
>> >
>> > - /* Modify information model and set maintenance status */
>> > - LOG_NO("STEP: Modify information model and set maintenance
>> > status");
>> > + /* Modify information model */
>> > + LOG_NO("STEP: Modify information model");
>> > if (m_step->modifyInformationModel() != SA_AIS_OK) {
>> > LOG_ER("Failed to Modify information model in step=%s",m_step-
>> > >getRdn().c_str());
>> > return false;
>> > @@ -1261,11 +1261,6 @@ SmfStepTypeNodeReboot::execute()
>> > return false;
>> > }
>> >
>> > - if (m_step->setMaintenanceStateActUnits() == false) {
>> > - LOG_ER("Failed to set maintenance state in step=%s",m_step-
>> > >getRdn().c_str());
>> > - return false;
>> > - }
>> > -
>> > /* The action below is an add on to
> SMF.-------------------------------------
>> --
>> > */
>> > /* See if any of the software bundles installed at online
> installation has
>> > the */
>> > /* saSmfBundleInstallOfflineScope attribute set to
>> > SA_SMF_CMD_SCOPE_PLM_EE */
>> > @@ -1372,6 +1367,13 @@ SmfStepTypeNodeReboot::execute()
>> > /* the units in the same state as before locking */
>> > m_step->copyDuInitStateToAu();
>> >
>> > + /* Now that the node is up, set the maintenance status */
>> > + LOG_NO("STEP: Set Maintenance Status");
>> > + if (m_step->setMaintenanceStateActUnits() == false) {
>> > + LOG_ER("Failed to set maintenance state in step=%s",m_step-
>> > >getRdn().c_str());
>> > + return false;
>> > + }
>> > +
>> > /* Instantiate activation units */
>> > LOG_NO("STEP: Instantiate activation units");
>> > if (m_step->instantiateActivationUnits() == false) {
>> > @@ -1685,8 +1687,8 @@ SmfStepTypeNodeRebootAct::execute()
>> > return false;
>> > }
>> >
>> > - /* Modify information model and set maintenance status */
>> > - LOG_NO("STEP: Modify information model and set maintenance
>> > status");
>> > + /* Modify information model */
>> > + LOG_NO("STEP: Modify information model");
>> > if (m_step->modifyInformationModel() != SA_AIS_OK) {
>> > LOG_ER("Failed to Modify information model in step=%s",m_step-
>> > >getRdn().c_str());
>> > return false;
>> > @@ -1699,11 +1701,6 @@ SmfStepTypeNodeRebootAct::execute()
>> > return false;
>> > }
>> >
>> > - if (m_step->setMaintenanceStateActUnits() == false) {
>> > - LOG_ER("Failed to set maintenance state in step=%s",m_step-
>> > >getRdn().c_str());
>> > - return false;
>> > - }
>> > -
>> > /* Offline installation of new software */
>> > LOG_NO("STEP: Offline installation of new software");
>> > if (m_step->offlineInstallNewBundles() == false) {
>> > @@ -1746,6 +1743,13 @@ SmfStepTypeNodeRebootAct::execute()
>> > /* the units in the same state as before locking */
>> > m_step->copyDuInitStateToAu();
>> >
>> > + /* Now that the node is up, set the maintenance status */
>> > + LOG_NO("STEP: Set Maintenance Status");
>> > + if (m_step->setMaintenanceStateActUnits() == false) {
>> > + LOG_ER("Failed to set maintenance state in step=%s",m_step-
>> > >getRdn().c_str());
>> > + return false;
>> > + }
>> > +
>> > /* Instantiate activation units */
>> > LOG_NO("STEP: Instantiate activation units");
>> > if (m_step->instantiateActivationUnits() == false) {
>>
>>
>>
> ------------------------------------------------------------------------------
>> Developer Access Program for Intel Xeon Phi Processors
>> Access to Intel Xeon Phi processor-based developer platforms.
>> With one year of Intel Parallel Studio XE.
>> Training and support from Colfax.
>> Order your platform today. http://sdm.link/xeonphi
>> _______________________________________________
>> Opensaf-devel mailing list
>> Opensaf-devel@lists.sourceforge.net
> <mailto:Opensaf-devel@lists.sourceforge.net>
>> https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Attachment: signature.asc
Description: OpenPGP digital signature

------------------------------------------------------------------------------
Developer Access Program for Intel Xeon Phi Processors
Access to Intel Xeon Phi processor-based developer platforms.
With one year of Intel Parallel Studio XE.
Training and support from Colfax.
Order your platform today. http://sdm.link/xeonphi
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to