Re: [devel] [PATCH 1 of 1] imm: Remove coordinator role when SC absence happens [#1625]

2016-03-07 Thread Neelakanta Reddy
Hi Hung,

Comments inline.

On Monday 07 March 2016 12:37 PM, Hung Nguyen wrote:
> Hi Neel,
>
> Please find my answers inline.
>
>
> BR,
> Hung Nguyen - DEK Technologies
>
> 
> From: Neelakanta reddyreddy.neelaka...@oracle.com
> Sent: Friday, February 26, 2016 12:00PM
> To: Hung Nguyen, Zoran Milinkovic
>  hung.d.ngu...@dektech.com.au,zoran.milinko...@ericsson.com
> Cc: Opensaf-devel
>  opensaf-devel@lists.sourceforge.net
> Subject: Re: [PATCH 1 of 1] imm: Remove coordinator role when SC absence 
> happens [#1625]
>
>
> Hi Hung,
>
> Reviewed and tested the patch.
> Following are the comments:
>
> 1. This patch has to go in a separate defect,After #1625 is pushed.
> 2. re-publish the patch with the comments when the defect is opened
> 3. Comments inline.
>
> Thanks,
> Neel.
>
> On Thursday 25 February 2016 05:29 PM, Hung Nguyen wrote:
>> osaf/services/saf/immsv/immnd/immnd_evt.c  |  13 +--
>>   osaf/services/saf/immsv/immnd/immnd_init.h |   1 +
>>   osaf/services/saf/immsv/immnd/immnd_proc.c |  55 
>> -
>>   3 files changed, 55 insertions(+), 14 deletions(-)
>>
>>
>> Set 'mIsCoord' to false when headless to avoid coordinator from 
>> restarting.
>> Also handle the cases when headless occurs before/during sync.
>>
>> diff --git a/osaf/services/saf/immsv/immnd/immnd_evt.c 
>> b/osaf/services/saf/immsv/immnd/immnd_evt.c
>> --- a/osaf/services/saf/immsv/immnd/immnd_evt.c
>> +++ b/osaf/services/saf/immsv/immnd/immnd_evt.c
>> @@ -10196,18 +10196,7 @@ static uint32_t immnd_evt_proc_mds_evt(I
>>   } else { /* SC ABSENCE ALLOWED */
>>   LOG_WA("SC Absence IS allowed:%u IMMD service is DOWN", 
>> cb->mScAbsenceAllowed);
>>   if(cb->mIsCoord) {
>> -/* Note that normally the coord will reside at SCs 
>> so this branch will
>> -   only be relevant if REPEATED toal scAbsence 
>> occurs. After SC absence
>> -   and subsequent return of SC, the coord will be 
>> elected at a payload.
>> -   That coord will be active untill restart of that 
>> payload..
>> -   unless we add functionality for the payload coord 
>> to restart after
>> -   a few minutes .. ?
>> -*/
>> -LOG_WA("This IMMND coord has to exit allowing 
>> restarted IMMD to select new coord");
>> -if(cb->mState < IMM_SERVER_SYNC_SERVER) {
>> -immnd_ackToNid(NCSCC_RC_FAILURE);
>> -}
>> -exit(1);
>> +immnd_proc_remove_coordinator_role(cb);
> No, need of new function, add the code here.
>>   } else if(cb->mState <= IMM_SERVER_LOADING_PENDING) {
>>   /* Reset state in payloads that had not joined. No 
>> need to restart. */
>>   LOG_IN("Resetting IMMND state from %u to 
>> IMM_SERVER_ANONYMOUS", cb->mState);
>> diff --git a/osaf/services/saf/immsv/immnd/immnd_init.h 
>> b/osaf/services/saf/immsv/immnd/immnd_init.h
>> --- a/osaf/services/saf/immsv/immnd/immnd_init.h
>> +++ b/osaf/services/saf/immsv/immnd/immnd_init.h
>> @@ -40,6 +40,7 @@ extern IMMND_CB *immnd_cb;
>>   /* file : -  immnd_proc.c */
>> void immnd_proc_discard_other_nodes(IMMND_CB *cb);
>> +void immnd_proc_remove_coordinator_role(IMMND_CB *cb);
>> void immnd_proc_imma_down(IMMND_CB *cb, MDS_DEST dest, 
>> NCSMDS_SVC_ID sv_id);
>>   uint32_t immnd_proc_imma_discard_connection(IMMND_CB *cb, 
>> IMMND_IMM_CLIENT_NODE *cl_node, bool scAbsenceAllowed);
>> diff --git a/osaf/services/saf/immsv/immnd/immnd_proc.c 
>> b/osaf/services/saf/immsv/immnd/immnd_proc.c
>> --- a/osaf/services/saf/immsv/immnd/immnd_proc.c
>> +++ b/osaf/services/saf/immsv/immnd/immnd_proc.c
>> @@ -872,7 +872,7 @@ void immnd_abortSync(IMMND_CB *cb)
>>   memset(_evt, '\0', sizeof(IMMSV_EVT));
>>   TRACE_ENTER();
>>   TRACE("ME:%u RE:%u", cb->mMyEpoch, cb->mRulingEpoch);
>> -osafassert(cb->mIsCoord);
>> +osafassert(cb->mIsCoord || cb->mIntroduced == 2);
> osafassert(cb->mIsCoord || (cb->mScAbsenceAllowed && cb->mIntroduced 
> == 2 );
>>   cb->mPendSync = 0;
>>   if(cb->mSyncFinalizing) {
>>   cb->mSyncFinalizing = 0x0;
>> @@ -898,6 +898,12 @@ void immnd_abortSync(IMMND_CB *cb)
>>   LOG_ER("immnd_abortSync not clean on epoch: RE:%u ME:%u", 
>> cb->mRulingEpoch, cb->mMyEpoch);
>>   }
>>   +/* Skip broadcasting sync abort msg when SC are absent */
>> +if (cb->mIntroduced == 2) {
> (cb->mScAbsenceAllowed && cb->mIntroduced == 2 )
>> +TRACE_LEAVE();
>> +return;
>> +}
>> +
>>   while (!immnd_is_immd_up(cb) && (retryCount++ < 20)) {
>>   LOG_WA("Coord blocked in sending ABORT_SYNC because IMMD is 
>> DOWN %u", retryCount);
>>   sleep(1);
>> @@ -1319,6 +1325,10 @@ void immnd_proc_global_abort_ccb(IMMND_C
>> static SaBoolT 

Re: [devel] [PATCH 1 of 1] imm: Remove coordinator role when SC absence happens [#1625]

2016-03-06 Thread Hung Nguyen
Hi Neel,

Please find my answers inline.


BR,

Hung Nguyen - DEK Technologies



From: Neelakanta Reddy reddy.neelaka...@oracle.com
Sent: Friday, February 26, 2016 12:00PM
To: Hung Nguyen, Zoran Milinkovic
 hung.d.ngu...@dektech.com.au, zoran.milinko...@ericsson.com
Cc: Opensaf-devel
 opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1 of 1] imm: Remove coordinator role when SC absence 
happens [#1625]


Hi Hung,

Reviewed and tested the patch.
Following are the comments:

1. This patch has to go in a separate defect,After #1625 is pushed.
2. re-publish the patch with the comments when the defect is opened
3. Comments inline.

Thanks,
Neel.

On Thursday 25 February 2016 05:29 PM, Hung Nguyen wrote:
>   osaf/services/saf/immsv/immnd/immnd_evt.c |  13 +--
>   osaf/services/saf/immsv/immnd/immnd_init.h |   1 +
>   osaf/services/saf/immsv/immnd/immnd_proc.c |  55 
> -
>   3 files changed, 55 insertions(+), 14 deletions(-)
>
>
> Set 'mIsCoord' to false when headless to avoid coordinator from 
> restarting.
> Also handle the cases when headless occurs before/during sync.
>
> diff --git a/osaf/services/saf/immsv/immnd/immnd_evt.c 
> b/osaf/services/saf/immsv/immnd/immnd_evt.c
> --- a/osaf/services/saf/immsv/immnd/immnd_evt.c
> +++ b/osaf/services/saf/immsv/immnd/immnd_evt.c
> @@ -10196,18 +10196,7 @@ static uint32_t immnd_evt_proc_mds_evt(I
>   } else { /* SC ABSENCE ALLOWED */
>   LOG_WA("SC Absence IS allowed:%u IMMD service is DOWN", 
> cb->mScAbsenceAllowed);
>   if(cb->mIsCoord) {
> -/* Note that normally the coord will reside at SCs so 
> this branch will
> -   only be relevant if REPEATED toal scAbsence 
> occurs. After SC absence
> -   and subsequent return of SC, the coord will be 
> elected at a payload.
> -   That coord will be active untill restart of that 
> payload..
> -   unless we add functionality for the payload coord 
> to restart after
> -   a few minutes .. ?
> -*/
> -LOG_WA("This IMMND coord has to exit allowing 
> restarted IMMD to select new coord");
> -if(cb->mState < IMM_SERVER_SYNC_SERVER) {
> -immnd_ackToNid(NCSCC_RC_FAILURE);
> -}
> -exit(1);
> +immnd_proc_remove_coordinator_role(cb);
No, need of new function, add the code here.
>   } else if(cb->mState <= IMM_SERVER_LOADING_PENDING) {
>   /* Reset state in payloads that had not joined. No 
> need to restart. */
>   LOG_IN("Resetting IMMND state from %u to 
> IMM_SERVER_ANONYMOUS", cb->mState);
> diff --git a/osaf/services/saf/immsv/immnd/immnd_init.h 
> b/osaf/services/saf/immsv/immnd/immnd_init.h
> --- a/osaf/services/saf/immsv/immnd/immnd_init.h
> +++ b/osaf/services/saf/immsv/immnd/immnd_init.h
> @@ -40,6 +40,7 @@ extern IMMND_CB *immnd_cb;
>   /* file : -  immnd_proc.c */
> void immnd_proc_discard_other_nodes(IMMND_CB *cb);
> +void immnd_proc_remove_coordinator_role(IMMND_CB *cb);
> void immnd_proc_imma_down(IMMND_CB *cb, MDS_DEST dest, 
> NCSMDS_SVC_ID sv_id);
>   uint32_t immnd_proc_imma_discard_connection(IMMND_CB *cb, 
> IMMND_IMM_CLIENT_NODE *cl_node, bool scAbsenceAllowed);
> diff --git a/osaf/services/saf/immsv/immnd/immnd_proc.c 
> b/osaf/services/saf/immsv/immnd/immnd_proc.c
> --- a/osaf/services/saf/immsv/immnd/immnd_proc.c
> +++ b/osaf/services/saf/immsv/immnd/immnd_proc.c
> @@ -872,7 +872,7 @@ void immnd_abortSync(IMMND_CB *cb)
>   memset(_evt, '\0', sizeof(IMMSV_EVT));
>   TRACE_ENTER();
>   TRACE("ME:%u RE:%u", cb->mMyEpoch, cb->mRulingEpoch);
> -osafassert(cb->mIsCoord);
> +osafassert(cb->mIsCoord || cb->mIntroduced == 2);
osafassert(cb->mIsCoord || (cb->mScAbsenceAllowed && cb->mIntroduced == 
2 );
>   cb->mPendSync = 0;
>   if(cb->mSyncFinalizing) {
>   cb->mSyncFinalizing = 0x0;
> @@ -898,6 +898,12 @@ void immnd_abortSync(IMMND_CB *cb)
>   LOG_ER("immnd_abortSync not clean on epoch: RE:%u ME:%u", 
> cb->mRulingEpoch, cb->mMyEpoch);
>   }
>   +/* Skip broadcasting sync abort msg when SC are absent */
> +if (cb->mIntroduced == 2) {
(cb->mScAbsenceAllowed && cb->mIntroduced == 2 )
> +TRACE_LEAVE();
> +return;
> +}
> +
>   while (!immnd_is_immd_up(cb) && (retryCount++ < 20)) {
>   LOG_WA("Coord blocked in sending ABORT_SYNC because IMMD is 
> DOWN %u", retryCount);
>   sleep(1);
> @@ -1319,6 +1325,10 @@ void immnd_proc_global_abort_ccb(IMMND_C
> static SaBoolT immnd_ccbsTerminated(IMMND_CB *cb, SaUint32T 
> duration, SaBoolT* pbeImmndDeadlock)
>   {
> +if (cb->mIntroduced == 2) {
> +/* Return true to enter phase 2 or phase 3 of SYNC_SERVER */
> +return SA_TRUE;
> +}
>   

Re: [devel] [PATCH 1 of 1] imm: Remove coordinator role when SC absence happens [#1625]

2016-02-25 Thread Neelakanta Reddy
Hi Hung,

Reviewed and tested the patch.
Following are the comments:

1. This patch has to go in a separate defect,After #1625 is pushed.
2. re-publish the patch with the comments when the defect is opened
3. Comments inline.

Thanks,
Neel.

On Thursday 25 February 2016 05:29 PM, Hung Nguyen wrote:
>   osaf/services/saf/immsv/immnd/immnd_evt.c  |  13 +--
>   osaf/services/saf/immsv/immnd/immnd_init.h |   1 +
>   osaf/services/saf/immsv/immnd/immnd_proc.c |  55 
> -
>   3 files changed, 55 insertions(+), 14 deletions(-)
>
>
> Set 'mIsCoord' to false when headless to avoid coordinator from restarting.
> Also handle the cases when headless occurs before/during sync.
>
> diff --git a/osaf/services/saf/immsv/immnd/immnd_evt.c 
> b/osaf/services/saf/immsv/immnd/immnd_evt.c
> --- a/osaf/services/saf/immsv/immnd/immnd_evt.c
> +++ b/osaf/services/saf/immsv/immnd/immnd_evt.c
> @@ -10196,18 +10196,7 @@ static uint32_t immnd_evt_proc_mds_evt(I
>   } else { /* SC ABSENCE ALLOWED */
>   LOG_WA("SC Absence IS allowed:%u IMMD service is DOWN", 
> cb->mScAbsenceAllowed);
>   if(cb->mIsCoord) {
> - /* Note that normally the coord will reside at 
> SCs so this branch will
> -only be relevant if REPEATED toal scAbsence 
> occurs. After SC absence
> -and subsequent return of SC, the coord will 
> be elected at a payload.
> -That coord will be active untill restart of 
> that payload..
> -unless we add functionality for the payload 
> coord to restart after
> -a few minutes .. ?
> - */
> - LOG_WA("This IMMND coord has to exit allowing 
> restarted IMMD to select new coord");
> - if(cb->mState < IMM_SERVER_SYNC_SERVER) {
> - immnd_ackToNid(NCSCC_RC_FAILURE);
> - }
> - exit(1);
> + immnd_proc_remove_coordinator_role(cb);
No, need of new function, add the code here.
>   } else if(cb->mState <= IMM_SERVER_LOADING_PENDING) {
>   /* Reset state in payloads that had not joined. 
> No need to restart. */
>   LOG_IN("Resetting IMMND state from %u to 
> IMM_SERVER_ANONYMOUS", cb->mState);
> diff --git a/osaf/services/saf/immsv/immnd/immnd_init.h 
> b/osaf/services/saf/immsv/immnd/immnd_init.h
> --- a/osaf/services/saf/immsv/immnd/immnd_init.h
> +++ b/osaf/services/saf/immsv/immnd/immnd_init.h
> @@ -40,6 +40,7 @@ extern IMMND_CB *immnd_cb;
>   /* file : -  immnd_proc.c */
>   
>   void immnd_proc_discard_other_nodes(IMMND_CB *cb);
> +void immnd_proc_remove_coordinator_role(IMMND_CB *cb);
>   
>   void immnd_proc_imma_down(IMMND_CB *cb, MDS_DEST dest, NCSMDS_SVC_ID sv_id);
>   uint32_t immnd_proc_imma_discard_connection(IMMND_CB *cb, 
> IMMND_IMM_CLIENT_NODE *cl_node, bool scAbsenceAllowed);
> diff --git a/osaf/services/saf/immsv/immnd/immnd_proc.c 
> b/osaf/services/saf/immsv/immnd/immnd_proc.c
> --- a/osaf/services/saf/immsv/immnd/immnd_proc.c
> +++ b/osaf/services/saf/immsv/immnd/immnd_proc.c
> @@ -872,7 +872,7 @@ void immnd_abortSync(IMMND_CB *cb)
>   memset(_evt, '\0', sizeof(IMMSV_EVT));
>   TRACE_ENTER();
>   TRACE("ME:%u RE:%u", cb->mMyEpoch, cb->mRulingEpoch);
> - osafassert(cb->mIsCoord);
> + osafassert(cb->mIsCoord || cb->mIntroduced == 2);
osafassert(cb->mIsCoord || (cb->mScAbsenceAllowed && cb->mIntroduced == 2 );
>   cb->mPendSync = 0;
>   if(cb->mSyncFinalizing) {
>   cb->mSyncFinalizing = 0x0;
> @@ -898,6 +898,12 @@ void immnd_abortSync(IMMND_CB *cb)
>   LOG_ER("immnd_abortSync not clean on epoch: RE:%u ME:%u", 
> cb->mRulingEpoch, cb->mMyEpoch);
>   }
>   
> + /* Skip broadcasting sync abort msg when SC are absent */
> + if (cb->mIntroduced == 2) {
(cb->mScAbsenceAllowed && cb->mIntroduced == 2 )
> + TRACE_LEAVE();
> + return;
> + }
> +
>   while (!immnd_is_immd_up(cb) && (retryCount++ < 20)) {
>   LOG_WA("Coord blocked in sending ABORT_SYNC because IMMD is 
> DOWN %u", retryCount);
>   sleep(1);
> @@ -1319,6 +1325,10 @@ void immnd_proc_global_abort_ccb(IMMND_C
>   
>   static SaBoolT immnd_ccbsTerminated(IMMND_CB *cb, SaUint32T duration, 
> SaBoolT* pbeImmndDeadlock)
>   {
> + if (cb->mIntroduced == 2) {
> + /* Return true to enter phase 2 or phase 3 of SYNC_SERVER */
> + return SA_TRUE;
> + }
>   osafassert(cb->mIsCoord);
>   osafassert(pbeImmndDeadlock);
>   (*pbeImmndDeadlock) = SA_FALSE;
> @@ -1999,7 +2009,10 @@ uint32_t immnd_proc_server(uint32_t *tim
>   /*Phase 2 */
>   if