Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2

2016-10-27 Thread A V Mahesh
Hi Hoang,

ACK form me.

Function change looks less and code resignations looks much,
to make review easy next time on-words please split the  function change 
& code resignations in to different patches ( 1 of 2 & 2 of 2).

Not Tested  , in-service upgrade  & log DN cases

-AVM

On 10/27/2016 12:48 PM, Vo Minh Hoang wrote:
> Dear Mahesh,
>
> I tested with cases:
> - Old active with new standby
> - Old standby with new active
>
> Each case, create checkpoint, create section, write and read section, close
> and unlink.
>
> Sincerely,
> Hoang
>
> -Original Message-
> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
> Sent: Thursday, October 27, 2016 1:58 PM
> To: Hoang Vo ; anders.wid...@ericsson.com
> Cc: opensaf-devel@lists.sourceforge.net
> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
> length [#2108] V2
>
> Hi Hoang,
>
> Have you tested in-service upgrade case ?
>
> -AVM
>
>
> On 10/26/2016 2:33 PM, Hoang Vo wrote:
>>osaf/libs/common/cpsv/include/cpsv_shm.h |   28 +-
>>osaf/services/saf/cpsv/cpnd/cpnd_res.c   |  868
> --
>>2 files changed, 355 insertions(+), 541 deletions(-)
>>
>>
>> problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent
>> on all nodes CKPT_INFO size inscrease when support longDN lead to total
> size increase.
>> solution:
>> - From start, cpnd use old format shm.
>> - Run time cpnd keep using old format shm until first longDN checkpoint is
> created.
>> After that cpnd create extended format shm for longDN use.
>> - Fix init size for shm.
>>
>> diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h
>> b/osaf/libs/common/cpsv/include/cpsv_shm.h
>> --- a/osaf/libs/common/cpsv/include/cpsv_shm.h
>> +++ b/osaf/libs/common/cpsv/include/cpsv_shm.h
>> @@ -27,7 +27,9 @@
>>#define SHM_NEXT -3
>>#define SHM_INIT -1
>>
>> -#define CPSV_CPND_SHM_VERSION1
>> +#define CPSV_CPND_SHM_VERSION   1
>> +#define CPSV_CPND_SHM_VERSION_DEPRECATE 2
>> +#define CPSV_CPND_SHM_VERSION_EXTENDED  3
>>
>>typedef struct cpsv_ckpt_hdr {
>>  SaCkptCheckpointHandleT ckpt_id;/* Index for identifying the
> checkpoint */
>> @@ -57,7 +59,7 @@ typedef struct cpsv_sect_hdr {
>>} CPSV_SECT_HDR;
>>
>>typedef struct ckpt_info {
>> -char ckpt_name[kOsafMaxDnLength];
>> +SaNameT ckpt_name;
>>  SaCkptCheckpointHandleT ckpt_id;
>>  uint32_t maxSections;
>>  SaSizeT maxSecSize;
>> @@ -74,23 +76,10 @@ typedef struct ckpt_info {
>>  int32_t next;
>>} CKPT_INFO;
>>
>> -typedef struct ckpt_info_v0 {
>> -SaNameT ckpt_name;
>> -SaCkptCheckpointHandleT ckpt_id;
>> -uint32_t maxSections;
>> -SaSizeT maxSecSize;
>> -NODE_ID node_id;
>> -int32_t offset;
>> -uint32_t client_bitmap;
>> -int32_t is_valid;
>> -uint32_t bm_offset;
>> -bool is_unlink;
>> -bool is_close;
>> -bool cpnd_rep_create;
>> -bool is_first;
>> -SaTimeT close_time;
>> -int32_t next;
>> -} CKPT_INFO_V0;
>> +typedef struct ckpt_extend_info {
>> +char ckpt_name[kOsafMaxDnLength + 1];
>> +uint32_t is_valid;
>> +} CKPT_EXTENDED_INFO;
>>
>>typedef struct client_info {
>>  SaCkptHandleT ckpt_app_hdl;
>> @@ -109,6 +98,7 @@ typedef struct gbl_shm_ptr {
>>  void *base_addr;
>>  void *cli_addr;
>>  void *ckpt_addr;
>> +void *extended_addr;/* Added in CPSV_CPND_SHM_VERSION_EXTENDED
> */
>>  int32_t n_clients;
>>  int32_t n_ckpts;
>>} GBL_SHM_PTR;
>> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_res.c
>> b/osaf/services/saf/cpsv/cpnd/cpnd_res.c
>> --- a/osaf/services/saf/cpsv/cpnd/cpnd_res.c
>> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_res.c
>> @@ -40,8 +40,6 @@
>>
>>#define m_CPND_CKPTINFO_READ(ckpt_info,addr,offset)
>> memcpy(_info,addr+offset,sizeof(CKPT_INFO))
>>
>> -#define m_CPND_CKPTINFO_V0_READ(ckpt_info,addr,offset)
>> memcpy(_info,addr+offset,sizeof(CKPT_INFO_V0))
>> -
>>#define m_CPND_CKPTINFO_UPDATE(addr,ckpt_info,offset)
>> memcpy(addr+offset,_info,sizeof(CKPT_INFO))
>>
>>#define m_CPND_CKPTHDR_UPDATE(ckpt_hdr,offset)
>> memcpy(offset,_hdr,sizeof(CKPT_HDR))
>> @@ -50,13 +48,11 @@ static uint32_t cpnd_res_ckpt_sec_add(CP
>>static bool cpnd_find_exact_ckptinfo(CPND_CB *cb, CKPT_INFO *ckpt_info,
> uint32_t bitmap_offset,
>>   uint32_t *offset, uint32_t
> *prev_offset);
>>static void cpnd_clear_ckpt_info(CPND_CB *cb, CPND_CKPT_NODE
>> *cp_node, uint32_t curr_offset, uint32_t prev_offset); -static
>> uint32_t cpnd_restore_client_info(CPND_CB *cb, uint8_t *cli_addr);
>> -static uint32_t cpnd_restore_ckpt_info_v1(CPND_CB *cb, uint8_t
>> *ckpt_addr, SaClmNodeIdT nodeid); -static uint32_t
>> cpnd_restore_ckpt_info_v0(CPND_CB *cb, uint8_t *ckpt_addr,
>> SaClmNodeIdT nodeid); -static void
>> cpnd_destroy_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_OPEN_INFO
>> *open_req); -static void
>> 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2

2016-10-27 Thread Vo Minh Hoang
Dear Mahesh,

I tested with cases:
- Old active with new standby
- Old standby with new active

Each case, create checkpoint, create section, write and read section, close
and unlink.

Sincerely,
Hoang

-Original Message-
From: A V Mahesh [mailto:mahesh.va...@oracle.com] 
Sent: Thursday, October 27, 2016 1:58 PM
To: Hoang Vo ; anders.wid...@ericsson.com
Cc: opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
length [#2108] V2

Hi Hoang,

Have you tested in-service upgrade case ?

-AVM


On 10/26/2016 2:33 PM, Hoang Vo wrote:
>   osaf/libs/common/cpsv/include/cpsv_shm.h |   28 +-
>   osaf/services/saf/cpsv/cpnd/cpnd_res.c   |  868
--
>   2 files changed, 355 insertions(+), 541 deletions(-)
>
>
> problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent 
> on all nodes CKPT_INFO size inscrease when support longDN lead to total
size increase.
>
> solution:
> - From start, cpnd use old format shm.
> - Run time cpnd keep using old format shm until first longDN checkpoint is
created.
> After that cpnd create extended format shm for longDN use.
> - Fix init size for shm.
>
> diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h 
> b/osaf/libs/common/cpsv/include/cpsv_shm.h
> --- a/osaf/libs/common/cpsv/include/cpsv_shm.h
> +++ b/osaf/libs/common/cpsv/include/cpsv_shm.h
> @@ -27,7 +27,9 @@
>   #define SHM_NEXT -3
>   #define SHM_INIT -1
>   
> -#define CPSV_CPND_SHM_VERSION1
> +#define CPSV_CPND_SHM_VERSION1
> +#define CPSV_CPND_SHM_VERSION_DEPRECATE  2
> +#define CPSV_CPND_SHM_VERSION_EXTENDED   3
>   
>   typedef struct cpsv_ckpt_hdr {
>   SaCkptCheckpointHandleT ckpt_id;/* Index for identifying the
checkpoint */
> @@ -57,7 +59,7 @@ typedef struct cpsv_sect_hdr {
>   } CPSV_SECT_HDR;
>   
>   typedef struct ckpt_info {
> - char ckpt_name[kOsafMaxDnLength];
> + SaNameT ckpt_name;
>   SaCkptCheckpointHandleT ckpt_id;
>   uint32_t maxSections;
>   SaSizeT maxSecSize;
> @@ -74,23 +76,10 @@ typedef struct ckpt_info {
>   int32_t next;
>   } CKPT_INFO;
>   
> -typedef struct ckpt_info_v0 {
> - SaNameT ckpt_name;
> - SaCkptCheckpointHandleT ckpt_id;
> - uint32_t maxSections;
> - SaSizeT maxSecSize;
> - NODE_ID node_id;
> - int32_t offset;
> - uint32_t client_bitmap;
> - int32_t is_valid;
> - uint32_t bm_offset;
> - bool is_unlink;
> - bool is_close;
> - bool cpnd_rep_create;
> - bool is_first;
> - SaTimeT close_time;
> - int32_t next;
> -} CKPT_INFO_V0;
> +typedef struct ckpt_extend_info {
> + char ckpt_name[kOsafMaxDnLength + 1];
> + uint32_t is_valid;
> +} CKPT_EXTENDED_INFO;
>   
>   typedef struct client_info {
>   SaCkptHandleT ckpt_app_hdl;
> @@ -109,6 +98,7 @@ typedef struct gbl_shm_ptr {
>   void *base_addr;
>   void *cli_addr;
>   void *ckpt_addr;
> + void *extended_addr;/* Added in CPSV_CPND_SHM_VERSION_EXTENDED
*/
>   int32_t n_clients;
>   int32_t n_ckpts;
>   } GBL_SHM_PTR;
> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_res.c 
> b/osaf/services/saf/cpsv/cpnd/cpnd_res.c
> --- a/osaf/services/saf/cpsv/cpnd/cpnd_res.c
> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_res.c
> @@ -40,8 +40,6 @@
>   
>   #define m_CPND_CKPTINFO_READ(ckpt_info,addr,offset) 
> memcpy(_info,addr+offset,sizeof(CKPT_INFO))
>   
> -#define m_CPND_CKPTINFO_V0_READ(ckpt_info,addr,offset) 
> memcpy(_info,addr+offset,sizeof(CKPT_INFO_V0))
> -
>   #define m_CPND_CKPTINFO_UPDATE(addr,ckpt_info,offset) 
> memcpy(addr+offset,_info,sizeof(CKPT_INFO))
>   
>   #define m_CPND_CKPTHDR_UPDATE(ckpt_hdr,offset)  
> memcpy(offset,_hdr,sizeof(CKPT_HDR))
> @@ -50,13 +48,11 @@ static uint32_t cpnd_res_ckpt_sec_add(CP
>   static bool cpnd_find_exact_ckptinfo(CPND_CB *cb, CKPT_INFO *ckpt_info,
uint32_t bitmap_offset,
>uint32_t *offset, uint32_t
*prev_offset);
>   static void cpnd_clear_ckpt_info(CPND_CB *cb, CPND_CKPT_NODE 
> *cp_node, uint32_t curr_offset, uint32_t prev_offset); -static 
> uint32_t cpnd_restore_client_info(CPND_CB *cb, uint8_t *cli_addr); 
> -static uint32_t cpnd_restore_ckpt_info_v1(CPND_CB *cb, uint8_t 
> *ckpt_addr, SaClmNodeIdT nodeid); -static uint32_t 
> cpnd_restore_ckpt_info_v0(CPND_CB *cb, uint8_t *ckpt_addr, 
> SaClmNodeIdT nodeid); -static void 
> cpnd_destroy_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_OPEN_INFO 
> *open_req); -static void 
> *cpnd_create_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_INFO *req_info); 
> -static void cpnd_update_shm_cpnd_cp_info(CPND_CB *cb); -static void 
> cpnd_convert_cp_info_v0(CKPT_INFO_V0 *cp_info_v0, CKPT_INFO *cp_info);
> +static void cpnd_destroy_shm(NCS_OS_POSIX_SHM_REQ_OPEN_INFO 
> +*open_req); static uint32_t cpnd_shm_extended_open(CPND_CB *cb, 
> +uint32_t flag); static uint32_t 
> +cpnd_extended_name_lend(SaConstStringT value, SaNameT* name); static 
> 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2

2016-10-27 Thread A V Mahesh
Hi Hoang,

Have you tested in-service upgrade case ?

-AVM


On 10/26/2016 2:33 PM, Hoang Vo wrote:
>   osaf/libs/common/cpsv/include/cpsv_shm.h |   28 +-
>   osaf/services/saf/cpsv/cpnd/cpnd_res.c   |  868 
> --
>   2 files changed, 355 insertions(+), 541 deletions(-)
>
>
> problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent on all 
> nodes
> CKPT_INFO size inscrease when support longDN lead to total size increase.
>
> solution:
> - From start, cpnd use old format shm.
> - Run time cpnd keep using old format shm until first longDN checkpoint is 
> created.
> After that cpnd create extended format shm for longDN use.
> - Fix init size for shm.
>
> diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h 
> b/osaf/libs/common/cpsv/include/cpsv_shm.h
> --- a/osaf/libs/common/cpsv/include/cpsv_shm.h
> +++ b/osaf/libs/common/cpsv/include/cpsv_shm.h
> @@ -27,7 +27,9 @@
>   #define SHM_NEXT -3
>   #define SHM_INIT -1
>   
> -#define CPSV_CPND_SHM_VERSION1
> +#define CPSV_CPND_SHM_VERSION1
> +#define CPSV_CPND_SHM_VERSION_DEPRECATE  2
> +#define CPSV_CPND_SHM_VERSION_EXTENDED   3
>   
>   typedef struct cpsv_ckpt_hdr {
>   SaCkptCheckpointHandleT ckpt_id;/* Index for identifying the 
> checkpoint */
> @@ -57,7 +59,7 @@ typedef struct cpsv_sect_hdr {
>   } CPSV_SECT_HDR;
>   
>   typedef struct ckpt_info {
> - char ckpt_name[kOsafMaxDnLength];
> + SaNameT ckpt_name;
>   SaCkptCheckpointHandleT ckpt_id;
>   uint32_t maxSections;
>   SaSizeT maxSecSize;
> @@ -74,23 +76,10 @@ typedef struct ckpt_info {
>   int32_t next;
>   } CKPT_INFO;
>   
> -typedef struct ckpt_info_v0 {
> - SaNameT ckpt_name;
> - SaCkptCheckpointHandleT ckpt_id;
> - uint32_t maxSections;
> - SaSizeT maxSecSize;
> - NODE_ID node_id;
> - int32_t offset;
> - uint32_t client_bitmap;
> - int32_t is_valid;
> - uint32_t bm_offset;
> - bool is_unlink;
> - bool is_close;
> - bool cpnd_rep_create;
> - bool is_first;
> - SaTimeT close_time;
> - int32_t next;
> -} CKPT_INFO_V0;
> +typedef struct ckpt_extend_info {
> + char ckpt_name[kOsafMaxDnLength + 1];
> + uint32_t is_valid;
> +} CKPT_EXTENDED_INFO;
>   
>   typedef struct client_info {
>   SaCkptHandleT ckpt_app_hdl;
> @@ -109,6 +98,7 @@ typedef struct gbl_shm_ptr {
>   void *base_addr;
>   void *cli_addr;
>   void *ckpt_addr;
> + void *extended_addr;/* Added in CPSV_CPND_SHM_VERSION_EXTENDED */
>   int32_t n_clients;
>   int32_t n_ckpts;
>   } GBL_SHM_PTR;
> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_res.c 
> b/osaf/services/saf/cpsv/cpnd/cpnd_res.c
> --- a/osaf/services/saf/cpsv/cpnd/cpnd_res.c
> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_res.c
> @@ -40,8 +40,6 @@
>   
>   #define m_CPND_CKPTINFO_READ(ckpt_info,addr,offset) 
> memcpy(_info,addr+offset,sizeof(CKPT_INFO))
>   
> -#define m_CPND_CKPTINFO_V0_READ(ckpt_info,addr,offset) 
> memcpy(_info,addr+offset,sizeof(CKPT_INFO_V0))
> -
>   #define m_CPND_CKPTINFO_UPDATE(addr,ckpt_info,offset) 
> memcpy(addr+offset,_info,sizeof(CKPT_INFO))
>   
>   #define m_CPND_CKPTHDR_UPDATE(ckpt_hdr,offset)  
> memcpy(offset,_hdr,sizeof(CKPT_HDR))
> @@ -50,13 +48,11 @@ static uint32_t cpnd_res_ckpt_sec_add(CP
>   static bool cpnd_find_exact_ckptinfo(CPND_CB *cb, CKPT_INFO *ckpt_info, 
> uint32_t bitmap_offset,
>uint32_t *offset, uint32_t 
> *prev_offset);
>   static void cpnd_clear_ckpt_info(CPND_CB *cb, CPND_CKPT_NODE *cp_node, 
> uint32_t curr_offset, uint32_t prev_offset);
> -static uint32_t cpnd_restore_client_info(CPND_CB *cb, uint8_t *cli_addr);
> -static uint32_t cpnd_restore_ckpt_info_v1(CPND_CB *cb, uint8_t *ckpt_addr, 
> SaClmNodeIdT nodeid);
> -static uint32_t cpnd_restore_ckpt_info_v0(CPND_CB *cb, uint8_t *ckpt_addr, 
> SaClmNodeIdT nodeid);
> -static void cpnd_destroy_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_OPEN_INFO 
> *open_req);
> -static void *cpnd_create_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_INFO 
> *req_info);
> -static void cpnd_update_shm_cpnd_cp_info(CPND_CB *cb);
> -static void cpnd_convert_cp_info_v0(CKPT_INFO_V0 *cp_info_v0, CKPT_INFO 
> *cp_info);
> +static void cpnd_destroy_shm(NCS_OS_POSIX_SHM_REQ_OPEN_INFO *open_req);
> +static uint32_t cpnd_shm_extended_open(CPND_CB *cb, uint32_t flag);
> +static uint32_t cpnd_extended_name_lend(SaConstStringT value, SaNameT* name);
> +static SaConstStringT cpnd_extended_name_borrow(const SaNameT* name);
> +static void cpnd_extended_name_free(const SaNameT* name);
>   
>   
> /***
>  *
>* Name   : cpnd_client_extract_bits
> @@ -324,10 +320,24 @@ void cpnd_restart_update_timer(CPND_CB *
>   
>   void *cpnd_restart_shm_create(NCS_OS_POSIX_SHM_REQ_INFO *cpnd_open_req, 
> CPND_CB *cb, SaClmNodeIdT nodeid)
>   {
> - uint32_t rc = NCSCC_RC_SUCCESS;
> + uint32_t 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2

2016-10-25 Thread A V Mahesh
Hi Hoan,

What I tested is a simple default cpsv `small format shm`  ( Short-DN) 
functionality,
this `big format shm` (LONG-DN) fix   has side-effect/implication on 
existing functionality
and these issue will become more complex to handle bugs, so please test 
all case as follows
with new patch and then publish:

1)  `small format shm`  ( Short-DN),
2)  `big format shm` ( LONG-DN)
3)  combination of both  `small format shm`  ( Short-DN) &  `big format 
shm` ( LONG-DN)

-AVM

On 10/26/2016 10:51 AM, Vo Minh Hoang wrote:
> Dear Mahesh,
>
> Thank you very much for your help.
> Compared to your test app I found my test stop too soon.
> After reboot I just check shm existence, did not check to open again.
>
> I will send fix patch soon after carefully test it again.
>
> Thank you and best regards,
> Hoang
>
> -Original Message-
> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
> Sent: Wednesday, October 26, 2016 11:02 AM
> To: Vo Minh Hoang ; anders.wid...@ericsson.com
> Cc: opensaf-devel@lists.sourceforge.net
> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
> length [#2108] V2
>
> Hi Hoang,
>
> The attached  `test_#2108_app.c` application will generate cpnd shm open
> request is getting failed case
>
> #gcc test_#2108_app.c -o checkpoint -lSaCkpt
>
> -AVM
>
> On 10/25/2016 12:23 PM, A V Mahesh wrote:
>> Hi Hoang,
>>
>> On 10/25/2016 12:10 PM, Vo Minh Hoang wrote:
>>> Would you please tell me the process to reproduce this error?
>> I will write standalone application and will share with you .
>>
>> -AVM
>>
>> On 10/25/2016 12:10 PM, Vo Minh Hoang wrote:
>>> Dear Mahesh,
>>>
>>> Thank you very much for your checking.
>>> It is very strangle that I tested with 2 following case:
>>> - restart nd by kill -9 
>>> - restart node by kill -9 
>>> Both cases executed well in my local machine.
>>>
>>> Would you please tell me the process to reproduce this error?
>>> It is very strangle that ER is cannot open replica's shm that is not in
>>> touch of this patch.
>>>
>>> Thank you and best regards,
>>> Hoang
>>>
>>> -Original Message-
>>> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
>>> Sent: Tuesday, October 25, 2016 12:53 PM
>>> To: Hoang Vo ; anders.wid...@ericsson.com
>>> Cc: opensaf-devel@lists.sourceforge.net
>>> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
>>> length [#2108] V2
>>>
>>> Hi Hoang,
>>>
>>> With the patch after CPND restart cpnd shm open request is getting
>>> failed
>>>
>>> please test CPND restart cases.
>>>
>>>
> 
>
>>> 
>>>
>>>  saCkptCheckpointOpen  returned checkpointHandle 626040
>>> 222 saCkptCheckpointOpen  returned checkpointHandle 6261f0
>>> Before pkill osafckptnd  saCkptCheckpointOpen
>>> root 23946 1  0 11:14 ?00:00:00
>>> /usr/lib64/opensaf/osafckptnd
>>> root 24041 24038  0 11:15 pts/000:00:00 sh -c ps -ef | grep
>>> osafckptnd
>>> root 24043 24041  0 11:15 pts/000:00:00 grep osafckptnd
>>> Oct 25 11:15:07 SC-1 osafckptnd[23946]: exiting for shutdown
>>> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO
>>> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' component restart probation
>>> timer started (timeout: 600 ns)
>>> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO Restarting a component of
>>> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 1)
>>> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO
>>> 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to
>>> 'avaDown' : Recovery is 'componentRestart'
>>> Oct 25 11:15:07 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start
>>> CPND_RETENTION timer id = 0x663f10, arg=0x664020
>>> Oct 25 11:15:07 SC-1 osafckptnd[24058]: Started
>>> VV saCkptCheckpointOpen 3rd may hit try again returned 18.
>>> 333 saCkptCheckpointOpen  returned checkpointHandle 7f29fbdc7588
>>> VV saCkptCheckpointOpen 4th returned may hit try again
>>> returned 12.
>>> 444 saCkptCheckpointOpen  returned checkpointHandle 7fffb4a097d8
>>> saCkptCheckpointOpen 5th returned 12.
>>>  saCkptCheckpointOpen  returned checkpointHandle 7f29fbdf61a8
>>> Before pkill osafckptnd & saCkptCheckpointClose
>>> root 24058 1  0 11:15 ?00:00:00
>>> /usr/lib64/opensaf/osafckptnd
>>> root 24063 24038  0 11:15 pts/000:00:00 sh -c ps -ef | grep
>>> osafckptnd
>>> root 24065 24063  0 11:15 pts/000:00:00 grep osafckptnd
>>> Oct 25 11:15:19 SC-1 osafckptnd[24058]: exiting for shutdown
>>> Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO Restarting a component of
>>> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 2)
>>> Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO
>>> 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to
>>> 'avaDown' : Recovery is 'componentRestart'
>>> Oct 25 11:15:19 SC-1 osafckptnd[24080]: Started
>>> Oct 25 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2

2016-10-25 Thread Vo Minh Hoang
Dear Mahesh,

Thank you very much for your help.
Compared to your test app I found my test stop too soon.
After reboot I just check shm existence, did not check to open again.

I will send fix patch soon after carefully test it again.

Thank you and best regards,
Hoang

-Original Message-
From: A V Mahesh [mailto:mahesh.va...@oracle.com] 
Sent: Wednesday, October 26, 2016 11:02 AM
To: Vo Minh Hoang ; anders.wid...@ericsson.com
Cc: opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
length [#2108] V2

Hi Hoang,

The attached  `test_#2108_app.c` application will generate cpnd shm open 
request is getting failed case

#gcc test_#2108_app.c -o checkpoint -lSaCkpt

-AVM

On 10/25/2016 12:23 PM, A V Mahesh wrote:
> Hi Hoang,
>
> On 10/25/2016 12:10 PM, Vo Minh Hoang wrote:
>> Would you please tell me the process to reproduce this error?
> I will write standalone application and will share with you .
>
> -AVM
>
> On 10/25/2016 12:10 PM, Vo Minh Hoang wrote:
>> Dear Mahesh,
>>
>> Thank you very much for your checking.
>> It is very strangle that I tested with 2 following case:
>> - restart nd by kill -9 
>> - restart node by kill -9 
>> Both cases executed well in my local machine.
>>
>> Would you please tell me the process to reproduce this error?
>> It is very strangle that ER is cannot open replica's shm that is not in
>> touch of this patch.
>>
>> Thank you and best regards,
>> Hoang
>>
>> -Original Message-
>> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
>> Sent: Tuesday, October 25, 2016 12:53 PM
>> To: Hoang Vo ; anders.wid...@ericsson.com
>> Cc: opensaf-devel@lists.sourceforge.net
>> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
>> length [#2108] V2
>>
>> Hi Hoang,
>>
>> With the patch after CPND restart cpnd shm open request is getting 
>> failed
>>
>> please test CPND restart cases.
>>
>>


>>
>> 
>>
>>  saCkptCheckpointOpen  returned checkpointHandle 626040
>> 222 saCkptCheckpointOpen  returned checkpointHandle 6261f0
>>Before pkill osafckptnd  saCkptCheckpointOpen
>> root 23946 1  0 11:14 ?00:00:00
>> /usr/lib64/opensaf/osafckptnd
>> root 24041 24038  0 11:15 pts/000:00:00 sh -c ps -ef | grep
>> osafckptnd
>> root 24043 24041  0 11:15 pts/000:00:00 grep osafckptnd
>> Oct 25 11:15:07 SC-1 osafckptnd[23946]: exiting for shutdown
>> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO
>> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' component restart probation
>> timer started (timeout: 600 ns)
>> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO Restarting a component of
>> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 1)
>> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO
>> 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to
>> 'avaDown' : Recovery is 'componentRestart'
>> Oct 25 11:15:07 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start
>> CPND_RETENTION timer id = 0x663f10, arg=0x664020
>> Oct 25 11:15:07 SC-1 osafckptnd[24058]: Started
>>VV saCkptCheckpointOpen 3rd may hit try again returned 18.
>> 333 saCkptCheckpointOpen  returned checkpointHandle 7f29fbdc7588
>>VV saCkptCheckpointOpen 4th returned may hit try again
>> returned 12.
>> 444 saCkptCheckpointOpen  returned checkpointHandle 7fffb4a097d8
>>saCkptCheckpointOpen 5th returned 12.
>>  saCkptCheckpointOpen  returned checkpointHandle 7f29fbdf61a8
>>Before pkill osafckptnd & saCkptCheckpointClose
>> root 24058 1  0 11:15 ?00:00:00
>> /usr/lib64/opensaf/osafckptnd
>> root 24063 24038  0 11:15 pts/000:00:00 sh -c ps -ef | grep
>> osafckptnd
>> root 24065 24063  0 11:15 pts/000:00:00 grep osafckptnd
>> Oct 25 11:15:19 SC-1 osafckptnd[24058]: exiting for shutdown
>> Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO Restarting a component of
>> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 2)
>> Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO
>> 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to
>> 'avaDown' : Recovery is 'componentRestart'
>> Oct 25 11:15:19 SC-1 osafckptnd[24080]: Started
>> Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed
>> safCkpt=checkpoint_tes_131343_1
>> Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed
>> safCkpt=checkpoint_tes_131343_1
>>  saCkptCheckpointClose  checkpointHandle 626040
>> Attempt 0-0:  saCkptCheckpointClose returned 12.
>> 222 saCkptCheckpointClose  checkpointHandle 6261f0
>> Attempt 0-0:  saCkptCheckpointClose returned 12.
>> 333 saCkptCheckpointClose  checkpointHandle 7f29fbdc7588
>> Attempt 0-0:  saCkptCheckpointClose returned 9.
>>  saCkptCheckpointClose  checkpointHandle 7fffb4a097d8
>> Attempt 0-0:  saCkptCheckpointClose returned 9.
>> 555 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2

2016-10-25 Thread A V Mahesh

Hi Hoang,

The attached  `test_#2108_app.c` application will generate cpnd shm open 
request is getting failed case


#gcc test_#2108_app.c -o checkpoint -lSaCkpt

-AVM

On 10/25/2016 12:23 PM, A V Mahesh wrote:

Hi Hoang,

On 10/25/2016 12:10 PM, Vo Minh Hoang wrote:

Would you please tell me the process to reproduce this error?

I will write standalone application and will share with you .

-AVM

On 10/25/2016 12:10 PM, Vo Minh Hoang wrote:

Dear Mahesh,

Thank you very much for your checking.
It is very strangle that I tested with 2 following case:
- restart nd by kill -9 
- restart node by kill -9 
Both cases executed well in my local machine.

Would you please tell me the process to reproduce this error?
It is very strangle that ER is cannot open replica's shm that is not in
touch of this patch.

Thank you and best regards,
Hoang

-Original Message-
From: A V Mahesh [mailto:mahesh.va...@oracle.com]
Sent: Tuesday, October 25, 2016 12:53 PM
To: Hoang Vo ; anders.wid...@ericsson.com
Cc: opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
length [#2108] V2

Hi Hoang,

With the patch after CPND restart cpnd shm open request is getting 
failed


please test CPND restart cases.

 




 saCkptCheckpointOpen  returned checkpointHandle 626040
222 saCkptCheckpointOpen  returned checkpointHandle 6261f0
   Before pkill osafckptnd  saCkptCheckpointOpen
root 23946 1  0 11:14 ?00:00:00
/usr/lib64/opensaf/osafckptnd
root 24041 24038  0 11:15 pts/000:00:00 sh -c ps -ef | grep
osafckptnd
root 24043 24041  0 11:15 pts/000:00:00 grep osafckptnd
Oct 25 11:15:07 SC-1 osafckptnd[23946]: exiting for shutdown
Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO
'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' component restart probation
timer started (timeout: 600 ns)
Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO Restarting a component of
'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 1)
Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO
'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to
'avaDown' : Recovery is 'componentRestart'
Oct 25 11:15:07 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start
CPND_RETENTION timer id = 0x663f10, arg=0x664020
Oct 25 11:15:07 SC-1 osafckptnd[24058]: Started
   VV saCkptCheckpointOpen 3rd may hit try again returned 18.
333 saCkptCheckpointOpen  returned checkpointHandle 7f29fbdc7588
   VV saCkptCheckpointOpen 4th returned may hit try again
returned 12.
444 saCkptCheckpointOpen  returned checkpointHandle 7fffb4a097d8
   saCkptCheckpointOpen 5th returned 12.
 saCkptCheckpointOpen  returned checkpointHandle 7f29fbdf61a8
   Before pkill osafckptnd & saCkptCheckpointClose
root 24058 1  0 11:15 ?00:00:00
/usr/lib64/opensaf/osafckptnd
root 24063 24038  0 11:15 pts/000:00:00 sh -c ps -ef | grep
osafckptnd
root 24065 24063  0 11:15 pts/000:00:00 grep osafckptnd
Oct 25 11:15:19 SC-1 osafckptnd[24058]: exiting for shutdown
Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO Restarting a component of
'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 2)
Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO
'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to
'avaDown' : Recovery is 'componentRestart'
Oct 25 11:15:19 SC-1 osafckptnd[24080]: Started
Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed
safCkpt=checkpoint_tes_131343_1
Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed
safCkpt=checkpoint_tes_131343_1
 saCkptCheckpointClose  checkpointHandle 626040
Attempt 0-0:  saCkptCheckpointClose returned 12.
222 saCkptCheckpointClose  checkpointHandle 6261f0
Attempt 0-0:  saCkptCheckpointClose returned 12.
333 saCkptCheckpointClose  checkpointHandle 7f29fbdc7588
Attempt 0-0:  saCkptCheckpointClose returned 9.
 saCkptCheckpointClose  checkpointHandle 7fffb4a097d8
Attempt 0-0:  saCkptCheckpointClose returned 9.
555 saCkptCheckpointClose  checkpointHandle 7f29fbdf61a8
Attempt 0-0:  saCkptCheckpointClose returned 9.
 saCkptCheckpointOpen  returned checkpointHandle 626040
222 saCkptCheckpointOpen  returned checkpointHandle 628b40
   Before pkill osafckptnd  saCkptCheckpointOpen
root 24080 1  0 11:15 ?00:00:00
/usr/lib64/opensaf/osafckptnd
root 24085 24038  0 11:15 pts/000:00:00 sh -c ps -ef | grep
osafckptnd
root 24087 24085  0 11:15 pts/000:00:00 grep osafckptnd
Oct 25 11:15:26 SC-1 osafckptnd[24080]: exiting for shutdown
Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO Restarting a component of
'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 3)
Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO
'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to
'avaDown' : Recovery is 'componentRestart'
Oct 25 11:15:26 SC-1 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2

2016-10-25 Thread A V Mahesh
Hi Hoang,

On 10/25/2016 12:10 PM, Vo Minh Hoang wrote:
> Would you please tell me the process to reproduce this error?
I will write standalone application and will share with you .

-AVM

On 10/25/2016 12:10 PM, Vo Minh Hoang wrote:
> Dear Mahesh,
>
> Thank you very much for your checking.
> It is very strangle that I tested with 2 following case:
> - restart nd by kill -9 
> - restart node by kill -9 
> Both cases executed well in my local machine.
>
> Would you please tell me the process to reproduce this error?
> It is very strangle that ER is cannot open replica's shm that is not in
> touch of this patch.
>
> Thank you and best regards,
> Hoang
>
> -Original Message-
> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
> Sent: Tuesday, October 25, 2016 12:53 PM
> To: Hoang Vo ; anders.wid...@ericsson.com
> Cc: opensaf-devel@lists.sourceforge.net
> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
> length [#2108] V2
>
> Hi Hoang,
>
> With the patch after CPND restart cpnd shm open request is getting failed
>
> please test CPND restart cases.
>
> 
> 
>
>  saCkptCheckpointOpen  returned checkpointHandle 626040
> 222 saCkptCheckpointOpen  returned checkpointHandle 6261f0
>Before pkill osafckptnd  saCkptCheckpointOpen
> root 23946 1  0 11:14 ?00:00:00
> /usr/lib64/opensaf/osafckptnd
> root 24041 24038  0 11:15 pts/000:00:00 sh -c ps -ef | grep
> osafckptnd
> root 24043 24041  0 11:15 pts/000:00:00 grep osafckptnd
> Oct 25 11:15:07 SC-1 osafckptnd[23946]: exiting for shutdown
> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO
> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' component restart probation
> timer started (timeout: 600 ns)
> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO Restarting a component of
> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 1)
> Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO
> 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to
> 'avaDown' : Recovery is 'componentRestart'
> Oct 25 11:15:07 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start
> CPND_RETENTION timer id = 0x663f10, arg=0x664020
> Oct 25 11:15:07 SC-1 osafckptnd[24058]: Started
>VV saCkptCheckpointOpen 3rd may hit try again returned 18.
> 333 saCkptCheckpointOpen  returned checkpointHandle 7f29fbdc7588
>VV saCkptCheckpointOpen 4th returned may hit try again
> returned 12.
> 444 saCkptCheckpointOpen  returned checkpointHandle 7fffb4a097d8
>saCkptCheckpointOpen 5th returned 12.
>  saCkptCheckpointOpen  returned checkpointHandle 7f29fbdf61a8
>Before pkill osafckptnd & saCkptCheckpointClose
> root 24058 1  0 11:15 ?00:00:00
> /usr/lib64/opensaf/osafckptnd
> root 24063 24038  0 11:15 pts/000:00:00 sh -c ps -ef | grep
> osafckptnd
> root 24065 24063  0 11:15 pts/000:00:00 grep osafckptnd
> Oct 25 11:15:19 SC-1 osafckptnd[24058]: exiting for shutdown
> Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO Restarting a component of
> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 2)
> Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO
> 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to
> 'avaDown' : Recovery is 'componentRestart'
> Oct 25 11:15:19 SC-1 osafckptnd[24080]: Started
> Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed
> safCkpt=checkpoint_tes_131343_1
> Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed
> safCkpt=checkpoint_tes_131343_1
>  saCkptCheckpointClose  checkpointHandle 626040
> Attempt 0-0:  saCkptCheckpointClose returned 12.
> 222 saCkptCheckpointClose  checkpointHandle 6261f0
> Attempt 0-0:  saCkptCheckpointClose returned 12.
> 333 saCkptCheckpointClose  checkpointHandle 7f29fbdc7588
> Attempt 0-0:  saCkptCheckpointClose returned 9.
>  saCkptCheckpointClose  checkpointHandle 7fffb4a097d8
> Attempt 0-0:  saCkptCheckpointClose returned 9.
> 555 saCkptCheckpointClose  checkpointHandle 7f29fbdf61a8
> Attempt 0-0:  saCkptCheckpointClose returned 9.
>  saCkptCheckpointOpen  returned checkpointHandle 626040
> 222 saCkptCheckpointOpen  returned checkpointHandle 628b40
>Before pkill osafckptnd  saCkptCheckpointOpen
> root 24080 1  0 11:15 ?00:00:00
> /usr/lib64/opensaf/osafckptnd
> root 24085 24038  0 11:15 pts/000:00:00 sh -c ps -ef | grep
> osafckptnd
> root 24087 24085  0 11:15 pts/000:00:00 grep osafckptnd
> Oct 25 11:15:26 SC-1 osafckptnd[24080]: exiting for shutdown
> Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO Restarting a component of
> 'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 3)
> Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO
> 'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to
> 'avaDown' : Recovery is 'componentRestart'
> Oct 25 11:15:26 SC-1 osafckptd[23989]: NO 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2

2016-10-25 Thread Vo Minh Hoang
Dear Mahesh,

Thank you very much for your checking.
It is very strangle that I tested with 2 following case:
- restart nd by kill -9 
- restart node by kill -9 
Both cases executed well in my local machine.

Would you please tell me the process to reproduce this error?
It is very strangle that ER is cannot open replica's shm that is not in
touch of this patch.

Thank you and best regards,
Hoang

-Original Message-
From: A V Mahesh [mailto:mahesh.va...@oracle.com] 
Sent: Tuesday, October 25, 2016 12:53 PM
To: Hoang Vo ; anders.wid...@ericsson.com
Cc: opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
length [#2108] V2

Hi Hoang,

With the patch after CPND restart cpnd shm open request is getting failed

please test CPND restart cases.




 saCkptCheckpointOpen  returned checkpointHandle 626040
222 saCkptCheckpointOpen  returned checkpointHandle 6261f0
  Before pkill osafckptnd  saCkptCheckpointOpen
root 23946 1  0 11:14 ?00:00:00 
/usr/lib64/opensaf/osafckptnd
root 24041 24038  0 11:15 pts/000:00:00 sh -c ps -ef | grep 
osafckptnd
root 24043 24041  0 11:15 pts/000:00:00 grep osafckptnd
Oct 25 11:15:07 SC-1 osafckptnd[23946]: exiting for shutdown
Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO 
'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' component restart probation 
timer started (timeout: 600 ns)
Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO Restarting a component of 
'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 1)
Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO 
'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 
'avaDown' : Recovery is 'componentRestart'
Oct 25 11:15:07 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start 
CPND_RETENTION timer id = 0x663f10, arg=0x664020
Oct 25 11:15:07 SC-1 osafckptnd[24058]: Started
  VV saCkptCheckpointOpen 3rd may hit try again returned 18.
333 saCkptCheckpointOpen  returned checkpointHandle 7f29fbdc7588
  VV saCkptCheckpointOpen 4th returned may hit try again 
returned 12.
444 saCkptCheckpointOpen  returned checkpointHandle 7fffb4a097d8
  saCkptCheckpointOpen 5th returned 12.
 saCkptCheckpointOpen  returned checkpointHandle 7f29fbdf61a8
  Before pkill osafckptnd & saCkptCheckpointClose
root 24058 1  0 11:15 ?00:00:00 
/usr/lib64/opensaf/osafckptnd
root 24063 24038  0 11:15 pts/000:00:00 sh -c ps -ef | grep 
osafckptnd
root 24065 24063  0 11:15 pts/000:00:00 grep osafckptnd
Oct 25 11:15:19 SC-1 osafckptnd[24058]: exiting for shutdown
Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO Restarting a component of 
'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 2)
Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO 
'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 
'avaDown' : Recovery is 'componentRestart'
Oct 25 11:15:19 SC-1 osafckptnd[24080]: Started
Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed 
safCkpt=checkpoint_tes_131343_1
Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed 
safCkpt=checkpoint_tes_131343_1
 saCkptCheckpointClose  checkpointHandle 626040
Attempt 0-0:  saCkptCheckpointClose returned 12.
222 saCkptCheckpointClose  checkpointHandle 6261f0
Attempt 0-0:  saCkptCheckpointClose returned 12.
333 saCkptCheckpointClose  checkpointHandle 7f29fbdc7588
Attempt 0-0:  saCkptCheckpointClose returned 9.
 saCkptCheckpointClose  checkpointHandle 7fffb4a097d8
Attempt 0-0:  saCkptCheckpointClose returned 9.
555 saCkptCheckpointClose  checkpointHandle 7f29fbdf61a8
Attempt 0-0:  saCkptCheckpointClose returned 9.
 saCkptCheckpointOpen  returned checkpointHandle 626040
222 saCkptCheckpointOpen  returned checkpointHandle 628b40
  Before pkill osafckptnd  saCkptCheckpointOpen
root 24080 1  0 11:15 ?00:00:00 
/usr/lib64/opensaf/osafckptnd
root 24085 24038  0 11:15 pts/000:00:00 sh -c ps -ef | grep 
osafckptnd
root 24087 24085  0 11:15 pts/000:00:00 grep osafckptnd
Oct 25 11:15:26 SC-1 osafckptnd[24080]: exiting for shutdown
Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO Restarting a component of 
'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 3)
Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO 
'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 
'avaDown' : Recovery is 'componentRestart'
Oct 25 11:15:26 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start 
CPND_RETENTION timer id = 0x663f10, arg=0x664020
Oct 25 11:15:26 SC-1 osafckptnd[24102]: Started
Oct 25 11:15:26 SC-1 osafckptnd[24102]: ER cpnd shm open request failed 
safCkpt=checkpoint_tes_131343_1
Oct 25 11:15:26 SC-1 osafckptnd[24102]: ER cpnd shm open request failed 
safCkpt=checkpoint_tes_131343_1
  VV saCkptCheckpointOpen 3rd may hit try again returned 18.
333 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108] V2

2016-10-24 Thread A V Mahesh
Hi Hoang,

With the patch after CPND restart cpnd shm open request is getting failed

please test CPND restart cases.



 saCkptCheckpointOpen  returned checkpointHandle 626040
222 saCkptCheckpointOpen  returned checkpointHandle 6261f0
  Before pkill osafckptnd  saCkptCheckpointOpen
root 23946 1  0 11:14 ?00:00:00 
/usr/lib64/opensaf/osafckptnd
root 24041 24038  0 11:15 pts/000:00:00 sh -c ps -ef | grep 
osafckptnd
root 24043 24041  0 11:15 pts/000:00:00 grep osafckptnd
Oct 25 11:15:07 SC-1 osafckptnd[23946]: exiting for shutdown
Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO 
'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' component restart probation 
timer started (timeout: 600 ns)
Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO Restarting a component of 
'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 1)
Oct 25 11:15:07 SC-1 osafamfnd[23844]: NO 
'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 
'avaDown' : Recovery is 'componentRestart'
Oct 25 11:15:07 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start 
CPND_RETENTION timer id = 0x663f10, arg=0x664020
Oct 25 11:15:07 SC-1 osafckptnd[24058]: Started
  VV saCkptCheckpointOpen 3rd may hit try again returned 18.
333 saCkptCheckpointOpen  returned checkpointHandle 7f29fbdc7588
  VV saCkptCheckpointOpen 4th returned may hit try again 
returned 12.
444 saCkptCheckpointOpen  returned checkpointHandle 7fffb4a097d8
  saCkptCheckpointOpen 5th returned 12.
 saCkptCheckpointOpen  returned checkpointHandle 7f29fbdf61a8
  Before pkill osafckptnd & saCkptCheckpointClose
root 24058 1  0 11:15 ?00:00:00 
/usr/lib64/opensaf/osafckptnd
root 24063 24038  0 11:15 pts/000:00:00 sh -c ps -ef | grep 
osafckptnd
root 24065 24063  0 11:15 pts/000:00:00 grep osafckptnd
Oct 25 11:15:19 SC-1 osafckptnd[24058]: exiting for shutdown
Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO Restarting a component of 
'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 2)
Oct 25 11:15:19 SC-1 osafamfnd[23844]: NO 
'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 
'avaDown' : Recovery is 'componentRestart'
Oct 25 11:15:19 SC-1 osafckptnd[24080]: Started
Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed 
safCkpt=checkpoint_tes_131343_1
Oct 25 11:15:19 SC-1 osafckptnd[24080]: ER cpnd shm open request failed 
safCkpt=checkpoint_tes_131343_1
 saCkptCheckpointClose  checkpointHandle 626040
Attempt 0-0:  saCkptCheckpointClose returned 12.
222 saCkptCheckpointClose  checkpointHandle 6261f0
Attempt 0-0:  saCkptCheckpointClose returned 12.
333 saCkptCheckpointClose  checkpointHandle 7f29fbdc7588
Attempt 0-0:  saCkptCheckpointClose returned 9.
 saCkptCheckpointClose  checkpointHandle 7fffb4a097d8
Attempt 0-0:  saCkptCheckpointClose returned 9.
555 saCkptCheckpointClose  checkpointHandle 7f29fbdf61a8
Attempt 0-0:  saCkptCheckpointClose returned 9.
 saCkptCheckpointOpen  returned checkpointHandle 626040
222 saCkptCheckpointOpen  returned checkpointHandle 628b40
  Before pkill osafckptnd  saCkptCheckpointOpen
root 24080 1  0 11:15 ?00:00:00 
/usr/lib64/opensaf/osafckptnd
root 24085 24038  0 11:15 pts/000:00:00 sh -c ps -ef | grep 
osafckptnd
root 24087 24085  0 11:15 pts/000:00:00 grep osafckptnd
Oct 25 11:15:26 SC-1 osafckptnd[24080]: exiting for shutdown
Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO Restarting a component of 
'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart count: 3)
Oct 25 11:15:26 SC-1 osafamfnd[23844]: NO 
'safComp=CPND,safSu=SC-1,safSg=NoRed,safApp=OpenSAF' faulted due to 
'avaDown' : Recovery is 'componentRestart'
Oct 25 11:15:26 SC-1 osafckptd[23989]: NO cpnd_down_process:: Start 
CPND_RETENTION timer id = 0x663f10, arg=0x664020
Oct 25 11:15:26 SC-1 osafckptnd[24102]: Started
Oct 25 11:15:26 SC-1 osafckptnd[24102]: ER cpnd shm open request failed 
safCkpt=checkpoint_tes_131343_1
Oct 25 11:15:26 SC-1 osafckptnd[24102]: ER cpnd shm open request failed 
safCkpt=checkpoint_tes_131343_1
  VV saCkptCheckpointOpen 3rd may hit try again returned 18.
333 saCkptCheckpointOpen  returned checkpointHandle 0
  VV saCkptCheckpointOpen 4th returned may hit try again 
returned 12.
444 saCkptCheckpointOpen  returned checkpointHandle 0
  saCkptCheckpointOpen 5th returned 12.
 saCkptCheckpointOpen  returned checkpointHandle 0
  Before pkill osafckptnd & saCkptCheckpointClose
root 24102 1  0 11:15 ?00:00:00 
/usr/lib64/opensaf/osafckptnd
root 24107 24038  0 11:15 pts/000:00:00 sh -c ps -ef | grep 
osafckptnd
root 24109 24107  0 11:15 pts/000:00:00 grep osafckptnd
Oct 25 11:15:38 SC-1 osafckptnd[24102]: exiting for shutdown
Oct 25 11:15:38 SC-1 osafamfnd[23844]: NO Restarting a component of 
'safSu=SC-1,safSg=NoRed,safApp=OpenSAF' (comp restart 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]

2016-10-19 Thread Anders Widell
Although it is technically possible to upgrade OpenSAF without rebooting 
the node, in practice this is neither tested nor recommended anyway. So 
I don't think you need to support "version 2" in the new code. We do 
support downgrade of OpenSAF, and I guess it can't be worse than 
downgrading to a previous version anyhow. You could verify that it 
doesn't crash if it encounters a shared memory with an unknown version.

regards,
Anders Widell

On 10/18/2016 11:17 AM, Vo Minh Hoang wrote:
> Dear Anders and Mahesh,
>
> Thank you very much for your comments.
>
> I would like to clarify the backward compatible satisfaction in this case.
> We have `small format shm` that is used from beginning (version 1).
> We also have `big format shm` that is introduced with Long DN feature
> (changeset 7949:815c56c74d18) (version 2).
> The question is:
> Do we need to make modification that compatible with both 2 kind of shm or
> just the original small one and complete remove the big one?
>
> As you might know, the 2 SHMs approach comes before this SHM swapping ideal
> but I postpone implementing it because of the complicated logic for working
> with both 2 old versions.
>
> If we don't need to care about version 2, I might introduce cleaner code
> solution.
>
> Thank you and best regards,
> Hoang
>
> -Original Message-
> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
> Sent: Tuesday, October 18, 2016 3:51 PM
> To: Anders Widell ; Vo Minh Hoang
> 
> Cc: opensaf-devel@lists.sourceforge.net
> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
> length [#2108]
>
> Hi Hoang,
>
> Swamping SHM looks more complex logic  to me while accessing old & new
> shm in transit ,
> and it will create more issue and will take some time to stabilize.
>
> Let us explore other options like Anders Widell suggested or any other,
> which can be simple and avoids the SHM swap.
>
> I assessed these issues and I was reluctant to have  Long DN for Ckpt
> service ,
> where their is no much piratical use case.
>
> -AVM
>
> On 10/18/2016 1:57 PM, Anders Widell wrote:
>> Maybe it is better to create a second shm containing a fixed-size
>> record of the remaining 2048-256 bytes of the DN? Then you will not
>> have to convert the shm format and creating a new shm segment should
>> be very quick.
>>
>> regards,
>>
>> Anders Widell
>>
>>
>> On 10/18/2016 09:29 AM, Vo Minh Hoang wrote:
>>> Dear Mahesh,
>>>
>>> Sorry I miss-sending incomplete email.
>>> This is full version.
>>> --
>>> I would like to send my answer to 2 of your concerning points in
>>> compound.
>>>
>>> Based on my understand, a client command affects shared mem by following
>>> behavior:
>>>
>>> Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2)
>>> > update shm (3)
>>>
>>> When:
>>> --> Synchronous
>>> ==> Asynchronous
>>> (1) and (2) has same behavior to update shm and store pointer to shm
>>> (3) The modification only take place here include swapping shm and
>>> update
>>> pointers
>>>
>>> So even there are multiple call from multiple client at a time, CPND
>>> update
>>> shm in sequence. So just after the first request swaps shm, the second
>>> request could access shm. There is not case that 2 requests access
>>> shm at
>>> the same time.
>>> When shm already storing data, in swapping, CPND will update pointer
>>> so the
>>> next request that accesses old data can still work with updated
>>> pointer with
>>> same behavior.
>>>
>>> Thank you and best regards,
>>> Hoang
>>>
>>> -Original Message-
>>> From: Vo Minh Hoang [mailto:hoang.m...@dektech.com.au]
>>> Sent: Tuesday, October 18, 2016 2:15 PM
>>> To: 'A V Mahesh' 
>>> Cc: 'anders.wid...@ericsson.com' ;
>>> 'opensaf-devel@lists.sourceforge.net'
>>> 
>>> Subject: RE: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
>>> length [#2108]
>>>
>>> Dear Mahesh,
>>>
>>> I would like to send my answer to 2 of your concerning points in
>>> compound.
>>>
>>> Based on my understand, a client command affects shared mem by following
>>> behavior:
>>>
>>> Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2)
>>> > update shm (3)
>>>
>>> When:
>>> -->
>>>
>>>
>>>
>>> -Original Message-
>>> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
>>> Sent: Tuesday, October 18, 2016 1:10 PM
>>> To: Vo Minh Hoang 
>>> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net
>>> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
>>> length [#2108]
>>>
>>> Hi Hoang,
>>>
>>>
>>> On 10/18/2016 11:24 AM, Vo Minh Hoang wrote:
 Dear Mahesh,

>> [AVM] A non-collated Ckpt will have two replicas on both Active and
 standby.
 Each node will receive one CPND_EVT_D2ND_CKPT_CREATE message so it
 handles
 swapping itself and does not affect 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]

2016-10-18 Thread A V Mahesh
Hi Hoang,

Swamping SHM looks more complex logic  to me while accessing old & new 
shm in transit ,
and it will create more issue and will take some time to stabilize.

Let us explore other options like Anders Widell suggested or any other,
which can be simple and avoids the SHM swap.

I assessed these issues and I was reluctant to have  Long DN for Ckpt 
service ,
where their is no much piratical use case.

-AVM

On 10/18/2016 1:57 PM, Anders Widell wrote:
> Maybe it is better to create a second shm containing a fixed-size 
> record of the remaining 2048-256 bytes of the DN? Then you will not 
> have to convert the shm format and creating a new shm segment should 
> be very quick.
>
> regards,
>
> Anders Widell
>
>
> On 10/18/2016 09:29 AM, Vo Minh Hoang wrote:
>> Dear Mahesh,
>>
>> Sorry I miss-sending incomplete email.
>> This is full version.
>> --
>> I would like to send my answer to 2 of your concerning points in 
>> compound.
>>
>> Based on my understand, a client command affects shared mem by following
>> behavior:
>>
>> Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2)
>> > update shm (3)
>>
>> When:
>> --> Synchronous
>> ==> Asynchronous
>> (1) and (2) has same behavior to update shm and store pointer to shm
>> (3) The modification only take place here include swapping shm and 
>> update
>> pointers
>>
>> So even there are multiple call from multiple client at a time, CPND 
>> update
>> shm in sequence. So just after the first request swaps shm, the second
>> request could access shm. There is not case that 2 requests access 
>> shm at
>> the same time.
>> When shm already storing data, in swapping, CPND will update pointer 
>> so the
>> next request that accesses old data can still work with updated 
>> pointer with
>> same behavior.
>>
>> Thank you and best regards,
>> Hoang
>>
>> -Original Message-
>> From: Vo Minh Hoang [mailto:hoang.m...@dektech.com.au]
>> Sent: Tuesday, October 18, 2016 2:15 PM
>> To: 'A V Mahesh' 
>> Cc: 'anders.wid...@ericsson.com' ;
>> 'opensaf-devel@lists.sourceforge.net' 
>> 
>> Subject: RE: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
>> length [#2108]
>>
>> Dear Mahesh,
>>
>> I would like to send my answer to 2 of your concerning points in 
>> compound.
>>
>> Based on my understand, a client command affects shared mem by following
>> behavior:
>>
>> Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2)
>> > update shm (3)
>>
>> When:
>> -->
>>
>>
>>
>> -Original Message-
>> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
>> Sent: Tuesday, October 18, 2016 1:10 PM
>> To: Vo Minh Hoang 
>> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net
>> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
>> length [#2108]
>>
>> Hi Hoang,
>>
>>
>> On 10/18/2016 11:24 AM, Vo Minh Hoang wrote:
>>> Dear Mahesh,
>>>
> [AVM] A non-collated Ckpt will have two replicas on both Active and
>>> standby.
>>> Each node will receive one CPND_EVT_D2ND_CKPT_CREATE message so it 
>>> handles
>>> swapping itself and does not affect each other nor another.
>> [AVM]   I was taking about existing, swapping of existing `small 
>> format shm`
>>not  not new create request , where the ckpt is already opened
>> multiple nodes with ALL option.
> [AVM] piratically  we can have large size data & transit time, if ckt
>> pat
>>> has  large data  sham is file I/O operation
>   not middle-ware controlled activity , swap time will 
> vary
>>> depending on system.
>>> I am agree that this modification affects performance of create/open
>>> function so it need performance acceptance verification.
>>> Fortunately, shared mem is on memory so it is not heavily depend on 
>>> OS or
>>> file system (unless on swap memory area).
>>> Maybe I am not understand your ideal here but I have not found a clear
>>> reason of handling try-again.
>> [AVM] say for example an application is writing in a loop to old `small
>> format shm`,
>> at that moment you started conversation of old `small format shm` to
>> new  `big format`
>>
>> -AVM
>>
>>> Thank you and best regards,
>>> Hoang
>>>
>>> -Original Message-
>>> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
>>> Sent: Tuesday, October 18, 2016 12:14 PM
>>> To: Vo Minh Hoang 
>>> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net
>>> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
>>> length [#2108]
>>>
>>> Hi Hoan,
>>>
>>>
>>> On 10/18/2016 9:59 AM, Vo Minh Hoang wrote:
 Dear Mahesh,

 Thank you very much for your comments.

 I would like to explain my understanding and reason for this solution.
 Please correct me if I am wrong.

 - This memory swapping works on single node alone, it will occur
 maximum 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]

2016-10-18 Thread Anders Widell
Maybe it is better to create a second shm containing a fixed-size record 
of the remaining 2048-256 bytes of the DN? Then you will not have to 
convert the shm format and creating a new shm segment should be very quick.

regards,

Anders Widell


On 10/18/2016 09:29 AM, Vo Minh Hoang wrote:
> Dear Mahesh,
>
> Sorry I miss-sending incomplete email.
> This is full version.
> --
> I would like to send my answer to 2 of your concerning points in compound.
>
> Based on my understand, a client command affects shared mem by following
> behavior:
>
> Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2)
> > update shm (3)
>
> When:
> --> Synchronous
> ==> Asynchronous
> (1) and (2) has same behavior to update shm and store pointer to shm
> (3) The modification only take place here include swapping shm and update
> pointers
>
> So even there are multiple call from multiple client at a time, CPND update
> shm in sequence. So just after the first request swaps shm, the second
> request could access shm. There is not case that 2 requests access shm at
> the same time.
> When shm already storing data, in swapping, CPND will update pointer so the
> next request that accesses old data can still work with updated pointer with
> same behavior.
>
> Thank you and best regards,
> Hoang
>
> -Original Message-
> From: Vo Minh Hoang [mailto:hoang.m...@dektech.com.au]
> Sent: Tuesday, October 18, 2016 2:15 PM
> To: 'A V Mahesh' 
> Cc: 'anders.wid...@ericsson.com' ;
> 'opensaf-devel@lists.sourceforge.net' 
> Subject: RE: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
> length [#2108]
>
> Dear Mahesh,
>
> I would like to send my answer to 2 of your concerning points in compound.
>
> Based on my understand, a client command affects shared mem by following
> behavior:
>
> Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2)
> > update shm (3)
>
> When:
> -->
>
>
>
> -Original Message-
> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
> Sent: Tuesday, October 18, 2016 1:10 PM
> To: Vo Minh Hoang 
> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net
> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
> length [#2108]
>
> Hi Hoang,
>
>
> On 10/18/2016 11:24 AM, Vo Minh Hoang wrote:
>> Dear Mahesh,
>>
 [AVM] A non-collated Ckpt will have two replicas on both Active and
>> standby.
>> Each node will receive one CPND_EVT_D2ND_CKPT_CREATE message so it handles
>> swapping itself and does not affect each other nor another.
> [AVM]   I was taking about existing, swapping of existing `small format shm`
>not  not new create request , where the ckpt is already opened
> multiple nodes with ALL option.
 [AVM] piratically  we can have large size data & transit time, if ckt
> pat
>> has  large data  sham is file I/O operation
   not middle-ware controlled activity , swap time will vary
>> depending on system.
>> I am agree that this modification affects performance of create/open
>> function so it need performance acceptance verification.
>> Fortunately, shared mem is on memory so it is not heavily depend on OS or
>> file system (unless on swap memory area).
>> Maybe I am not understand your ideal here but I have not found a clear
>> reason of handling try-again.
> [AVM] say for example an application is writing in a loop to old `small
> format shm`,
> at that moment you started conversation of old `small format shm` to
> new  `big format`
>
> -AVM
>
>> Thank you and best regards,
>> Hoang
>>
>> -Original Message-
>> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
>> Sent: Tuesday, October 18, 2016 12:14 PM
>> To: Vo Minh Hoang 
>> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net
>> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
>> length [#2108]
>>
>> Hi Hoan,
>>
>>
>> On 10/18/2016 9:59 AM, Vo Minh Hoang wrote:
>>> Dear Mahesh,
>>>
>>> Thank you very much for your comments.
>>>
>>> I would like to explain my understanding and reason for this solution.
>>> Please correct me if I am wrong.
>>>
>>> - This memory swapping works on single node alone, it will occur
>>> maximum once per node in open/create checkpoint process.
>>> - This swapping action just takes place in nodes that meet condition
>>> and does not affect other node.
>> [AVM] A non-collated Ckpt will have two replicas on both Active and
> standby
>> .
>>> - CPND handles open/create processes atomically in sequence in one
>>> thread only.
>>>
>>> Because of that I think it is unnecessary to implement thread
>>> synchronizing or `try-again` handling.
>> [AVM] piratically  we can have large size data & transit time, if ckt pat
>> has  large data  sham is file I/O operation
>>not middle-ware controlled activity , swap time will vary
>> 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]

2016-10-18 Thread Vo Minh Hoang
Dear Mahesh,

Sorry I miss-sending incomplete email.
This is full version.
--
I would like to send my answer to 2 of your concerning points in compound.

Based on my understand, a client command affects shared mem by following
behavior:

Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2)
> update shm (3)

When:
--> Synchronous
==> Asynchronous
(1) and (2) has same behavior to update shm and store pointer to shm
(3) The modification only take place here include swapping shm and update
pointers

So even there are multiple call from multiple client at a time, CPND update
shm in sequence. So just after the first request swaps shm, the second
request could access shm. There is not case that 2 requests access shm at
the same time.
When shm already storing data, in swapping, CPND will update pointer so the
next request that accesses old data can still work with updated pointer with
same behavior.

Thank you and best regards,
Hoang

-Original Message-
From: Vo Minh Hoang [mailto:hoang.m...@dektech.com.au] 
Sent: Tuesday, October 18, 2016 2:15 PM
To: 'A V Mahesh' 
Cc: 'anders.wid...@ericsson.com' ;
'opensaf-devel@lists.sourceforge.net' 
Subject: RE: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
length [#2108]

Dear Mahesh,

I would like to send my answer to 2 of your concerning points in compound.

Based on my understand, a client command affects shared mem by following
behavior:

Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2)
> update shm (3)

When:
--> 



-Original Message-
From: A V Mahesh [mailto:mahesh.va...@oracle.com] 
Sent: Tuesday, October 18, 2016 1:10 PM
To: Vo Minh Hoang 
Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
length [#2108]

Hi Hoang,


On 10/18/2016 11:24 AM, Vo Minh Hoang wrote:
> Dear Mahesh,
>
>>> [AVM] A non-collated Ckpt will have two replicas on both Active and
> standby.
> Each node will receive one CPND_EVT_D2ND_CKPT_CREATE message so it handles
> swapping itself and does not affect each other nor another.
[AVM]   I was taking about existing, swapping of existing `small format shm`
  not  not new create request , where the ckpt is already opened 
multiple nodes with ALL option.
>
>>> [AVM] piratically  we can have large size data & transit time, if ckt
pat
> has  large data  sham is file I/O operation
>>>  not middle-ware controlled activity , swap time will vary
> depending on system.
> I am agree that this modification affects performance of create/open
> function so it need performance acceptance verification.
> Fortunately, shared mem is on memory so it is not heavily depend on OS or
> file system (unless on swap memory area).
> Maybe I am not understand your ideal here but I have not found a clear
> reason of handling try-again.
[AVM] say for example an application is writing in a loop to old `small 
format shm`,
at that moment you started conversation of old `small format shm` to 
new  `big format`

-AVM

>
> Thank you and best regards,
> Hoang
>
> -Original Message-
> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
> Sent: Tuesday, October 18, 2016 12:14 PM
> To: Vo Minh Hoang 
> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net
> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
> length [#2108]
>
> Hi Hoan,
>
>
> On 10/18/2016 9:59 AM, Vo Minh Hoang wrote:
>> Dear Mahesh,
>>
>> Thank you very much for your comments.
>>
>> I would like to explain my understanding and reason for this solution.
>> Please correct me if I am wrong.
>>
>> - This memory swapping works on single node alone, it will occur
>> maximum once per node in open/create checkpoint process.
>> - This swapping action just takes place in nodes that meet condition
>> and does not affect other node.
> [AVM] A non-collated Ckpt will have two replicas on both Active and
standby
> .
>> - CPND handles open/create processes atomically in sequence in one
>> thread only.
>>
>> Because of that I think it is unnecessary to implement thread
>> synchronizing or `try-again` handling.
> [AVM] piratically  we can have large size data & transit time, if ckt pat
> has  large data  sham is file I/O operation
>   not middle-ware controlled activity , swap time will vary
> depending on system.
>> Sincerely,
>> Hoang
>>
>> -Original Message-
>> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
>> Sent: Tuesday, October 18, 2016 10:48 AM
>> To: Vo Minh Hoang 
>> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net
>> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
>> length [#2108]
>>
>> Hi Hoang,
>>
>> On 10/13/2016 12:44 PM, Vo Minh Hoang wrote:
>>> No, old checkpoint data 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]

2016-10-18 Thread Vo Minh Hoang
Dear Mahesh,

I would like to send my answer to 2 of your concerning points in compound.

Based on my understand, a client command affects shared mem by following
behavior:

Client --> CPA ==> CPND (1) ==> CPD (active) ==> CPND (has replica) (2)
> update shm (3)

When:
--> 



-Original Message-
From: A V Mahesh [mailto:mahesh.va...@oracle.com] 
Sent: Tuesday, October 18, 2016 1:10 PM
To: Vo Minh Hoang 
Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
length [#2108]

Hi Hoang,


On 10/18/2016 11:24 AM, Vo Minh Hoang wrote:
> Dear Mahesh,
>
>>> [AVM] A non-collated Ckpt will have two replicas on both Active and
> standby.
> Each node will receive one CPND_EVT_D2ND_CKPT_CREATE message so it handles
> swapping itself and does not affect each other nor another.
[AVM]   I was taking about existing, swapping of existing `small format shm`
  not  not new create request , where the ckpt is already opened 
multiple nodes with ALL option.
>
>>> [AVM] piratically  we can have large size data & transit time, if ckt
pat
> has  large data  sham is file I/O operation
>>>  not middle-ware controlled activity , swap time will vary
> depending on system.
> I am agree that this modification affects performance of create/open
> function so it need performance acceptance verification.
> Fortunately, shared mem is on memory so it is not heavily depend on OS or
> file system (unless on swap memory area).
> Maybe I am not understand your ideal here but I have not found a clear
> reason of handling try-again.
[AVM] say for example an application is writing in a loop to old `small 
format shm`,
at that moment you started conversation of old `small format shm` to 
new  `big format`

-AVM

>
> Thank you and best regards,
> Hoang
>
> -Original Message-
> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
> Sent: Tuesday, October 18, 2016 12:14 PM
> To: Vo Minh Hoang 
> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net
> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
> length [#2108]
>
> Hi Hoan,
>
>
> On 10/18/2016 9:59 AM, Vo Minh Hoang wrote:
>> Dear Mahesh,
>>
>> Thank you very much for your comments.
>>
>> I would like to explain my understanding and reason for this solution.
>> Please correct me if I am wrong.
>>
>> - This memory swapping works on single node alone, it will occur
>> maximum once per node in open/create checkpoint process.
>> - This swapping action just takes place in nodes that meet condition
>> and does not affect other node.
> [AVM] A non-collated Ckpt will have two replicas on both Active and
standby
> .
>> - CPND handles open/create processes atomically in sequence in one
>> thread only.
>>
>> Because of that I think it is unnecessary to implement thread
>> synchronizing or `try-again` handling.
> [AVM] piratically  we can have large size data & transit time, if ckt pat
> has  large data  sham is file I/O operation
>   not middle-ware controlled activity , swap time will vary
> depending on system.
>> Sincerely,
>> Hoang
>>
>> -Original Message-
>> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
>> Sent: Tuesday, October 18, 2016 10:48 AM
>> To: Vo Minh Hoang 
>> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net
>> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
>> length [#2108]
>>
>> Hi Hoang,
>>
>> On 10/13/2016 12:44 PM, Vo Minh Hoang wrote:
>>> No, old checkpoint data is converted to `big format`.
>>> So all of them will be stored in `big format`.
>> [AVM] This approach is introducing NEW transit ,  so far application
>> are aware of  switch-over & fail-over transit and TRY-AGAIN is
>> expected only in those case , now this solution is introducing  a new
>> transit  for the application which are accessioning the old  (by the
>> way this patch didn't implemented TRY-AGAIN when shared memory
>> swapping action occurring)
>>
>> `small format shm`, up on some application creating  `big format` (
>> application impacting the HA behavior )
>> not sure about the solution approach need to discussed !
>>
>> -AVM
>>
>> On 10/13/2016 12:44 PM, Vo Minh Hoang wrote:
>>> Dear Mahesh,
>>>
>>> Because of keeping the consistent working behavior of existing
>>> function, only 1 shared memory at a time. If shared memory swapping
>>> action occurs, a new shared memory will replace old one.
>>>
>>> Here is the detailed answers to your questions:
> -The  existing  `small format shm`  will continue to be small , is
> that
>>> right ?
> -Only newly created longDN checkpoint will be in `big format shm`,
> is
>>> that right ?
>>> No, old checkpoint data is converted to `big format`.
>>> So all of them will be stored in `big format`.
>>>
> - what will be the format of newly joined the PL-5 opens  an
> 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]

2016-10-18 Thread A V Mahesh
Hi Hoang,


On 10/18/2016 11:24 AM, Vo Minh Hoang wrote:
> Dear Mahesh,
>
>>> [AVM] A non-collated Ckpt will have two replicas on both Active and
> standby.
> Each node will receive one CPND_EVT_D2ND_CKPT_CREATE message so it handles
> swapping itself and does not affect each other nor another.
[AVM]   I was taking about existing, swapping of existing `small format shm`
  not  not new create request , where the ckpt is already opened 
multiple nodes with ALL option.
>
>>> [AVM] piratically  we can have large size data & transit time, if ckt pat
> has  large data  sham is file I/O operation
>>>  not middle-ware controlled activity , swap time will vary
> depending on system.
> I am agree that this modification affects performance of create/open
> function so it need performance acceptance verification.
> Fortunately, shared mem is on memory so it is not heavily depend on OS or
> file system (unless on swap memory area).
> Maybe I am not understand your ideal here but I have not found a clear
> reason of handling try-again.
[AVM] say for example an application is writing in a loop to old `small 
format shm`,
at that moment you started conversation of old `small format shm` to 
new  `big format`

-AVM

>
> Thank you and best regards,
> Hoang
>
> -Original Message-
> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
> Sent: Tuesday, October 18, 2016 12:14 PM
> To: Vo Minh Hoang 
> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net
> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
> length [#2108]
>
> Hi Hoan,
>
>
> On 10/18/2016 9:59 AM, Vo Minh Hoang wrote:
>> Dear Mahesh,
>>
>> Thank you very much for your comments.
>>
>> I would like to explain my understanding and reason for this solution.
>> Please correct me if I am wrong.
>>
>> - This memory swapping works on single node alone, it will occur
>> maximum once per node in open/create checkpoint process.
>> - This swapping action just takes place in nodes that meet condition
>> and does not affect other node.
> [AVM] A non-collated Ckpt will have two replicas on both Active and standby
> .
>> - CPND handles open/create processes atomically in sequence in one
>> thread only.
>>
>> Because of that I think it is unnecessary to implement thread
>> synchronizing or `try-again` handling.
> [AVM] piratically  we can have large size data & transit time, if ckt pat
> has  large data  sham is file I/O operation
>   not middle-ware controlled activity , swap time will vary
> depending on system.
>> Sincerely,
>> Hoang
>>
>> -Original Message-
>> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
>> Sent: Tuesday, October 18, 2016 10:48 AM
>> To: Vo Minh Hoang 
>> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net
>> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
>> length [#2108]
>>
>> Hi Hoang,
>>
>> On 10/13/2016 12:44 PM, Vo Minh Hoang wrote:
>>> No, old checkpoint data is converted to `big format`.
>>> So all of them will be stored in `big format`.
>> [AVM] This approach is introducing NEW transit ,  so far application
>> are aware of  switch-over & fail-over transit and TRY-AGAIN is
>> expected only in those case , now this solution is introducing  a new
>> transit  for the application which are accessioning the old  (by the
>> way this patch didn't implemented TRY-AGAIN when shared memory
>> swapping action occurring)
>>
>> `small format shm`, up on some application creating  `big format` (
>> application impacting the HA behavior )
>> not sure about the solution approach need to discussed !
>>
>> -AVM
>>
>> On 10/13/2016 12:44 PM, Vo Minh Hoang wrote:
>>> Dear Mahesh,
>>>
>>> Because of keeping the consistent working behavior of existing
>>> function, only 1 shared memory at a time. If shared memory swapping
>>> action occurs, a new shared memory will replace old one.
>>>
>>> Here is the detailed answers to your questions:
> -The  existing  `small format shm`  will continue to be small , is
> that
>>> right ?
> -Only newly created longDN checkpoint will be in `big format shm`,
> is
>>> that right ?
>>> No, old checkpoint data is converted to `big format`.
>>> So all of them will be stored in `big format`.
>>>
> - what will be the format of newly joined the PL-5 opens  an
> existing
>>> `small format shm`
>>> PL-5 still use `small format`.
>>> Only when a long DN replica is added in this node, the shared memory
>>> is converted to `big format`.
> the what will be the new replica  on new node `small format shm`
> or `big
>>> format shm` ?
>>> This implementation only affect the `header` shared memory
>>> (opensaf_CPND_CHECKPOINT_INFO_nodeid). It do not change replica
>>> shared memory (opensaf_ckptname_nodeid_n).
>>>
>>> About testing, because of above specification, I tested:
>>> - start new node
>>> - restart ckptnd with existing small shm
>>> - 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]

2016-10-17 Thread Vo Minh Hoang
Dear Mahesh,

>> [AVM] A non-collated Ckpt will have two replicas on both Active and
standby.
Each node will receive one CPND_EVT_D2ND_CKPT_CREATE message so it handles
swapping itself and does not affect each other nor another.

>> [AVM] piratically  we can have large size data & transit time, if ckt pat
has  large data  sham is file I/O operation
>> not middle-ware controlled activity , swap time will vary
depending on system.
I am agree that this modification affects performance of create/open
function so it need performance acceptance verification.
Fortunately, shared mem is on memory so it is not heavily depend on OS or
file system (unless on swap memory area).
Maybe I am not understand your ideal here but I have not found a clear
reason of handling try-again.

Thank you and best regards,
Hoang

-Original Message-
From: A V Mahesh [mailto:mahesh.va...@oracle.com] 
Sent: Tuesday, October 18, 2016 12:14 PM
To: Vo Minh Hoang 
Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
length [#2108]

Hi Hoan,


On 10/18/2016 9:59 AM, Vo Minh Hoang wrote:
> Dear Mahesh,
>
> Thank you very much for your comments.
>
> I would like to explain my understanding and reason for this solution.
> Please correct me if I am wrong.
>
> - This memory swapping works on single node alone, it will occur 
> maximum once per node in open/create checkpoint process.
> - This swapping action just takes place in nodes that meet condition 
> and does not affect other node.
[AVM] A non-collated Ckpt will have two replicas on both Active and standby
.
> - CPND handles open/create processes atomically in sequence in one 
> thread only.
>
> Because of that I think it is unnecessary to implement thread 
> synchronizing or `try-again` handling.
[AVM] piratically  we can have large size data & transit time, if ckt pat
has  large data  sham is file I/O operation
 not middle-ware controlled activity , swap time will vary
depending on system.
>
> Sincerely,
> Hoang
>
> -Original Message-
> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
> Sent: Tuesday, October 18, 2016 10:48 AM
> To: Vo Minh Hoang 
> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net
> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name 
> length [#2108]
>
> Hi Hoang,
>
> On 10/13/2016 12:44 PM, Vo Minh Hoang wrote:
>> No, old checkpoint data is converted to `big format`.
>> So all of them will be stored in `big format`.
> [AVM] This approach is introducing NEW transit ,  so far application 
> are aware of  switch-over & fail-over transit and TRY-AGAIN is 
> expected only in those case , now this solution is introducing  a new 
> transit  for the application which are accessioning the old  (by the 
> way this patch didn't implemented TRY-AGAIN when shared memory 
> swapping action occurring)
>
> `small format shm`, up on some application creating  `big format` ( 
> application impacting the HA behavior )
>not sure about the solution approach need to discussed !
>
> -AVM
>
> On 10/13/2016 12:44 PM, Vo Minh Hoang wrote:
>> Dear Mahesh,
>>
>> Because of keeping the consistent working behavior of existing 
>> function, only 1 shared memory at a time. If shared memory swapping 
>> action occurs, a new shared memory will replace old one.
>>
>> Here is the detailed answers to your questions:
 -The  existing  `small format shm`  will continue to be small , is 
 that
>> right ?
 -Only newly created longDN checkpoint will be in `big format shm`, 
 is
>> that right ?
>> No, old checkpoint data is converted to `big format`.
>> So all of them will be stored in `big format`.
>>
 - what will be the format of newly joined the PL-5 opens  an 
 existing
>> `small format shm`
>> PL-5 still use `small format`.
>> Only when a long DN replica is added in this node, the shared memory 
>> is converted to `big format`.
the what will be the new replica  on new node `small format shm` 
 or `big
>> format shm` ?
>> This implementation only affect the `header` shared memory 
>> (opensaf_CPND_CHECKPOINT_INFO_nodeid). It do not change replica 
>> shared memory (opensaf_ckptname_nodeid_n).
>>
>> About testing, because of above specification, I tested:
>> - start new node
>> - restart ckptnd with existing small shm
>> - restart ckptnd with existing big shm
>> - create first long dn (check all node)
>>
>> Thank you and best regards,
>> Hoang
>>
>> -Original Message-
>> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
>> Sent: Thursday, October 13, 2016 1:33 PM
>> To: Hoang Vo ; anders.wid...@ericsson.com
>> Cc: opensaf-devel@lists.sourceforge.net
>> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt 
>> name length [#2108]
>>
>> Hi Hoang,
>>
>>>> - Run time cpnd keep using small format shm until first 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]

2016-10-17 Thread A V Mahesh
Hi Hoan,


On 10/18/2016 9:59 AM, Vo Minh Hoang wrote:
> Dear Mahesh,
>
> Thank you very much for your comments.
>
> I would like to explain my understanding and reason for this solution.
> Please correct me if I am wrong.
>
> - This memory swapping works on single node alone, it will occur maximum
> once per node in open/create checkpoint process.
> - This swapping action just takes place in nodes that meet condition and
> does not affect other node.
[AVM] A non-collated Ckpt will have two replicas on both Active and 
standby .
> - CPND handles open/create processes atomically in sequence in one thread
> only.
>
> Because of that I think it is unnecessary to implement thread synchronizing
> or `try-again` handling.
[AVM] piratically  we can have large size data & transit time, if ckt 
pat has  large data  sham is file I/O operation
 not middle-ware controlled activity , swap time will vary 
depending on system.
>
> Sincerely,
> Hoang
>
> -Original Message-
> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
> Sent: Tuesday, October 18, 2016 10:48 AM
> To: Vo Minh Hoang 
> Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net
> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
> length [#2108]
>
> Hi Hoang,
>
> On 10/13/2016 12:44 PM, Vo Minh Hoang wrote:
>> No, old checkpoint data is converted to `big format`.
>> So all of them will be stored in `big format`.
> [AVM] This approach is introducing NEW transit ,  so far application are
> aware of  switch-over & fail-over transit and TRY-AGAIN is expected only in
> those case , now this solution is introducing  a new transit  for the
> application which are accessioning the old  (by the way this patch didn't
> implemented TRY-AGAIN when shared memory swapping action occurring)
>
> `small format shm`, up on some application creating  `big format` (
> application impacting the HA behavior )
>not sure about the solution approach need to discussed !
>
> -AVM
>
> On 10/13/2016 12:44 PM, Vo Minh Hoang wrote:
>> Dear Mahesh,
>>
>> Because of keeping the consistent working behavior of existing
>> function, only 1 shared memory at a time. If shared memory swapping
>> action occurs, a new shared memory will replace old one.
>>
>> Here is the detailed answers to your questions:
 -The  existing  `small format shm`  will continue to be small , is
 that
>> right ?
 -Only newly created longDN checkpoint will be in `big format shm`,
 is
>> that right ?
>> No, old checkpoint data is converted to `big format`.
>> So all of them will be stored in `big format`.
>>
 - what will be the format of newly joined the PL-5 opens  an
 existing
>> `small format shm`
>> PL-5 still use `small format`.
>> Only when a long DN replica is added in this node, the shared memory
>> is converted to `big format`.
the what will be the new replica  on new node `small format shm`
 or `big
>> format shm` ?
>> This implementation only affect the `header` shared memory
>> (opensaf_CPND_CHECKPOINT_INFO_nodeid). It do not change replica shared
>> memory (opensaf_ckptname_nodeid_n).
>>
>> About testing, because of above specification, I tested:
>> - start new node
>> - restart ckptnd with existing small shm
>> - restart ckptnd with existing big shm
>> - create first long dn (check all node)
>>
>> Thank you and best regards,
>> Hoang
>>
>> -Original Message-
>> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
>> Sent: Thursday, October 13, 2016 1:33 PM
>> To: Hoang Vo ; anders.wid...@ericsson.com
>> Cc: opensaf-devel@lists.sourceforge.net
>> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
>> length [#2108]
>>
>> Hi Hoang,
>>
>>>> - Run time cpnd keep using small format shm until first longDN
>> checkpoint is created.
>>>> After that cpnd use big format shm.
>>
>> While reviewing I am assuming following please confirm  :
>>
>> -The  existing  `small format shm`  will continue to be small , is
>> that right ?
>> -Only newly created longDN checkpoint will be in `big format shm`, is
>> that right ?
>> - what will be the format of newly joined the PL-5 opens  an existing
>> `small format shm`
>>  the what will be the new replica  on new node `small format shm`
>> or `big format shm` ?
>>
>>
>> I hope you  tested following :
>> ==
>> - combination of some `small format shm`  and some  `big format shm`
>> ckpts
>> - Joined a New node ( say PL-5)  and then opened the existing `small
>> format shm` ckpt from the new Node
>> - Restating controller which has combination of  `small format shm`
>> and `big format shm` and how the restored non-collocated ckpt`s
>>
>> -AVM
>>
>> On 10/11/2016 1:15 PM, Hoang Vo wrote:
>>> osaf/libs/common/cpsv/include/cpsv_shm.h |9 +-
>>> osaf/services/saf/cpsv/cpnd/cpnd_res.c   |  565
>> --
>>> 2 files changed, 536 insertions(+), 38 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]

2016-10-17 Thread Vo Minh Hoang
Dear Mahesh,

Thank you very much for your comments.

I would like to explain my understanding and reason for this solution.
Please correct me if I am wrong.

- This memory swapping works on single node alone, it will occur maximum
once per node in open/create checkpoint process.
- This swapping action just takes place in nodes that meet condition and
does not affect other node.
- CPND handles open/create processes atomically in sequence in one thread
only.

Because of that I think it is unnecessary to implement thread synchronizing
or `try-again` handling.

Sincerely,
Hoang

-Original Message-
From: A V Mahesh [mailto:mahesh.va...@oracle.com] 
Sent: Tuesday, October 18, 2016 10:48 AM
To: Vo Minh Hoang 
Cc: anders.wid...@ericsson.com; opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
length [#2108]

Hi Hoang,

On 10/13/2016 12:44 PM, Vo Minh Hoang wrote:
> No, old checkpoint data is converted to `big format`.
> So all of them will be stored in `big format`.
[AVM] This approach is introducing NEW transit ,  so far application are
aware of  switch-over & fail-over transit and TRY-AGAIN is expected only in
those case , now this solution is introducing  a new transit  for the
application which are accessioning the old  (by the way this patch didn't
implemented TRY-AGAIN when shared memory swapping action occurring)

`small format shm`, up on some application creating  `big format` (
application impacting the HA behavior )
  not sure about the solution approach need to discussed !

-AVM

On 10/13/2016 12:44 PM, Vo Minh Hoang wrote:
> Dear Mahesh,
>
> Because of keeping the consistent working behavior of existing 
> function, only 1 shared memory at a time. If shared memory swapping 
> action occurs, a new shared memory will replace old one.
>
> Here is the detailed answers to your questions:
>>> -The  existing  `small format shm`  will continue to be small , is 
>>> that
> right ?
>>> -Only newly created longDN checkpoint will be in `big format shm`, 
>>> is
> that right ?
> No, old checkpoint data is converted to `big format`.
> So all of them will be stored in `big format`.
>
>>> - what will be the format of newly joined the PL-5 opens  an 
>>> existing
> `small format shm`
> PL-5 still use `small format`.
> Only when a long DN replica is added in this node, the shared memory 
> is converted to `big format`.
>>>   the what will be the new replica  on new node `small format shm` 
>>> or `big
> format shm` ?
> This implementation only affect the `header` shared memory 
> (opensaf_CPND_CHECKPOINT_INFO_nodeid). It do not change replica shared 
> memory (opensaf_ckptname_nodeid_n).
>
> About testing, because of above specification, I tested:
> - start new node
> - restart ckptnd with existing small shm
> - restart ckptnd with existing big shm
> - create first long dn (check all node)
>
> Thank you and best regards,
> Hoang
>
> -Original Message-
> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
> Sent: Thursday, October 13, 2016 1:33 PM
> To: Hoang Vo ; anders.wid...@ericsson.com
> Cc: opensaf-devel@lists.sourceforge.net
> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name 
> length [#2108]
>
> Hi Hoang,
>
>   >> - Run time cpnd keep using small format shm until first longDN 
> checkpoint is created.
>   >> After that cpnd use big format shm.
>
> While reviewing I am assuming following please confirm  :
>
> -The  existing  `small format shm`  will continue to be small , is 
> that right ?
> -Only newly created longDN checkpoint will be in `big format shm`, is 
> that right ?
> - what will be the format of newly joined the PL-5 opens  an existing 
> `small format shm`
> the what will be the new replica  on new node `small format shm` 
> or `big format shm` ?
>
>
> I hope you  tested following :
> ==
> - combination of some `small format shm`  and some  `big format shm`  
> ckpts
> - Joined a New node ( say PL-5)  and then opened the existing `small 
> format shm` ckpt from the new Node
> - Restating controller which has combination of  `small format shm` 
> and `big format shm` and how the restored non-collocated ckpt`s
>
> -AVM
>
> On 10/11/2016 1:15 PM, Hoang Vo wrote:
>>osaf/libs/common/cpsv/include/cpsv_shm.h |9 +-
>>osaf/services/saf/cpsv/cpnd/cpnd_res.c   |  565
> --
>>2 files changed, 536 insertions(+), 38 deletions(-)
>>
>>
>> problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent 
>> on all nodes CKPT_INFO size inscrease when support longDN lead to 
>> total
> size increase.
>> solution:
>> - From start, cpnd use small format shm.
>> - Run time cpnd keep using small format shm until first longDN 
>> checkpoint
> is created.
>> After that cpnd use big format shm.
>>
>> diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h
>> b/osaf/libs/common/cpsv/include/cpsv_shm.h
>> --- 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]

2016-10-17 Thread A V Mahesh
Hi Hoang,

On 10/13/2016 12:44 PM, Vo Minh Hoang wrote:
> No, old checkpoint data is converted to `big format`.
> So all of them will be stored in `big format`.
[AVM] This approach is introducing NEW transit ,  so far application are 
aware of  switch-over & fail-over transit
and TRY-AGAIN is expected only in those case , now this solution is 
introducing  a new transit  for the application
which are accessioning the old  (by the way this patch didn't 
implemented TRY-AGAIN when shared memory swapping action occurring)

`small format shm`, up on some application creating  `big format` ( 
application impacting the HA behavior )
  not sure about the solution approach need to discussed !

-AVM

On 10/13/2016 12:44 PM, Vo Minh Hoang wrote:
> Dear Mahesh,
>
> Because of keeping the consistent working behavior of existing function,
> only 1 shared memory at a time. If shared memory swapping action occurs, a
> new shared memory will replace old one.
>
> Here is the detailed answers to your questions:
>>> -The  existing  `small format shm`  will continue to be small , is that
> right ?
>>> -Only newly created longDN checkpoint will be in `big format shm`, is
> that right ?
> No, old checkpoint data is converted to `big format`.
> So all of them will be stored in `big format`.
>
>>> - what will be the format of newly joined the PL-5 opens  an existing
> `small format shm`
> PL-5 still use `small format`.
> Only when a long DN replica is added in this node, the shared memory is
> converted to `big format`.
>>>   the what will be the new replica  on new node `small format shm` or `big
> format shm` ?
> This implementation only affect the `header` shared memory
> (opensaf_CPND_CHECKPOINT_INFO_nodeid). It do not change replica shared
> memory (opensaf_ckptname_nodeid_n).
>
> About testing, because of above specification, I tested:
> - start new node
> - restart ckptnd with existing small shm
> - restart ckptnd with existing big shm
> - create first long dn (check all node)
>
> Thank you and best regards,
> Hoang
>
> -Original Message-
> From: A V Mahesh [mailto:mahesh.va...@oracle.com]
> Sent: Thursday, October 13, 2016 1:33 PM
> To: Hoang Vo ; anders.wid...@ericsson.com
> Cc: opensaf-devel@lists.sourceforge.net
> Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
> length [#2108]
>
> Hi Hoang,
>
>   >> - Run time cpnd keep using small format shm until first longDN
> checkpoint is created.
>   >> After that cpnd use big format shm.
>
> While reviewing I am assuming following please confirm  :
>
> -The  existing  `small format shm`  will continue to be small , is that
> right ?
> -Only newly created longDN checkpoint will be in `big format shm`, is that
> right ?
> - what will be the format of newly joined the PL-5 opens  an existing `small
> format shm`
> the what will be the new replica  on new node `small format shm` or `big
> format shm` ?
>
>
> I hope you  tested following :
> ==
> - combination of some `small format shm`  and some  `big format shm`  ckpts
> - Joined a New node ( say PL-5)  and then opened the existing `small format
> shm` ckpt from the new Node
> - Restating controller which has combination of  `small format shm` and `big
> format shm` and how the restored non-collocated ckpt`s
>
> -AVM
>
> On 10/11/2016 1:15 PM, Hoang Vo wrote:
>>osaf/libs/common/cpsv/include/cpsv_shm.h |9 +-
>>osaf/services/saf/cpsv/cpnd/cpnd_res.c   |  565
> --
>>2 files changed, 536 insertions(+), 38 deletions(-)
>>
>>
>> problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent
>> on all nodes CKPT_INFO size inscrease when support longDN lead to total
> size increase.
>> solution:
>> - From start, cpnd use small format shm.
>> - Run time cpnd keep using small format shm until first longDN checkpoint
> is created.
>> After that cpnd use big format shm.
>>
>> diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h
>> b/osaf/libs/common/cpsv/include/cpsv_shm.h
>> --- a/osaf/libs/common/cpsv/include/cpsv_shm.h
>> +++ b/osaf/libs/common/cpsv/include/cpsv_shm.h
>> @@ -27,7 +27,8 @@
>>#define SHM_NEXT -3
>>#define SHM_INIT -1
>>
>> -#define CPSV_CPND_SHM_VERSION1
>> +#define CPSV_CPND_SHM_VERSION_SHORT_DN  0
>> +#define CPSV_CPND_SHM_VERSION_LONG_DN   1
>>
>>typedef struct cpsv_ckpt_hdr {
>>  SaCkptCheckpointHandleT ckpt_id;/* Index for identifying the
> checkpoint */
>> @@ -134,4 +135,10 @@ typedef enum cpnd_type_info {
>>  CPND_CKPT_INFO
>>} CPND_TYPE_INFO;
>>
>> +#define cpsv_cpnd_shm_size(x) x == CPSV_CPND_SHM_VERSION_LONG_DN ?  \
>> +sizeof(CLIENT_HDR) + (MAX_CLIENTS * sizeof(CLIENT_INFO)) +
> \
>> +sizeof(CKPT_HDR) + (MAX_CKPTS * sizeof(CKPT_INFO)) :
> \
>> +sizeof(CLIENT_HDR) + (MAX_CLIENTS * sizeof(CLIENT_INFO)) +
> \
>> +sizeof(CKPT_HDR) + (MAX_CKPTS * sizeof(CKPT_INFO_V0))
> \
>> +
>>#endif
>> diff --git 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]

2016-10-13 Thread Vo Minh Hoang
Dear Mahesh,

Because of keeping the consistent working behavior of existing function,
only 1 shared memory at a time. If shared memory swapping action occurs, a
new shared memory will replace old one.

Here is the detailed answers to your questions:
>> -The  existing  `small format shm`  will continue to be small , is that
right ?
>> -Only newly created longDN checkpoint will be in `big format shm`, is
that right ? 
No, old checkpoint data is converted to `big format`.
So all of them will be stored in `big format`.

>> - what will be the format of newly joined the PL-5 opens  an existing
`small format shm`
PL-5 still use `small format`.
Only when a long DN replica is added in this node, the shared memory is
converted to `big format`.
>>  the what will be the new replica  on new node `small format shm` or `big
format shm` ?
This implementation only affect the `header` shared memory
(opensaf_CPND_CHECKPOINT_INFO_nodeid). It do not change replica shared
memory (opensaf_ckptname_nodeid_n).

About testing, because of above specification, I tested:
- start new node
- restart ckptnd with existing small shm
- restart ckptnd with existing big shm
- create first long dn (check all node)

Thank you and best regards,
Hoang

-Original Message-
From: A V Mahesh [mailto:mahesh.va...@oracle.com] 
Sent: Thursday, October 13, 2016 1:33 PM
To: Hoang Vo ; anders.wid...@ericsson.com
Cc: opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1 of 1] cpnd: use shared memory based on ckpt name
length [#2108]

Hi Hoang,

 >> - Run time cpnd keep using small format shm until first longDN
checkpoint is created.
 >> After that cpnd use big format shm.

While reviewing I am assuming following please confirm  :

-The  existing  `small format shm`  will continue to be small , is that
right ?
-Only newly created longDN checkpoint will be in `big format shm`, is that
right ?
- what will be the format of newly joined the PL-5 opens  an existing `small
format shm`
   the what will be the new replica  on new node `small format shm` or `big
format shm` ?


I hope you  tested following :
==
- combination of some `small format shm`  and some  `big format shm`  ckpts
- Joined a New node ( say PL-5)  and then opened the existing `small format
shm` ckpt from the new Node
- Restating controller which has combination of  `small format shm` and `big
format shm` and how the restored non-collocated ckpt`s

-AVM

On 10/11/2016 1:15 PM, Hoang Vo wrote:
>   osaf/libs/common/cpsv/include/cpsv_shm.h |9 +-
>   osaf/services/saf/cpsv/cpnd/cpnd_res.c   |  565
--
>   2 files changed, 536 insertions(+), 38 deletions(-)
>
>
> problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent 
> on all nodes CKPT_INFO size inscrease when support longDN lead to total
size increase.
>
> solution:
> - From start, cpnd use small format shm.
> - Run time cpnd keep using small format shm until first longDN checkpoint
is created.
> After that cpnd use big format shm.
>
> diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h 
> b/osaf/libs/common/cpsv/include/cpsv_shm.h
> --- a/osaf/libs/common/cpsv/include/cpsv_shm.h
> +++ b/osaf/libs/common/cpsv/include/cpsv_shm.h
> @@ -27,7 +27,8 @@
>   #define SHM_NEXT -3
>   #define SHM_INIT -1
>   
> -#define CPSV_CPND_SHM_VERSION1
> +#define CPSV_CPND_SHM_VERSION_SHORT_DN   0
> +#define CPSV_CPND_SHM_VERSION_LONG_DN1
>   
>   typedef struct cpsv_ckpt_hdr {
>   SaCkptCheckpointHandleT ckpt_id;/* Index for identifying the
checkpoint */
> @@ -134,4 +135,10 @@ typedef enum cpnd_type_info {
>   CPND_CKPT_INFO
>   } CPND_TYPE_INFO;
>   
> +#define cpsv_cpnd_shm_size(x) x == CPSV_CPND_SHM_VERSION_LONG_DN ?   \
> + sizeof(CLIENT_HDR) + (MAX_CLIENTS * sizeof(CLIENT_INFO)) +
\
> + sizeof(CKPT_HDR) + (MAX_CKPTS * sizeof(CKPT_INFO)) :
\
> + sizeof(CLIENT_HDR) + (MAX_CLIENTS * sizeof(CLIENT_INFO)) +
\
> + sizeof(CKPT_HDR) + (MAX_CKPTS * sizeof(CKPT_INFO_V0))
\
> +
>   #endif
> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_res.c 
> b/osaf/services/saf/cpsv/cpnd/cpnd_res.c
> --- a/osaf/services/saf/cpsv/cpnd/cpnd_res.c
> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_res.c
> @@ -44,20 +44,34 @@
>   
>   #define m_CPND_CKPTINFO_UPDATE(addr,ckpt_info,offset) 
> memcpy(addr+offset,_info,sizeof(CKPT_INFO))
>   
> +#define m_CPND_CKPTINFO_V0_UPDATE(addr,ckpt_info,offset) 
> +memcpy(addr+offset,_info,sizeof(CKPT_INFO_V0))
> +
>   #define m_CPND_CKPTHDR_UPDATE(ckpt_hdr,offset)  
> memcpy(offset,_hdr,sizeof(CKPT_HDR))
>   
> +void *cpnd_restart_shm(NCS_OS_POSIX_SHM_REQ_INFO *cpnd_open_req, 
> +CPND_CB *cb, SaClmNodeIdT nodeid); uint32_t 
> +cpnd_update_ckpt_with_clienthdl_v1(CPND_CB *cb, CPND_CKPT_NODE 
> +*cp_node, SaCkptHandleT client_hdl); uint32_t 
> +cpnd_update_ckpt_with_clienthdl_v0(CPND_CB *cb, CPND_CKPT_NODE 
> +*cp_node, SaCkptHandleT client_hdl); uint32_t 
> +cpnd_write_ckpt_info_v1(CPND_CB *cb, 

Re: [devel] [PATCH 1 of 1] cpnd: use shared memory based on ckpt name length [#2108]

2016-10-13 Thread A V Mahesh
Hi Hoang,

 >> - Run time cpnd keep using small format shm until first longDN 
checkpoint is created.
 >> After that cpnd use big format shm.

While reviewing I am assuming following please confirm  :

-The  existing  `small format shm`  will continue to be small , is that 
right ?
-Only newly created longDN checkpoint will be in `big format shm`, is 
that right ?
- what will be the format of newly joined the PL-5 opens  an existing  
`small format shm`
   the what will be the new replica  on new node `small format shm` or  
`big format shm` ?


I hope you  tested following :
==
- combination of some `small format shm`  and some  `big format shm`  ckpts
- Joined a New node ( say PL-5)  and then opened the existing `small 
format shm` ckpt from the new Node
- Restating controller which has combination of  `small format shm` and 
`big format shm` and how the restored non-collocated ckpt`s

-AVM

On 10/11/2016 1:15 PM, Hoang Vo wrote:
>   osaf/libs/common/cpsv/include/cpsv_shm.h |9 +-
>   osaf/services/saf/cpsv/cpnd/cpnd_res.c   |  565 
> --
>   2 files changed, 536 insertions(+), 38 deletions(-)
>
>
> problem: In the case of CKPT osafckptnd increased 3,5Mb - 240 percent on all 
> nodes
> CKPT_INFO size inscrease when support longDN lead to total size increase.
>
> solution:
> - From start, cpnd use small format shm.
> - Run time cpnd keep using small format shm until first longDN checkpoint is 
> created.
> After that cpnd use big format shm.
>
> diff --git a/osaf/libs/common/cpsv/include/cpsv_shm.h 
> b/osaf/libs/common/cpsv/include/cpsv_shm.h
> --- a/osaf/libs/common/cpsv/include/cpsv_shm.h
> +++ b/osaf/libs/common/cpsv/include/cpsv_shm.h
> @@ -27,7 +27,8 @@
>   #define SHM_NEXT -3
>   #define SHM_INIT -1
>   
> -#define CPSV_CPND_SHM_VERSION1
> +#define CPSV_CPND_SHM_VERSION_SHORT_DN   0
> +#define CPSV_CPND_SHM_VERSION_LONG_DN1
>   
>   typedef struct cpsv_ckpt_hdr {
>   SaCkptCheckpointHandleT ckpt_id;/* Index for identifying the 
> checkpoint */
> @@ -134,4 +135,10 @@ typedef enum cpnd_type_info {
>   CPND_CKPT_INFO
>   } CPND_TYPE_INFO;
>   
> +#define cpsv_cpnd_shm_size(x) x == CPSV_CPND_SHM_VERSION_LONG_DN ?   \
> + sizeof(CLIENT_HDR) + (MAX_CLIENTS * sizeof(CLIENT_INFO)) +  
> \
> + sizeof(CKPT_HDR) + (MAX_CKPTS * sizeof(CKPT_INFO)) :
> \
> + sizeof(CLIENT_HDR) + (MAX_CLIENTS * sizeof(CLIENT_INFO)) +  
> \
> + sizeof(CKPT_HDR) + (MAX_CKPTS * sizeof(CKPT_INFO_V0))   
> \
> +
>   #endif
> diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_res.c 
> b/osaf/services/saf/cpsv/cpnd/cpnd_res.c
> --- a/osaf/services/saf/cpsv/cpnd/cpnd_res.c
> +++ b/osaf/services/saf/cpsv/cpnd/cpnd_res.c
> @@ -44,20 +44,34 @@
>   
>   #define m_CPND_CKPTINFO_UPDATE(addr,ckpt_info,offset) 
> memcpy(addr+offset,_info,sizeof(CKPT_INFO))
>   
> +#define m_CPND_CKPTINFO_V0_UPDATE(addr,ckpt_info,offset) 
> memcpy(addr+offset,_info,sizeof(CKPT_INFO_V0))
> +
>   #define m_CPND_CKPTHDR_UPDATE(ckpt_hdr,offset)  
> memcpy(offset,_hdr,sizeof(CKPT_HDR))
>   
> +void *cpnd_restart_shm(NCS_OS_POSIX_SHM_REQ_INFO *cpnd_open_req, CPND_CB 
> *cb, SaClmNodeIdT nodeid);
> +uint32_t cpnd_update_ckpt_with_clienthdl_v1(CPND_CB *cb, CPND_CKPT_NODE 
> *cp_node, SaCkptHandleT client_hdl);
> +uint32_t cpnd_update_ckpt_with_clienthdl_v0(CPND_CB *cb, CPND_CKPT_NODE 
> *cp_node, SaCkptHandleT client_hdl);
> +uint32_t cpnd_write_ckpt_info_v1(CPND_CB *cb, CPND_CKPT_NODE *cp_node, 
> int32_t offset, SaCkptHandleT client_hdl);
> +uint32_t cpnd_write_ckpt_info_v0(CPND_CB *cb, CPND_CKPT_NODE *cp_node, 
> int32_t offset, SaCkptHandleT client_hdl);
> +
>   static uint32_t cpnd_res_ckpt_sec_add(CPND_CKPT_SECTION_INFO *pSecPtr, 
> CPND_CKPT_NODE *cp_node);
>   static bool cpnd_find_exact_ckptinfo(CPND_CB *cb, CKPT_INFO *ckpt_info, 
> uint32_t bitmap_offset,
>uint32_t *offset, uint32_t 
> *prev_offset);
> +static bool cpnd_find_exact_ckptinfo_v0(CPND_CB *cb, CKPT_INFO_V0 
> *ckpt_info, uint32_t bitmap_offset,
> +  uint32_t *offset, uint32_t 
> *prev_offset);
>   static void cpnd_clear_ckpt_info(CPND_CB *cb, CPND_CKPT_NODE *cp_node, 
> uint32_t curr_offset, uint32_t prev_offset);
>   static uint32_t cpnd_restore_client_info(CPND_CB *cb, uint8_t *cli_addr);
>   static uint32_t cpnd_restore_ckpt_info_v1(CPND_CB *cb, uint8_t *ckpt_addr, 
> SaClmNodeIdT nodeid);
>   static uint32_t cpnd_restore_ckpt_info_v0(CPND_CB *cb, uint8_t *ckpt_addr, 
> SaClmNodeIdT nodeid);
> +static void cpnd_restart_client_reset_v1(CPND_CB *cb, CPND_CKPT_NODE 
> *cp_node, CPND_CKPT_CLIENT_NODE *cl_node);
> +static void cpnd_restart_client_reset_v0(CPND_CB *cb, CPND_CKPT_NODE 
> *cp_node, CPND_CKPT_CLIENT_NODE *cl_node);
>   static void cpnd_destroy_shm_cpnd_cp_info(NCS_OS_POSIX_SHM_REQ_OPEN_INFO 
> *open_req);
>   static void