Re: [devel] [PATCH 1 of 1] cpsv: on cpnd down fist remove child safReplica object then parent safCkpt object [#2189]

2016-11-17 Thread Vo Minh Hoang
Dear Mahesh,

Reviewed and tested with collocated and non-collocated case, saw problem
fixed and could not find any occurrence.

So ACK from me, tested.

Sincerely,
Hoang

-Original Message-
From: mahesh.va...@oracle.com [mailto:mahesh.va...@oracle.com] 
Sent: Wednesday, November 16, 2016 3:58 PM
To: hoang.m...@dektech.com.au
Cc: opensaf-devel@lists.sourceforge.net
Subject: [PATCH 1 of 1] cpsv: on cpnd down fist remove child safReplica
object then parent safCkpt object [#2189]

 osaf/services/saf/cpsv/cpd/cpd_imm.c  |  20 
 osaf/services/saf/cpsv/cpd/cpd_proc.c |   9 -
 2 files changed, 20 insertions(+), 9 deletions(-)


Bug :
While cpd processing cpnd down for  COLLOCATED  cktp  and that checkpoint
only exist on the went down cpnd ( no others Node opened this checkpoint in
cluster) , then cpd  removes  that checkpoint and replica completely.

In such case the current logic has as bug,  fist it removes ckpt node and
then replica, this is causing deletion of parent object safCkpt=...,*  first
, then child object safReplica=...,safCkpt=...,* next.

as we know IMM removes child if parent is removed ,so this is causing the
issue out of sequence remove of safReplica object and ERR_NOT_EXIST  is
returned.

Fix :
While cpd removing  that checkpoint and replica completely , follow the
sequence of  child object safReplica=...,safCkpt=...,*  fist then  parent
object safCkpt=...,* next.

This is focused fix , my be we need to review complete code for such
occurrences , if found will be addressed in new ticket.

diff --git a/osaf/services/saf/cpsv/cpd/cpd_imm.c
b/osaf/services/saf/cpsv/cpd/cpd_imm.c
--- a/osaf/services/saf/cpsv/cpd/cpd_imm.c
+++ b/osaf/services/saf/cpsv/cpd/cpd_imm.c
@@ -400,7 +400,9 @@ SaAisErrorT delete_runtime_replica_objec
osaf_extended_name_lend(replica_dn, _name);
rc = immutil_saImmOiRtObjectDelete(immOiHandle, _name); 
if (rc != SA_AIS_OK) {
-   LOG_ER("Deleting run time object %s Failed - rc =
%d",replica_dn, rc);
+   LOG_ER("Deleting run time object %s Failed-1 - rc =
%d",replica_dn, rc);
+   } else {
+   TRACE("Deleting run time object %s Success-1 - rc =
%d",replica_dn, 
+rc);
}
 
free(replica_dn);
@@ -522,9 +524,11 @@ SaAisErrorT delete_runtime_ckpt_object(C
osaf_extended_name_lend(ckpt_node->ckpt_name, _name);
 
rc =  immutil_saImmOiRtObjectDelete(immOiHandle, _name);
-   if (rc != SA_AIS_OK)
+   if (rc != SA_AIS_OK) {
LOG_ER("Deleting run time object %s failed - rc = %d",
ckpt_node->ckpt_name, rc);
-
+   } else {
+   TRACE("Deleting run time object %s success - rc = %d",
ckpt_node->ckpt_name, rc);
+   }
return rc;
 }
 
@@ -917,11 +921,11 @@ SaAisErrorT cpd_clean_checkpoint_objects
/* Delete the runtime object and its children. */
rc = immutil_saImmOiRtObjectDelete(cb->immOiHandle,
_name);
if (rc == SA_AIS_OK) {
-   TRACE("Object \"%s\" deleted", (char *)
osaf_extended_name_borrow(_name));
-   } else {
-   LOG_ER("%s saImmOiRtObjectDelete for \"%s\" FAILED
%d",
-   __FUNCTION__, (char *)
osaf_extended_name_borrow(_name), rc);
-   }
+  TRACE("saImmOiRtObjectDelete \"%s\" deleted
Successfully", (char *) osaf_extended_name_borrow(_name));
+  } else {
+  LOG_ER("%s saImmOiRtObjectDelete for \"%s\" FAILED
%d",
+  __FUNCTION__, (char *)
osaf_extended_name_borrow(_name), rc);
+  }
}
 
if (rc != SA_AIS_ERR_NOT_EXIST) { diff --git
a/osaf/services/saf/cpsv/cpd/cpd_proc.c
b/osaf/services/saf/cpsv/cpd/cpd_proc.c
--- a/osaf/services/saf/cpsv/cpd/cpd_proc.c
+++ b/osaf/services/saf/cpsv/cpd/cpd_proc.c
@@ -809,6 +809,11 @@ uint32_t cpd_process_cpnd_down(CPD_CB *c
send_evt.info.cpnd.info.ckpt_del.mds_dest =
*cpnd_dest;
if (ckpt_node->dest_cnt == 0) {
TRACE_1("cpd ckpt del success for
ckpt_id:%llx",ckpt_node->ckpt_id);
+   /* Delete reploc fist*/
+   cpd_ckpt_reploc_get(>ckpt_reploc_tree,
_info, _info);
+   if (rep_info) {
+   cpd_ckpt_reploc_node_delete(cb,
rep_info, ckpt_node->is_unlink_set);
+   }
cpd_ckpt_map_node_get(>ckpt_map_tree,
ckpt_node->ckpt_name, _info);
 
/* Remove the ckpt_node */
@@ -875,7 +880,7 @@ uint32_t cpd_process_cpnd_down(CPD_CB *c
/* Send it to CPD(s), by sending ckpt_id = 0 */
/* This is to delete the node from reploc_tree */
cpd_ckpt_reploc_get(>ckpt_reploc_tree, _info,
_info);
-   if 

Re: [devel] [PATCH 1 of 1] cpsv: on cpnd down fist remove child safReplica object then parent safCkpt object [#2189]

2016-11-16 Thread A V Mahesh
Hi Hoang,

Additional TRACE will be removed after  testing.

-AVM

On 11/16/2016 2:28 PM, mahesh.va...@oracle.com wrote:
>   osaf/services/saf/cpsv/cpd/cpd_imm.c  |  20 
>   osaf/services/saf/cpsv/cpd/cpd_proc.c |   9 -
>   2 files changed, 20 insertions(+), 9 deletions(-)
>
>
> Bug :
> While cpd processing cpnd down for  COLLOCATED  cktp  and that checkpoint
> only exist on the went down cpnd ( no others Node opened this checkpoint in 
> cluster) ,
> then cpd  removes  that checkpoint and replica completely.
>
> In such case the current logic has as bug,  fist it removes ckpt node and 
> then replica,
> this is causing deletion of parent object safCkpt=...,*  first , then child 
> object safReplica=...,safCkpt=...,* next.
>
> as we know IMM removes child if parent is removed ,so this is causing the 
> issue out of
> sequence remove of safReplica object and ERR_NOT_EXIST  is returned.
>
> Fix :
> While cpd removing  that checkpoint and replica completely ,
> follow the sequence of  child object safReplica=...,safCkpt=...,*  fist then  
> parent object safCkpt=...,* next.
>
> This is focused fix , my be we need to review complete code for such 
> occurrences , if found
> will be addressed in new ticket.
>
> diff --git a/osaf/services/saf/cpsv/cpd/cpd_imm.c 
> b/osaf/services/saf/cpsv/cpd/cpd_imm.c
> --- a/osaf/services/saf/cpsv/cpd/cpd_imm.c
> +++ b/osaf/services/saf/cpsv/cpd/cpd_imm.c
> @@ -400,7 +400,9 @@ SaAisErrorT delete_runtime_replica_objec
>   osaf_extended_name_lend(replica_dn, _name);
>   rc = immutil_saImmOiRtObjectDelete(immOiHandle, _name);
>   if (rc != SA_AIS_OK) {
> - LOG_ER("Deleting run time object %s Failed - rc = 
> %d",replica_dn, rc);
> + LOG_ER("Deleting run time object %s Failed-1 - rc = 
> %d",replica_dn, rc);
> + } else {
> + TRACE("Deleting run time object %s Success-1 - rc = 
> %d",replica_dn, rc);
>   }
>   
>   free(replica_dn);
> @@ -522,9 +524,11 @@ SaAisErrorT delete_runtime_ckpt_object(C
>   osaf_extended_name_lend(ckpt_node->ckpt_name, _name);
>   
>   rc =  immutil_saImmOiRtObjectDelete(immOiHandle, _name);
> - if (rc != SA_AIS_OK)
> + if (rc != SA_AIS_OK) {
>   LOG_ER("Deleting run time object %s failed - rc = %d", 
> ckpt_node->ckpt_name, rc);
> -
> + } else {
> + TRACE("Deleting run time object %s success - rc = %d", 
> ckpt_node->ckpt_name, rc);
> + }
>   return rc;
>   }
>   
> @@ -917,11 +921,11 @@ SaAisErrorT cpd_clean_checkpoint_objects
>  /* Delete the runtime object and its children. */
>  rc = immutil_saImmOiRtObjectDelete(cb->immOiHandle, 
> _name);
>  if (rc == SA_AIS_OK) {
> -   TRACE("Object \"%s\" deleted", (char *) 
> osaf_extended_name_borrow(_name));
> -   } else {
> -   LOG_ER("%s saImmOiRtObjectDelete for \"%s\" FAILED 
> %d",
> -   __FUNCTION__, (char *) 
> osaf_extended_name_borrow(_name), rc);
> -   }
> +TRACE("saImmOiRtObjectDelete \"%s\" deleted 
> Successfully", (char *) osaf_extended_name_borrow(_name));
> +} else {
> +LOG_ER("%s saImmOiRtObjectDelete for \"%s\" FAILED %d",
> +__FUNCTION__, (char *) 
> osaf_extended_name_borrow(_name), rc);
> +}
>  }
>   
>  if (rc != SA_AIS_ERR_NOT_EXIST) {
> diff --git a/osaf/services/saf/cpsv/cpd/cpd_proc.c 
> b/osaf/services/saf/cpsv/cpd/cpd_proc.c
> --- a/osaf/services/saf/cpsv/cpd/cpd_proc.c
> +++ b/osaf/services/saf/cpsv/cpd/cpd_proc.c
> @@ -809,6 +809,11 @@ uint32_t cpd_process_cpnd_down(CPD_CB *c
>   send_evt.info.cpnd.info.ckpt_del.mds_dest = *cpnd_dest;
>   if (ckpt_node->dest_cnt == 0) {
>   TRACE_1("cpd ckpt del success for 
> ckpt_id:%llx",ckpt_node->ckpt_id);
> + /* Delete reploc fist*/
> + cpd_ckpt_reploc_get(>ckpt_reploc_tree, 
> _info, _info);
> + if (rep_info) {
> + cpd_ckpt_reploc_node_delete(cb, 
> rep_info, ckpt_node->is_unlink_set);
> + }
>   cpd_ckpt_map_node_get(>ckpt_map_tree, 
> ckpt_node->ckpt_name, _info);
>   
>   /* Remove the ckpt_node */
> @@ -875,7 +880,7 @@ uint32_t cpd_process_cpnd_down(CPD_CB *c
>   /* Send it to CPD(s), by sending ckpt_id = 0 */
>   /* This is to delete the node from reploc_tree */
>   cpd_ckpt_reploc_get(>ckpt_reploc_tree, _info, 
> _info);
> - if (rep_info) {
> + if ((rep_info) && (ckpt_node)) {
>   cpd_ckpt_reploc_node_delete(cb, rep_info, 
> ckpt_node->is_unlink_set);
>   }
>   
> @@ -1238,6 +1243,8 

[devel] [PATCH 1 of 1] cpsv: on cpnd down fist remove child safReplica object then parent safCkpt object [#2189]

2016-11-16 Thread mahesh . valla
 osaf/services/saf/cpsv/cpd/cpd_imm.c  |  20 
 osaf/services/saf/cpsv/cpd/cpd_proc.c |   9 -
 2 files changed, 20 insertions(+), 9 deletions(-)


Bug :
While cpd processing cpnd down for  COLLOCATED  cktp  and that checkpoint
only exist on the went down cpnd ( no others Node opened this checkpoint in 
cluster) ,
then cpd  removes  that checkpoint and replica completely.

In such case the current logic has as bug,  fist it removes ckpt node and then 
replica,
this is causing deletion of parent object safCkpt=...,*  first , then child 
object safReplica=...,safCkpt=...,* next.

as we know IMM removes child if parent is removed ,so this is causing the issue 
out of
sequence remove of safReplica object and ERR_NOT_EXIST  is returned.

Fix :
While cpd removing  that checkpoint and replica completely ,
follow the sequence of  child object safReplica=...,safCkpt=...,*  fist then  
parent object safCkpt=...,* next.

This is focused fix , my be we need to review complete code for such 
occurrences , if found
will be addressed in new ticket.

diff --git a/osaf/services/saf/cpsv/cpd/cpd_imm.c 
b/osaf/services/saf/cpsv/cpd/cpd_imm.c
--- a/osaf/services/saf/cpsv/cpd/cpd_imm.c
+++ b/osaf/services/saf/cpsv/cpd/cpd_imm.c
@@ -400,7 +400,9 @@ SaAisErrorT delete_runtime_replica_objec
osaf_extended_name_lend(replica_dn, _name);
rc = immutil_saImmOiRtObjectDelete(immOiHandle, _name); 
if (rc != SA_AIS_OK) {
-   LOG_ER("Deleting run time object %s Failed - rc = 
%d",replica_dn, rc);
+   LOG_ER("Deleting run time object %s Failed-1 - rc = 
%d",replica_dn, rc);
+   } else {
+   TRACE("Deleting run time object %s Success-1 - rc = 
%d",replica_dn, rc);
}
 
free(replica_dn);
@@ -522,9 +524,11 @@ SaAisErrorT delete_runtime_ckpt_object(C
osaf_extended_name_lend(ckpt_node->ckpt_name, _name);
 
rc =  immutil_saImmOiRtObjectDelete(immOiHandle, _name);
-   if (rc != SA_AIS_OK)
+   if (rc != SA_AIS_OK) {
LOG_ER("Deleting run time object %s failed - rc = %d", 
ckpt_node->ckpt_name, rc);
-
+   } else {
+   TRACE("Deleting run time object %s success - rc = %d", 
ckpt_node->ckpt_name, rc);
+   }
return rc;
 }
 
@@ -917,11 +921,11 @@ SaAisErrorT cpd_clean_checkpoint_objects
/* Delete the runtime object and its children. */
rc = immutil_saImmOiRtObjectDelete(cb->immOiHandle, 
_name);
if (rc == SA_AIS_OK) {
-   TRACE("Object \"%s\" deleted", (char *) 
osaf_extended_name_borrow(_name));
-   } else {
-   LOG_ER("%s saImmOiRtObjectDelete for \"%s\" FAILED %d",
-   __FUNCTION__, (char *) 
osaf_extended_name_borrow(_name), rc);
-   }
+  TRACE("saImmOiRtObjectDelete \"%s\" deleted 
Successfully", (char *) osaf_extended_name_borrow(_name));
+  } else {
+  LOG_ER("%s saImmOiRtObjectDelete for \"%s\" FAILED %d",
+  __FUNCTION__, (char *) 
osaf_extended_name_borrow(_name), rc);
+  }
}
 
if (rc != SA_AIS_ERR_NOT_EXIST) {
diff --git a/osaf/services/saf/cpsv/cpd/cpd_proc.c 
b/osaf/services/saf/cpsv/cpd/cpd_proc.c
--- a/osaf/services/saf/cpsv/cpd/cpd_proc.c
+++ b/osaf/services/saf/cpsv/cpd/cpd_proc.c
@@ -809,6 +809,11 @@ uint32_t cpd_process_cpnd_down(CPD_CB *c
send_evt.info.cpnd.info.ckpt_del.mds_dest = *cpnd_dest;
if (ckpt_node->dest_cnt == 0) {
TRACE_1("cpd ckpt del success for 
ckpt_id:%llx",ckpt_node->ckpt_id);
+   /* Delete reploc fist*/
+   cpd_ckpt_reploc_get(>ckpt_reploc_tree, 
_info, _info);
+   if (rep_info) {
+   cpd_ckpt_reploc_node_delete(cb, 
rep_info, ckpt_node->is_unlink_set);
+   }
cpd_ckpt_map_node_get(>ckpt_map_tree, 
ckpt_node->ckpt_name, _info);
 
/* Remove the ckpt_node */
@@ -875,7 +880,7 @@ uint32_t cpd_process_cpnd_down(CPD_CB *c
/* Send it to CPD(s), by sending ckpt_id = 0 */
/* This is to delete the node from reploc_tree */
cpd_ckpt_reploc_get(>ckpt_reploc_tree, _info, 
_info);
-   if (rep_info) {
+   if ((rep_info) && (ckpt_node)) {
cpd_ckpt_reploc_node_delete(cb, rep_info, 
ckpt_node->is_unlink_set);
}
 
@@ -1238,6 +1243,8 @@ uint32_t cpd_ckpt_reploc_imm_object_dele
LOG_ER("Deleting run time object %s FAILED", 
replica_dn);
free(replica_dn);
return NCSCC_RC_FAILURE;
+   } else {
+   TRACE("Deleting