Hi Hoang Vo,
Not sure why unfinished unlink operation , unlink will land on old CPD
active or after fail-over New CPD active ,
in transit time CPND will get try-again , can you please elaborate the
case .
-AVM
On 10/13/2016 2:02 PM, Hoang Vo wrote:
> osaf/services/saf/cpsv/cpd/cpd_db.c | 15 +++++++++++++++
> osaf/services/saf/cpsv/cpd/cpd_proc.c | 18 ++++++++++++++++--
> 2 files changed, 31 insertions(+), 2 deletions(-)
>
>
> problem:
> In case a failover happens while a checkpoint is being unlinked, it might
> causes an unfinished
> unlink operation (i.e the checkpoint IMM object is not deleted). Later on,
> when the checkpoint is
> created again, it will not succeed because the CPD detects that the
> checkpoint IMM object existing.
>
> Fix:
> - When error occur delete the existing checkpoint IMM object and re-create
> new one.
> - Stop timer of removed node.
> - Update data in patricia trees.
>
> diff --git a/osaf/services/saf/cpsv/cpd/cpd_db.c
> b/osaf/services/saf/cpsv/cpd/cpd_db.c
> --- a/osaf/services/saf/cpsv/cpd/cpd_db.c
> +++ b/osaf/services/saf/cpsv/cpd/cpd_db.c
> @@ -104,6 +104,21 @@ uint32_t cpd_ckpt_node_add(NCS_PATRICIA_
> /*create the imm runtime object */
> if (ha_state == SA_AMF_HA_ACTIVE) {
> err = create_runtime_ckpt_object(ckpt_node, immOiHandle);
> +
> + /* The Checkpoint IMM object exist due to unfinished previous
> opernation (e.g unlink)
> + * The action is to delete the old object and create a new one
> */
> +
> + if (err == SA_AIS_ERR_EXIST) {
> + LOG_WA("cpd ckpt node add - the IMM object exits %s",
> ckpt_node->ckpt_name);
> +
> + if (delete_runtime_ckpt_object(ckpt_node, immOiHandle)
> != SA_AIS_OK) {
> + LOG_ER("Deleting run time object %s FAILED",
> ckpt_node->ckpt_name);
> + return NCSCC_RC_FAILURE;
> + }
> +
> + err = create_runtime_ckpt_object(ckpt_node,
> immOiHandle);
> + }
> +
> if (err != SA_AIS_OK) {
> LOG_ER("create runtime ckpt object failed with error:
> %u",err);
> if (err == SA_AIS_ERR_INVALID_PARAM) {
> diff --git a/osaf/services/saf/cpsv/cpd/cpd_proc.c
> b/osaf/services/saf/cpsv/cpd/cpd_proc.c
> --- a/osaf/services/saf/cpsv/cpd/cpd_proc.c
> +++ b/osaf/services/saf/cpsv/cpd/cpd_proc.c
> @@ -348,7 +348,8 @@ uint32_t cpd_ckpt_db_entry_update(CPD_CB
> proc_rc =
> cpd_ckpt_reploc_node_add(&cb->ckpt_reploc_tree, reploc_info, cb->ha_state,
> cb->immOiHandle);
> if (proc_rc != NCSCC_RC_SUCCESS) {
> /* goto reploc_node_add_fail; */
> - TRACE_4("cpd db add failed ");
> + LOG_ER("cpd db replica add failed ");
> + goto replica_node_add_fail;
> }
> }
>
> @@ -367,6 +368,10 @@ uint32_t cpd_ckpt_db_entry_update(CPD_CB
> TRACE_LEAVE();
> return NCSCC_RC_SUCCESS;
>
> + replica_node_add_fail:
> + cpd_ckpt_node_delete(cb, ckpt_node);
> + ckpt_node = NULL;
> +
> ckpt_node_add_fail:
> cpd_ckpt_map_node_delete(cb, map_info);
> map_info = NULL;
> @@ -679,7 +684,8 @@ uint32_t cpd_process_cpnd_down(CPD_CB *c
> cpd_cpnd_info_node_find_add(&cb->cpnd_tree, cpnd_dest, &cpnd_info,
> &add_flag);
> if (!cpnd_info)
> return NCSCC_RC_SUCCESS;
> -
> + /* Stop timer before processing down */
> + cpd_tmr_stop(&cpnd_info->cpnd_ret_timer);
> cref_info = cpnd_info->ckpt_ref_list;
>
> while (cref_info) {
> @@ -984,6 +990,14 @@ uint32_t cpd_proc_retention_set(CPD_CB *
>
> /* Update the retention Time */
> (*ckpt_node)->ret_time = reten_time;
> + (*ckpt_node)->attributes.retentionDuration = reten_time;
> +
> + /* Update the related patricia tree */
> + CPD_CKPT_MAP_INFO *map_info = NULL;
> + cpd_ckpt_map_node_get(&cb->ckpt_map_tree, (*ckpt_node)->ckpt_name,
> &map_info);
> + if (map_info) {
> + map_info->attributes.retentionDuration = reten_time;
> + }
> return rc;
> }
>
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel