osaf/services/saf/cpsv/cpnd/cpnd_evt.c |  10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)


ckptnd core dumps with many different stack traces

When a collocated checkpoint replica is opened, and the active replica has large
numbers of sections (~200k), the sync from the active to the replica can
timeout. If the MDS sync succeeds, but the error code in the out_evt is not
SA_AIS_OK, the current code jumps to the ckpt_shm_node_free_error label. The
code under this label assumes that the node was not successfully created in the
database, so doesn't remove it. But in this case it was created. The node memory
is freed, but the node is not removed from the database. The next time this
checkpoint is accessed, cpnd will access freed memory and crash.

Set a flag after the node has been added to the database. And in the
ckpt_node_free_error label, remove the node from the database if it was added.

diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_evt.c 
b/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
--- a/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
+++ b/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
@@ -594,6 +594,7 @@ static uint32_t cpnd_evt_proc_ckpt_open(
        CPSV_EVT send_evt, *out_evt = NULL;
        SaNameT ckpt_name;
        uint32_t rc = NCSCC_RC_SUCCESS;
+       bool node_added = false;
        CPND_CPD_DEFERRED_REQ_NODE *node = NULL;
        CPND_CKPT_CLIENT_NODE *cl_node = NULL;
        CPND_CKPT_NODE *cp_node = NULL;
@@ -856,6 +857,8 @@ static uint32_t cpnd_evt_proc_ckpt_open(
                        goto ckpt_shm_node_free_error;
                }
 
+               node_added = true;
+
                if (out_evt->info.cpnd.info.ckpt_info.ckpt_rep_create == true &&
                    cp_node->create_attrib.maxSections == 1) {
 
@@ -983,6 +986,13 @@ static uint32_t cpnd_evt_proc_ckpt_open(
        if (cp_node->ret_tmr.is_active)
                cpnd_tmr_stop(&cp_node->ret_tmr);
        cpnd_ckpt_sec_map_destroy(&cp_node->replica_info);
+
+       if (node_added) {
+               rc = cpnd_ckpt_node_del(cb, cp_node);
+               if (rc == NCSCC_RC_FAILURE)
+                       LOG_ER("cpnd client tree del failed");
+       }
+
        m_MMGR_FREE_CPND_CKPT_NODE(cp_node);
 
  agent_rsp:


------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to