possible that the checkpointHandle is being passed as |    0 
 osaf/services/saf/cpsv/cpnd/cpnd_evt.c                |   18 +++++++++++++++---
 osaf/services/saf/cpsv/cpnd/cpnd_proc.c               |    3 ++-
 3 files changed, 17 insertions(+), 4 deletions(-)


Bug Analysis :

checkpointHandle - is A pointer to the checkpoint handle, allocated in the
address space of the invoking process (CPA/application) . CPA stores into the
memory area of CPA/application/process uses to access the checkpoint in
subsequent invocations of the functions of the Checkpoint Service API.
In the case of saCkptCheckpointOpenAsync() , saCkptCheckpointWrite() ,
this handle is returned in the corresponding response message.

Eevn though saCkptCheckpointWrite() is Sync request checkpointHandle is used by 
CPND for tracking
CPND<--->CPND messaging invoking activity on request of saCkptCheckpointWrite() 
.

If the ckpoint is SA_CKPT_CHECKPOINT_COLLOCATED & SA_CKPT_WR_ALL_REPLICAS 
checkpoint ,
and the checkpoint is opened on multiple nodes , and saCkptCheckpointWrite() 
are beeing requested by
multiple CPAs from same Node , then the local CPND to update the all other 
CPNDS ,
whic has opened the same Ckpt, to track the pending invocations/event a
teperory cpnd_evt entry added with key checkpointHandle and after a successful 
write responserevived
from all the CPND's , using this cpnd_evt entry the local CPND will response 
back to CPA/application with result,
and then deleted cpnd_evt entry from local CPND ( temporary only for tracking 
response message of peer CPND )

In current code CPA is using malloc() return value as checkpointHandle for
unique reference key where malloc() returns virtual memory specific to 
processes,
so malloc() can return the same pointer value in separate CPA processes .

If we are running two ckpt app (2 processes) try to write data into two
difference/same checkpoints , it is possible that the checkpointHandle is being 
passed as
same from both checkpoint application processes , as same checkpointHandle is 
being shared to CPND by both CPA
as key reference, the CPND can miss behave because of ambiguous same reference 
key.
Solution :

As malloc() is standred call and the Checkpoint Service API Specification says
checkpointHandle should be pointer allocated in the address space of the 
invoking
process/CPA , CPND will return try again before the saCkptCheckpointWrite() 
call ,
if multiple saCkptCheckpointWrite() request come to CPND with SAME
checkpointHandle ( same virtual memory address) from different CPA's at the 
same time.

diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_evt.c 
b/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
--- a/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
+++ b/osaf/services/saf/cpsv/cpnd/cpnd_evt.c
@@ -639,8 +639,11 @@ static uint32_t cpnd_evt_proc_ckpt_open(
                }
 
                /* found locally,incr ckpt_locl ref count and add client list 
info */
-               if (true == cp_node->is_restart) {
+               if ((true == cp_node->is_restart) || 
+                               ((cp_node->open_active_sync_tmr.is_active) &&
+                                (cp_node->open_active_sync_tmr.lcl_ckpt_hdl == 
evt->info.openReq.lcl_ckpt_hdl))){
                        send_evt.info.cpa.info.openRsp.error = 
SA_AIS_ERR_TRY_AGAIN;
+                       LOG_ER("cpnd Open try again sync_tmr exist or ndrestart 
for lcl_ckpt_hdl:%llx ckpt:%llx",evt->info.openReq.lcl_ckpt_hdl, client_hdl);
                        goto agent_rsp;
                }
 
@@ -2889,6 +2892,7 @@ static uint32_t cpnd_evt_proc_ckpt_write
        CPSV_EVT send_evt;
        uint32_t err_flag = 0;
        uint32_t errflag = 0;
+       CPSV_CPND_ALL_REPL_EVT_NODE *evt_node = NULL;
        TRACE_ENTER();
 
        memset(&send_evt, '\0', sizeof(CPSV_EVT));
@@ -2918,7 +2922,15 @@ static uint32_t cpnd_evt_proc_ckpt_write
                                evt->info.ckpt_write.ckpt_id, 
SA_AIS_ERR_NOT_EXIST);
                goto agent_rsp;
        }
-       if ((true == cp_node->is_restart) || 
(m_CPND_IS_LOCAL_NODE(&cp_node->active_mds_dest, &cb->cpnd_mdest_id) != 0)) {
+       
+       cpnd_evt_node_get(cb, evt->info.ckpt_write.lcl_ckpt_id, &evt_node);
+       if (evt_node) {
+               LOG_ER("cpnd cpnd_evt_node pending with lcl_ckpt_id:%llx write 
failed for ckpt_id:%llx",
+                               evt->info.ckpt_write.lcl_ckpt_id, 
evt->info.ckpt_write.ckpt_id);
+       }
+
+       if ((true == cp_node->is_restart) || 
(m_CPND_IS_LOCAL_NODE(&cp_node->active_mds_dest, &cb->cpnd_mdest_id) != 0) ||
+                       (evt_node != NULL)) {
                send_evt.type = CPSV_EVT_TYPE_CPA;
                send_evt.info.cpa.type = CPA_EVT_ND2A_CKPT_DATA_RSP;
                switch (evt->info.ckpt_write.type) {
@@ -2938,7 +2950,7 @@ static uint32_t cpnd_evt_proc_ckpt_write
                        break;
 
                }
-               TRACE_4("cpnd ckpt sect write failed 
ckpt_id:%llx,is_restart:%d",
+               TRACE_4("cpnd ckpt sect write failed ckpt_id:%llx, evt_node 
pending/is_restart:%d",
                                evt->info.ckpt_write.ckpt_id, 
cp_node->is_restart);
 
                /*send_evt.info.cpa.info.sec_data_rsp.num_of_elmts=-1;
diff --git a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c 
b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
--- a/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
+++ b/osaf/services/saf/cpsv/cpnd/cpnd_proc.c
@@ -1322,7 +1322,8 @@ cpnd_proc_update_remote(CPND_CB *cb, CPN
 
                                if (cpnd_evt_node_add(cb, all_repl_evt) == 
NCSCC_RC_FAILURE) {
                                        /*log this error */
-                                       /*free the memory allocated */
+                                       LOG_ER("cpnd a multi event_add request 
for lcl_ckpt_id: %llx, evt is in progress for ckpt_id : %llx",
+                                                       
in_evt->info.ckpt_write.lcl_ckpt_id, cp_node->ckpt_id);
                                }
 
                                /*Start the timer before send the async req to 
remote node */

------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to