osaf/services/saf/clmsv/clms/clms_cb.h | 6 +++++ osaf/services/saf/clmsv/clms/clms_evt.c | 35 ++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 1 deletions(-)
There is a possiblity that the checkpointing message for a NODE_DOWN reaches the STANDBY first, i.e. before the MDS delivers the NODE_DOWN event to the standby. This can result in stale node_down record getting stored in the node_down list which is a designated list for processing of node downs that occur during role change from standby to active. The patch introduces a variable that checks whether the checkpoint event for node_down has arrived first, followed by a check during role change to ignore such stale events. diff --git a/osaf/services/saf/clmsv/clms/clms_cb.h b/osaf/services/saf/clmsv/clms/clms_cb.h --- a/osaf/services/saf/clmsv/clms/clms_cb.h +++ b/osaf/services/saf/clmsv/clms/clms_cb.h @@ -37,6 +37,11 @@ typedef enum { IMM_RECONFIGURED = 5 } ADMIN_OP; +typedef enum { + CHECKPOINT_PROCESSED = 1, + MDS_DOWN_PROCESSED +} NODE_DOWN_STATUS; + /* Cluster Properties */ typedef struct cluster_db_t { SaNameT name; @@ -124,6 +129,7 @@ typedef struct clma_down_list_tag { typedef struct node_down_list_tag { SaClmNodeIdT node_id; + NODE_DOWN_STATUS ndown_status; struct node_down_list_tag *next; } NODE_DOWN_LIST; diff --git a/osaf/services/saf/clmsv/clms/clms_evt.c b/osaf/services/saf/clmsv/clms/clms_evt.c --- a/osaf/services/saf/clmsv/clms/clms_evt.c +++ b/osaf/services/saf/clmsv/clms/clms_evt.c @@ -592,6 +592,7 @@ static uint32_t proc_mds_node_evt(CLMSV_ clms_cb->node_down_list_tail->next = node_down_rec; } clms_cb->node_down_list_tail = node_down_rec; + node_down_rec->ndown_status = MDS_DOWN_PROCESSED; } done: @@ -1613,6 +1614,7 @@ void clms_remove_node_down_rec(SaClmNode { NODE_DOWN_LIST *node_down_rec = clms_cb->node_down_list_head; NODE_DOWN_LIST *prev_rec = NULL; + bool record_found = false; while (node_down_rec) { if (node_down_rec->node_id == node_id) { @@ -1638,11 +1640,36 @@ void clms_remove_node_down_rec(SaClmNode /* Free the NODE_DOWN_REC */ free(node_down_rec); node_down_rec = NULL; + record_found = true; break; } prev_rec = node_down_rec; /* Remember address of this entry */ node_down_rec = node_down_rec->next; /* Go to next entry */ } + + if (!record_found) { + /* MDS node_down has not yet reached the STANDBY, + * Just add this checkupdate record to the list. MDS_DOWN processing will delete it. + * If role change happens before MDS_DOWN is recieved, + * then role change processing just ignores the record and removes it + * from the list. + */ + node_down_rec = NULL; + if ((node_down_rec = (NODE_DOWN_LIST *) malloc(sizeof(NODE_DOWN_LIST))) == NULL) { + LOG_CR("Memory Allocation for NODE_DOWN_LIST failed"); + return; + } + memset(node_down_rec, 0, sizeof(NODE_DOWN_LIST)); + node_down_rec->node_id = node_id; + if (clms_cb->node_down_list_head == NULL) { + clms_cb->node_down_list_head = node_down_rec; + } else { + if (clms_cb->node_down_list_tail) + clms_cb->node_down_list_tail->next = node_down_rec; + } + clms_cb->node_down_list_tail = node_down_rec; + node_down_rec->ndown_status = CHECKPOINT_PROCESSED; + } } /** @@ -1696,7 +1723,13 @@ void proc_downs_during_rolechange (void) /*Remove NODE_DOWN_REC from the NODE_DOWN_LIST */ node = clms_node_get_by_id(node_down_rec->node_id); temp_node_down_rec = node_down_rec; - if (node != NULL) + /* If nodedown status is CHECKPOINT_PROCESSED, it means that + * a checkpoint update was received when this node was STANDBY, but + * the MDS node_down did not reach the STANDBY. An extremely rare chance, + * but good to have protection for it, by ignoring the record + * if the record is in CHECKPOINT_PROCESSED state. + */ + if ((node != NULL) && (temp_node_down_rec->ndown_status != CHECKPOINT_PROCESSED)) clms_track_send_node_down(node); node_down_rec = node_down_rec->next; /*Free the NODE_DOWN_REC */ ------------------------------------------------------------------------------ Meet PCI DSS 3.0 Compliance Requirements with EventLog Analyzer Achieve PCI DSS 3.0 Compliant Status with Out-of-the-box PCI DSS Reports Are you Audit-Ready for PCI DSS 3.0 Compliance? Download White paper Comply to PCI DSS 3.0 Requirement 10 and 11.5 with EventLog Analyzer http://pubads.g.doubleclick.net/gampad/clk?id=154622311&iu=/4140/ostg.clktrk _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel