osaf/services/saf/clmsv/clms/clms_cb.h  |   6 +++++
 osaf/services/saf/clmsv/clms/clms_evt.c |  35 ++++++++++++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 1 deletions(-)


There is a possiblity that the checkpointing message for a NODE_DOWN reaches
the STANDBY first, i.e. before the MDS delivers the NODE_DOWN event to the
standby. This can result in stale node_down record getting stored in the
node_down list which is a designated list for processing of node downs that
occur during role change from standby to active. The patch introduces a
variable that checks whether the checkpoint event for node_down has arrived
first, followed by a check during role change to ignore such stale events.

diff --git a/osaf/services/saf/clmsv/clms/clms_cb.h 
b/osaf/services/saf/clmsv/clms/clms_cb.h
--- a/osaf/services/saf/clmsv/clms/clms_cb.h
+++ b/osaf/services/saf/clmsv/clms/clms_cb.h
@@ -37,6 +37,11 @@ typedef enum {
        IMM_RECONFIGURED = 5
 } ADMIN_OP;
 
+typedef enum {
+       CHECKPOINT_PROCESSED = 1,
+       MDS_DOWN_PROCESSED
+} NODE_DOWN_STATUS;
+
 /* Cluster Properties */
 typedef struct cluster_db_t {
        SaNameT name;
@@ -124,6 +129,7 @@ typedef struct clma_down_list_tag {
 
 typedef struct node_down_list_tag {
        SaClmNodeIdT node_id;
+       NODE_DOWN_STATUS ndown_status;
        struct node_down_list_tag *next;
 } NODE_DOWN_LIST;
 
diff --git a/osaf/services/saf/clmsv/clms/clms_evt.c 
b/osaf/services/saf/clmsv/clms/clms_evt.c
--- a/osaf/services/saf/clmsv/clms/clms_evt.c
+++ b/osaf/services/saf/clmsv/clms/clms_evt.c
@@ -592,6 +592,7 @@ static uint32_t proc_mds_node_evt(CLMSV_
                                clms_cb->node_down_list_tail->next = 
node_down_rec;
                }
                clms_cb->node_down_list_tail = node_down_rec;
+               node_down_rec->ndown_status = MDS_DOWN_PROCESSED;
        }
 
  done:
@@ -1613,6 +1614,7 @@ void clms_remove_node_down_rec(SaClmNode
 {
        NODE_DOWN_LIST *node_down_rec = clms_cb->node_down_list_head;
        NODE_DOWN_LIST *prev_rec = NULL;
+       bool record_found = false;
 
        while (node_down_rec) {
                if (node_down_rec->node_id == node_id) {
@@ -1638,11 +1640,36 @@ void clms_remove_node_down_rec(SaClmNode
                        /* Free the NODE_DOWN_REC */
                        free(node_down_rec);
                        node_down_rec = NULL;
+                       record_found = true;
                        break;
                }
                prev_rec = node_down_rec;       /* Remember address of this 
entry */
                node_down_rec = node_down_rec->next;    /* Go to next entry */
        }
+
+       if (!record_found) {
+               /* MDS node_down has not yet reached the STANDBY,
+                * Just add this checkupdate record to the list. MDS_DOWN 
processing will delete it.
+                * If role change happens before MDS_DOWN is recieved,
+                * then role change processing just ignores the record and 
removes it 
+                * from the list.
+                */
+               node_down_rec = NULL;
+               if ((node_down_rec = (NODE_DOWN_LIST *) 
malloc(sizeof(NODE_DOWN_LIST))) == NULL) {
+                       LOG_CR("Memory Allocation for NODE_DOWN_LIST failed");
+                       return;
+               }
+               memset(node_down_rec, 0, sizeof(NODE_DOWN_LIST));
+               node_down_rec->node_id = node_id;
+               if (clms_cb->node_down_list_head == NULL) {
+                       clms_cb->node_down_list_head = node_down_rec;
+               } else {
+                       if (clms_cb->node_down_list_tail)
+                               clms_cb->node_down_list_tail->next = 
node_down_rec;
+               }
+               clms_cb->node_down_list_tail = node_down_rec;
+               node_down_rec->ndown_status = CHECKPOINT_PROCESSED;
+       }
 }
 
 /**
@@ -1696,7 +1723,13 @@ void proc_downs_during_rolechange (void)
                /*Remove NODE_DOWN_REC from the NODE_DOWN_LIST */
                node = clms_node_get_by_id(node_down_rec->node_id);
                temp_node_down_rec = node_down_rec;
-               if (node != NULL)
+               /* If nodedown status is CHECKPOINT_PROCESSED, it means that
+                * a checkpoint update was received when this node was STANDBY, 
but
+                * the MDS node_down did not reach the STANDBY. An extremely 
rare chance,
+                * but good to have protection for it, by ignoring the record
+                * if the record is in CHECKPOINT_PROCESSED state.
+                */
+               if ((node != NULL) && (temp_node_down_rec->ndown_status != 
CHECKPOINT_PROCESSED))
                        clms_track_send_node_down(node);
                node_down_rec = node_down_rec->next;
                /*Free the NODE_DOWN_REC */

------------------------------------------------------------------------------
Meet PCI DSS 3.0 Compliance Requirements with EventLog Analyzer
Achieve PCI DSS 3.0 Compliant Status with Out-of-the-box PCI DSS Reports
Are you Audit-Ready for PCI DSS 3.0 Compliance? Download White paper
Comply to PCI DSS 3.0 Requirement 10 and 11.5 with EventLog Analyzer
http://pubads.g.doubleclick.net/gampad/clk?id=154622311&iu=/4140/ostg.clktrk
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to