Hi Thien,

I have some comments below.

I see this enhancement does not bring much value to NTF as it deals with a very rare case - process is terminated before saNtfInitialize() returns. In reality, if NTF server is getting overloaded
by such process, there must be an error in that process.

@Minh: how about your opinion? is this ticket valid?

Anyway, here are my comments:
1) Only C source files, ntfs_mds.c & ntfs_evt.c, access the new added list `ntfa_down_list_head`, why put new added methods in the C++ file and add C wrapper functions for them? It should be more clean if you move these functions into a new files e.g: ntfs_client_down.{h,c}.

2) C++ method name should start with a capital letter (refer to C++ google coding rule)

3) Naming methods that represent adding a down client to list, and removing from the list should pair/opposite with each other e.g. Open vs Close, Add vs Remove, not mark vs remove

4) The list is accessing from 02 different threads, mds and main thread, therefore must use mutex
to prevent race conditions.

5) Should have a check to ensure *not* adding the down client into the list if that client has successfully initialized.

Regards, Vu

On 10/9/19 9:36 AM, thien.m.huynh wrote:
Ntfd will not send response to a client when client already down.
This will avoid timeout when ntfd send via mds.
---
  src/ntf/ntfd/NtfAdmin.cc | 93 ++++++++++++++++++++++++++++++++++++++++++++++++
  src/ntf/ntfd/NtfAdmin.h  |  3 ++
  src/ntf/ntfd/ntfs_cb.h   |  6 ++++
  src/ntf/ntfd/ntfs_com.h  |  3 ++
  src/ntf/ntfd/ntfs_evt.c  |  1 +
  src/ntf/ntfd/ntfs_mds.c  |  9 ++++-
  6 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/src/ntf/ntfd/NtfAdmin.cc b/src/ntf/ntfd/NtfAdmin.cc
index 8bbee69..641171b 100644
--- a/src/ntf/ntfd/NtfAdmin.cc
+++ b/src/ntf/ntfd/NtfAdmin.cc
@@ -560,6 +560,85 @@ void NtfAdmin::SearchAndSetClientsDownFlag(MDS_DEST 
mds_dest) {
  }
/**
+ * @brief Add mds_dest tag into ntfa down list
+ * @param mds_dest
+ */
+void NtfAdmin::markAgentDown(MDS_DEST mds_dest) {
+  TRACE_ENTER();
+  NTFA_DOWN_LIST *ntfa_down_rec = NULL;
+  if ((ntfa_down_rec = reinterpret_cast<NTFA_DOWN_LIST *>(
+           malloc(sizeof(NTFA_DOWN_LIST)))) == NULL) {
+    LOG_ER("memory allocation for the NTFA_DOWN_LIST failed");
+    return;
+  }
+  memset(ntfa_down_rec, 0, sizeof(NTFA_DOWN_LIST));
+  ntfa_down_rec->mds_dest = mds_dest;
+  ntfa_down_rec->next = NULL;
+
+  if (ntfs_cb->ntfa_down_list_head == NULL) {
+    ntfs_cb->ntfa_down_list_head = ntfa_down_rec;
+  } else {
+    NTFA_DOWN_LIST *p = ntfs_cb->ntfa_down_list_head;
+    while (p->next != NULL) {
+      p = p->next;
+    }
+    p->next = ntfa_down_rec;
+  }
+  TRACE_1("Added MDS dest: %" PRIx64, ntfa_down_rec->mds_dest);
+  TRACE_LEAVE();
+}
+
+/**
+ * @brief Find and remove agent from ntfa down list
+ * @param mds_dest
+ */
+void NtfAdmin::removeAgentFromDownList(MDS_DEST mds_dest) {
+  NTFA_DOWN_LIST *ntfa_down_rec = ntfs_cb->ntfa_down_list_head;
+  NTFA_DOWN_LIST *prev = NULL;
+  TRACE_ENTER();
+  while (ntfa_down_rec != NULL) {
+    if (mds_dest == ntfa_down_rec->mds_dest) {
+      if (ntfa_down_rec == ntfs_cb->ntfa_down_list_head) {
+        if (ntfa_down_rec->next == NULL) {
+          ntfs_cb->ntfa_down_list_head = NULL;
+        } else {
+          ntfs_cb->ntfa_down_list_head = ntfa_down_rec->next;
+        }
+      } else if (prev) {
+        prev->next = ntfa_down_rec->next;
+      }
+      TRACE("Deleted MDS dest: %" PRIx64, ntfa_down_rec->mds_dest);
+      free(ntfa_down_rec);
+      ntfa_down_rec = NULL;
+      break;
+    }
+    prev = ntfa_down_rec;
+    ntfa_down_rec = ntfa_down_rec->next;
+  }
+  TRACE_LEAVE();
+}
+
+/**
+ * @brief  Check if agent exists in down list
+ * @param  mds_dest
+ * @return true/false
+ */
+bool NtfAdmin::isInNtfaDownList(MDS_DEST mds_dest) {
+  bool found = false;
+  NTFA_DOWN_LIST *ntfa_down_rec = ntfs_cb->ntfa_down_list_head;
+  TRACE_ENTER();
+  while (ntfa_down_rec != NULL) {
+    if (mds_dest == ntfa_down_rec->mds_dest) {
+      found = true;
+      break;
+    }
+    ntfa_down_rec = ntfa_down_rec->next;
+  }
+  TRACE_LEAVE();
+  return found;
+}
+
+/**
   * The node object where the client who had the subscription is notified
   * so it can delete the appropriate subscription and filter object.
   *
@@ -1300,6 +1379,20 @@ uint32_t 
send_clm_node_status_change(SaClmClusterChangesT cluster_change,
        cluster_change, node_id));
  }
+void removeAgentFromDownList(MDS_DEST mds_dest) {
+  osafassert(NtfAdmin::theNtfAdmin != NULL);
+  NtfAdmin::theNtfAdmin->removeAgentFromDownList(mds_dest);
+}
+
+bool isInNtfaDownList(MDS_DEST mds_dest) {
+  return (NtfAdmin::theNtfAdmin->isInNtfaDownList(mds_dest));
+}
+
+void markAgentDown(MDS_DEST mds_dest) {
+  osafassert(NtfAdmin::theNtfAdmin != NULL);
+  NtfAdmin::theNtfAdmin->markAgentDown(mds_dest);
+}
+
  /**
   * @brief  Checks CLM membership status of a client.
   *         A0101 clients are always CLM member.
diff --git a/src/ntf/ntfd/NtfAdmin.h b/src/ntf/ntfd/NtfAdmin.h
index 4808ca9..eab0016 100644
--- a/src/ntf/ntfd/NtfAdmin.h
+++ b/src/ntf/ntfd/NtfAdmin.h
@@ -109,6 +109,9 @@ class NtfAdmin {
    uint32_t send_cluster_membership_msg_to_clients(
        SaClmClusterChangesT cluster_change, NODE_ID node_id);
    bool is_stale_client(unsigned int clientId);
+  void markAgentDown(MDS_DEST mds_dest);
+  void removeAgentFromDownList(MDS_DEST mds_dest);
+  bool isInNtfaDownList(MDS_DEST mds_dest);
private:
    void processNotification(unsigned int clientId,
diff --git a/src/ntf/ntfd/ntfs_cb.h b/src/ntf/ntfd/ntfs_cb.h
index 96eedc1..518b1b9 100644
--- a/src/ntf/ntfd/ntfs_cb.h
+++ b/src/ntf/ntfd/ntfs_cb.h
@@ -38,6 +38,11 @@ typedef struct {
    MDS_DEST mds_dest;
  } ntf_client_t;
+typedef struct ntfa_down_list_tag {
+  MDS_DEST mds_dest;
+  struct ntfa_down_list_tag *next;
+} NTFA_DOWN_LIST;
+
  typedef struct ntfs_cb {
    SYSF_MBX mbx;           /* NTFS's mailbox                             */
    MDS_HDL mds_hdl;        /* PWE Handle for interacting with NTFAs      */
@@ -71,6 +76,7 @@ typedef struct ntfs_cb {
    NCS_SEL_OBJ usr2_sel_obj; /* Selection object for CLM initialization.*/
    uint16_t peer_mbcsv_version; /*Remeber peer NTFS MBCSV version.*/
    bool clm_initialized;        // For CLM init status;
+  NTFA_DOWN_LIST *ntfa_down_list_head; /* NTFA down reccords */
  } ntfs_cb_t;
extern uint32_t ntfs_cb_init(ntfs_cb_t *);
diff --git a/src/ntf/ntfd/ntfs_com.h b/src/ntf/ntfd/ntfs_com.h
index b9e37da..d13e9c9 100644
--- a/src/ntf/ntfd/ntfs_com.h
+++ b/src/ntf/ntfd/ntfs_com.h
@@ -112,6 +112,9 @@ void storeMatchingSubscription(SaNtfIdentifierT 
notificationId,
  void discardedAdd(unsigned int clientId, SaNtfSubscriptionIdT subscriptionId,
                    SaNtfIdentifierT notificationId);
  void discardedClear(unsigned int clientId, SaNtfSubscriptionIdT 
subscriptionId);
+void markAgentDown(MDS_DEST mds_dest);
+void removeAgentFromDownList(MDS_DEST mds_dest);
+bool isInNtfaDownList(MDS_DEST mds_dest);
/* Calls from Admin --> communication layer */ diff --git a/src/ntf/ntfd/ntfs_evt.c b/src/ntf/ntfd/ntfs_evt.c
index 19b2f60..cd7e035 100644
--- a/src/ntf/ntfd/ntfs_evt.c
+++ b/src/ntf/ntfd/ntfs_evt.c
@@ -110,6 +110,7 @@ static uint32_t proc_ntfa_updn_mds_msg(ntfsv_ntfs_evt_t 
*evt)
                } else {
                        clientRemoveMDS(evt->fr_dest);
                }
+               removeAgentFromDownList(evt->fr_dest);
                break;
        default:
                TRACE("Unknown evt type!!!");
diff --git a/src/ntf/ntfd/ntfs_mds.c b/src/ntf/ntfd/ntfs_mds.c
index 5f40c6e..75161ef 100644
--- a/src/ntf/ntfd/ntfs_mds.c
+++ b/src/ntf/ntfd/ntfs_mds.c
@@ -950,10 +950,11 @@ static uint32_t mds_svc_event(struct ncsmds_callback_info 
*info)
                        // checkpoint of initializing new client in standby or
                        // checking if client has already downed in active.
                        SearchAndSetClientsDownFlag(evt->fr_dest);
+                       markAgentDown(evt->fr_dest);
/* Push the event and we are done */
                        if (m_NCS_IPC_SEND(&ntfs_cb->mbx, evt,
-                                          NCS_IPC_PRIORITY_HIGH) !=
+                                          NCS_IPC_PRIORITY_LOW) !=
                            NCSCC_RC_SUCCESS) {
                                TRACE("ipc send failed");
                                ntfs_evt_destroy(evt);
@@ -974,6 +975,8 @@ static uint32_t mds_svc_event(struct ncsmds_callback_info 
*info)
                                TRACE_8("AVD ADEST UP");
                                ncs_sel_obj_ind(&ntfs_cb->usr2_sel_obj);
                        }
+
+                       removeAgentFromDownList(info->info.svc_evt.i_dest);
                }
        }
@@ -1280,6 +1283,10 @@ uint32_t ntfs_mds_msg_send(ntfs_cb_t *cb, ntfsv_msg_t *msg, MDS_DEST *dest,
        MDS_SEND_INFO *send_info = &mds_info.info.svc_send;
        uint32_t rc = NCSCC_RC_SUCCESS;
+ /* skip sending message if agent is already down */
+       if (isInNtfaDownList(*dest))
+               return rc;
+
        /* populate the mds params */
        memset(&mds_info, '\0', sizeof(NCSMDS_INFO));



_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to