Hi Thuan,

I have few comments below.

Regards, Vu

On 10/22/19 7:14 AM, thuan.tran wrote:
- When sending response message which Adest not exist (already down)
current MDS try to wait for 1.5 seconds before conclude no route to
send response message.

- There are 2 scenarios may have:
UP -> DOWN -> receive SNDRSP -> response timeout after 1.5s
UP -> receive SNDRSP -> DOWN -> response timeout after 1.5s

- With this change, MDS will not waste for 1.5s which can cause trouble
for higher layer services, e.g: ntf, imm, etc...
---
  src/mds/mds_c_api.c     | 70 ++++++++++++++++++++++++++++++++++++++++-
  src/mds/mds_c_sndrcv.c  | 52 ++++++++++++++++++++++++++++--
  src/mds/mds_core.h      | 25 +++++++++++++--
  src/mds/mds_dt2c.h      |  2 +-
  src/mds/mds_dt_common.c | 22 ++++++++++++-
  5 files changed, 162 insertions(+), 9 deletions(-)

diff --git a/src/mds/mds_c_api.c b/src/mds/mds_c_api.c
index 132555b8e..5dd30c536 100644
--- a/src/mds/mds_c_api.c
+++ b/src/mds/mds_c_api.c
@@ -1900,6 +1900,32 @@ uint32_t mds_mcm_svc_up(PW_ENV_ID pwe_id, MDS_SVC_ID 
svc_id, V_DEST_RL role,
/*************** Validation for SCOPE **********************/ + if (adest != m_MDS_GET_ADEST) {
+               MDS_ADEST_INFO *adest_info =
+                       (MDS_ADEST_INFO *)ncs_patricia_tree_get(
+                               &gl_mds_mcm_cb->adest_list,
+                               (uint8_t *)&adest);
+               if (!adest_info) {
+                       /* Add adest to adest list */
+                       adest_info = m_MMGR_ALLOC_ADEST_INFO;
+                       memset(adest_info, 0, sizeof(MDS_ADEST_INFO));
+                       adest_info->adest = adest;
+                       adest_info->node.key_info =
+                               (uint8_t *)&adest_info->adest;
+                       adest_info->svc_cnt = 1;
+                       adest_info->tmr_start = false;
+                       ncs_patricia_tree_add(
+                           &gl_mds_mcm_cb->adest_list,
+                           (NCS_PATRICIA_NODE *)adest_info);
+                       m_MDS_LOG_DBG("MCM:API: Adest=%" PRIu64
+                               " svc_cnt=%u", adest, adest_info->svc_cnt);
+               } else {
+                       adest_info->svc_cnt++;
+                       m_MDS_LOG_DBG("MCM:API: Adest=%" PRIu64
+                               " svc_cnt=%u", adest, adest_info->svc_cnt);
+               }
+       }
[Vu] This new database, adest_list, is shared b/w internal osaf_mds thread and mds's user thread, hence should be protected by mutex.
+
        status = mds_get_subtn_res_tbl_by_adest(local_svc_hdl, svc_id, vdest_id,
                                                adest, &log_subtn_result_info);
@@ -3571,6 +3597,24 @@ uint32_t mds_mcm_svc_down(PW_ENV_ID pwe_id, MDS_SVC_ID svc_id, V_DEST_RL role,
                /* Discard : Getting down before getting up */
        } else { /* Entry exist in subscription result table */
+ /* If adest exist and no sndrsp, start a timer */
+               MDS_ADEST_INFO *adest_info =
+                       (MDS_ADEST_INFO *)ncs_patricia_tree_get(
+                               &gl_mds_mcm_cb->adest_list,
+                               (uint8_t *)&adest);
+               if (adest_info) {
+                       adest_info->svc_cnt--;
+                       if (adest_info->svc_cnt == 0 &&
+                           adest_info->sndrsp_cnt == 0) {
+                               m_MDS_LOG_INFO("MCM:API: Adest=%" PRIu64
+                                       " down timer start", adest);
+                               if (adest_info->tmr_start == false) {
+                                       adest_info->tmr_start = true;
+                                       start_mds_down_tmr(adest, svc_id);
[Vu] Seems mds_down tmr is started twice. The first start is at the beginning of the function. But what is the reason to start the mds down timer here? What if we won't start the tmr?

    /* potentially clean up the process info database */
    MDS_PROCESS_INFO *info = mds_process_info_get(adest, svc_id);
    if (info != NULL) {
        /* Process info exist, delay the cleanup with a timeout to avoid
         * race */
        start_mds_down_tmr(adest, svc_id);
    }
+                               }
+                       }
+               }
+
                if (vdest_id == m_VDEST_ID_FOR_ADEST_ENTRY) {
                        status = mds_subtn_res_tbl_del(
                            local_svc_hdl, svc_id, vdest_id, adest,
@@ -4956,6 +5000,17 @@ uint32_t mds_mcm_init(void)
                return NCSCC_RC_FAILURE;
        }
+ /* ADEST TREE */
+       memset(&pat_tree_params, 0, sizeof(NCS_PATRICIA_PARAMS));
+       pat_tree_params.key_size = sizeof(MDS_DEST);
+       if (NCSCC_RC_SUCCESS !=
+           ncs_patricia_tree_init(&gl_mds_mcm_cb->adest_list,
+                                  &pat_tree_params)) {
+               m_MDS_LOG_ERR(
+                   "MCM:API: patricia_tree_init: adest :failure, L 
mds_mcm_init");
+               return NCSCC_RC_FAILURE;
+       }
+
        /* SERVICE TREE */
        memset(&pat_tree_params, 0, sizeof(NCS_PATRICIA_PARAMS));
        pat_tree_params.key_size = sizeof(MDS_SVC_HDL);
@@ -4966,7 +5021,12 @@ uint32_t mds_mcm_init(void)
                if (NCSCC_RC_SUCCESS !=
                    ncs_patricia_tree_destroy(&gl_mds_mcm_cb->vdest_list)) {
                        m_MDS_LOG_ERR(
-                           "MCM:API: patricia_tree_destroy: service :failure, L 
mds_mcm_init");
+                           "MCM:API: patricia_tree_destroy: vdest :failure, L 
mds_mcm_init");
+               }
+               if (NCSCC_RC_SUCCESS !=
+                   ncs_patricia_tree_destroy(&gl_mds_mcm_cb->adest_list)) {
+                       m_MDS_LOG_ERR(
+                           "MCM:API: patricia_tree_destroy: adest :failure, L 
mds_mcm_init");
                }
                return NCSCC_RC_FAILURE;
        }
@@ -4989,6 +5049,11 @@ uint32_t mds_mcm_init(void)
                        m_MDS_LOG_ERR(
                            "MCM:API: patricia_tree_destroy: vdest :failure, L 
mds_mcm_init");
                }
+               if (NCSCC_RC_SUCCESS !=
+                   ncs_patricia_tree_destroy(&gl_mds_mcm_cb->adest_list)) {
+                       m_MDS_LOG_ERR(
+                           "MCM:API: patricia_tree_destroy: adest :failure, L 
mds_mcm_init");
+               }
                return NCSCC_RC_FAILURE;
        }
@@ -5036,6 +5101,9 @@ uint32_t mds_mcm_destroy(void)
        /* VDEST TREE */
        ncs_patricia_tree_destroy(&gl_mds_mcm_cb->vdest_list);
+ /* ADEST TREE */
+       ncs_patricia_tree_destroy(&gl_mds_mcm_cb->adest_list);
+
        /* Free MCM control block */
        m_MMGR_FREE_MCM_CB(gl_mds_mcm_cb);
diff --git a/src/mds/mds_c_sndrcv.c b/src/mds/mds_c_sndrcv.c
index 7850ac714..4d8865b3a 100644
--- a/src/mds/mds_c_sndrcv.c
+++ b/src/mds/mds_c_sndrcv.c
@@ -2650,6 +2650,22 @@ static uint32_t mcm_pvt_red_snd_process_common(
                }
        }
+ /* Update adest sndrsp counter */
+       if ((MDS_SENDTYPE_RSP == req->i_sendtype) ||
+           (MDS_SENDTYPE_RRSP == req->i_sendtype)) {
[Vu] How about other send types? In current code, it also wait up to 1.5 seconds before returning timeout to caller.
    case MDS_SENDTYPE_SND:
    case MDS_SENDTYPE_RSP:
    case MDS_SENDTYPE_RED:
    case MDS_SENDTYPE_RRSP:
    case MDS_SENDTYPE_BCAST:
    case MDS_SENDTYPE_RBCAST:

+               MDS_ADEST_INFO *adest_info =
+                       (MDS_ADEST_INFO *)ncs_patricia_tree_get(
+                               &gl_mds_mcm_cb->adest_list, (uint8_t *)&dest);
+               if (adest_info) {
+                       adest_info->sndrsp_cnt--;
+                       if (adest_info->sndrsp_cnt == 0 &&
+                           adest_info->svc_cnt == 0)
+                               ncs_patricia_tree_del(
+                                       &gl_mds_mcm_cb->adest_list,
+                                       (NCS_PATRICIA_NODE *)adest_info);
+               }
+       }
+
        if (NCSCC_RC_SUCCESS != mds_subtn_res_tbl_get_by_adest(
                                    svc_cb->svc_hdl, to_svc_id, dest_vdest_id,
                                    dest, &role_ret, &subs_result_hdl)) {
@@ -2716,9 +2732,27 @@ static uint32_t 
mds_mcm_process_disc_queue_checks_redundant(
        } else if (sub_info->tmr_flag != true) {
                if ((MDS_SENDTYPE_RSP == req->i_sendtype) ||
                    (MDS_SENDTYPE_RRSP == req->i_sendtype)) {
-                       time_wait = true;
-                       m_MDS_LOG_INFO(
-                           "MDS_SND_RCV:Disc queue red: Subscr exists no timer 
running: Waiting for some time\n");
+                       MDS_ADEST_INFO *adest_info =
+                               (MDS_ADEST_INFO *)ncs_patricia_tree_get(
+                                       &gl_mds_mcm_cb->adest_list,
+                                       (uint8_t *)&anchor);
+                       if (adest_info && adest_info->svc_cnt == 0) {
+                               adest_info->sndrsp_cnt--;
+                               m_MDS_LOG_NOTIFY(
+                                       "MDS_SND_RCV: Adest already down,"
+                                       " skip response, remain sndrsp_cnt=%d",
+                                       adest_info->sndrsp_cnt);
+                               if (adest_info->sndrsp_cnt == 0)
+                                       ncs_patricia_tree_del(
+                                           &gl_mds_mcm_cb->adest_list,
+                                           (NCS_PATRICIA_NODE *)adest_info);
+                               return NCSCC_RC_FAILURE;
[Vu] since we know the destination is down, the error code should not be a failure.
+                       } else {
+                               time_wait = true;
+                               m_MDS_LOG_INFO(
+                                   "MDS_SND_RCV:Disc queue red: Subscr exists"
+                                   " no timer running: Waiting for some time");
+                       }
                } else {
                        m_MDS_LOG_INFO(
                            "MDS_SND_RCV: Subscription exists but Timer has 
expired\n");
@@ -5024,6 +5058,18 @@ static uint32_t 
mds_mcm_process_recv_snd_msg_common(MDS_SVC_INFO *svccb,
                        mds_mcm_free_msg_memory(recv->msg);
                        return NCSCC_RC_FAILURE;
                }
+
+               /* Update adest sndrsp counter */
+               if (recv->snd_type == MDS_SENDTYPE_SNDRSP ||
+                   recv->snd_type == MDS_SENDTYPE_REDRSP) {
+                       MDS_ADEST_INFO *adest_info =
+                               (MDS_ADEST_INFO *)ncs_patricia_tree_get(
+                                       &gl_mds_mcm_cb->adest_list,
+                                       (uint8_t *)&recv->src_adest);
+                       if (adest_info)
+                               adest_info->sndrsp_cnt++;
+               }
[Vu] If adest_info does not exist from the adest database, mds should not fwd the message to upper layer
as the rsp will be dropped later on.
+
                cbinfo.info.receive.pid = recv->pid;
                cbinfo.info.receive.uid = recv->uid;
                cbinfo.info.receive.gid = recv->gid;
diff --git a/src/mds/mds_core.h b/src/mds/mds_core.h
index c09b428ba..f5d4d22a1 100644
--- a/src/mds/mds_core.h
+++ b/src/mds/mds_core.h
@@ -204,12 +204,12 @@ typedef struct mds_subscription_info {
  #define m_GET_HDL_FROM_MDS_SVC_INFO(info) \
    ((info->svc-id)<<32 ||                        \
     (info->parent_pwe->pwe_id)<<16 ||            \
-   (info->parent_pwe->parent_vdest->vdest-id)))
+   (info->parent_pwe->parent_vdest->vdest-id))
#define m_GET_HDL_FROM_MDS_SVC_PWE_VDEST(s, p, v) \
    ((s)<<32 ||                                   \
     (p)<<16 ||                                   \
-   (v)))
+   (v))
/**************************************\
      MDS PWE related declarations
@@ -251,6 +251,16 @@ typedef struct mds_vdest_info {
} MDS_VDEST_INFO; +typedef struct mds_adest_info {
+  /* Indexing info */
+  NCS_PATRICIA_NODE node;
+  /* Adest info */
+  MDS_DEST adest; /* Key for Patricia node */
+  uint16_t svc_cnt; /* Adest SVC counter */
+  uint32_t sndrsp_cnt; /* Counter for adest send sndrsp */
+  bool tmr_start; /* Adest down timer start flag */
[Vu] What is the purpose of tmr_start flag?
+} MDS_ADEST_INFO;
+
  typedef struct mds_svc_info {
    /* Indexing info */
    NCS_PATRICIA_NODE svc_list_node;
@@ -301,6 +311,7 @@ typedef struct mds_mcm_cb {
    NCS_PATRICIA_TREE subtn_results;
    NCS_PATRICIA_TREE svc_list;   /* Tree of MDS_SVC_INFO information */
    NCS_PATRICIA_TREE vdest_list; /* Tree of MDS_VDEST_INFO information */
+  NCS_PATRICIA_TREE adest_list; /* Tree of MDS_ADEST_INFO information */
  } MDS_MCM_CB;
/* Global MDSCB */
@@ -681,6 +692,15 @@ uint32_t (*mds_mdtm_node_unsubscribe)(MDS_SUBTN_REF_VAL 
subtn_ref_val);
    m_NCS_MEM_FREE(p, NCS_MEM_REGION_TRANSIENT, NCS_SERVICE_ID_MDS, \
                   MDS_MEM_VDEST_INFO)
+#define m_MMGR_ALLOC_ADEST_INFO \
+  (MDS_ADEST_INFO *)m_NCS_MEM_ALLOC(sizeof(MDS_ADEST_INFO),   \
+                                    NCS_MEM_REGION_TRANSIENT, \
+                                    NCS_SERVICE_ID_MDS, MDS_MEM_ADEST_INFO)
+
+#define m_MMGR_FREE_ADEST_INFO(p)                                 \
+  m_NCS_MEM_FREE(p, NCS_MEM_REGION_TRANSIENT, NCS_SERVICE_ID_MDS, \
+                 MDS_MEM_ADEST_INFO)
+
  #define m_MMGR_ALLOC_PWE_INFO                               \
    (MDS_PWE_INFO *)m_NCS_MEM_ALLOC(sizeof(MDS_PWE_INFO),     \
                                    NCS_MEM_REGION_TRANSIENT, \
@@ -768,7 +788,6 @@ typedef struct mds_mcm_msg_elem {
        NCSMDS_CALLBACK_INFO cbinfo;
      } event;
    } info;
-
  } MDS_MCM_MSG_ELEM;
/* ******************************************** */
diff --git a/src/mds/mds_dt2c.h b/src/mds/mds_dt2c.h
index c92fecbba..49c9b09c4 100644
--- a/src/mds/mds_dt2c.h
+++ b/src/mds/mds_dt2c.h
@@ -223,7 +223,7 @@ typedef enum {
    MDS_MEM_DIRECT_BUFF,
    MDS_MEM_AWAIT_ACTIVE,
    MDS_MEM_MSGELEM,
-  MDS_MEM_ADEST_LIST,
+  MDS_MEM_ADEST_INFO,
    MDS_MEM_REASSEM_QUEUE,
    MDS_MEM_HDL_LIST,
    MDS_MEM_CACHED_EVENTS_LIST,
diff --git a/src/mds/mds_dt_common.c b/src/mds/mds_dt_common.c
index de1388367..19000b387 100644
--- a/src/mds/mds_dt_common.c
+++ b/src/mds/mds_dt_common.c
@@ -998,6 +998,26 @@ uint32_t mds_tmr_mailbox_processing(void)
                                            info->mds_dest, info->pid);
                                        (void)mds_process_info_del(info);
                                        free(info);
+                               } else {
+                                       MDS_DEST adest =
+                                               tmr_req_info->info
+                                               .down_event_tmr_info.adest;
+                                       m_MDS_LOG_INFO(
+                                           "TIMEOUT Adest=%" PRIu64
+                                           " down", adest);
+                                       MDS_ADEST_INFO *adest_info =
+                                           (MDS_ADEST_INFO *)
+                                           ncs_patricia_tree_get(
+                                           &gl_mds_mcm_cb->adest_list,
+                                           (uint8_t *)&adest);
+                                       if (adest_info) {
+                                               adest_info->tmr_start = false;
+                                               if (adest_info->svc_cnt == 0 &&
+                                                   adest_info->sndrsp_cnt == 0)
+                                                   ncs_patricia_tree_del(
+                                                       
&gl_mds_mcm_cb->adest_list,
+                                                       (NCS_PATRICIA_NODE 
*)adest_info);
+                                       }
                                }
if (tmr_req_info->info.down_event_tmr_info
@@ -1700,7 +1720,7 @@ uint16_t mds_checksum(uint32_t length, uint8_t buff[])
        /* Take the one's complement of sum */
        sum = ~sum;
- return ((uint16_t)sum);
+       return (uint16_t)sum;
[Vu] Any reason to change the format of this code line?
  }
/****************************************************************************



_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to