Re: [devel] [PATCH 1/1] clm: fix memleak detected by valgrind [#3238]
Hi Surbhi, ACK with minor comment. Short commit msg should be "ntf: ..." Best Regards, ThuanTr -Original Message- From: Surbhi Tripathi Sent: Tuesday, November 24, 2020 10:11 AM To: Thuan Tran Cc: opensaf-devel@lists.sourceforge.net; Surbhi Tripathi Subject: [PATCH 1/1] clm: fix memleak detected by valgrind [#3238] --- src/ntf/ntfd/ntfs_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ntf/ntfd/ntfs_main.c b/src/ntf/ntfd/ntfs_main.c index 2054205e2..d37d7f8b3 100644 --- a/src/ntf/ntfd/ntfs_main.c +++ b/src/ntf/ntfd/ntfs_main.c @@ -343,6 +343,7 @@ int main(int argc, char *argv[]) if (fds[FD_TERM].revents & POLLIN) { TRACE("Exit via FD_TERM calling stop_ntfimcn()"); (void)stop_ntfimcn(); +saClmFinalize(ntfs_cb->clm_hdl); daemon_exit(); } -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] imm: prevent override delimiter value when using with option -a [#3235]
Hi Thien, ACK from me. Best Regards, ThuanTr -Original Message- From: Thien Minh Huynh Sent: Thursday, November 12, 2020 5:16 PM To: Thuan Tran ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net; Thien Minh Huynh Subject: [PATCH 1/1] imm: prevent override delimiter value when using with option -a [#3235] when using option -d to set delimiter for multiple value combine with option -a. Delimiter was override with default value. The fix is checking delimiter option before assign default value. --- src/imm/tools/imm_list.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/imm/tools/imm_list.c b/src/imm/tools/imm_list.c index b5694bb42..680bac91f 100644 --- a/src/imm/tools/imm_list.c +++ b/src/imm/tools/imm_list.c @@ -83,7 +83,7 @@ static void usage(const char *progname) printf("\timmlist -a saAmfApplicationAdminState safApp=OpenSAF\n"); printf("\timmlist safApp=myApp1 safApp=myApp2\n"); printf("\timmlist --pretty-print=no -a saAmfAppType safApp=OpenSAF\n"); - printf("\timmlist -d '|' -a safAmfNodeGroup safAmfNodeGroup=AllNodes,safAmfCluster=myAmfCluster\n"); + printf("\timmlist -d '|' -a saAmfNGNodeList safAmfNodeGroup=AllNodes,safAmfCluster=myAmfCluster\n"); } static void print_attr_value_raw(SaImmValueTypeT attrValueType, @@ -492,6 +492,7 @@ int main(int argc, char *argv[]) int rc = EXIT_SUCCESS; unsigned long timeoutVal = 60; char delimiter = ' '; + bool is_existed_delimiter = false; /* Support for long DN */ setenv("SA_ENABLE_EXTENDED_NAMES", "1", 1); @@ -513,7 +514,8 @@ int main(int argc, char *argv[]) attributeNames, ++len * sizeof(SaImmAttrNameT)); attributeNames[len - 2] = strdup(optarg); attributeNames[len - 1] = NULL; - delimiter = ':'; + if (!is_existed_delimiter) + delimiter = ':'; pretty_print = 0; break; case 'c': @@ -537,6 +539,7 @@ int main(int argc, char *argv[]) "delimiter must be a single character\n"); exit(EXIT_FAILURE); } + is_existed_delimiter = true; delimiter = optarg[0]; break; default: -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] ntf: fix coredump while creating object having string value, SA_NOTIFY [#3232]
Hi Quang, ACK from me. Best Regards, ThuanTr -Original Message- From: Quang Xuan Nhat Nghiem Sent: Wednesday, November 11, 2020 3:50 PM To: Minh Hon Chau ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net; Quang Xuan Nhat Nghiem Subject: [PATCH 1/1] ntf: fix coredump while creating object having string value, SA_NOTIFY [#3232] When create or modify an object having size of attribute value over 65535, this actual size will be truncated because dataSize of saNtfPtrValAllocate is SaUint16T (from 0 to 65535). Thus, after saNtfPtrValAllocate's invoked, the attribute value is assigned to the memory allocated with the actual size over 65535 and cause a memory corruption. Solution is prevent the size of data and log a warning if is's over 65535. --- src/ntf/ntfimcnd/ntfimcn_notifier.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/src/ntf/ntfimcnd/ntfimcn_notifier.c b/src/ntf/ntfimcnd/ntfimcn_notifier.c index c63b4393f..05cbb6a67 100644 --- a/src/ntf/ntfimcnd/ntfimcn_notifier.c +++ b/src/ntf/ntfimcnd/ntfimcn_notifier.c @@ -233,6 +233,13 @@ static int fill_value_array(SaNtfNotificationHandleT notificationHandle, TRACE_ENTER(); + if (value_in_size > USHRT_MAX) { + LOG_WA("Failed to prepare notification as attr value size " + "(%llu) > MAX(%u)", + value_in_size, USHRT_MAX); + internal_rc = (-1); + goto done; + } rc = saNtfPtrValAllocate(notificationHandle, value_in_size, (void **)_ptr, value_out); if (rc != SA_AIS_OK) { -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] ntf: fix coredump while creating object having string value, SA_NOTIFY [#3232]
Hi Quang, One comment inline. Best Regards, ThuanTr -Original Message- From: Quang Xuan Nhat Nghiem Sent: Wednesday, November 11, 2020 12:56 PM To: Minh Hon Chau ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net; Quang Xuan Nhat Nghiem Subject: [PATCH 1/1] ntf: fix coredump while creating object having string value, SA_NOTIFY [#3232] When create or modify an object having size of attribute value over 65535, this actual size will be truncated because dataSize of saNtfPtrValAllocate is SaUint16T (from 0 to 65535). Thus, after saNtfPtrValAllocate's invoked, the attribute value is assigned to the memory allocated with the actual size over 65535 and cause a memory corruption. Solution is prevent the size of data and log a warning if is's over 65535. --- src/ntf/ntfimcnd/ntfimcn_notifier.c | 12 ++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/ntf/ntfimcnd/ntfimcn_notifier.c b/src/ntf/ntfimcnd/ntfimcn_notifier.c index c63b4393f..148e5abae 100644 --- a/src/ntf/ntfimcnd/ntfimcn_notifier.c +++ b/src/ntf/ntfimcnd/ntfimcn_notifier.c @@ -233,8 +233,16 @@ static int fill_value_array(SaNtfNotificationHandleT notificationHandle, TRACE_ENTER(); - rc = saNtfPtrValAllocate(notificationHandle, value_in_size, -(void **)_ptr, value_out); + if (value_in_size > USHRT_MAX) { + LOG_WA("Failed to prepare notification as attr value size " + "(%llu) > MAX(%u)", + value_in_size, USHRT_MAX); + internal_rc = (-1); + goto done; + } else { + rc = saNtfPtrValAllocate(notificationHandle, value_in_size, +(void **)_ptr, value_out); [Thuan] No need put it in ELSE as IF block has goto done. Keep this out of ELSE make smaller changes and consistent code. Otherwise, below IF block inside this ELSE looks more consistent. + } if (rc != SA_AIS_OK) { LOG_ER("%s: saNtfPtrValAllocate failed %s", __FUNCTION__, saf_error(rc)); -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] base: Use non-blocking socketpair in sysf_exc module V3 [#3222]
Hi Minh, ACK from me. Best Regards, ThuanTr -Original Message- From: Minh Hon Chau Sent: Wednesday, October 28, 2020 5:43 AM To: Thuan Tran ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net; Minh Hon Chau Subject: [PATCH 1/1] base: Use non-blocking socketpair in sysf_exc module V3 [#3222] In the scenario that amfnd terminates a huge number of components at once (around 800 components), amfnd catches the sigchild signal from components' processes in signal handler and calls write() to notify amfnd's threads to proceed the component termination. As of this result, multiple blocking write() calls are observed being blocked because the thread calls read() being busy with waitpid despite that waitpid is nohang. The slowness of read() thread is due to scanning through all pids and there are so many child processes being terminated at the same time. This patch changes the socketpair as non-blocking to avoid write() being blocked. It also uses poll event to avoid hogging cpu in the read() thread. --- src/base/sysf_exc_scr.c | 121 1 file changed, 60 insertions(+), 61 deletions(-) diff --git a/src/base/sysf_exc_scr.c b/src/base/sysf_exc_scr.c index 378b1eeab..119f72478 100644 --- a/src/base/sysf_exc_scr.c +++ b/src/base/sysf_exc_scr.c @@ -33,10 +33,11 @@ #include "base/sysf_exc_scr.h" #include "base/ncssysf_def.h" +#include #include SYSF_EXECUTE_MODULE_CB module_cb; - +static struct pollfd fds[1]; /* PROCEDURE: ncs_exc_mdl_start_timer @@ -108,8 +109,19 @@ void ncs_exec_module_signal_hdlr(int signal) /* printf("\n In SIGCHLD Handler \n"); */ - if (-1 == write(module_cb.write_fd, (const void *), + while (-1 == write(module_cb.write_fd, (const void *), sizeof(EXEC_MOD_INFO))) { + /* Only continue if the error is EINTR which may be +* caused by the signal interupt, and do not try again +* with EAGAIN and EWOULDBLOCK since that will become +* the reason to cause the threads hanging with +* BLOCKING socketpair and the ncs_exec_mod_hdlr scans +* all child pid for each read() +*/ + if (errno == EINTR) + continue; + + break; perror("ncs_exec_module_signal_hdlr: write"); } } @@ -137,11 +149,7 @@ void ncs_exec_module_timer_hdlr(void *uarg) EXEC_MOD_INFO info = {.pid = NCS_PTR_TO_INT32_CAST(uarg), .status = 0, .type = SYSF_EXEC_INFO_TIME_OUT}; - - if (-1 == write(module_cb.write_fd, (const void *), - sizeof(EXEC_MOD_INFO))) { - perror("ncs_exec_module_timer_hdlr: write"); - } + give_exec_mod_cb(info.pid, info.status, info.type); } /**\ @@ -169,8 +177,25 @@ void ncs_exec_mod_hdlr(void) SYSF_PID_LIST *exec_pid = NULL; int status = -1; int pid = -1; + int polltmo = -1; + + fds[0].fd = module_cb.read_fd; + fds[0].events = POLLIN; while (1) { + int pollretval = poll(fds, 1, polltmo); + + if (pollretval == -1) { + if (errno == EINTR) + continue; + + LOG_ER("ncs_exec_mod_hdlr: poll FAILED - %s", + strerror(errno)); + break; + } + if ((fds[0].revents & POLLIN) == false) + continue; + while ((ret_val = read( module_cb.read_fd, (((uint8_t *)) + count), (maxsize - count))) != (maxsize - count)) { @@ -178,66 +203,40 @@ void ncs_exec_mod_hdlr(void) if (errno == EBADF) return; - perror("ncs_exec_mod_hdlr: read fail:"); continue; } count += ret_val; } /* while */ - if (info.type == SYSF_EXEC_INFO_TIME_OUT) { - /* printf("Time out signal \n"); */ - pid = info.pid; - give_exec_mod_cb(info.pid, info.status, info.type); - - } /* if */ - else { repeat_srch_from_beginning: - m_NCS_LOCK(_cb.tree_lock, NCS_LOCK_WRITE); - - for (ex
Re: [devel] [PATCH 1/1] amf: fix coredump in start up [#3186]
Hi Thang, ACK from me. Best Regards, ThuanTr -Original Message- From: Thang Duc Nguyen Sent: Thursday, October 22, 2020 2:34 PM To: Thuan Tran ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net; Thang Duc Nguyen Subject: [PATCH 1/1] amf: fix coredump in start up [#3186] During node start up, it loses connection with Active SC. At that time CLM join cluster and AMFND tries to convert CLM node to AMF node. But IMMND at that time is down (e.i, unregister then register with MDS). The IMM OM API return asap and cause the coredump. Need to retry in this case for more robustness. --- src/amf/amfnd/clm.cc | 16 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/amf/amfnd/clm.cc b/src/amf/amfnd/clm.cc index 317674f10..27b6e171d 100644 --- a/src/amf/amfnd/clm.cc +++ b/src/amf/amfnd/clm.cc @@ -138,7 +138,7 @@ done: TRACE_LEAVE(); } -static void clm_to_amf_node(void) { +static SaAisErrorT clm_to_amf_node(void) { SaAisErrorT error; SaImmSearchHandleT searchHandle; SaNameT amfdn, clmdn; @@ -157,8 +157,7 @@ static void clm_to_amf_node(void) { error = saImmOmInitialize_cond(, nullptr, ); if (SA_AIS_OK != error) { -LOG_WA("saImmOmInitialize failed. Use previous value of nodeName."); -osafassert(avnd_cb->amf_nodeName.empty() == false); +LOG_WA("saImmOmInitialize failed: %u", error); goto done1; } @@ -192,6 +191,7 @@ done: immutil_saImmOmFinalize(immOmHandle); done1: TRACE_LEAVE2("%u", error); + return error; } / @@ -279,7 +279,15 @@ static void clm_track_cb( memcpy(&(avnd_cb->node_info), &(notifItem->clusterNode), sizeof(SaClmClusterNodeT_4)); /*get the amf node from clm node name */ - if (avnd_cb->amf_nodeName.empty()) clm_to_amf_node(); + if (avnd_cb->amf_nodeName.empty()) { +if (clm_to_amf_node() == SA_AIS_ERR_TRY_AGAIN) { + // Take one more try + if (clm_to_amf_node() != SA_AIS_OK) { +LOG_ER("clm_to_amf_node() failed"); +exit(EXIT_FAILURE); + } +} + } avnd_send_node_up_msg(); avnd_cb->first_time_up = false; } else { -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amf: Increase sync node size equal with clusters size [#3225]
Hi Thien, ACK from me. Best Regards, ThuanTr -Original Message- From: Thien Minh Huynh Sent: Tuesday, October 20, 2020 11:16 AM To: Thuan Tran ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net; Thien Minh Huynh Subject: [PATCH 1/1] amf: Increase sync node size equal with clusters size [#3225] --- src/amf/amfd/ndfsm.cc | 11 --- src/amf/amfd/proc.h | 2 +- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/amf/amfd/ndfsm.cc b/src/amf/amfd/ndfsm.cc index 434c30ce4..be7db7a3e 100644 --- a/src/amf/amfd/ndfsm.cc +++ b/src/amf/amfd/ndfsm.cc @@ -169,7 +169,7 @@ uint32_t avd_count_sync_node_size(AVD_CL_CB *cb) { uint32_t count = 0; TRACE_ENTER(); - count = node_name_db->size() - 1; + count = node_name_db->size(); TRACE("sync node size:%d", count); return count; @@ -180,15 +180,13 @@ uint32_t avd_count_sync_node_size(AVD_CL_CB *cb) { * Purpose: Helper function count number of nodes that sent node_up msg to * director * - * Input: cb - the AVD control block - * * Returns: Number of node * * NOTES: * * **/ -uint32_t avd_count_node_up(AVD_CL_CB *cb) { +uint32_t avd_count_node_up() { uint32_t received_count = 0; AVD_AVND *node = nullptr; @@ -196,8 +194,7 @@ uint32_t avd_count_node_up(AVD_CL_CB *cb) { for (const auto : *node_name_db) { node = value.second; -if (node->node_up_msg_count > 0 && -node->node_info.nodeId != cb->node_id_avd) +if (node->node_up_msg_count > 0) ++received_count; } TRACE("Number of node director(s) that director received node_up msg:%u", @@ -329,7 +326,7 @@ void avd_node_up_evh(AVD_CL_CB *cb, AVD_EVT *evt) { } uint32_t rc_node_up; avnd->node_up_msg_count++; -rc_node_up = avd_count_node_up(cb); +rc_node_up = avd_count_node_up(); if (rc_node_up == sync_nd_size) { if (cb->node_sync_tmr.is_active) { avd_stop_tmr(cb, >node_sync_tmr); diff --git a/src/amf/amfd/proc.h b/src/amf/amfd/proc.h index 1c3cce9ca..bf8b476ca 100644 --- a/src/amf/amfd/proc.h +++ b/src/amf/amfd/proc.h @@ -64,7 +64,7 @@ uint32_t avd_sg_nway_si_assign(AVD_CL_CB *, AVD_SG *); /* The following are for N-way Active redundancy model */ AVD_SU *avd_sg_nacvred_su_chose_asgn(AVD_CL_CB *cb, AVD_SG *sg); -uint32_t avd_count_node_up(AVD_CL_CB *cb); +uint32_t avd_count_node_up(); uint32_t avd_evt_queue_count(AVD_CL_CB *cb); uint32_t avd_count_sync_node_size(AVD_CL_CB *cb); void avd_process_state_info_queue(AVD_CL_CB *cb); -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amf: Increase sync node size equal with clusters size [#3225]
Hi Thien, ACK from me. Best Regards, ThuanTr -Original Message- From: Thien Minh Huynh Sent: Monday, October 19, 2020 9:58 AM To: Thuan Tran ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net; Thien Minh Huynh Subject: [PATCH 1/1] amf: Increase sync node size equal with clusters size [#3225] --- src/amf/amfd/ndfsm.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/amf/amfd/ndfsm.cc b/src/amf/amfd/ndfsm.cc index 434c30ce4..6baf68fdb 100644 --- a/src/amf/amfd/ndfsm.cc +++ b/src/amf/amfd/ndfsm.cc @@ -169,7 +169,7 @@ uint32_t avd_count_sync_node_size(AVD_CL_CB *cb) { uint32_t count = 0; TRACE_ENTER(); - count = node_name_db->size() - 1; + count = node_name_db->size(); TRACE("sync node size:%d", count); return count; @@ -196,8 +196,7 @@ uint32_t avd_count_node_up(AVD_CL_CB *cb) { for (const auto : *node_name_db) { node = value.second; -if (node->node_up_msg_count > 0 && -node->node_info.nodeId != cb->node_id_avd) +if (node->node_up_msg_count > 0) ++received_count; } TRACE("Number of node director(s) that director received node_up msg:%u", -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] imm: fix immnd crash in multi partitioned clusters rejoin [#3219]
Hi Minh, Before IMMND get re-intro rsp from IMMD, it get IMMND_EVT_D2ND_PBE_PRTO_PURGE_MUTATIONS from IMMD broadcast then crash because it's used on different partition. As IMMND crash then it cannot reboot node as expected. The patch to help not incident get broadcast event before getting re-intro rsp. Best Regards, ThuanTr -Original Message- From: Minh Hon Chau Sent: Friday, September 18, 2020 9:11 AM To: Thuan Tran ; Thang Duc Nguyen ; Thanh Nguyen ; Thien Minh Huynh Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1/1] imm: fix immnd crash in multi partitioned clusters rejoin [#3219] Hi Thuan, Can you elaborate a bit more: - how the crash is happened, what's the cause. - how the patch would work. Thanks Minh On 17/9/20 1:35 pm, thuan.tran wrote: > - immnd prioritize re-introduce rsp from immd. > - immnd ignore broadcast events from IMMD if re-introduce on-going. > --- > src/imm/immnd/immnd_evt.c | 21 +++-- > src/imm/immnd/immnd_mds.c | 5 - > 2 files changed, 19 insertions(+), 7 deletions(-) > > diff --git a/src/imm/immnd/immnd_evt.c b/src/imm/immnd/immnd_evt.c > index afc2106a0..714a75ca2 100644 > --- a/src/imm/immnd/immnd_evt.c > +++ b/src/imm/immnd/immnd_evt.c > @@ -625,6 +625,21 @@ void immnd_process_evt(void) > return; > } > > + if ((cb->mIntroduced == 2) && > + ((evt->info.immnd.type == IMMND_EVT_D2ND_SYNC_START) || > + (evt->info.immnd.type == IMMND_EVT_D2ND_SYNC_ABORT) || > + (evt->info.immnd.type == IMMND_EVT_D2ND_PBE_PRTO_PURGE_MUTATIONS) > || > + (evt->info.immnd.type == IMMND_EVT_D2ND_DUMP_OK) || > + (evt->info.immnd.type == IMMND_EVT_D2ND_LOADING_OK) || > + (evt->info.immnd.type == IMMND_EVT_D2ND_GLOB_FEVS_REQ) || > + (evt->info.immnd.type == IMMND_EVT_D2ND_GLOB_FEVS_REQ_2))) { > + LOG_WA("DISCARD message %s from IMMD %x as re-intro on-going", > + immsv_get_immnd_evt_name(evt->info.immnd.type), > + evt->sinfo.node_id); > + immnd_evt_destroy(evt, true, __LINE__); > + return; > + } > + > if ((evt->info.immnd.type != IMMND_EVT_D2ND_GLOB_FEVS_REQ) && > (evt->info.immnd.type != IMMND_EVT_D2ND_GLOB_FEVS_REQ_2)) > immsv_msg_trace_rec(evt->sinfo.dest, evt); > @@ -10779,12 +10794,6 @@ static uint32_t immnd_evt_proc_fevs_rcv(IMMND_CB > *cb, IMMND_EVT *evt, >: false; > TRACE_ENTER(); > > - if (cb->mIntroduced == 2) { > - LOG_WA("DISCARD FEVS message:%llu from %x", msgNo, > sinfo->node_id); > - dequeue_outgoing(cb); > - return NCSCC_RC_FAILURE; > - } > - > if (cb->highestProcessed >= msgNo) { > /*We have already received this message, discard it. */ > LOG_WA( > diff --git a/src/imm/immnd/immnd_mds.c b/src/imm/immnd/immnd_mds.c > index 02cb4b552..d9cccd5d9 100644 > --- a/src/imm/immnd/immnd_mds.c > +++ b/src/imm/immnd/immnd_mds.c > @@ -552,7 +552,10 @@ static uint32_t immnd_mds_rcv(IMMND_CB *cb, > MDS_CALLBACK_RECEIVE_INFO *rcv_info) > } > > /* Put it in IMMND's Event Queue */ > - if (pEvt->info.immnd.type == IMMND_EVT_A2ND_IMM_INIT) > + if (pEvt->info.immnd.type == IMMND_EVT_D2ND_INTRO_RSP) > + rc = m_NCS_IPC_SEND(>immnd_mbx, (NCSCONTEXT)pEvt, > + NCS_IPC_PRIORITY_VERY_HIGH); > + else if (pEvt->info.immnd.type == IMMND_EVT_A2ND_IMM_INIT) > rc = m_NCS_IPC_SEND(>immnd_mbx, (NCSCONTEXT)pEvt, > NCS_IPC_PRIORITY_HIGH); > else ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] imm: Enhance deep clone of Imm Attr values in immutil [#3215]
Hi Thanh, ACK with one minor comment. I think commit msg should be "osaf: ...", not "imm: ..." P/s: You don't need send out new version for minor comments. Best Regards, ThuanTr -Original Message- From: Thanh Nguyen Sent: Friday, September 4, 2020 6:43 AM To: Thang Duc Nguyen ; Minh Hon Chau ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net; Thanh Nguyen Subject: [PATCH 1/1] imm: Enhance deep clone of Imm Attr values in immutil [#3215] 1) Reuse existing functionality for deep clone of Imm Attr values. 2) Publish immutil internal memory management for external use --- src/amf/amfd/imm.cc| 15 ++- src/amf/amfd/imm.h | 8 +- src/ntf/ntfimcnd/ntfimcn_imm.c | 52 + src/ntf/ntfimcnd/ntfimcn_imm.h | 18 src/osaf/immutil/immutil.c | 190 - src/osaf/immutil/immutil.h | 46 ++-- 6 files changed, 128 insertions(+), 201 deletions(-) diff --git a/src/amf/amfd/imm.cc b/src/amf/amfd/imm.cc index 826e90d41..b2a17a10c 100644 --- a/src/amf/amfd/imm.cc +++ b/src/amf/amfd/imm.cc @@ -206,9 +206,19 @@ done: return res; } +const size_t ImmObjCreate::INIT_MEMREF_SIZE; + +ImmObjCreate::ImmObjCreate() : +className_(NULL), +parentName_(), +attrValues_(NULL), +memRef(NULL) +{ + memRef = immutil_getMem(INIT_MEMREF_SIZE); +} // ImmObjCreate::~ImmObjCreate() { - immutil_freeSaImmAttrValuesT(attrValues_); + immutil_freeMem(memRef); delete[] className_; } @@ -1862,7 +1872,8 @@ void avd_saImmOiRtObjectCreate(const std::string , ajob->className_ = StrDup(className.c_str()); osafassert(ajob->className_ != nullptr); ajob->parentName_ = parentName; - ajob->attrValues_ = immutil_dupSaImmAttrValuesT(attrValues); + ajob->attrValues_ = immutil_dupSaImmAttrValuesT_array(ajob->memRef, + attrValues); Fifo::queue(ajob); TRACE_LEAVE(); diff --git a/src/amf/amfd/imm.h b/src/amf/amfd/imm.h index 99d47d313..797d7ac82 100644 --- a/src/amf/amfd/imm.h +++ b/src/amf/amfd/imm.h @@ -84,12 +84,18 @@ class ImmObjCreate : public ImmJob { SaImmClassNameT className_; std::string parentName_; SaImmAttrValuesT_2 **attrValues_; + // Memory used to deep clone SaImmAttrValuesT_2** + // memRef must be allocated by immutil_getMem() and free by immutil_freeMem() + void* memRef; - ImmObjCreate() : ImmJob(){}; + ImmObjCreate(); bool immobj_update_required(); AvdJobDequeueResultT exec(AVD_CL_CB *cb); ~ImmObjCreate(); + + private: + static const size_t INIT_MEMREF_SIZE = 512; }; // diff --git a/src/ntf/ntfimcnd/ntfimcn_imm.c b/src/ntf/ntfimcnd/ntfimcn_imm.c index 2e515e22b..deb75a072 100644 --- a/src/ntf/ntfimcnd/ntfimcn_imm.c +++ b/src/ntf/ntfimcnd/ntfimcn_imm.c @@ -37,9 +37,8 @@ #include #include -#include "osaf/immutil/immutil.h" -#include "ntfimcn_main.h" +#include "ntfimcn_imm.h" #include "ntfimcn_notifier.h" /* @@ -288,14 +287,6 @@ static void free_ccb_data(CcbUtilCcbData_t *ccb_data) { if (ccb_data != NULL) { if (ccb_data->userData != NULL) { osaf_extended_name_free(ccb_data->userData); - free(ccb_data->userData); - } - // Free userData in CcbUtilOperationData - struct CcbUtilOperationData* oper_data = - ccb_data->operationListHead; - for (; oper_data!= NULL; oper_data = oper_data->next) { - immutil_freeSaImmAttrValuesT((SaImmAttrValuesT_2**) - oper_data->userData); } ccbutil_deleteCcbData(ccb_data); } @@ -578,7 +569,8 @@ saImmOiCcbObjectModifyCallback(SaImmOiHandleT immOiHandle, SaImmOiCcbIdT ccbId, struct CcbUtilOperationData *ccbOperData; ccbOperData = ccbUtilCcbData->operationListTail; SaImmAttrValuesT_2 **curAttr; - rc = get_current_attrs(objectName, attrMods, ); + rc = immutil_getCurrentAttrs(ccbUtilCcbData->memref, +objectName, attrMods, ); if (SA_AIS_OK == rc) { ccbOperData->userData = curAttr; } else { @@ -999,41 +991,3 @@ static void finalizeImmOmHandle(SaImmHandleT immOmHandle) { saf_error(ais_rc)); } } - -SaAisErrorT get_current_attrs(const SaNameT *objectName, -const SaImmAttrModificationT_2 **attrMods, -SaImmAttrValuesT_2 ***curAttr) { - TRACE_ENTER(); - SaAisErrorT rc = SA_AIS_OK; - // There is no new attribute modifications - if (attrMods == NULL) { - *curAttr = NULL; - return SA_AIS_ERR_INVALID_PARAM; - } - int len; - for (len = 0; attrMods[len] != NULL; ++len) ; - SaImmAttrNameT *attrNames = calloc((len + 1), sizeof(S
Re: [devel] [PATCH 1/1] imm: Enhance deep clone of Imm Attr values in immutil [#3215]
Hi Thanh, ACK with minor comments inline. Best Regards, ThuanTr -Original Message- From: Thanh Nguyen Sent: Wednesday, September 2, 2020 10:49 AM To: Thang Duc Nguyen ; Minh Hon Chau ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net; Thanh Nguyen Subject: [PATCH 1/1] imm: Enhance deep clone of Imm Attr values in immutil [#3215] 1) Reuse existing functionality for deep clone of Imm Attr values. 2) Publish immutil internal memory management for external use --- src/amf/amfd/imm.cc| 15 ++- src/amf/amfd/imm.h | 8 +- src/ntf/ntfimcnd/ntfimcn_imm.c | 50 + src/ntf/ntfimcnd/ntfimcn_imm.h | 18 src/osaf/immutil/immutil.c | 190 - src/osaf/immutil/immutil.h | 48 ++--- 6 files changed, 127 insertions(+), 202 deletions(-) diff --git a/src/amf/amfd/imm.cc b/src/amf/amfd/imm.cc index 826e90d41..b2a17a10c 100644 --- a/src/amf/amfd/imm.cc +++ b/src/amf/amfd/imm.cc @@ -206,9 +206,19 @@ done: return res; } +const size_t ImmObjCreate::INIT_MEMREF_SIZE; + +ImmObjCreate::ImmObjCreate() : +className_(NULL), +parentName_(), +attrValues_(NULL), +memRef(NULL) +{ + memRef = immutil_getMem(INIT_MEMREF_SIZE); +} // ImmObjCreate::~ImmObjCreate() { - immutil_freeSaImmAttrValuesT(attrValues_); + immutil_freeMem(memRef); delete[] className_; } @@ -1862,7 +1872,8 @@ void avd_saImmOiRtObjectCreate(const std::string , ajob->className_ = StrDup(className.c_str()); osafassert(ajob->className_ != nullptr); ajob->parentName_ = parentName; - ajob->attrValues_ = immutil_dupSaImmAttrValuesT(attrValues); + ajob->attrValues_ = immutil_dupSaImmAttrValuesT_array(ajob->memRef, + attrValues); Fifo::queue(ajob); TRACE_LEAVE(); diff --git a/src/amf/amfd/imm.h b/src/amf/amfd/imm.h index 99d47d313..797d7ac82 100644 --- a/src/amf/amfd/imm.h +++ b/src/amf/amfd/imm.h @@ -84,12 +84,18 @@ class ImmObjCreate : public ImmJob { SaImmClassNameT className_; std::string parentName_; SaImmAttrValuesT_2 **attrValues_; + // Memory used to deep clone SaImmAttrValuesT_2** + // memRef must be allocated by immutil_getMem() and free by immutil_freeMem() + void* memRef; - ImmObjCreate() : ImmJob(){}; + ImmObjCreate(); bool immobj_update_required(); AvdJobDequeueResultT exec(AVD_CL_CB *cb); ~ImmObjCreate(); + + private: + static const size_t INIT_MEMREF_SIZE = 512; }; // diff --git a/src/ntf/ntfimcnd/ntfimcn_imm.c b/src/ntf/ntfimcnd/ntfimcn_imm.c index 2e515e22b..2d04a8e2b 100644 --- a/src/ntf/ntfimcnd/ntfimcn_imm.c +++ b/src/ntf/ntfimcnd/ntfimcn_imm.c @@ -37,7 +37,6 @@ #include #include -#include "osaf/immutil/immutil.h" #include "ntfimcn_main.h" #include "ntfimcn_notifier.h" @@ -288,14 +287,6 @@ static void free_ccb_data(CcbUtilCcbData_t *ccb_data) { if (ccb_data != NULL) { if (ccb_data->userData != NULL) { osaf_extended_name_free(ccb_data->userData); - free(ccb_data->userData); - } - // Free userData in CcbUtilOperationData - struct CcbUtilOperationData* oper_data = - ccb_data->operationListHead; - for (; oper_data!= NULL; oper_data = oper_data->next) { - immutil_freeSaImmAttrValuesT((SaImmAttrValuesT_2**) - oper_data->userData); } ccbutil_deleteCcbData(ccb_data); } @@ -578,7 +569,8 @@ saImmOiCcbObjectModifyCallback(SaImmOiHandleT immOiHandle, SaImmOiCcbIdT ccbId, struct CcbUtilOperationData *ccbOperData; ccbOperData = ccbUtilCcbData->operationListTail; SaImmAttrValuesT_2 **curAttr; - rc = get_current_attrs(objectName, attrMods, ); + rc = immutil_get_currentAttributes(ccbUtilCcbData->memref, +objectName, attrMods, ); if (SA_AIS_OK == rc) { ccbOperData->userData = curAttr; } else { @@ -999,41 +991,3 @@ static void finalizeImmOmHandle(SaImmHandleT immOmHandle) { saf_error(ais_rc)); } } - -SaAisErrorT get_current_attrs(const SaNameT *objectName, -const SaImmAttrModificationT_2 **attrMods, -SaImmAttrValuesT_2 ***curAttr) { - TRACE_ENTER(); - SaAisErrorT rc = SA_AIS_OK; - // There is no new attribute modifications - if (attrMods == NULL) { - *curAttr = NULL; - return SA_AIS_ERR_INVALID_PARAM; - } - int len; - for (len = 0; attrMods[len] != NULL; ++len) ; - SaImmAttrNameT *attrNames = calloc((len + 1), sizeof(SaImmAttrNameT)); - if (attrNames == NULL) { - *curAttr = NULL; - return SA_AIS_ERR_NO_MEMORY; - } - for (int i = 0; i
Re: [devel] [PATCH 1/2] mds: fix receiving old msg under flow control enabled [#3216]
Hi Minh, Thanks for comments. See my replies inline. Regards, Thuan From: Minh Hon Chau Sent: Friday, August 28, 2020, 06:16 To: Thuan Tran; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1/2] mds: fix receiving old msg under flow control enabled [#3216] Hi Thuan, Some comments inline. Thanks Minh On 27/8/20 11:53 pm, thuan.tran wrote: > - Revert apart of #3151 solution, not decide PortId reset base on > fseq=1 but reset rcvwnd when getting Intro msg from known PortId. > - Check to skip invalid Nack to avoid sender mistake move to > overflow and queue all messages later but receiver don't get any > further message to send ChunkAck. > - Not return error if PortId not found in checking send queue > capable to avoid agent crash after fix #3208 if agent enable mds > flow control. > --- > src/mds/mds_tipc_fctrl_intf.cc | 23 +-- > src/mds/mds_tipc_fctrl_portid.cc | 38 +++- > 2 files changed, 29 insertions(+), 32 deletions(-) > > diff --git a/src/mds/mds_tipc_fctrl_intf.cc b/src/mds/mds_tipc_fctrl_intf.cc > index 93bfce51c..348605c67 100644 > --- a/src/mds/mds_tipc_fctrl_intf.cc > +++ b/src/mds/mds_tipc_fctrl_intf.cc > @@ -351,26 +351,17 @@ uint32_t mds_tipc_fctrl_sndqueue_capable(struct > tipc_portid id, > uint16_t* next_seq) { > if (is_fctrl_enabled == false) return NCSCC_RC_SUCCESS; > > - uint32_t rc = NCSCC_RC_SUCCESS; > - > portid_map_mutex.lock(); > > TipcPortId *portid = portid_lookup(id); > - if (portid == nullptr) { > -m_MDS_LOG_ERR("FCTRL: [me] --> [node:%x, ref:%u], " > -"[line:%u], Error[PortId not found]", > -id.node, id.ref, __LINE__); > -rc = NCSCC_RC_FAILURE; > - } else { > -if (portid->state_ != TipcPortId::State::kDisabled) { > + if (portid && portid->state_ != TipcPortId::State::kDisabled) { > // assign the sequence number of the outgoing message > *next_seq = portid->GetCurrentSeq(); > -} > } > > portid_map_mutex.unlock(); > > - return rc; > + return NCSCC_RC_SUCCESS; > } > [M]: I'm not quite sure how this change relates to #3208. If fctrl is enabled, we should have a corresponding portid (?), otherwise the bidirectional flows with their sequence will be incorrect afterwards. [T] AMFD try to checkpoint but standby during standby down. And #3208 check mds red send fail to free ditto buffer. With FCTRL disable, mds red send return success. With FCTRL enable, mds red send return failure. Then duplicate free() cause crash. This change is just to skip portid check here, but later it is still check in mds_tipc_fctrl_trysend() But mds_tipc_fctrl_trysend() failure is ignored in mdtm_sendto(). There is no impact to current bidirectional flows. > uint32_t mds_tipc_fctrl_trysend(struct tipc_portid id, const uint8_t > *buffer, > @@ -564,12 +555,10 @@ uint32_t mds_tipc_fctrl_rcv_data(uint8_t *buffer, > uint16_t len, > // no need to decode intro message > // the decoding intro message type is done in header decoding > // send to the event thread > - pevt = new Event(Event::Type::kEvtRcvIntro, id, 0, 0, 0, 0); > - if (m_NCS_IPC_SEND(_events, pevt, > - NCS_IPC_PRIORITY_HIGH) != NCSCC_RC_SUCCESS) { > -m_MDS_LOG_ERR("FCTRL: Failed to send msg to mbx_events, Error[%s]", > -strerror(errno)); > - } > + portid_map_mutex.lock(); > + Event evt(Event::Type::kEvtRcvIntro, id, 0, 0, 0, 0); > + process_flow_event(evt); > + portid_map_mutex.unlock(); [M]: It seems the call portid->ReceiveIntro() in process_flow_event() is redundant now? [T] No, It is still used. > } else { > m_MDS_LOG_ERR("FCTRL: [me] <-- [node:%x, ref:%u], " > "[msg_type:%u], Error[not supported message type]", > diff --git a/src/mds/mds_tipc_fctrl_portid.cc > b/src/mds/mds_tipc_fctrl_portid.cc > index 41fce3df8..f569e1f99 100644 > --- a/src/mds/mds_tipc_fctrl_portid.cc > +++ b/src/mds/mds_tipc_fctrl_portid.cc > @@ -373,10 +373,10 @@ uint32_t TipcPortId::ReceiveData(uint32_t mseq, > uint16_t mfrag, > if (rcvwnd_.rcv_ + 1 < Seq16(fseq)) { > if (rcvwnd_.rcv_ == 0 && rcvwnd_.acked_ == 0) { > // peer does not realize that this portid reset > -m_MDS_LOG_NOTIFY("FCTRL: [me] <-- [node:%x, ref:%u], " > +m_MDS_LOG_DBG("FCTRL: [me] <-- [node:%x, ref:%u], " > "RcvData[mseq:%u, mfrag:%u, fseq:%u], " > "rcvwnd[acked:%u, rcv:%u, nacked:%" PRIu64 "], " > -"Warning[portid
Re: [devel] [PATCH 1/1] mbc: fix agent crash inside ncs_mbcsv_null_func() [#3214]
Hi Thang, NULL is already checked inside m_MMGR_FREE_BUFR_LIST() Best Regards, ThuanTr -Original Message- From: Thang Duc Nguyen Sent: Tuesday, August 18, 2020 8:40 AM To: Thuan Tran ; Minh Hon Chau ; Thanh Nguyen Cc: opensaf-devel@lists.sourceforge.net Subject: RE: [PATCH 1/1] mbc: fix agent crash inside ncs_mbcsv_null_func() [#3214] Hi Thuan, I think in ncs_mbcsv_null_func() just add a check before freeing. e.g, if (evt->info.peer_msg.info.client_msg.uba.ub != NULL) m_MMGR_FREE_BUFR_LIST( evt->info.peer_msg.info.client_msg.uba.ub); B.R/Thang -Original Message- From: Thuan Tran Sent: Friday, August 14, 2020 1:33 PM To: Minh Hon Chau ; Thang Duc Nguyen ; Thanh Nguyen Cc: opensaf-devel@lists.sourceforge.net; Thuan Tran Subject: [PATCH 1/1] mbc: fix agent crash inside ncs_mbcsv_null_func() [#3214] --- src/mbc/mbcsv_peer.c | 8 +++- src/mbc/mbcsv_util.c | 5 ++--- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/mbc/mbcsv_peer.c b/src/mbc/mbcsv_peer.c index 1a9eeb125..e81352105 100644 --- a/src/mbc/mbcsv_peer.c +++ b/src/mbc/mbcsv_peer.c @@ -457,7 +457,6 @@ FSM. * Then depending on its current role it will start FSM. void mbcsv_clear_multiple_active_state(CKPT_INST *ckpt) { PEER_INST *peer; - MBCSV_EVT rcvd_evt; TRACE_ENTER(); /* @@ -470,8 +469,7 @@ void mbcsv_clear_multiple_active_state(CKPT_INST *ckpt) peer = ckpt->peer_list; TRACE("multiple ACTIVE peers"); - m_NCS_MBCSV_FSM_DISPATCH(peer, NCSMBCSV_EVENT_MULTIPLE_ACTIVE, -_evt); + m_NCS_MBCSV_FSM_DISPATCH(peer, NCSMBCSV_EVENT_MULTIPLE_ACTIVE, NULL); TRACE_LEAVE(); return; @@ -491,12 +489,12 @@ void mbcsv_clear_multiple_active_state(CKPT_INST *ckpt) m_NCS_MBCSV_FSM_DISPATCH( peer, NCSMBCSV_EVENT_STATE_TO_KEEP_STBY_SYNC, - _evt); + NULL); else m_NCS_MBCSV_FSM_DISPATCH( peer, NCSMBCSV_EVENT_STATE_TO_WAIT_FOR_CW_SYNC, - _evt); + NULL); } peer = peer->next; diff --git a/src/mbc/mbcsv_util.c b/src/mbc/mbcsv_util.c index dafa268ba..9ce79243f 100644 --- a/src/mbc/mbcsv_util.c +++ b/src/mbc/mbcsv_util.c @@ -409,8 +409,7 @@ uint32_t mbcsv_send_ckpt_data_to_all_peers(NCS_MBCSV_SEND_CKPT *msg_to_send, continue; } TRACE("dispatching FSM for NCSMBCSV_SEND_ASYNC_UPDATE"); - m_NCS_MBCSV_FSM_DISPATCH(peer_ptr, NCSMBCSV_SEND_ASYNC_UPDATE, -_msg); + m_NCS_MBCSV_FSM_DISPATCH(peer_ptr, NCSMBCSV_SEND_ASYNC_UPDATE, NULL); if (false == peer_ptr->okay_to_async_updt) { peer_ptr->ckpt_msg_sent = true; @@ -471,7 +470,7 @@ uint32_t mbcsv_send_ckpt_data_to_all_peers(NCS_MBCSV_SEND_CKPT *msg_to_send, while (NULL != tmp_ptr) { TRACE("dispatching FSM for NCSMBCSV_SEND_ASYNC_UPDATE"); m_NCS_MBCSV_FSM_DISPATCH( - tmp_ptr, NCSMBCSV_SEND_ASYNC_UPDATE, _msg); + tmp_ptr, NCSMBCSV_SEND_ASYNC_UPDATE, NULL); if (false == tmp_ptr->okay_to_async_updt) { tmp_ptr->ckpt_msg_sent = true; -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] pyosaf: Support python3 [#3210]
Hi Thien, ACK from me. Best Regards, ThuanTr -Original Message- From: Thien Minh Huynh Sent: Thursday, August 13, 2020 10:26 AM To: Thuan Tran ; Minh Hon Chau ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net; Thien Minh Huynh Subject: [PATCH 1/1] pyosaf: Support python3 [#3210] Adapt pyosaf to support python3, and be compatible with python2 --- python/pyosaf/saAis.py| 51 +++ python/pyosaf/saAmf.py| 8 +++-- python/pyosaf/saImm.py| 4 ++- python/pyosaf/saLog.py| 4 ++- python/pyosaf/utils/immoi/agent.py| 2 +- python/pyosaf/utils/immom/accessor.py | 10 +++--- python/pyosaf/utils/immom/agent.py| 11 +++--- python/pyosaf/utils/immom/ccb.py | 19 +- python/pyosaf/utils/immom/iterator.py | 9 ++--- python/pyosaf/utils/immom/object.py | 2 +- python/pyosaf/utils/ntf/producer.py | 24 +++-- src/imm/tools/immxml-merge| 32 - 12 files changed, 123 insertions(+), 53 deletions(-) diff --git a/python/pyosaf/saAis.py b/python/pyosaf/saAis.py index 178ea554c..912523e69 100644 --- a/python/pyosaf/saAis.py +++ b/python/pyosaf/saAis.py @@ -18,13 +18,15 @@ ''' Common types and prototypes used by all modules in pyosaf ''' +import sys from os import environ - import ctypes from ctypes import pointer, POINTER, cast, byref, c_void_p, Structure, \ Union, CDLL from pyosaf.saEnumConst import Enumeration, Const +PY3 = True if sys.version_info[0] == 3 else False + SaInt8T = ctypes.c_char SaInt16T = ctypes.c_short SaInt32T = ctypes.c_int @@ -39,7 +41,21 @@ SaVoidPtr = c_void_p # Types used by the NTF/IMMS service SaFloatT = ctypes.c_float SaDoubleT = ctypes.c_double -SaStringT = ctypes.c_char_p +if PY3: + class SaStringT(ctypes.c_char_p): + def __init__(self, value=''): + if value is not None: + value = value.encode('utf-8') + super(SaStringT, self).__init__(value) + + def __str__(self): + if self.value is not None: + return self.value.decode('utf-8') + else: + return self.__repr__() +else: + SaStringT = ctypes.c_char_p + SaTimeT = SaInt64T SaInvocationT = SaUint64T @@ -177,6 +193,12 @@ if environ.get('SA_ENABLE_EXTENDED_NAMES') == '1': immdll.saAisNameLend.argtypes = [SaConstStringT, POINTER(SaNameT)] immdll.saAisNameLend.restype = None + if PY3: + # Add name_in_bytes attribute to this object as a workaround + # To keep the `name` in bytes not to be freed by garbarge + # collector when it's still refered in `_opaque` of SaNameT + self.name_in_bytes = name.encode('utf-8') + name = self.name_in_bytes immdll.saAisNameLend(name, BYREF(self)) @@ -185,7 +207,11 @@ if environ.get('SA_ENABLE_EXTENDED_NAMES') == '1': """ immdll.saAisNameBorrow.argtypes = [POINTER(SaNameT)] immdll.saAisNameBorrow.restype = SaConstStringT - return immdll.saAisNameBorrow(BYREF(self)) + name = immdll.saAisNameBorrow(BYREF(self)) + if PY3: + return name.decode('utf-8') + + return name else: class SaNameT(Structure): """Contain names. @@ -196,12 +222,17 @@ else: def __init__(self, name=''): """Construct instance with contents of 'name'. """ + if PY3: + name = name.encode('utf-8') super(SaNameT, self).__init__(len(name), name) def __str__(self): """Returns the content of SaNameT """ - return self.value + if PY3: + return self.value.decode('utf-8') + else: + return self.value class SaVersionT(Structure): @@ -211,6 +242,14 @@ class SaVersionT(Structure): ('majorVersion', SaUint8T), ('minorVersion', SaUint8T)] + def __init__(self, release='_', major=0, minor=0): + """Construct instance with contents of 'name'. + """ + if PY3: + release = release[0].
Re: [devel] [PATCH 1/1] osaf: move common functions into immutil [#3211]
Hi Thanh, Thank you. Yes, please continue with next ticket. Best Regards, ThuanTr -Original Message- From: Thanh Nguyen Sent: Monday, August 17, 2020 10:43 AM To: Thuan Tran ; Thang Duc Nguyen ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net Subject: RE: [PATCH 1/1] osaf: move common functions into immutil [#3211] Hi Thuan, ACK from me. As we discussed, this is followed up with a ticket to use internal memory management. Best Regards, Thanh -Original Message- From: Thuan Tran Sent: Monday, 17 August 2020 1:40 PM To: Thanh Nguyen ; Thang Duc Nguyen ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net Subject: RE: [PATCH 1/1] osaf: move common functions into immutil [#3211] Hi Thanh, Here is V2 patch. Best Regards, ThuanTr -Original Message- From: Thuan Tran Sent: Tuesday, August 11, 2020 1:08 PM To: Thanh Nguyen ; Thang Duc Nguyen ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net; Thuan Tran Subject: [PATCH 1/1] osaf: move common functions into immutil [#3211] --- src/amf/amfd/imm.cc | 187 ++-- src/amf/amfd/imm.h | 2 +- src/ntf/ntfimcnd/ntfimcn_imm.c | 181 +-- src/ntf/ntfimcnd/ntfimcn_imm.h | 30 +--- src/ntf/ntfimcnd/ntfimcn_notifier.c | 2 +- src/osaf/immutil/immutil.c | 215 +++- src/osaf/immutil/immutil.h | 36 + 7 files changed, 263 insertions(+), 390 deletions(-) diff --git a/src/amf/amfd/imm.cc b/src/amf/amfd/imm.cc index d917b0d8b..826e90d41 100644 --- a/src/amf/amfd/imm.cc +++ b/src/amf/amfd/imm.cc @@ -178,7 +178,7 @@ AvdJobDequeueResultT ImmObjCreate::exec(AVD_CL_CB *cb) { goto done; } rc = saImmOiRtObjectCreate_2(immOiHandle, className_, parent_name, - attrValues_); + (const + SaImmAttrValuesT_2**)attrValues_); cb->avd_imm_status = AVD_IMM_INIT_DONE; if ((rc == SA_AIS_OK) || (rc == SA_AIS_ERR_EXIST)) { @@ -208,31 +208,8 @@ done: // ImmObjCreate::~ImmObjCreate() { - unsigned int i, j; - - for (i = 0; attrValues_[i] != nullptr; i++) { -SaImmAttrValuesT_2 *attrValue = (SaImmAttrValuesT_2 *)attrValues_[i]; - -if (attrValue->attrValueType == SA_IMM_ATTR_SASTRINGT) { - for (j = 0; j < attrValue->attrValuesNumber; j++) { -char *p = *((char **)attrValue->attrValues[j]); -delete[] p; - } -} else if (attrValue->attrValueType == SA_IMM_ATTR_SANAMET) { - for (j = 0; j < attrValue->attrValuesNumber; j++) { -SaNameT *name = reinterpret_cast(attrValue->attrValues[i]); -osaf_extended_name_free(name); - } -} -delete[] attrValue->attrName; -delete[] static_cast( -attrValue->attrValues[0]); // free blob shared by all values -delete[] attrValue->attrValues; -delete attrValue; - } - + immutil_freeSaImmAttrValuesT(attrValues_); delete[] className_; - delete[] attrValues_; } // @@ -630,110 +607,18 @@ typedef struct avd_ccb_apply_ordered_list { static AvdCcbApplyOrderedListT *ccb_apply_list; -/* - * FUNCTION PROTOTYPES - * - */ - -static size_t value_size(SaImmValueTypeT attrValueType) { - size_t valueSize = 0; - - switch (attrValueType) { -case SA_IMM_ATTR_SAINT32T: - valueSize = sizeof(SaInt32T); - break; -case SA_IMM_ATTR_SAUINT32T: - valueSize = sizeof(SaUint32T); - break; -case SA_IMM_ATTR_SAINT64T: - valueSize = sizeof(SaInt64T); - break; -case SA_IMM_ATTR_SAUINT64T: - valueSize = sizeof(SaUint64T); - break; -case SA_IMM_ATTR_SATIMET: - valueSize = sizeof(SaTimeT); - break; -case SA_IMM_ATTR_SANAMET: - valueSize = sizeof(SaNameT); - break; -case SA_IMM_ATTR_SAFLOATT: - valueSize = sizeof(SaFloatT); - break; -case SA_IMM_ATTR_SADOUBLET: - valueSize = sizeof(SaDoubleT); - break; -case SA_IMM_ATTR_SASTRINGT: - valueSize = sizeof(SaStringT); - break; -case SA_IMM_ATTR_SAANYT: - osafassert(0); - break; - } - - return valueSize; -} - -static void copySaImmAttrValuesT(SaImmAttrValuesT_2 *copy, - const SaImmAttrValuesT_2 *original) { - size_t valueSize = 0; - unsigned int i, valueCount = original->attrValuesNumber; - char *databuffer; - - copy->attrName = StrDup(original->attrName); - - copy->attrValuesNumber = valueCount; - copy->attrValueType = original->attrValueType; - if (valueCount == 0) return; /* (just in case...) */ - - copy->attrValues = new SaImmAttrValueT[valueCount]; - - valueSize = value_size(original->attrValueType); - - // alloc blob shared by all values - databuffer = new char[valueCount * valueSize]; - - for (i = 0;
Re: [devel] [PATCH 1/1] osaf: move common functions into immutil [#3211]
Hi Thanh, Here is V2 patch. Best Regards, ThuanTr -Original Message- From: Thuan Tran Sent: Tuesday, August 11, 2020 1:08 PM To: Thanh Nguyen ; Thang Duc Nguyen ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net; Thuan Tran Subject: [PATCH 1/1] osaf: move common functions into immutil [#3211] --- src/amf/amfd/imm.cc | 187 ++-- src/amf/amfd/imm.h | 2 +- src/ntf/ntfimcnd/ntfimcn_imm.c | 181 +-- src/ntf/ntfimcnd/ntfimcn_imm.h | 30 +--- src/ntf/ntfimcnd/ntfimcn_notifier.c | 2 +- src/osaf/immutil/immutil.c | 215 +++- src/osaf/immutil/immutil.h | 36 + 7 files changed, 263 insertions(+), 390 deletions(-) diff --git a/src/amf/amfd/imm.cc b/src/amf/amfd/imm.cc index d917b0d8b..826e90d41 100644 --- a/src/amf/amfd/imm.cc +++ b/src/amf/amfd/imm.cc @@ -178,7 +178,7 @@ AvdJobDequeueResultT ImmObjCreate::exec(AVD_CL_CB *cb) { goto done; } rc = saImmOiRtObjectCreate_2(immOiHandle, className_, parent_name, - attrValues_); + (const SaImmAttrValuesT_2**)attrValues_); cb->avd_imm_status = AVD_IMM_INIT_DONE; if ((rc == SA_AIS_OK) || (rc == SA_AIS_ERR_EXIST)) { @@ -208,31 +208,8 @@ done: // ImmObjCreate::~ImmObjCreate() { - unsigned int i, j; - - for (i = 0; attrValues_[i] != nullptr; i++) { -SaImmAttrValuesT_2 *attrValue = (SaImmAttrValuesT_2 *)attrValues_[i]; - -if (attrValue->attrValueType == SA_IMM_ATTR_SASTRINGT) { - for (j = 0; j < attrValue->attrValuesNumber; j++) { -char *p = *((char **)attrValue->attrValues[j]); -delete[] p; - } -} else if (attrValue->attrValueType == SA_IMM_ATTR_SANAMET) { - for (j = 0; j < attrValue->attrValuesNumber; j++) { -SaNameT *name = reinterpret_cast(attrValue->attrValues[i]); -osaf_extended_name_free(name); - } -} -delete[] attrValue->attrName; -delete[] static_cast( -attrValue->attrValues[0]); // free blob shared by all values -delete[] attrValue->attrValues; -delete attrValue; - } - + immutil_freeSaImmAttrValuesT(attrValues_); delete[] className_; - delete[] attrValues_; } // @@ -630,110 +607,18 @@ typedef struct avd_ccb_apply_ordered_list { static AvdCcbApplyOrderedListT *ccb_apply_list; -/* - * FUNCTION PROTOTYPES - * - */ - -static size_t value_size(SaImmValueTypeT attrValueType) { - size_t valueSize = 0; - - switch (attrValueType) { -case SA_IMM_ATTR_SAINT32T: - valueSize = sizeof(SaInt32T); - break; -case SA_IMM_ATTR_SAUINT32T: - valueSize = sizeof(SaUint32T); - break; -case SA_IMM_ATTR_SAINT64T: - valueSize = sizeof(SaInt64T); - break; -case SA_IMM_ATTR_SAUINT64T: - valueSize = sizeof(SaUint64T); - break; -case SA_IMM_ATTR_SATIMET: - valueSize = sizeof(SaTimeT); - break; -case SA_IMM_ATTR_SANAMET: - valueSize = sizeof(SaNameT); - break; -case SA_IMM_ATTR_SAFLOATT: - valueSize = sizeof(SaFloatT); - break; -case SA_IMM_ATTR_SADOUBLET: - valueSize = sizeof(SaDoubleT); - break; -case SA_IMM_ATTR_SASTRINGT: - valueSize = sizeof(SaStringT); - break; -case SA_IMM_ATTR_SAANYT: - osafassert(0); - break; - } - - return valueSize; -} - -static void copySaImmAttrValuesT(SaImmAttrValuesT_2 *copy, - const SaImmAttrValuesT_2 *original) { - size_t valueSize = 0; - unsigned int i, valueCount = original->attrValuesNumber; - char *databuffer; - - copy->attrName = StrDup(original->attrName); - - copy->attrValuesNumber = valueCount; - copy->attrValueType = original->attrValueType; - if (valueCount == 0) return; /* (just in case...) */ - - copy->attrValues = new SaImmAttrValueT[valueCount]; - - valueSize = value_size(original->attrValueType); - - // alloc blob shared by all values - databuffer = new char[valueCount * valueSize]; - - for (i = 0; i < valueCount; i++) { -copy->attrValues[i] = databuffer; -if (original->attrValueType == SA_IMM_ATTR_SASTRINGT) { - char *cporig = *((char **)original->attrValues[i]); - char **cpp = (char **)databuffer; - *cpp = StrDup(cporig); -} else if (original->attrValueType == SA_IMM_ATTR_SANAMET) { - SaNameT *orig = reinterpret_cast(original->attrValues[i]); - SaNameT *dest = reinterpret_cast(databuffer); - osaf_extended_name_alloc(osaf_extended_name_borrow(orig), dest); -} else { - memcpy(databuffer, original->attrValues[i], valueSize); -} -databuffer += valueSize; - } -} - -static const SaImmAttrValuesT_2 *dupSaImmAttrValuesT( -const SaI
Re: [devel] [PATCH 1/1] rde: allow node start first to be promoted in relax mode [#3191]
Best Regards, ThuanTr From: Thang Duc Nguyen Sent: Thursday, August 13, 2020 3:45 PM To: Thuan Tran ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net Subject: RE: [PATCH 1/1] rde: allow node start first to be promoted in relax mode [#3191] Hi Thuan, One comment in code review. From: Thuan Tran mailto:thuan.t...@dektech.com.au>> Sent: Thursday, August 13, 2020 8:44 AM To: Thang Duc Nguyen mailto:thang.d.ngu...@dektech.com.au>>; Minh Hon Chau mailto:minh.c...@dektech.com.au>> Cc: opensaf-devel@lists.sourceforge.net<mailto:opensaf-devel@lists.sourceforge.net> Subject: RE: [PATCH 1/1] rde: allow node start first to be promoted in relax mode [#3191] Hi Thang, See my reply inline. Best Regards, ThuanTr From: Thang Duc Nguyen mailto:thang.d.ngu...@dektech.com.au>> Sent: Thursday, August 13, 2020 8:07 AM To: Thuan Tran mailto:thuan.t...@dektech.com.au>>; Minh Hon Chau mailto:minh.c...@dektech.com.au>> Cc: opensaf-devel@lists.sourceforge.net<mailto:opensaf-devel@lists.sourceforge.net> Subject: RE: [PATCH 1/1] rde: allow node start first to be promoted in relax mode [#3191] Hi Thuan, See my comment inline. From: Thuan Tran mailto:thuan.t...@dektech.com.au>> Sent: Wednesday, August 12, 2020 10:25 PM To: Thang Duc Nguyen mailto:thang.d.ngu...@dektech.com.au>>; Minh Hon Chau mailto:minh.c...@dektech.com.au>> Cc: opensaf-devel@lists.sourceforge.net<mailto:opensaf-devel@lists.sourceforge.net> Subject: Re: [PATCH 1/1] rde: allow node start first to be promoted in relax mode [#3191] Hi Thang, It is for the case as following. For example: Peer B see peer A up, calculate B pending time and SendPeerInfoResp() to peer A. If peer A get peer B info before see peer B up, A pending time is still zero now. [Thang]: Maybe it is not a real case. B/c rde will always receive the peer up then get info respond. [Thuan]: We don't know for sure as UP is discovery SVC event but PeerInfoRsp is a message. Then A need calculate time (in SetPeerState if pending time is zero) to decide give up or not. This calculated pending time is later sent to peer B as peer A info. By this way, to make decisions are made by two peers have no confliction. Best Regards, Thuan From: Thang Duc Nguyen mailto:thang.d.ngu...@dektech.com.au>> Sent: Wednesday, August 12, 2020 3:50:24 PM To: Thuan Tran mailto:thuan.t...@dektech.com.au>>; Minh Hon Chau mailto:minh.c...@dektech.com.au>> Cc: opensaf-devel@lists.sourceforge.net<mailto:opensaf-devel@lists.sourceforge.net> mailto:opensaf-devel@lists.sourceforge.net>> Subject: RE: [PATCH 1/1] rde: allow node start first to be promoted in relax mode [#3191] Hi Thuan, I think the pending time only need updating/calculating in SendPeerInfoResp(), when it receives the peer up event. No need to update again in SetPeerState(). B.R/Thang -Original Message- From: Thuan Tran mailto:thuan.t...@dektech.com.au>> Sent: Monday, May 25, 2020 11:27 AM To: Minh Hon Chau mailto:minh.c...@dektech.com.au>>; Thang Duc Nguyen mailto:thang.d.ngu...@dektech.com.au>> Cc: opensaf-devel@lists.sourceforge.net<mailto:opensaf-devel@lists.sourceforge.net>; Thuan Tran mailto:thuan.t...@dektech.com.au>> Subject: [PATCH 1/1] rde: allow node start first to be promoted in relax mode [#3191] - In relax mode and unavailable consensus, sometimes SC-2 cannot become active even start long time before SC-1 because current promotion strategy only base on node id (lower is chosen). - Change the way to get promotion by comparing promotion pending duration, node with promotion pending longer will get promotion and another node will give up. This help node start first become active. If promotion pending duration is same, lower node id will promote. --- src/rde/rded/rde_cb.h| 3 +++ src/rde/rded/rde_main.cc | 10 +- src/rde/rded/rde_mds.cc | 12 src/rde/rded/role.cc | 20 ++-- src/rde/rded/role.h | 3 ++- 5 files changed, 44 insertions(+), 4 deletions(-) diff --git a/src/rde/rded/rde_cb.h b/src/rde/rded/rde_cb.h index e35fdab2b..50a0a0d26 100644 --- a/src/rde/rded/rde_cb.h +++ b/src/rde/rded/rde_cb.h @@ -54,6 +54,8 @@ struct RDE_CONTROL_BLOCK { State state{State::kNotActive}; std::atomic consensus_service_state{ConsensusState::kUnknown}; std::atomic state_refresh_thread_started{false}; // consensus service + struct timespec promote_start{0}; + uint64_t promote_pending{0}; }; enum RDE_MSG_TYPE { @@ -72,6 +74,7 @@ enum RDE_MSG_TYPE { struct rde_peer_info { PCS_RDA_ROLE ha_role; + uint64_t promote_pending; }; struct rde_msg { diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc index 6594b3d49..e6bd759ec 100644 --- a/src/rde/rded/rde_main.cc +++ b/src/rde/rded/rde_main.cc @@ -30,6 +30,7 @@ #include #include #include "base/conf.h"
Re: [devel] [PATCH 1/1] rde: allow node start first to be promoted in relax mode [#3191]
Hi Thang, See my reply inline. Best Regards, ThuanTr From: Thang Duc Nguyen Sent: Thursday, August 13, 2020 8:07 AM To: Thuan Tran ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net Subject: RE: [PATCH 1/1] rde: allow node start first to be promoted in relax mode [#3191] Hi Thuan, See my comment inline. From: Thuan Tran mailto:thuan.t...@dektech.com.au>> Sent: Wednesday, August 12, 2020 10:25 PM To: Thang Duc Nguyen mailto:thang.d.ngu...@dektech.com.au>>; Minh Hon Chau mailto:minh.c...@dektech.com.au>> Cc: opensaf-devel@lists.sourceforge.net<mailto:opensaf-devel@lists.sourceforge.net> Subject: Re: [PATCH 1/1] rde: allow node start first to be promoted in relax mode [#3191] Hi Thang, It is for the case as following. For example: Peer B see peer A up, calculate B pending time and SendPeerInfoResp() to peer A. If peer A get peer B info before see peer B up, A pending time is still zero now. [Thang]: Maybe it is not a real case. B/c rde will always receive the peer up then get info respond. [Thuan]: We don't know for sure as UP is discovery SVC event but PeerInfoRsp is a message. Then A need calculate time (in SetPeerState if pending time is zero) to decide give up or not. This calculated pending time is later sent to peer B as peer A info. By this way, to make decisions are made by two peers have no confliction. Best Regards, Thuan From: Thang Duc Nguyen mailto:thang.d.ngu...@dektech.com.au>> Sent: Wednesday, August 12, 2020 3:50:24 PM To: Thuan Tran mailto:thuan.t...@dektech.com.au>>; Minh Hon Chau mailto:minh.c...@dektech.com.au>> Cc: opensaf-devel@lists.sourceforge.net<mailto:opensaf-devel@lists.sourceforge.net> mailto:opensaf-devel@lists.sourceforge.net>> Subject: RE: [PATCH 1/1] rde: allow node start first to be promoted in relax mode [#3191] Hi Thuan, I think the pending time only need updating/calculating in SendPeerInfoResp(), when it receives the peer up event. No need to update again in SetPeerState(). B.R/Thang -Original Message- From: Thuan Tran mailto:thuan.t...@dektech.com.au>> Sent: Monday, May 25, 2020 11:27 AM To: Minh Hon Chau mailto:minh.c...@dektech.com.au>>; Thang Duc Nguyen mailto:thang.d.ngu...@dektech.com.au>> Cc: opensaf-devel@lists.sourceforge.net<mailto:opensaf-devel@lists.sourceforge.net>; Thuan Tran mailto:thuan.t...@dektech.com.au>> Subject: [PATCH 1/1] rde: allow node start first to be promoted in relax mode [#3191] - In relax mode and unavailable consensus, sometimes SC-2 cannot become active even start long time before SC-1 because current promotion strategy only base on node id (lower is chosen). - Change the way to get promotion by comparing promotion pending duration, node with promotion pending longer will get promotion and another node will give up. This help node start first become active. If promotion pending duration is same, lower node id will promote. --- src/rde/rded/rde_cb.h| 3 +++ src/rde/rded/rde_main.cc | 10 +- src/rde/rded/rde_mds.cc | 12 src/rde/rded/role.cc | 20 ++-- src/rde/rded/role.h | 3 ++- 5 files changed, 44 insertions(+), 4 deletions(-) diff --git a/src/rde/rded/rde_cb.h b/src/rde/rded/rde_cb.h index e35fdab2b..50a0a0d26 100644 --- a/src/rde/rded/rde_cb.h +++ b/src/rde/rded/rde_cb.h @@ -54,6 +54,8 @@ struct RDE_CONTROL_BLOCK { State state{State::kNotActive}; std::atomic consensus_service_state{ConsensusState::kUnknown}; std::atomic state_refresh_thread_started{false}; // consensus service + struct timespec promote_start{0}; + uint64_t promote_pending{0}; }; enum RDE_MSG_TYPE { @@ -72,6 +74,7 @@ enum RDE_MSG_TYPE { struct rde_peer_info { PCS_RDA_ROLE ha_role; + uint64_t promote_pending; }; struct rde_msg { diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc index 6594b3d49..e6bd759ec 100644 --- a/src/rde/rded/rde_main.cc +++ b/src/rde/rded/rde_main.cc @@ -30,6 +30,7 @@ #include #include #include "base/conf.h" +#include "base/time.h" #include "base/daemon.h" #include "base/logtrace.h" #include "base/ncs_main_papi.h" @@ -108,7 +109,8 @@ static void handle_mbx_event() { msg->type == RDE_MSG_PEER_INFO_RESP ? "response" : "request", msg->fr_node_id, Role::to_string(msg->info.peer_info.ha_role)); CheckForSplitBrain(msg); - role->SetPeerState(msg->info.peer_info.ha_role, msg->fr_node_id); + role->SetPeerState(msg->info.peer_info.ha_role, msg->fr_node_id, + msg->info.peer_info.promote_pending); break; } case RDE_MSG_PEER_UP: { @@ -283,9 +285,15 @@ static void CheckForSplitBrain(const rde_msg *msg) { } static void SendPeerInfoResp(MDS_DEST mds_dest) { + RDE_CONTROL_BLOCK *cb = rde_get_control_b
Re: [devel] [PATCH 1/1] rde: allow node start first to be promoted in relax mode [#3191]
Hi Thang, It is for the case as following. For example: Peer B see peer A up, calculate B pending time and SendPeerInfoResp() to peer A. If peer A get peer B info before see peer B up, A pending time is still zero now. Then A need calculate time (in SetPeerState if pending time is zero) to decide give up or not. This calculated pending time is later sent to peer B as peer A info. By this way, to make decisions are made by two peers have no confliction. Best Regards, Thuan From: Thang Duc Nguyen Sent: Wednesday, August 12, 2020 3:50:24 PM To: Thuan Tran ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net Subject: RE: [PATCH 1/1] rde: allow node start first to be promoted in relax mode [#3191] Hi Thuan, I think the pending time only need updating/calculating in SendPeerInfoResp(), when it receives the peer up event. No need to update again in SetPeerState(). B.R/Thang -Original Message- From: Thuan Tran Sent: Monday, May 25, 2020 11:27 AM To: Minh Hon Chau ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net; Thuan Tran Subject: [PATCH 1/1] rde: allow node start first to be promoted in relax mode [#3191] - In relax mode and unavailable consensus, sometimes SC-2 cannot become active even start long time before SC-1 because current promotion strategy only base on node id (lower is chosen). - Change the way to get promotion by comparing promotion pending duration, node with promotion pending longer will get promotion and another node will give up. This help node start first become active. If promotion pending duration is same, lower node id will promote. --- src/rde/rded/rde_cb.h| 3 +++ src/rde/rded/rde_main.cc | 10 +- src/rde/rded/rde_mds.cc | 12 src/rde/rded/role.cc | 20 ++-- src/rde/rded/role.h | 3 ++- 5 files changed, 44 insertions(+), 4 deletions(-) diff --git a/src/rde/rded/rde_cb.h b/src/rde/rded/rde_cb.h index e35fdab2b..50a0a0d26 100644 --- a/src/rde/rded/rde_cb.h +++ b/src/rde/rded/rde_cb.h @@ -54,6 +54,8 @@ struct RDE_CONTROL_BLOCK { State state{State::kNotActive}; std::atomic consensus_service_state{ConsensusState::kUnknown}; std::atomic state_refresh_thread_started{false}; // consensus service + struct timespec promote_start{0}; + uint64_t promote_pending{0}; }; enum RDE_MSG_TYPE { @@ -72,6 +74,7 @@ enum RDE_MSG_TYPE { struct rde_peer_info { PCS_RDA_ROLE ha_role; + uint64_t promote_pending; }; struct rde_msg { diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc index 6594b3d49..e6bd759ec 100644 --- a/src/rde/rded/rde_main.cc +++ b/src/rde/rded/rde_main.cc @@ -30,6 +30,7 @@ #include #include #include "base/conf.h" +#include "base/time.h" #include "base/daemon.h" #include "base/logtrace.h" #include "base/ncs_main_papi.h" @@ -108,7 +109,8 @@ static void handle_mbx_event() { msg->type == RDE_MSG_PEER_INFO_RESP ? "response" : "request", msg->fr_node_id, Role::to_string(msg->info.peer_info.ha_role)); CheckForSplitBrain(msg); - role->SetPeerState(msg->info.peer_info.ha_role, msg->fr_node_id); + role->SetPeerState(msg->info.peer_info.ha_role, msg->fr_node_id, + msg->info.peer_info.promote_pending); break; } case RDE_MSG_PEER_UP: { @@ -283,9 +285,15 @@ static void CheckForSplitBrain(const rde_msg *msg) { } static void SendPeerInfoResp(MDS_DEST mds_dest) { + RDE_CONTROL_BLOCK *cb = rde_get_control_block(); rde_msg peer_info_req; peer_info_req.type = RDE_MSG_PEER_INFO_RESP; peer_info_req.info.peer_info.ha_role = role->role(); + if (role->role() == PCS_RDA_UNDEFINED && cb->promote_pending == 0) { +struct timespec now = base::ReadMonotonicClock(); +cb->promote_pending = base::TimespecToMillis(now - + cb->promote_start); } peer_info_req.info.peer_info.promote_pending = + cb->promote_pending; rde_mds_send(_info_req, mds_dest); } diff --git a/src/rde/rded/rde_mds.cc b/src/rde/rded/rde_mds.cc index bc335f090..a32f54082 100644 --- a/src/rde/rded/rde_mds.cc +++ b/src/rde/rded/rde_mds.cc @@ -48,6 +48,10 @@ static uint32_t msg_encode(MDS_CALLBACK_ENC_INFO *enc_info) { assert(data); ncs_encode_32bit(, msg->info.peer_info.ha_role); ncs_enc_claim_space(uba, sizeof(uint32_t)); + data = ncs_enc_reserve_space(uba, sizeof(uint64_t)); + assert(data); + ncs_encode_64bit(, msg->info.peer_info.promote_pending); + ncs_enc_claim_space(uba, sizeof(uint64_t)); break; default: @@ -94,6 +98,14 @@ static uint32_t msg_decode(MDS_CALLBACK_DEC_INFO *dec_info) { msg->info.peer_info.ha_role = static_cast(ncs_decode_32bit()); ncs_dec_skip_space(uba, sizeof(uint32_t)); + msg->info.peer_info.promote_pending = 0; + if (msg->
Re: [devel] [PATCH 1/1] nid: fix opensafd fail to start under gcov enabled [#3209]
Hi Minh, Thanks. I will include it and push. Best Regards, ThuanTr -Original Message- From: Minh Hon Chau Sent: Sunday, August 9, 2020 9:15 AM To: Thuan Tran ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1/1] nid: fix opensafd fail to start under gcov enabled [#3209] Hi Thuan, ack from me. Just find another compilation issue with gcc 9.2.1 make[2]: Entering directory '/home/minhchau/dek/osaftest/opensaf-code' CXX src/base/lib_libopensaf_core_la-logtrace.lo src/base/logtrace.cc:61:14: error: 'pid_t gettid()' was declared 'extern' and later 'static' [-fpermissive] 61 | static pid_t gettid() { return syscall(SYS_gettid); } Can we add the below patch within this commit? Otherwise I can raise another ticket. diff --git a/src/base/logtrace.cc b/src/base/logtrace.cc index 9822879ab..31c9294e2 100644 --- a/src/base/logtrace.cc +++ b/src/base/logtrace.cc @@ -58,7 +58,7 @@ std::once_flag init_flag; thread_local LogTraceBuffer gl_thread_buffer{gl_local_thread_trace, global::thread_trace_buffer_size}; -static pid_t gettid() { return syscall(SYS_gettid); } +static pid_t get_tid() { return syscall(SYS_gettid); } /** * USR2 signal handler to enable/disable trace (toggle) @@ -83,7 +83,7 @@ void trace_output(const char *file, unsigned line, unsigned priority, assert(priority <= LOG_DEBUG && category < CAT_MAX); if (strncmp(file, "src/", 4) == 0) file += 4; - snprintf(preamble, sizeof(preamble), "%d:%s:%u %s %s", gettid(), file, line, + snprintf(preamble, sizeof(preamble), "%d:%s:%u %s %s", get_tid(), file, line, global::prefix_name[priority + category], format); // legacy trace if (is_logtrace_enabled(category)) { @@ -110,7 +110,7 @@ void log_output(const char *file, unsigned line, unsigned priority, assert(priority <= LOG_DEBUG && category < CAT_MAX); if (strncmp(file, "src/", 4) == 0) file += 4; - snprintf(preamble, sizeof(preamble), "%d:%s:%u %s %s", gettid(), file, line, + snprintf(preamble, sizeof(preamble), "%d:%s:%u %s %s", get_tid(), file, line, global::prefix_name[priority + category], format); LogTraceClient::Log(gl_remote_osaflog, static_cast(priority), preamble, ap); Thanks Minh On 4/8/20 6:00 pm, thuan.tran wrote: > - Fix amfd/sgproc.cc cause compile failed when configure enable gcov. > - Waiting svc_monitor_thread ready in create_svc_monitor_thread to avoid > lost svc_mon_thr_fd value which later cause opensafd fail to start. > --- > src/amf/amfd/sgproc.cc | 3 +-- > src/nid/nodeinit.cc| 19 +-- > 2 files changed, 10 insertions(+), 12 deletions(-) > > diff --git a/src/amf/amfd/sgproc.cc b/src/amf/amfd/sgproc.cc > index 78ccb31f9..405e2c45d 100644 > --- a/src/amf/amfd/sgproc.cc > +++ b/src/amf/amfd/sgproc.cc > @@ -2624,9 +2624,8 @@ static uint32_t shutdown_contained_sus(AVD_CL_CB *cb, > AVD_SU *container_su, > } > > done: > - return rc; > - > TRACE_LEAVE(); > + return rc; > } > > > /* > diff --git a/src/nid/nodeinit.cc b/src/nid/nodeinit.cc > index d5b4eb20a..548c7fb46 100644 > --- a/src/nid/nodeinit.cc > +++ b/src/nid/nodeinit.cc > @@ -1612,6 +1612,15 @@ uint32_t create_svc_monitor_thread(void) { > return NCSCC_RC_FAILURE; > } > > + // Waiting until svc_monitor_thread is up and in ready state. > + unsigned no_repeat = 0; > + while (svc_monitor_thread_ready == false && no_repeat < 100) { > +osaf_nanosleep(); > +no_repeat++; > + } > + osafassert(svc_monitor_thread_ready); > + LOG_NO("svc_monitor_thread is up and in ready state"); > + > TRACE_LEAVE(); > return NCSCC_RC_SUCCESS; > } > @@ -1662,16 +1671,6 @@ int main(int argc, char *argv[]) { > exit(EXIT_FAILURE); > } > > - // Waiting until svc_monitor_thread is up and in ready state. > - unsigned no_repeat = 0; > - while (svc_monitor_thread_ready == false && no_repeat < 100) { > -osaf_nanosleep(); > -no_repeat++; > - } > - > - osafassert(svc_monitor_thread_ready); > - LOG_NO("svc_monitor_thread is up and in ready state"); > - > if (parse_nodeinit_conf(sbuf) != NCSCC_RC_SUCCESS) { > LOG_ER("Failed to parse file %s. Exiting", sbuf); > exit(EXIT_FAILURE); ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] clm: fix memory leak reported by valgrind [#3206]
Hi Thien, ACK from me. Best Regards, ThuanTr -Original Message- From: Thien Minh Huynh Sent: Wednesday, July 29, 2020 12:15 PM To: Thuan Tran ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net; Thien Minh Huynh Subject: [PATCH 1/1] clm: fix memory leak reported by valgrind [#3206] Fix definitely lost reported by valgrind. --- src/clm/clmd/clms_evt.cc | 5 + src/clm/clmd/clms_mbcsv.cc | 7 +++ src/clm/clmd/clms_mds.cc | 2 ++ src/mbc/mbcsv_mds.c| 2 ++ 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/clm/clmd/clms_evt.cc b/src/clm/clmd/clms_evt.cc index 28f5596ec..e8577060e 100644 --- a/src/clm/clmd/clms_evt.cc +++ b/src/clm/clmd/clms_evt.cc @@ -2136,10 +2136,7 @@ void clms_remove_node_down_rec(SaClmNodeIdT node_id) { */ void clms_evt_destroy(CLMSV_CLMS_EVT *evt) { osafassert(evt != nullptr); - if (evt->info.msg.info.api_info.type == CLMSV_CLUSTER_JOIN_REQ) { -TRACE("not calloced in server code,don't free it here"); - } else -free(evt); + free(evt); } /*clms ack to response msg from clma diff --git a/src/clm/clmd/clms_mbcsv.cc b/src/clm/clmd/clms_mbcsv.cc index 049b32c41..cc726fa77 100644 --- a/src/clm/clmd/clms_mbcsv.cc +++ b/src/clm/clmd/clms_mbcsv.cc @@ -2092,6 +2092,7 @@ static uint32_t ckpt_decode_cold_sync(CLMS_CB *cb, NCS_MBCSV_CB_ARG *cbk_arg) { CLMS_CKPT_REC msg; CLMS_CKPT_REC *data = nullptr; uint32_t num_rec = 0; + uint8_t *ptr; TRACE_ENTER(); /* @@ -2188,6 +2189,12 @@ static uint32_t ckpt_decode_cold_sync(CLMS_CB *cb, NCS_MBCSV_CB_ARG *cbk_arg) { --num_rec; } + /* Get the async update count */ + ptr = ncs_dec_flatten_space(_arg->info.decode.i_uba, + (uint8_t *)>async_upd_cnt, sizeof(uint32_t)); + cb->async_upd_cnt = ncs_decode_32bit(); + ncs_dec_skip_space(_arg->info.decode.i_uba, sizeof(uint32_t)); + done: if (rc != NCSCC_RC_SUCCESS) { /* Do not allow standby to get out of sync */ diff --git a/src/clm/clmd/clms_mds.cc b/src/clm/clmd/clms_mds.cc index 5a77885ac..45d1453e4 100644 --- a/src/clm/clmd/clms_mds.cc +++ b/src/clm/clmd/clms_mds.cc @@ -1247,6 +1247,8 @@ static uint32_t clms_mds_svc_event(struct ncsmds_callback_info *info) { rc = NCSCC_RC_FAILURE; goto done; } +} else { + free(evt); } } done: diff --git a/src/mbc/mbcsv_mds.c b/src/mbc/mbcsv_mds.c index f3370f8e2..964e33542 100644 --- a/src/mbc/mbcsv_mds.c +++ b/src/mbc/mbcsv_mds.c @@ -391,6 +391,8 @@ uint32_t mbcsv_mds_rcv(NCSMDS_CALLBACK_INFO *cbinfo) TRACE_LEAVE2("ipc send failed"); return NCSCC_RC_FAILURE; } + } else { + m_MMGR_FREE_MBCSV_EVT(msg); } return NCSCC_RC_SUCCESS; -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] clm: fix memory leak reeported by valgrind [#3206]
Hi Thien, Good work! Some comments inline. Also, commit message typo: "reeported" -> "reported" Best Regards, ThuanTr -Original Message- From: thien.m.huynh Sent: Tuesday, July 28, 2020 9:27 AM To: Thang Duc Nguyen ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net Subject: [devel] [PATCH 1/1] clm: fix memory leak reeported by valgrind [#3206] Fix definitely lost reported by valgrind. --- src/clm/clmd/clms_evt.cc | 5 + src/clm/clmd/clms_mbcsv.cc | 10 ++ src/clm/clmd/clms_mds.cc | 2 ++ src/mbc/mbcsv_mds.c| 3 +++ 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/clm/clmd/clms_evt.cc b/src/clm/clmd/clms_evt.cc index 28f5596ec..e8577060e 100644 --- a/src/clm/clmd/clms_evt.cc +++ b/src/clm/clmd/clms_evt.cc @@ -2136,10 +2136,7 @@ void clms_remove_node_down_rec(SaClmNodeIdT node_id) { */ void clms_evt_destroy(CLMSV_CLMS_EVT *evt) { osafassert(evt != nullptr); - if (evt->info.msg.info.api_info.type == CLMSV_CLUSTER_JOIN_REQ) { -TRACE("not calloced in server code,don't free it here"); - } else -free(evt); + free(evt); } /*clms ack to response msg from clma diff --git a/src/clm/clmd/clms_mbcsv.cc b/src/clm/clmd/clms_mbcsv.cc index 049b32c41..aa86b46c5 100644 --- a/src/clm/clmd/clms_mbcsv.cc +++ b/src/clm/clmd/clms_mbcsv.cc @@ -2092,6 +2092,9 @@ static uint32_t ckpt_decode_cold_sync(CLMS_CB *cb, NCS_MBCSV_CB_ARG *cbk_arg) { CLMS_CKPT_REC msg; CLMS_CKPT_REC *data = nullptr; uint32_t num_rec = 0; + uint8_t data_cnt[16]; + uint32_t num_of_async_upd; + uint8_t *ptr; TRACE_ENTER(); /* @@ -2188,6 +2191,13 @@ static uint32_t ckpt_decode_cold_sync(CLMS_CB *cb, NCS_MBCSV_CB_ARG *cbk_arg) { --num_rec; } + /* Get the async update count */ + ptr = ncs_dec_flatten_space(_arg->info.decode.i_uba, data_cnt, + sizeof(uint32_t)); + num_of_async_upd = ncs_decode_32bit(); + cb->async_upd_cnt = num_of_async_upd; + ncs_dec_skip_space(_arg->info.decode.i_uba, sizeof(uint32_t)); + [Thuan] Can be shorten like: --- ptr = ncs_dec_flatten_space(_arg->info.decode.i_uba, (uint8_t*)>async_upd_cnt, sizeof(uint32_t)); cb->async_upd_cnt = ncs_decode_32bit(); --- done: if (rc != NCSCC_RC_SUCCESS) { /* Do not allow standby to get out of sync */ diff --git a/src/clm/clmd/clms_mds.cc b/src/clm/clmd/clms_mds.cc index 5a77885ac..45d1453e4 100644 --- a/src/clm/clmd/clms_mds.cc +++ b/src/clm/clmd/clms_mds.cc @@ -1247,6 +1247,8 @@ static uint32_t clms_mds_svc_event(struct ncsmds_callback_info *info) { rc = NCSCC_RC_FAILURE; goto done; } +} else { + free(evt); } } done: diff --git a/src/mbc/mbcsv_mds.c b/src/mbc/mbcsv_mds.c index f3370f8e2..efb86c42a 100644 --- a/src/mbc/mbcsv_mds.c +++ b/src/mbc/mbcsv_mds.c @@ -391,6 +391,9 @@ uint32_t mbcsv_mds_rcv(NCSMDS_CALLBACK_INFO *cbinfo) TRACE_LEAVE2("ipc send failed"); return NCSCC_RC_FAILURE; } + } else { + if (msg) + m_MMGR_FREE_MBCSV_EVT(msg); [Thuan] Can be shorten like: --- } else { m_MMGR_FREE_MBCSV_EVT(msg); } --- } return NCSCC_RC_SUCCESS; -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] saflogger: avoid saflogger loop forever in resilience [#3198]
Hi Thien, ACK from me. Best Regards, ThuanTr -Original Message- From: Thien Minh Huynh Sent: Tuesday, July 14, 2020 4:50 PM To: Vu Minh Nguyen ; Thuan Tran ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net; Thien Minh Huynh Subject: [PATCH 1/1] saflogger: avoid saflogger loop forever in resilience [#3198] When log resilient mode is enabled and timeout of saflogger is configured larger than logResilienceTimeout , the saflogger will be blocked forever until the underlying filesystem is responsive. The fix is adding a timer to avoid command loop too long. --- src/log/tools/saf_logger.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/log/tools/saf_logger.c b/src/log/tools/saf_logger.c index e9f7e9b36..7a7e192b2 100644 --- a/src/log/tools/saf_logger.c +++ b/src/log/tools/saf_logger.c @@ -148,6 +148,7 @@ static SaAisErrorT write_log_record(SaLogHandleT logHandle, int i = 0; struct pollfd fds[1]; int ret; + int64_t start_time_us = get_current_SaTime() / 1000; unsigned int wait_time = 0; i++; @@ -206,6 +207,7 @@ poll_retry: return SA_AIS_ERR_BAD_OPERATION; } + wait_time = (get_current_SaTime() / 1000) - start_time_us; if (cb_error == SA_AIS_ERR_TRY_AGAIN && wait_time < g_timeout*ONE_SECOND_TO_NS) { usleep(HUNDRED_MS); -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] saflogger: avoid saflogger loop forever in resilience [#3198]
Hi Thien, Check my inline comments. Best Regards, ThuanTr -Original Message- From: Thien Minh Huynh Sent: Tuesday, July 14, 2020 12:03 PM To: Vu Minh Nguyen ; Thuan Tran ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net; Thien Minh Huynh Subject: [PATCH 1/1] saflogger: avoid saflogger loop forever in resilience [#3198] When log resilient mode is enabled and timeout of saflogger is configured larger than logResilienceTimeout , the saflogger will be blocked forever until the underlying filesystem is responsive. The fix is adding a timer to avoid command loop too long. --- src/log/tools/saf_logger.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/log/tools/saf_logger.c b/src/log/tools/saf_logger.c index e9f7e9b36..550e3c58a 100644 --- a/src/log/tools/saf_logger.c +++ b/src/log/tools/saf_logger.c @@ -148,7 +148,8 @@ static SaAisErrorT write_log_record(SaLogHandleT logHandle, int i = 0; struct pollfd fds[1]; int ret; - unsigned int wait_time = 0; + int64_t start_time_us = get_current_SaTime() / 1000; + int64_t wait_time = 0; [Thuan] Can we keep "unsigned int" for wait_time? Then we can reduce changes. i++; @@ -166,7 +167,7 @@ retry: if (errorCode != SA_AIS_OK) { if (wait_time) - fprintf(stderr, "Waited for %u seconds.\n", + fprintf(stderr, "Waited for %ld seconds.\n", wait_time / 100); fprintf(stderr, "saLogWriteLogAsync FAILED: %s\n", saf_error(errorCode)); @@ -206,8 +207,9 @@ poll_retry: return SA_AIS_ERR_BAD_OPERATION; } + wait_time = (get_current_SaTime() / 1000) - start_time_us; if (cb_error == SA_AIS_ERR_TRY_AGAIN && - wait_time < g_timeout*ONE_SECOND_TO_NS) { + wait_time < g_timeout * ONE_SECOND_TO_NS) { [Thuan] Can we keep it unchanged? usleep(HUNDRED_MS); wait_time += HUNDRED_MS; goto retry; @@ -221,7 +223,7 @@ poll_retry: if (cb_error != SA_AIS_OK) { if (wait_time) - fprintf(stderr, "Waited for %u seconds.\n", + fprintf(stderr, "Waited for %ld seconds.\n", wait_time / 100); fprintf(stderr, "logWriteLogCallbackT FAILED: %s\n", saf_error(cb_error)); -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] ntf: Enhance attribute change notification [#3196]
Hi Thanh, ACK from me. Best Regards, ThuanTr -Original Message- From: Thanh Nguyen Sent: Friday, July 10, 2020 3:27 PM To: Thang Duc Nguyen ; Minh Hon Chau ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net; Thanh Nguyen Subject: [PATCH 1/1] ntf: Enhance attribute change notification [#3196] 1) When IMM changes an attribute with NOTIFY flag, ntfimcn currently sends the changed attribute values in notification data. With the enhancement, ntfimcn fetches the old attribute values, corresponding to the changed attribute values, if exist in IMM, and include the old attribute values in notification data. 2) Test suite ntftest is updated. --- src/ntf/apitest/test_ntf_imcn.cc| 1100 --- src/ntf/apitest/tet_ntf_common.cc | 83 ++ src/ntf/apitest/tet_ntf_common.h|3 + src/ntf/ntfimcnd/ntfimcn_imm.c | 244 +- src/ntf/ntfimcnd/ntfimcn_imm.h | 44 ++ src/ntf/ntfimcnd/ntfimcn_main.h |3 + src/ntf/ntfimcnd/ntfimcn_notifier.c | 44 +- 7 files changed, 1060 insertions(+), 461 deletions(-) diff --git a/src/ntf/apitest/test_ntf_imcn.cc b/src/ntf/apitest/test_ntf_imcn.cc index 6fc5fc831..0443491a9 100644 --- a/src/ntf/apitest/test_ntf_imcn.cc +++ b/src/ntf/apitest/test_ntf_imcn.cc @@ -1,6 +1,7 @@ /* -*- OpenSAF -*- * * (C) Copyright 2013 The OpenSAF Foundation + * Copyright Ericsson AB 2020 - All Rights Reserved. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY @@ -24,10 +25,13 @@ #include #include #include +#include +#include #include #include #include #include +#include #include "base/time.h" #include "osaf/immutil/immutil.h" #include "osaf/apitest/util.h" @@ -55,6 +59,8 @@ static char BUF1_STR[sizeof(BUF1) + 1] = { '\0' }; static char BUF2_STR[sizeof(BUF2) + 1] = { '\0' }; static char BUF3_STR[sizeof(BUF3) + 1] = { '\0' }; +#define OLD_ATTR_PRESENT_DEFAULT SA_FALSE + /** * Callback routine, called when subscribed notification arrives. */ @@ -168,6 +174,123 @@ static SaBoolT bufs_equal1(const SaUint8T *recbuf, return rc; } +static void print_notification_attr(SaNtfNotificationHandleT nHandle, +SaNtfElementIdT attributeId, +SaNtfValueTypeT attributeType, +SaNtfValueT* value, +int idx) { + SaUint16T numElem = 0; + char *str = NULL; + SaAisErrorT rc = SA_AIS_OK; + + switch (attributeType) { + case SA_NTF_VALUE_STRING: { +rc = saNtfPtrValGet( +nHandle, +value, +reinterpret_cast(), ); +if (rc == SA_AIS_OK) { + printf("[%d]: Id:%d Type:%d Value:(%d)%s\n", idx, +attributeId, attributeType, numElem, str); +} else { + printf("[%d]: Id:%d Type:%d Value:(%d)%s\n", idx, +attributeId, attributeType, numElem, ""); +} +break; + } + case SA_NTF_VALUE_UINT32: { +printf("[%d]: Id:%d Type:%d Value:%u\n", idx, +attributeId, attributeType, value->uint32Val); +break; + } + case SA_NTF_VALUE_INT32: { +printf("[%d]: Id:%d Type:%d Value:%d\n", idx, +attributeId, attributeType, value->int32Val); +break; + } + case SA_NTF_VALUE_UINT64: { +printf("[%d]: Id:%d Type:%d Value:%llu\n", idx, +attributeId, attributeType, value->uint64Val); +break; + } + case SA_NTF_VALUE_INT64: { +printf("[%d]: Id:%d Type:%d Value:%lld\n", idx, +attributeId, attributeType, value->int64Val); +break; + } + case SA_NTF_VALUE_FLOAT: { +printf("[%d]: Id:%d Type:%d Value:%f\n", idx, +attributeId, attributeType, value->floatVal); +break; + } + case SA_NTF_VALUE_DOUBLE: { +printf("[%d]: Id:%d Type:%d Value:%f\n", idx, +attributeId, attributeType, value->doubleVal); +break; + } + case SA_NTF_VALUE_LDAP_NAME: + case SA_NTF_VALUE_BINARY: { +SaUint8T *binptr = NULL; +rc = saNtfPtrValGet( +nHandle, +value, +reinterpret_cast(), ); +if (rc == SA_AIS_OK) { + printf("[%d]: Id:%d Type:%d NumBytes:%u\n", idx, + attributeId, attributeType, + numElem); + print_mem((const unsigned char*) binptr, numElem); +} else { + printf("[%d]: Id:%d Type:%d NumBytes:(%u)%s\n", idx, +attributeId, attributeType, numElem, ""); +} +break; + } + default: +printf("Unsupported attribute type: %d\n", +attributeType); +break; + } +} + +static void print_old_attrs(SaNtfNotificationHandleT nHandle, +SaNtfAttributeChangeT* attrChange, SaUint16T numAttributes) { + // Skip of none of the attribute exist + int count = -1; + for (int i = 0; i < numAttributes; ++i) { +// There is at least one old attribute value +if (attrChange[i].oldAttributePresent == SA_TRUE) { + co
Re: [devel] [PATCH 1/2] imm: reboot nodes used to be different partition with coord [#2936]
Hi Minh, Thanks, I will add comment for ex_immd_node_id and check spelling in README. I will send V2 with AMF patch as our verbal discussion. Best Regards, ThuanTr -Original Message- From: Minh Hon Chau Sent: Friday, July 10, 2020 9:56 AM To: Thuan Tran ; Thang Duc Nguyen ; Vu Minh Nguyen Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1/2] imm: reboot nodes used to be different partition with coord [#2936] Hi Thuan, A very minor comment marked with [M] and some spelling errors in README you may find them. I don't think I have understood IMM well enough to make sure that @ex_immd_node_id has been fully covered in all cases, that may need Vu having a look if he has time. Thanks Minh On 3/7/20 7:33 pm, thuan.tran wrote: > - immnd send re-introduce refresh=3 with ex-immd (active) node id. > - immd set very high priority for re-introduce msg of local immnd > and choose coord if re-introduce refresh=3 from local immnd. > - immd reply re-intro to reboot if ex-immd is not same as ex-immd > of selected coord. > - immd use new INTRO_RSP_2 to checkpoint ex-immd to standby. > - immnd use MDS_RED_SUBSCRIBE for immd to know active/standby immd > and help detect headless in multi partition clusters rejoin. > - immnd discard FEVS from unknown immd or during re-introduce to > avoid immnd OUT OF ORDER restart and lost ex-immd info. > - Update README.SC_ABSENCE for this new feature. > - Allow to configure disable/enable this new feature. > - immd standby will reboot if see two actives immd to avoid sync > with wrong active. > --- > scripts/opensaf_reboot | 1 + > src/imm/README.SC_ABSENCE | 21 ++ > src/imm/common/immsv_evt.c | 17 +++- > src/imm/common/immsv_evt.h | 4 ++ > src/imm/immd/immd.conf | 7 > src/imm/immd/immd_cb.h | 3 ++ > src/imm/immd/immd_evt.c| 86 -- > src/imm/immd/immd_main.c | 9 > src/imm/immd/immd_mbcsv.c | 24 +-- > src/imm/immd/immd_mds.c| 17 +--- > src/imm/immd/immd_proc.c | 15 --- > src/imm/immd/immd_red.h| 1 + > src/imm/immd/immd_sbevt.c | 9 +++- > src/imm/immnd/immnd_cb.h | 3 ++ > src/imm/immnd/immnd_evt.c | 84 + > src/imm/immnd/immnd_main.c | 2 + > src/imm/immnd/immnd_mds.c | 35 > src/imm/immnd/immnd_proc.c | 19 - > 18 files changed, 290 insertions(+), 67 deletions(-) > > diff --git a/scripts/opensaf_reboot b/scripts/opensaf_reboot > index bcbc689f0..bb3cee5a1 100644 > --- a/scripts/opensaf_reboot > +++ b/scripts/opensaf_reboot > @@ -143,6 +143,7 @@ unset tipc > # argument 3 is set to 1, "safe reboot" request. > if [ "$#" = 0 ]; then > $icmd pkill -STOP osafamfd > + $icmd pkill -STOP osafimmd > quick_local_node_reboot > elif [ "$safe_reboot" = 1 ]; then > opensaf_safe_reboot > diff --git a/src/imm/README.SC_ABSENCE b/src/imm/README.SC_ABSENCE > index 9cae5d519..644cbb546 100644 > --- a/src/imm/README.SC_ABSENCE > +++ b/src/imm/README.SC_ABSENCE > @@ -76,3 +76,24 @@ Support for absent IMMD is incompatible with 2PBE. If both > are configured then > 2PBE will win and the absence of IMMD feature will be ignored. An error > message > is printed in this case to the syslog at startup. > > + > +SC ABSENCE and ROAMING SC > += > +Under SC absence enable and Roaming SC cluster, multiple partitioned clusters > +can occur due to network split. If PBE database is configured on local node > +then many diverted IMM databases can occur. If rejoin these clusters into one > +cluster, any undefined behavior may happen. To avoid this, IMM implements > +mechanism to reboot nodes used to be on different partition with selected > +coordinator [ticket #2936] > + > +- IMMND send re-introduce use refresh id 3 with ex-immd node id. > +- When payload become controller, the IMMD will select coordinator > +(prioritize local IMMND) and send reply to reboot nodes which have ex-immd > +node id different with ex-immd of selected coordinator. > +- Active IMMD use new IMMD_A2S_MSG_INTRO_RSP_2 to checkpoint node info with > +ex-immd to standby IMMD. > +- IMMND use MDS_RED_SUBSCRIBE to know Active/Standby. Discard FEVS from > +unknown IMMD or during waiting accept of re-introduce to avoid IMMND restart > +due to OUT OR ORDER. This also detect headless in multi partitions rejoin. > + > +To enable this mechanism, please export IMMSV_COORD_SELECT_NODE=1 in > immd.conf > diff --git a/src/imm/common/immsv_evt.c b/src/imm/common/immsv_evt.c > index c93f82a0f..1c43ec719 100644 > --- a/src/imm/common/immsv_evt.c > +++ b/src/imm/common/immsv_evt.c >
Re: [devel] [PATCH 1/1] imm: fix memory leak reported by valgrind [#3199]
Hi Phuc, ACK from me. Best Regards, ThuanTr -Original Message- From: Phuc Hoang Chau Sent: Friday, July 10, 2020 11:04 AM To: Thuan Tran ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net; Phuc Hoang Chau Subject: [PATCH 1/1] imm: fix memory leak reported by valgrind [#3199] Fix definitely lost reported by valgrind. --- src/imm/immd/immd_proc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/imm/immd/immd_proc.c b/src/imm/immd/immd_proc.c index 69e23f2..c1c2700 100644 --- a/src/imm/immd/immd_proc.c +++ b/src/imm/immd/immd_proc.c @@ -824,6 +824,7 @@ uint32_t immd_process_immnd_down(IMMD_CB *cb, IMMD_IMMND_INFO_NODE *immnd_info, } free(tmpData); + m_MMGR_FREE_BUFR_LIST(uba.start); } } else { /* Standby NOT immediately sending D2ND_DISCARD_NODE. But will @@ -935,6 +936,7 @@ void immd_pending_discards(IMMD_CB *cb) } free(tmpData); + m_MMGR_FREE_BUFR_LIST(uba.start); } LOG_IN("Removing pending discard for node:%x epoch:%u", -- 2.7.4 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amf: enhance to work in roaming SC and headless [#3185]
Hi Minh, See my responses in line with [T2] I will send out V2 under #2936 umbrella. Best Regards, ThuanTr -Original Message- From: Minh Hon Chau Sent: Friday, July 10, 2020 9:50 AM To: Thuan Tran ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1/1] amf: enhance to work in roaming SC and headless [#3185] Hi Thuan, Please see comments inline marked with [M2], other than that no more comments from me. Thanks Minh On 10/7/20 12:32 pm, Thuan Tran wrote: > Hi Minh, > > Yes, #3185 is only help for multi partition cluster rejoin. > As verbal discussion, I will move this patch under #2936 umbrella. > > See my replies for inline comments. > > Best Regards, > ThuanTr > > -Original Message- > From: Minh Hon Chau > Sent: Thursday, July 9, 2020 8:39 PM > To: Thuan Tran ; Thang Duc Nguyen > > Cc: opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1/1] amf: enhance to work in roaming SC and headless > [#3185] > > Hi Thuan, > > Is this changed only needed if roamingSC is enabled? I remember last > time I tested #2936 Vu's prototype without roamingSC (headless > partitions rejoin) I didn't need any change from amf. > > And some comments inline. > > Thanks > > Minh > > > On 3/7/20 7:23 pm, thuan.tran wrote: >> - amfd reset msg id counter for node that ignore amfnd down >> event to avoid nodes reboot once more due to mismatch msg id after >> reboot up from reboot order for sending node_up after sync window. >> >> - amfd active order reboot its standby if it detect another >> active amfd (multi partition cluster rejoin). > [M] how's about the "another active", is it going to reboot too? > [T] Two active will be handled by RDE detect split-brain. > If AMF active reboot, other active may not reboot (as not yet see this active) > then it break legacy split-brain detected behavior of RDE. [M2]: I see, it's good to mention the reboot's from RDE here, so reader can understand immediately there will not be 2 active co-existed. [T2]: OK, will update commit message with this info. >> - amfd standby should reboot itself if see two active peers to >> avoid standby do cold-sync or be updated with wrong active. > [M] how's about one of "two active peers", is it going to reboot too? > [T] As above explain, it will reboot but by RDE. [M2]: As above. [T2]: OK as above >> - amfd just become standby (out of sync) but see active down >> should reboot itself. >> --- >>src/amf/amfd/dmsg.cc | 8 >>src/amf/amfd/evt.h | 1 + >>src/amf/amfd/main.cc | 3 +++ >>src/amf/amfd/mds.cc| 36 ++-- >>src/amf/amfd/msg.h | 1 + >>src/amf/amfd/ndfsm.cc | 2 ++ >>src/amf/amfd/proc.h| 1 + >>src/amf/amfd/role.cc | 27 +++ >>src/amf/amfd/util.cc | 2 +- >>src/amf/amfnd/amfnd.cc | 2 +- >>10 files changed, 79 insertions(+), 4 deletions(-) >> >> diff --git a/src/amf/amfd/dmsg.cc b/src/amf/amfd/dmsg.cc >> index cf4019d8a..5273f358c 100644 >> --- a/src/amf/amfd/dmsg.cc >> +++ b/src/amf/amfd/dmsg.cc >> @@ -75,6 +75,8 @@ void avd_mds_d_enc(MDS_CALLBACK_ENC_INFO *enc_info) { >> ncs_encode_32bit(, msg->msg_info.d2d_chg_role_rsp.role); >> ncs_encode_32bit(, msg->msg_info.d2d_chg_role_rsp.status); >> break; >> +case AVD_D2D_ROAMING_SC_SPLITBRAIN: > [M] We can name AVD_D2D_REBOOT, the message name should be reflect the > purpose of the message. > [T] OK, will rename it. >> + break; >>default: >> LOG_ER("%s: unknown msg %u", __FUNCTION__, msg->msg_type); >> break; >> @@ -120,6 +122,8 @@ void avd_mds_d_dec(MDS_CALLBACK_DEC_INFO *dec_info) { >> static_cast(ncs_decode_32bit()); >> d2d_msg->msg_info.d2d_chg_role_rsp.status = ncs_decode_32bit(); >> break; >> +case AVD_D2D_ROAMING_SC_SPLITBRAIN: >> + break; >>default: >> LOG_ER("%s: unknown msg %u", __FUNCTION__, d2d_msg->msg_type); >> break; >> @@ -210,6 +214,10 @@ uint32_t avd_d2d_msg_rcv(AVD_D2D_MSG *rcv_msg) { >>osafassert(0); >> } >> break; >> +case AVD_D2D_ROAMING_SC_SPLITBRAIN: >> + LOG_ER("Reboot order from Active as roaming SC split-brain detected"); >> + opensaf_quick_reboot("Split-brain detected"); >> + break; >>default: >> LOG_ER("%s: unknown msg %u", __FUNCTION__, rcv_msg->msg_type); >>
Re: [devel] [PATCH 1/1] ntf: Enhance attribute change notification V2 [#3196]
Hi Thanh, ACK with some minor comments inline. Best Regards, ThuanTr -Original Message- From: Thanh Nguyen Sent: Wednesday, July 8, 2020 2:52 PM To: Thang Duc Nguyen ; Minh Hon Chau ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net; Thanh Nguyen Subject: [PATCH 1/1] ntf: Enhance attribute change notification V2 [#3196] 1) When IMM changes an attribute with NOTIFY flag, ntfimcn currently sends the changed attribute values in notification data. With the enhancement, ntfimcn fetches the old attribute values, corresponding to the changed attribute values, if exist in IMM, and include the old attribute values in notification data. 2) Test suite ntftest is updated. --- src/ntf/apitest/test_ntf_imcn.cc| 1100 --- src/ntf/apitest/tet_ntf_common.cc | 83 ++ src/ntf/apitest/tet_ntf_common.h|3 + src/ntf/ntfimcnd/ntfimcn_imm.c | 245 +- src/ntf/ntfimcnd/ntfimcn_imm.h | 44 ++ src/ntf/ntfimcnd/ntfimcn_main.h |3 + src/ntf/ntfimcnd/ntfimcn_notifier.c | 44 +- 7 files changed, 1060 insertions(+), 462 deletions(-) diff --git a/src/ntf/apitest/test_ntf_imcn.cc b/src/ntf/apitest/test_ntf_imcn.cc index 6fc5fc831..0443491a9 100644 --- a/src/ntf/apitest/test_ntf_imcn.cc +++ b/src/ntf/apitest/test_ntf_imcn.cc @@ -1,6 +1,7 @@ /* -*- OpenSAF -*- * * (C) Copyright 2013 The OpenSAF Foundation + * Copyright Ericsson AB 2020 - All Rights Reserved. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY @@ -24,10 +25,13 @@ #include #include #include +#include +#include #include #include #include #include +#include #include "base/time.h" #include "osaf/immutil/immutil.h" #include "osaf/apitest/util.h" @@ -55,6 +59,8 @@ static char BUF1_STR[sizeof(BUF1) + 1] = { '\0' }; static char BUF2_STR[sizeof(BUF2) + 1] = { '\0' }; static char BUF3_STR[sizeof(BUF3) + 1] = { '\0' }; +#define OLD_ATTR_PRESENT_DEFAULT SA_FALSE + /** * Callback routine, called when subscribed notification arrives. */ @@ -168,6 +174,123 @@ static SaBoolT bufs_equal1(const SaUint8T *recbuf, return rc; } +static void print_notification_attr(SaNtfNotificationHandleT nHandle, +SaNtfElementIdT attributeId, +SaNtfValueTypeT attributeType, +SaNtfValueT* value, +int idx) { + SaUint16T numElem = 0; + char *str = NULL; + SaAisErrorT rc = SA_AIS_OK; + + switch (attributeType) { + case SA_NTF_VALUE_STRING: { +rc = saNtfPtrValGet( +nHandle, +value, +reinterpret_cast(), ); +if (rc == SA_AIS_OK) { + printf("[%d]: Id:%d Type:%d Value:(%d)%s\n", idx, +attributeId, attributeType, numElem, str); +} else { + printf("[%d]: Id:%d Type:%d Value:(%d)%s\n", idx, +attributeId, attributeType, numElem, ""); +} +break; + } + case SA_NTF_VALUE_UINT32: { +printf("[%d]: Id:%d Type:%d Value:%u\n", idx, +attributeId, attributeType, value->uint32Val); +break; + } + case SA_NTF_VALUE_INT32: { +printf("[%d]: Id:%d Type:%d Value:%d\n", idx, +attributeId, attributeType, value->int32Val); +break; + } + case SA_NTF_VALUE_UINT64: { +printf("[%d]: Id:%d Type:%d Value:%llu\n", idx, +attributeId, attributeType, value->uint64Val); +break; + } + case SA_NTF_VALUE_INT64: { +printf("[%d]: Id:%d Type:%d Value:%lld\n", idx, +attributeId, attributeType, value->int64Val); +break; + } + case SA_NTF_VALUE_FLOAT: { +printf("[%d]: Id:%d Type:%d Value:%f\n", idx, +attributeId, attributeType, value->floatVal); +break; + } + case SA_NTF_VALUE_DOUBLE: { +printf("[%d]: Id:%d Type:%d Value:%f\n", idx, +attributeId, attributeType, value->doubleVal); +break; + } + case SA_NTF_VALUE_LDAP_NAME: + case SA_NTF_VALUE_BINARY: { +SaUint8T *binptr = NULL; +rc = saNtfPtrValGet( +nHandle, +value, +reinterpret_cast(), ); +if (rc == SA_AIS_OK) { + printf("[%d]: Id:%d Type:%d NumBytes:%u\n", idx, + attributeId, attributeType, + numElem); + print_mem((const unsigned char*) binptr, numElem); +} else { + printf("[%d]: Id:%d Type:%d NumBytes:(%u)%s\n", idx, +attributeId, attributeType, numElem, ""); +} +break; + } + default: +printf("Unsupported attribute type: %d\n", +attributeType); +break; + } +} + +static void print_old_attrs(SaNtfNotificationHandleT nHandle, +SaNtfAttributeChangeT* attrChange, SaUint16T numAttributes) { + // Skip of none of the attribute exist + int count = -1; + for (int i = 0; i < numAttributes; ++i) { +// There is at least one old attribute value +if (attrChange[i].oldAttributePresent
Re: [devel] [PATCH 1/1] amf: enhance to work in roaming SC and headless [#3185]
Hi Minh, Yes, #3185 is only help for multi partition cluster rejoin. As verbal discussion, I will move this patch under #2936 umbrella. See my replies for inline comments. Best Regards, ThuanTr -Original Message- From: Minh Hon Chau Sent: Thursday, July 9, 2020 8:39 PM To: Thuan Tran ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1/1] amf: enhance to work in roaming SC and headless [#3185] Hi Thuan, Is this changed only needed if roamingSC is enabled? I remember last time I tested #2936 Vu's prototype without roamingSC (headless partitions rejoin) I didn't need any change from amf. And some comments inline. Thanks Minh On 3/7/20 7:23 pm, thuan.tran wrote: > - amfd reset msg id counter for node that ignore amfnd down > event to avoid nodes reboot once more due to mismatch msg id after > reboot up from reboot order for sending node_up after sync window. > > - amfd active order reboot its standby if it detect another > active amfd (multi partition cluster rejoin). [M] how's about the "another active", is it going to reboot too? [T] Two active will be handled by RDE detect split-brain. If AMF active reboot, other active may not reboot (as not yet see this active) then it break legacy split-brain detected behavior of RDE. > > - amfd standby should reboot itself if see two active peers to > avoid standby do cold-sync or be updated with wrong active. [M] how's about one of "two active peers", is it going to reboot too? [T] As above explain, it will reboot but by RDE. > > - amfd just become standby (out of sync) but see active down > should reboot itself. > --- > src/amf/amfd/dmsg.cc | 8 > src/amf/amfd/evt.h | 1 + > src/amf/amfd/main.cc | 3 +++ > src/amf/amfd/mds.cc| 36 ++-- > src/amf/amfd/msg.h | 1 + > src/amf/amfd/ndfsm.cc | 2 ++ > src/amf/amfd/proc.h| 1 + > src/amf/amfd/role.cc | 27 +++ > src/amf/amfd/util.cc | 2 +- > src/amf/amfnd/amfnd.cc | 2 +- > 10 files changed, 79 insertions(+), 4 deletions(-) > > diff --git a/src/amf/amfd/dmsg.cc b/src/amf/amfd/dmsg.cc > index cf4019d8a..5273f358c 100644 > --- a/src/amf/amfd/dmsg.cc > +++ b/src/amf/amfd/dmsg.cc > @@ -75,6 +75,8 @@ void avd_mds_d_enc(MDS_CALLBACK_ENC_INFO *enc_info) { > ncs_encode_32bit(, msg->msg_info.d2d_chg_role_rsp.role); > ncs_encode_32bit(, msg->msg_info.d2d_chg_role_rsp.status); > break; > +case AVD_D2D_ROAMING_SC_SPLITBRAIN: [M] We can name AVD_D2D_REBOOT, the message name should be reflect the purpose of the message. [T] OK, will rename it. > + break; > default: > LOG_ER("%s: unknown msg %u", __FUNCTION__, msg->msg_type); > break; > @@ -120,6 +122,8 @@ void avd_mds_d_dec(MDS_CALLBACK_DEC_INFO *dec_info) { > static_cast(ncs_decode_32bit()); > d2d_msg->msg_info.d2d_chg_role_rsp.status = ncs_decode_32bit(); > break; > +case AVD_D2D_ROAMING_SC_SPLITBRAIN: > + break; > default: > LOG_ER("%s: unknown msg %u", __FUNCTION__, d2d_msg->msg_type); > break; > @@ -210,6 +214,10 @@ uint32_t avd_d2d_msg_rcv(AVD_D2D_MSG *rcv_msg) { > osafassert(0); > } > break; > +case AVD_D2D_ROAMING_SC_SPLITBRAIN: > + LOG_ER("Reboot order from Active as roaming SC split-brain detected"); > + opensaf_quick_reboot("Split-brain detected"); > + break; > default: > LOG_ER("%s: unknown msg %u", __FUNCTION__, rcv_msg->msg_type); > break; > diff --git a/src/amf/amfd/evt.h b/src/amf/amfd/evt.h > index a9028cde3..a08dccebb 100644 > --- a/src/amf/amfd/evt.h > +++ b/src/amf/amfd/evt.h > @@ -72,6 +72,7 @@ typedef enum avd_evt_type { > AVD_IMM_REINITIALIZED, > AVD_EVT_UNASSIGN_SI_DEP_STATE, > AVD_EVT_ND_MDS_VER_INFO, > + AVD_EVT_ROAMING_SC_SPLITBRAIN, > AVD_EVT_MAX > } AVD_EVT_TYPE; > > diff --git a/src/amf/amfd/main.cc b/src/amf/amfd/main.cc > index 3b1536721..3cc0d9741 100644 > --- a/src/amf/amfd/main.cc > +++ b/src/amf/amfd/main.cc > @@ -132,6 +132,9 @@ static const AVD_EVT_HDLR g_actv_list[AVD_EVT_MAX] = { > invalid_evh,/* AVD_EVT_INVALID */ > avd_sidep_unassign_evh, /* AVD_EVT_UNASSIGN_SI_DEP_STATE */ > avd_avnd_mds_info_evh, /* AVD_EVT_ND_MDS_VER_INFO*/ > + > +/* Roaming SC split-brain processing */ > +avd_roaming_sc_split_brain_evh, /* AVD_EVT_ROAMING_SC_SPLITBRAIN */ > }; > > /* list of all the function pointers related to handling the events > diff --git a/src/amf/amfd/mds.cc b/src/amf/amfd/mds.cc >
Re: [devel] [PATCH 1/1] imm: fix memory leak reeported by valgrind [#3199]
Hi Phuc, For consistent code, could you please use? m_MMGR_FREE_BUFR_LIST(uba.start); Also, I think need also fix in function immd_pending_discards(). Best Regards, ThuanTr -Original Message- From: Phuc Hoang Chau Sent: Thursday, July 9, 2020 2:34 PM To: Thang Duc Nguyen ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net; Phuc Hoang Chau Subject: [PATCH 1/1] imm: fix memory leak reeported by valgrind [#3199] Fix definitely lost reported by valgrind. --- src/imm/immd/immd_proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/imm/immd/immd_proc.c b/src/imm/immd/immd_proc.c index 69e23f2..648f8b7 100644 --- a/src/imm/immd/immd_proc.c +++ b/src/imm/immd/immd_proc.c @@ -824,6 +824,7 @@ uint32_t immd_process_immnd_down(IMMD_CB *cb, IMMD_IMMND_INFO_NODE *immnd_info, } free(tmpData); + ncs_reset_uba(); [Thuan] use " m_MMGR_FREE_BUFR_LIST(uba.start);" for consistent code. } } else { /* Standby NOT immediately sending D2ND_DISCARD_NODE. But will -- 2.7.4 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] lgs: correct inform failure to AMF [#3197]
Hi Thang, ACK from me. P/s: maybe also need another ticket for NTF since ntfs_exit() do similar this one. ntfs_exit("Cold sync failed", SA_AMF_COMPONENT_RESTART); Best Regards, ThuanTr -Original Message- From: Thang Duc Nguyen Sent: Thursday, July 2, 2020 2:26 PM To: Thuan Tran ; Thien Minh Huynh ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net; Thang Duc Nguyen Subject: [PATCH 1/1] lgs: correct inform failure to AMF [#3197] Should not invoke saAmfComponentErrorReport() to AMF before exit with failure. In case invoking, AMF don't know how to handle it. And logd does not start again. --- src/log/logd/lgs_mbcsv.cc| 16 src/log/logd/lgs_oi_admin.cc | 7 +++ src/log/logd/lgs_util.cc | 4 +--- src/log/logd/lgs_util.h | 2 +- 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/src/log/logd/lgs_mbcsv.cc b/src/log/logd/lgs_mbcsv.cc index 2d1271c1c..a38f7a5d1 100644 --- a/src/log/logd/lgs_mbcsv.cc +++ b/src/log/logd/lgs_mbcsv.cc @@ -1825,7 +1825,7 @@ static uint32_t ckpt_decode_cold_sync(lgs_cb_t *cb, NCS_MBCSV_CB_ARG *cbk_arg) { done: if (rc != NCSCC_RC_SUCCESS) { /* Do not allow standby to get out of sync */ -lgs_exit("Cold sync failed", SA_AMF_COMPONENT_RESTART); +lgs_exit("Cold sync failed"); } TRACE_LEAVE(); return rc; @@ -1934,7 +1934,7 @@ static uint32_t ckpt_proc_initialize_client(lgs_cb_t *cb, void *data) { if ((client = lgs_client_new(param->mds_dest, param->client_id, param->stream_list)) == NULL) { /* Do not allow standby to get out of sync */ -lgs_exit("Could not create new client", SA_AMF_COMPONENT_RESTART); +lgs_exit("Could not create new client"); } else { client->client_ver = param->client_ver; } @@ -1942,7 +1942,7 @@ static uint32_t ckpt_proc_initialize_client(lgs_cb_t *cb, void *data) { /* Client with ID already exist, check other attributes */ if (client->mds_dest != param->mds_dest) { /* Do not allow standby to get out of sync */ -lgs_exit("Client attributes differ", SA_AMF_COMPONENT_RESTART); +lgs_exit("Client attributes differ"); } } } else if (lgs_is_peer_v6()) { @@ -1957,7 +1957,7 @@ static uint32_t ckpt_proc_initialize_client(lgs_cb_t *cb, void *data) { if ((client = lgs_client_new(param->mds_dest, param->client_id, param->stream_list)) == NULL) { /* Do not allow standby to get out of sync */ -lgs_exit("Could not create new client", SA_AMF_COMPONENT_RESTART); +lgs_exit("Could not create new client"); } else { client->client_ver = param->client_ver; } @@ -1965,7 +1965,7 @@ static uint32_t ckpt_proc_initialize_client(lgs_cb_t *cb, void *data) { /* Client with ID already exist, check other attributes */ if (client->mds_dest != param->mds_dest) { /* Do not allow standby to get out of sync */ -lgs_exit("Client attributes differ", SA_AMF_COMPONENT_RESTART); +lgs_exit("Client attributes differ"); } } } else { @@ -1980,13 +1980,13 @@ static uint32_t ckpt_proc_initialize_client(lgs_cb_t *cb, void *data) { if ((client = lgs_client_new(param->mds_dest, param->client_id, param->stream_list)) == NULL) { /* Do not allow standby to get out of sync */ -lgs_exit("Could not create new client", SA_AMF_COMPONENT_RESTART); +lgs_exit("Could not create new client"); } } else { /* Client with ID already exist, check other attributes */ if (client->mds_dest != param->mds_dest) { /* Do not allow standby to get out of sync */ -lgs_exit("Client attributes differ", SA_AMF_COMPONENT_RESTART); +lgs_exit("Client attributes differ"); } } } @@ -2488,7 +2488,7 @@ uint32_t ckpt_proc_open_stream(lgs_cb_t *cb, void *data) { /* Do not allow standby to get out of sync */ LOG_ER("%s - Failed to add stream '%s' to client %u", __FUNCTION__, param->logStreamName, param->clientId); -lgs_exit("Could not add stream to client", SA_AMF_COMPONENT_RESTART); +lgs_exit("Could not add stream to client"); } /* Stream is opened on standby. Remove from rtobj list if exist */ diff --git a/src/log/logd/lgs_oi_admin.cc b/src/log/logd/lgs_oi_admin.cc index 8b899219e..afbf3c5eb 100644 --- a/src/log/logd/lgs_oi_admin.cc +++ b/src/log/logd/lgs_oi_admin.cc @@ -343,11 +343,11 @@ static void createLogServerOi() { (ais_rc != SA_AIS_OK)) { LOG_WA("%s: Fail, OI creation timeout", __FUNCTION__); // The legacy
Re: [devel] [PATCH 1/1] lgs: fix memory leak reeported by valgrind [#3195]
Hi Thang, Some comments inline. One concern, LOG ticket but change src/base/daemon.c Is it acceptable? Or need create different ticket? Best Regards, ThuanTr -Original Message- From: Thang Duc Nguyen Sent: Tuesday, June 2, 2020 5:53 PM To: Vu Minh Nguyen ; Thien Minh Huynh ; Thuan Tran ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net; Thang Duc Nguyen Subject: [PATCH 1/1] lgs: fix memory leak reeported by valgrind [#3195] Fix definitely lost reported by valgrind. --- src/base/daemon.c | 6 -- src/log/logd/lgs_imm.cc | 8 src/log/logd/lgs_mbcsv.cc | 2 ++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/base/daemon.c b/src/base/daemon.c index 48a0665f2..2b23b43c3 100644 --- a/src/base/daemon.c +++ b/src/base/daemon.c @@ -57,7 +57,7 @@ #define DEFAULT_RUNAS_USERNAME "opensaf" -static const char *internal_version_id_; +static char internal_version_id_[53]; static char fifo_file[NAME_MAX]; static char __pidfile[NAME_MAX]; @@ -294,7 +294,9 @@ void daemonize(int argc, char *argv[]) char buf1[256 + sizeof("_SCHED_PRIORITY")] = {0}; char buf2[256 + sizeof("_SCHED_POLICY")] = {0}; - internal_version_id_ = strdup("@(#) $Id: " INTERNAL_VERSION_ID " $"); + strcpy(internal_version_id_, "@(#) $Id: "); + strcat(internal_version_id_, INTERNAL_VERSION_ID); + strcat(internal_version_id_, " $"); [Thuan] Can we use one line of code by snprintf(...)? if (argc > 0 && argv != NULL) { __parse_options(argc, argv); diff --git a/src/log/logd/lgs_imm.cc b/src/log/logd/lgs_imm.cc index 9094be5f3..1889a57b3 100644 --- a/src/log/logd/lgs_imm.cc +++ b/src/log/logd/lgs_imm.cc @@ -3027,6 +3027,14 @@ SaAisErrorT lgs_imm_init_configStreams(lgs_cb_t *cb) { } done: + /* Free memory allocated for attribute descriptions */ + om_rc = saImmOmClassDescriptionMemoryFree_2(omHandle, attr_definitions); + if (om_rc != SA_AIS_OK) { +LOG_NO("saImmOmClassDescriptionMemoryFree_2() Fail %s", + saf_error(om_rc)); +goto done; [Thuan] Remove "goto done;" + } + /* Do not abort if error when finalizing */ om_rc = immutil_saImmOmSearchFinalize(immSearchHandle); if (om_rc != SA_AIS_OK) { diff --git a/src/log/logd/lgs_mbcsv.cc b/src/log/logd/lgs_mbcsv.cc index 6ec004f0a..2d1271c1c 100644 --- a/src/log/logd/lgs_mbcsv.cc +++ b/src/log/logd/lgs_mbcsv.cc @@ -2502,6 +2502,7 @@ done: lgs_free_edu_mem(param->fileFmt); lgs_free_edu_mem(param->logFileCurrent); lgs_free_edu_mem(param->logStreamName); + lgs_free_edu_mem(param->dest_names); TRACE_LEAVE(); return NCSCC_RC_SUCCESS; @@ -2813,6 +2814,7 @@ done: lgs_free_edu_mem(logFileFormat); lgs_free_edu_mem(logFileCurrent); lgs_free_edu_mem(name); + lgs_free_edu_mem(dest_names); TRACE_LEAVE(); return NCSCC_RC_SUCCESS; -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] imm: add checking of return parameters [#3194]
Hi Peter, ACK. Best Regards, ThuanTr -Original Message- From: Peter McIntyre Sent: Monday, June 1, 2020 1:23 PM To: Minh Hon Chau ; Vu Minh Nguyen ; Thang Duc Nguyen ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net; Peter McIntyre Subject: [PATCH 1/1] imm: add checking of return parameters [#3194] --- src/imm/agent/imma_proc.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/imm/agent/imma_proc.cc b/src/imm/agent/imma_proc.cc index 1060a62c2..dafd42391 100644 --- a/src/imm/agent/imma_proc.cc +++ b/src/imm/agent/imma_proc.cc @@ -2149,7 +2149,8 @@ static bool imma_process_callback_info(IMMA_CB *cb, IMMA_CLIENT_NODE *cl_node, case IMMA_CALLBACK_OM_ADMIN_OP_RSP: /*Async reply via OM. */ /* ABT decide if it is A.2.1 or A.2.11 callback. */ if (cl_node->isImmA2bCbk) { -if (!osaf_is_extended_names_enabled()) { +if (!osaf_is_extended_names_enabled() && +callback->params) { int i = 0; while (callback->params[i]) { if (callback->params[i]->paramType == SA_IMM_ATTR_SANAMET && -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] rde: avoid dual active controllers in relax promotion mode [#3188]
Hi David, Please ignore this mail. I mistake it with #3192. Best Regards, ThuanTr -Original Message- From: Thuan Tran Sent: Thursday, May 28, 2020 9:53 AM To: Minh Hon Chau ; Thang Duc Nguyen ; Gary Lee ; Hoyt, David Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [devel] [PATCH 1/1] rde: avoid dual active controllers in relax promotion mode [#3188] Hi David, Please help ACK the review as you have verified the patch. Thank you. P/S: you can give comment if any. Best Regards, ThuanTr -Original Message- From: Thuan Tran Sent: Monday, May 25, 2020 5:40 PM To: Minh Hon Chau ; Thang Duc Nguyen ; Gary Lee Cc: opensaf-devel@lists.sourceforge.net; Thuan Tran Subject: [PATCH 1/1] rde: avoid dual active controllers in relax promotion mode [#3188] - Node already give up promotion has set role to QUIESCED should not promote active anyway, it will cause dual active controllers. - Node fail promote active with consensus with error exist should set role as QUIESCED if current role is UNDEFINED. --- src/rde/rded/role.cc | 5 + 1 file changed, 5 insertions(+) diff --git a/src/rde/rded/role.cc b/src/rde/rded/role.cc index 06c346ced..208ae2364 100644 --- a/src/rde/rded/role.cc +++ b/src/rde/rded/role.cc @@ -107,9 +107,14 @@ void Role::PromoteNode(const uint64_t cluster_size, rc = consensus_service.PromoteThisNode(true, cluster_size); if (rc == SA_AIS_ERR_EXIST) { LOG_WA("Another controller is already active"); +if (role() == PCS_RDA_UNDEFINED) SetRole(PCS_RDA_QUIESCED); return; } else if (rc != SA_AIS_OK && relaxed_mode == true) { LOG_WA("Unable to set active controller in consensus service"); +if (role() == PCS_RDA_QUIESCED) { + LOG_WA("Another controller is already promoted"); + return; +} LOG_WA("Will become active anyway"); promotion_pending = true; } else if (rc != SA_AIS_OK) { -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] dtm: bind configured node ip for socket to setup new connection [#3192]
Hi David, Please help ACK the review as you have verified the patch. Thank you. P/S: you can give comment if any. Best Regards, ThuanTr -Original Message- From: Thuan Tran Sent: Tuesday, May 26, 2020 1:31 PM To: Minh Hon Chau ; Gary Lee Cc: opensaf-devel@lists.sourceforge.net; Thuan Tran Subject: [PATCH 1/1] dtm: bind configured node ip for socket to setup new connection [#3192] --- src/dtm/dtmnd/dtm_node_sockets.cc | 28 1 file changed, 28 insertions(+) diff --git a/src/dtm/dtmnd/dtm_node_sockets.cc b/src/dtm/dtmnd/dtm_node_sockets.cc index 41e16206b..7cb461810 100644 --- a/src/dtm/dtmnd/dtm_node_sockets.cc +++ b/src/dtm/dtmnd/dtm_node_sockets.cc @@ -241,6 +241,10 @@ int comm_socket_setup_new(DTM_INTERNODE_CB *dtms_cb, struct addrinfo addr_criteria, *p; /* Criteria for address match */ char foreign_address_eth[INET6_ADDRSTRLEN + IFNAMSIZ]; int flag; + struct in_addr addr_ipv4; + struct sockaddr_in sockaddr; + struct in6_addr addr_ipv6; + struct sockaddr_in6 sockaddr6; TRACE_ENTER(); /* Construct the serv address structure */ @@ -330,6 +334,30 @@ int comm_socket_setup_new(DTM_INTERNODE_CB *dtms_cb, goto done; } + if (dtms_cb->i_addr_family == AF_INET) { +if (inet_pton(AF_INET, dtms_cb->ip_addr.c_str(), _ipv4) == 1) { + sockaddr.sin_family = AF_INET; + sockaddr.sin_port = 0; + sockaddr.sin_addr = addr_ipv4; + if (osaf_bind(sock_desc, (struct sockaddr *), +sizeof(sockaddr)) != 0) +LOG_WA("DTM:osaf_bind() ipv4 failed with errno %d", errno); +} else { + LOG_WA("DTM:inet_pton(%s) ipv4 failed", dtms_cb->ip_addr.c_str()); +} + } else { +if (inet_pton(AF_INET6, dtms_cb->ip_addr.c_str(), _ipv6) == 1) { + sockaddr6.sin6_family = AF_INET6; + sockaddr6.sin6_port = 0; + sockaddr6.sin6_addr = addr_ipv6; + if (osaf_bind(sock_desc, (struct sockaddr *), +sizeof(sockaddr6)) != 0) +LOG_WA("DTM:osaf_bind() ipv6 failed with errno %d", errno); +} else { + LOG_WA("DTM:inet_pton(%s) ipv6 failed", dtms_cb->ip_addr.c_str()); +} + } + /* Try to connect to the given port */ if (connect(sock_desc, addr_list->ai_addr, addr_list->ai_addrlen) < 0) { err = errno; -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] rde: avoid dual active controllers in relax promotion mode [#3188]
Hi David, Please help ACK the review as you have verified the patch. Thank you. P/S: you can give comment if any. Best Regards, ThuanTr -Original Message- From: Thuan Tran Sent: Monday, May 25, 2020 5:40 PM To: Minh Hon Chau ; Thang Duc Nguyen ; Gary Lee Cc: opensaf-devel@lists.sourceforge.net; Thuan Tran Subject: [PATCH 1/1] rde: avoid dual active controllers in relax promotion mode [#3188] - Node already give up promotion has set role to QUIESCED should not promote active anyway, it will cause dual active controllers. - Node fail promote active with consensus with error exist should set role as QUIESCED if current role is UNDEFINED. --- src/rde/rded/role.cc | 5 + 1 file changed, 5 insertions(+) diff --git a/src/rde/rded/role.cc b/src/rde/rded/role.cc index 06c346ced..208ae2364 100644 --- a/src/rde/rded/role.cc +++ b/src/rde/rded/role.cc @@ -107,9 +107,14 @@ void Role::PromoteNode(const uint64_t cluster_size, rc = consensus_service.PromoteThisNode(true, cluster_size); if (rc == SA_AIS_ERR_EXIST) { LOG_WA("Another controller is already active"); +if (role() == PCS_RDA_UNDEFINED) SetRole(PCS_RDA_QUIESCED); return; } else if (rc != SA_AIS_OK && relaxed_mode == true) { LOG_WA("Unable to set active controller in consensus service"); +if (role() == PCS_RDA_QUIESCED) { + LOG_WA("Another controller is already promoted"); + return; +} LOG_WA("Will become active anyway"); promotion_pending = true; } else if (rc != SA_AIS_OK) { -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 2/2] rde: avoid dual active controllers in relax promotion mode [#3188]
Hi Minh, Thread PromoteNode need read and take action base on latest value of role_. There is only one node give up (set role_ QUIESCED), another node will promote. Role_ could be UNDEFINED at begin of thread PromoteNode() then change to QUIESCED before PromoteNode reach "...active anyway". There is a very rare case that if peer info response come late somehow cause no give up on both nodes then dual active can happen. But at least, the current patches help reduce very much chance dual active happen. Best Regards, ThuanTr -Original Message- From: Minh Hon Chau Sent: Friday, May 22, 2020 7:12 AM To: Thuan Tran ; Thang Duc Nguyen ; Gary Lee Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 2/2] rde: avoid dual active controllers in relax promotion mode [#3188] Hi Thuan, the role_ being checked in this PromoteNode thread can be changed to QUIESCED from SetPeerState() in mainthead, it could happen due to a race of thread, timing of mds msg, thus rde will not promote the node. I think we can read role_ in mainthread, where the thread PromoteNode is started, and pass it to PromoteNode, to avoid the role_ being changed. Would it still work? Thanks Minh On 21/5/20 2:52 pm, thuan.tran wrote: > Node already give up promotion has set role to QUIESCED should not > promote active anyway, it will cause dual active controllers. > --- > src/rde/rded/role.cc | 4 > 1 file changed, 4 insertions(+) > > diff --git a/src/rde/rded/role.cc b/src/rde/rded/role.cc > index a3a969b66..8c941b55d 100644 > --- a/src/rde/rded/role.cc > +++ b/src/rde/rded/role.cc > @@ -110,6 +110,10 @@ void Role::PromoteNode(const uint64_t cluster_size, > return; > } else if (rc != SA_AIS_OK && relaxed_mode == true) { > LOG_WA("Unable to set active controller in consensus service"); > +if (role_ == PCS_RDA_QUIESCED) { > + LOG_WA("Another controller is already promoted"); > + return; > +} > LOG_WA("Will become active anyway"); > promotion_pending = true; > } else if (rc != SA_AIS_OK) { ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] osaf: fix coding issue identified by codechecker [#3189]
Hi Thang, OK, will move it up before push. Best Regards, ThuanTr -Original Message- From: Thang Duc Nguyen Sent: Monday, May 18, 2020 11:34 AM To: Thuan Tran ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net Subject: RE: [PATCH 1/1] osaf: fix coding issue identified by codechecker [#3189] Hi Thuan, Just a minor comment. Instead of adding asset() below size = (size + 3) & ~3; It's better to move it to above. B.R/Thang -Original Message- From: Thuan Tran Sent: Monday, May 18, 2020 11:15 AM To: Thang Duc Nguyen ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net; Thuan Tran Subject: [PATCH 1/1] osaf: fix coding issue identified by codechecker [#3189] --- src/osaf/immutil/immutil.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/osaf/immutil/immutil.c b/src/osaf/immutil/immutil.c index e10a8ffdf..4ad7a8fe8 100644 --- a/src/osaf/immutil/immutil.c +++ b/src/osaf/immutil/immutil.c @@ -1048,6 +1048,7 @@ static void *clistMalloc(struct Chunk *clist, size_t size) struct Chunk *chunk; size = (size + 3) & ~3; + osafassert(clist); if (size > CHUNK) { chunk = newChunk(clist->next, size); -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] log: increase sleep 2s to make sure all directory are created successfully [#3187]
Hi Thien, ACK with minor comment that maybe change short message to have quick catchup what the fix is For example: log: improve robustness for apitest suite 5 test case 16 [#3187] Sleep 1s after change logRootDirectory not enough time to logsv processing of directories creation. So cannot access '/srv/shared/saflog/croot/testRoot'. Increase sleep 2s to make sure all directory are created successfully. Best Regards, ThuanTr -Original Message- From: Thien Minh Huynh Sent: Wednesday, May 13, 2020 2:35 PM To: Thuan Tran ; Vu Minh Nguyen Cc: opensaf-devel@lists.sourceforge.net; Thien Minh Huynh Subject: [PATCH 1/1] log: increase sleep 2s to make sure all directory are created successfully [#3187] Sleep 1s after change logRootDirectory not enough time to logsv processing of directories creation. So cannot access '/srv/shared/saflog/croot/testRoot' The fix is increase sleep time to 2s to make sure all directory are created successfully. --- src/log/apitest/tet_LogOiOps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log/apitest/tet_LogOiOps.c b/src/log/apitest/tet_LogOiOps.c index aae0d5c57..2fde9b5d4 100644 --- a/src/log/apitest/tet_LogOiOps.c +++ b/src/log/apitest/tet_LogOiOps.c @@ -1729,7 +1729,7 @@ void change_root_path(void) } // Verify if the directory and subdirectly are created successfully - sleep(1); // to make sure logsv done processing of directories creation + sleep(2); // to make sure logsv done processing of directories creation sprintf(command, "ls %s/testRoot 1>/dev/null", tstdir); rc = systemCall(command); if (rc != 0) { -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] lgs: map the CkptPushAsync to the right memory [#3183]
Hi Thien, ACK from me. Best Regards, ThuanTr -Original Message- From: Thien Minh Huynh Sent: Tuesday, May 12, 2020 1:33 PM To: Thuan Tran ; Vu Minh Nguyen Cc: opensaf-devel@lists.sourceforge.net; Thien Minh Huynh Subject: [PATCH 1/1] lgs: map the CkptPushAsync to the right memory [#3183] The standby logsv is crashed during cold sync if having pending write requests in the queue.That happens because the CkptPushAsync data for decoding is referring to wrong data. The fix is to map the CkptPushAsync to the right memory. --- src/log/logd/lgs_cache.cc | 6 +- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/log/logd/lgs_cache.cc b/src/log/logd/lgs_cache.cc index e3583e97c..27e33702d 100644 --- a/src/log/logd/lgs_cache.cc +++ b/src/log/logd/lgs_cache.cc @@ -344,14 +344,10 @@ int Cache::DecodeColdSync(NCS_UBAID* uba, lgsv_ckpt_header_t* header, uint32_t num_rec = header->num_ckpt_records; int rc = NCSCC_RC_SUCCESS; EDU_ERR ederror; - lgsv_ckpt_msg_v8_t msg_v8; - auto data = _v8.ckpt_rec.push_async; - CkptPushAsync* cache_data; while (num_rec) { -cache_data = data; rc = m_NCS_EDU_EXEC(_cb->edu_hdl, EncodeDecodePushAsync, uba, EDP_OP_TYPE_DEC, -_data, ); +vckpt_rec, ); if (rc != NCSCC_RC_SUCCESS) { m_NCS_EDU_PRINT_ERROR_STRING(ederror); return rc; -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] lgs: fix data CkptPushAsync equals with encode cold sync [#3183]
Hi Thien, In my understanding, push_async is vckpt_rec. I wonder if below idea can solve the issue? diff --git a/src/log/logd/lgs_cache.cc b/src/log/logd/lgs_cache.cc index e3583e97c..27e33702d 100644 --- a/src/log/logd/lgs_cache.cc +++ b/src/log/logd/lgs_cache.cc @@ -344,14 +344,10 @@ int Cache::DecodeColdSync(NCS_UBAID* uba, lgsv_ckpt_header_t* header, uint32_t num_rec = header->num_ckpt_records; int rc = NCSCC_RC_SUCCESS; EDU_ERR ederror; - lgsv_ckpt_msg_v8_t msg_v8; - auto data = _v8.ckpt_rec.push_async; - CkptPushAsync* cache_data; while (num_rec) { -cache_data = data; rc = m_NCS_EDU_EXEC(_cb->edu_hdl, EncodeDecodePushAsync, uba, EDP_OP_TYPE_DEC, -_data, ); +vckpt_rec, ); if (rc != NCSCC_RC_SUCCESS) { m_NCS_EDU_PRINT_ERROR_STRING(ederror); return rc; Best Regards, Thuan From: Thien Minh Huynh Sent: Monday, May 11, 2020 2:51 PM To: Vu Minh Nguyen ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net Subject: RE: [PATCH 1/1] lgs: fix data CkptPushAsync equals with encode cold sync [#3183] Hi Vu, Thanks for your comments. Best Regards, ThienHuynh -Original Message- From: Vu Minh Nguyen Sent: Monday, May 11, 2020 2:30 PM To: Thien Minh Huynh ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1/1] lgs: fix data CkptPushAsync equals with encode cold sync [#3183] Ack with minor comments. On 5/11/20 2:11 PM, thien.m.huynh wrote: > When NFS is unavailable, client try to write to log. Lgs server will > put it into the queue with the time. At this time, standby node > startup and cold sync. Cause of coredump due to duplicate data > (CkptPushAsync) to put queue is NULL. > The fix is adding a parametar CkptPushAsync into DecodeColdSync to get > data for ckpt_proc_push_async more correctly. [Vu] The description is unclear to me. Here is my suggestion: The standby logsv is crashed during cold sync if having pending write requests in the queue. That happens because the CkptPushAsync data for decoding is referring to wrong data. The fix is to map the CkptPushAsync to the right memory. [Vu] The slogan should be updated too. > --- > src/log/logd/lgs_cache.cc | 10 +++--- > src/log/logd/lgs_cache.h | 4 ++-- > src/log/logd/lgs_mbcsv.cc | 6 +- > 3 files changed, 10 insertions(+), 10 deletions(-) > > diff --git a/src/log/logd/lgs_cache.cc b/src/log/logd/lgs_cache.cc > index e3583e97c..ca25e681c 100644 > --- a/src/log/logd/lgs_cache.cc > +++ b/src/log/logd/lgs_cache.cc > @@ -324,8 +324,8 @@ int Cache::EncodeColdSync(NCS_UBAID* uba) const { > } > > int Cache::DecodeColdSync(NCS_UBAID* uba, lgsv_ckpt_header_t* header, > - void* vdata, void** vckpt_rec, > - size_t ckpt_rec_size) const { > + CkptPushAsync* pasync, void* vdata, > + void** vckpt_rec, size_t ckpt_rec_size) > + const { > TRACE_ENTER(); > assert(is_active() == false && "This instance does not run with standby > role"); > if (lgs_is_peer_v8() == false) return NCSCC_RC_SUCCESS; @@ -344,14 > +344,10 @@ int Cache::DecodeColdSync(NCS_UBAID* uba, lgsv_ckpt_header_t* > header, > uint32_t num_rec = header->num_ckpt_records; > int rc = NCSCC_RC_SUCCESS; > EDU_ERR ederror; > - lgsv_ckpt_msg_v8_t msg_v8; > - auto data = _v8.ckpt_rec.push_async; > - CkptPushAsync* cache_data; > while (num_rec) { > -cache_data = data; > rc = m_NCS_EDU_EXEC(_cb->edu_hdl, EncodeDecodePushAsync, > uba, EDP_OP_TYPE_DEC, > -_data, ); > +, ); > if (rc != NCSCC_RC_SUCCESS) { > m_NCS_EDU_PRINT_ERROR_STRING(ederror); > return rc; > diff --git a/src/log/logd/lgs_cache.h b/src/log/logd/lgs_cache.h index > a5d6181fb..98ea6791b 100644 > --- a/src/log/logd/lgs_cache.h > +++ b/src/log/logd/lgs_cache.h > @@ -251,8 +251,8 @@ class Cache { > int EncodeColdSync(NCS_UBAID* uba) const; > // Decode the queue on stanby side. > int DecodeColdSync(NCS_UBAID* uba, lgsv_ckpt_header_t* header, > - void* vdata, void** vckpt_rec, > - size_t ckpt_rec_size) const; > + CkptPushAsync* pasync, void* vdata, > + void** vckpt_rec, size_t ckpt_rec_size) const; > >private: > // Private constructor to not allow to instantiate this object > directly, diff --git a/src/log/logd/lgs_mbcsv.cc > b/src/log/logd/lgs_mbcsv.cc index 6ec004f0a..7d097fc28 100644 > --- a/src/log/logd/lgs_mbcsv.cc > +++ b/src/log/logd/lgs_mbcsv.cc > @@ -1677,6 +1677,
Re: [devel] [PATCH 1/1] ntf: fix ntfimcn fail to send notification with no space error [#3181]
Hi Minh, Regarding to check max unit32, I think it's not necessary. Because the atoi() returns the converted integral number as an int value. It cannot bigger than max of uint32. Best Regards, Thuan From: Minh Hon Chau Sent: Monday, May 4, 2020 12:37 PM To: Thuan Tran ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1/1] ntf: fix ntfimcn fail to send notification with no space error [#3181] Hi Thuan Ack with comment. I think we need to check the max value of unit32t for ntf_var_data_limit when we source from the env var. Thanks Minh On 27/4/20 9:05 pm, thuan.tran wrote: > - Support NTFA_VARIABLE_DATA_LIMIT configuration for NTF Agent. > Default value is SHRT_MAX(32767). > - In system that object creation may have many info attributes/values, > it should configure this env variable to suitable value for ntfimcn > able send notification. > --- > src/ntf/agent/ntfa_util.c | 13 - > src/ntf/ntfd/ntfd.conf | 4 > src/ntf/ntfimcnd/ntfimcn_imm.c | 18 -- > 3 files changed, 28 insertions(+), 7 deletions(-) > > diff --git a/src/ntf/agent/ntfa_util.c b/src/ntf/agent/ntfa_util.c > index 5bc859259..379348ab5 100644 > --- a/src/ntf/agent/ntfa_util.c > +++ b/src/ntf/agent/ntfa_util.c > @@ -60,8 +60,19 @@ static unsigned int ntfa_create(void) >/* No longer needed */ >m_NCS_SEL_OBJ_DESTROY(_cb.ntfs_sync_sel); > > - /* TODO: fix env variable */ > + char *ptr = NULL; > + int optval = 0; >ntfa_cb.ntf_var_data_limit = NTFA_VARIABLE_DATA_LIMIT; > + if ((ptr = getenv("NTFA_VARIABLE_DATA_LIMIT")) != NULL) { > + optval = atoi(ptr); > + if (optval > 0) { > + ntfa_cb.ntf_var_data_limit = optval; > + LOG_NO("NTFA_VARIABLE_DATA_LIMIT=%d", optval); > + } else { > + LOG_WA("Invalid NTFA_VARIABLE_DATA_LIMIT, using default > %d", > +NTFA_VARIABLE_DATA_LIMIT); > + } > + } >return rc; > > error: > diff --git a/src/ntf/ntfd/ntfd.conf b/src/ntf/ntfd/ntfd.conf > index 91bfcd2e2..f2f67496f 100644 > --- a/src/ntf/ntfd/ntfd.conf > +++ b/src/ntf/ntfd/ntfd.conf > @@ -24,6 +24,10 @@ export NTFSV_ENV_HEALTHCHECK_KEY="Default" > # directory and the directory component of the path name (if any) is > ignored. > #export NTFSCN_TRACE_PATHNAME=osafntfcn > > +# Uncomment the next line to configure max allowed variable data size for the > +# osafntfcn (configuration notifier). Default value is 32767 bytes > +#export NTFA_VARIABLE_DATA_LIMIT=32767 > + > # Only log priority LOG_WARNING and higher to the system log file. > # All logging will be recorded in a new node local log file > $PKGLOGDIR/osaf.log. > # Uncomment the next line to enable this service to log to OpenSAF node > local log file. > diff --git a/src/ntf/ntfimcnd/ntfimcn_imm.c b/src/ntf/ntfimcnd/ntfimcn_imm.c > index c58e8a268..3f2c1a873 100644 > --- a/src/ntf/ntfimcnd/ntfimcn_imm.c > +++ b/src/ntf/ntfimcnd/ntfimcn_imm.c > @@ -680,8 +680,10 @@ static void saImmOiCcbApplyCallback(SaImmOiHandleT > immOiHandle, >ccbUtilOperationData, rdn_attr_name, ccbLast); >if (internal_rc != 0) { >LOG_ER( > - "%s send_object_create_notification fail", > - __FUNCTION__); > + "%s send_object_create_notification %s > fail", > + __FUNCTION__, > + osaf_extended_name_borrow( > + >objectName)); >goto done; >} >break; > @@ -706,8 +708,10 @@ static void saImmOiCcbApplyCallback(SaImmOiHandleT > immOiHandle, >ccbUtilOperationData, invoke_name_ptr, ccbLast); >if (internal_rc != 0) { >LOG_ER( > - "%s send_object_delete_notification fail", > - __FUNCTION__); > + "%s send_object_delete_notification %s > fail", > + __FUNCTION__, > + osaf_extended_name_borrow( > + >objectName)); >goto done; >} >break; > @@ -720,8 +724,10 @@ sta
Re: [devel] [PATCH 1/1] ntf: set operation invoke name to unknown if failed to get it [#3178]
Hi Thang, ACK Best Regards, ThuanTr -Original Message- From: Thang Duc Nguyen Sent: Wednesday, April 22, 2020 11:47 AM To: Minh Hon Chau ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net; Thang Duc Nguyen Subject: [PATCH 1/1] ntf: set operation invoke name to unknown if failed to get it [#3178] If ntfimcnd is restarted during ccb modify, the modify callback will not contain the invoke name( implementer name or admin owner name). In this case, the invoke name will be set to "unknown" and the ntfimcnd can continue with sending notification. The notification will contain only op that receive by the ntfimcnd. It means notification will lost the op before ntfimcnd restarted. --- src/ntf/ntfimcnd/ntfimcn_imm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/ntf/ntfimcnd/ntfimcn_imm.c b/src/ntf/ntfimcnd/ntfimcn_imm.c index 3c0a8c02a..c58e8a268 100644 --- a/src/ntf/ntfimcnd/ntfimcn_imm.c +++ b/src/ntf/ntfimcnd/ntfimcn_imm.c @@ -376,9 +376,8 @@ get_operation_invoke_name_modify(SaImmOiCcbIdT ccbId, goto done; } } - /* If we get here no name is found! */ - LOG_ER("%s no name was found", __FUNCTION__); - osafassert(0); + /* ntfimcnd was restarted durinng ccb modification */ + osaf_extended_name_alloc("unknown", operation_invoke_name); done: TRACE_LEAVE(); -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] ntf: set operation invoke name to unknown if failed to get it [#3178]
Hi Thang, Some comments inline. Best Regards, ThuanTr -Original Message- From: Thang Duc Nguyen Sent: Tuesday, April 21, 2020 5:49 PM To: Thuan Tran ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net; Thang Duc Nguyen Subject: [PATCH 1/1] ntf: set operation invoke name to unknown if failed to get it [#3178] If ntfimcnd is restarted during ccb modify, the modify callback will not contain the invoke name( implementer name or admin owner name). In this case, the invoke name will be set to "unknown" and the ntfimcnd can continue with sending notification. The notification will contain only op that receive by the ntfimcnd. It means notification will lost the op before ntfimcnd restarted. --- src/ntf/ntfimcnd/ntfimcn_imm.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/ntf/ntfimcnd/ntfimcn_imm.c b/src/ntf/ntfimcnd/ntfimcn_imm.c index 3c0a8c02a..2709b3337 100644 --- a/src/ntf/ntfimcnd/ntfimcn_imm.c +++ b/src/ntf/ntfimcnd/ntfimcn_imm.c @@ -376,9 +376,8 @@ get_operation_invoke_name_modify(SaImmOiCcbIdT ccbId, goto done; } } - /* If we get here no name is found! */ - LOG_ER("%s no name was found", __FUNCTION__); - osafassert(0); + osaf_extended_name_free(operation_invoke_name); [Thuan] just free(operation_invoke_name) because osaf_extended_name_alloc() is not executed. [Thuan] But instead of free() then check NULL outside this function to set "unknown", we can set inside this function. osaf_extended_name_alloc("unknown", operation_invoke_name); [Thuan] Also do same change for get_operation_invoke_name_create() + operation_invoke_name = NULL; done: TRACE_LEAVE(); @@ -558,6 +557,12 @@ saImmOiCcbObjectModifyCallback(SaImmOiHandleT immOiHandle, SaImmOiCcbIdT ccbId, } invoke_name_ptr = get_operation_invoke_name_modify(ccbId, attrMods); + // when ccb ntfimcnd restarted during ccb modification + if (invoke_name_ptr == NULL) { + invoke_name_ptr = malloc(sizeof(SaNameT)); + osaf_extended_name_lend("unknown", invoke_name_ptr); + } + ccbUtilCcbData->userData = invoke_name_ptr; } -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] ntf: restart ntfimcnd if it fails to get operation invoke name [#3178]
Hi, If there is no way to get admin owner or object implementer in middle of one CCB many operations. Then a "unknown" invoker is better than keep restarting by each operation of that CCB. Best Regards, ThuanTr -Original Message- From: Thang Duc Nguyen Sent: Tuesday, April 21, 2020 8:39 AM To: Thang Duc Nguyen ; Minh Hon Chau ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net Subject: RE: [PATCH 1/1] ntf: restart ntfimcnd if it fails to get operation invoke name [#3178] Update. If we accept to avoid coredump, there is @operation_invoke_name that needs to be freed before exit? [Thang]: as above can fill invoke_name as unknown in this case to avoid the coredump. And free in applyccbcb. -Original Message- From: Thang Duc Nguyen Sent: Tuesday, April 21, 2020 8:29 AM To: Minh Hon Chau ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [devel] [PATCH 1/1] ntf: restart ntfimcnd if it fails to get operation invoke name [#3178] Hi Minh, See my command inline. -Original Message- From: Minh Hon Chau Sent: Monday, April 20, 2020 5:24 PM To: Thang Duc Nguyen ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1/1] ntf: restart ntfimcnd if it fails to get operation invoke name [#3178] Hi Thang, I understand the invoke_name is only present in the first callback, thus ntfimcn must memorize it in the userdata. My question is, is it ok that this userdata being lost because ntfimcn restart? I think it is, since the ccb has not committed. [Thang]: can accept it and fill invoke_name as unknown instead of do nothing. If we accept the userdata being lost, then we can look at to avoid the coredump, otherwise Thuan can give an idea if it is imm issue that causes the lost userdata. If we accept to avoid coredump, there is @operation_invoke_name that needs to be freed before exit? [Thang]: as above can fill invoke_name as unknown in this case to avoid the coredump. thanks Minh On 20/4/20 6:30 pm, Thang Duc Nguyen wrote: > Hi Minh, > > See my comment inline. > > -Original Message- > From: Minh Hon Chau > Sent: Monday, April 20, 2020 11:51 AM > To: Thuan Tran ; Thang Duc Nguyen > > Cc: opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1/1] ntf: restart ntfimcnd if it fails to get > operation invoke name [#3178] > > Hi, > > One similarity to #2859 is that the invoke_name is only present in the first > callback, so ntfimcn must memorize it in ccb userdata. > > But after ntfimcn calls ccbutil_ccbAddModifyOperation, this userdata is not > written to immnd and sync across the other immnd(s)? > Meanings the userdata is only stored in imm agent? So after switchover, the > next ccb callback does not have the invoke_name, and ntfimcn has lost its > user data since restart. > > [Thang]: with a ccb with multi ops. The invoke_name, in this case only the > first op contain the adminOwnername. And after ntfimcnd restarts, it received > the seond or larger op modify. And this modify callback does not contain any > more about this invoke_name. > Maybe we can retrieve the invoke_name from imm db but we can not got all info > about all ops in that ccb. > > Thanks > > Minh > > On 16/4/20 3:32 pm, Thuan Tran wrote: >> Hi, >> >> I think this is just enhancement, not an urgent fix. >> Then we should make it better if possible. >> >> About #2859, I am not reviewer at that time. >> But I would not agree that solution as we can see service keep >> restart if service still start in middle of one CCB many operations. >> >> Best Regards, >> ThuanTr >> >> -Original Message- >> From: Thang Duc Nguyen >> Sent: Thursday, April 16, 2020 10:51 AM >> To: Thuan Tran ; Minh Hon Chau >> >> Cc: opensaf-devel@lists.sourceforge.net >> Subject: RE: [PATCH 1/1] ntf: restart ntfimcnd if it fails to get >> operation invoke name [#3178] >> >> Hi Thuan, >> >> Thanks for your comment. >> First this issue happen only in specific situation. And I think restart it >> is no cause big issue. >> And the ccb is internal data based mange by ntf/ntfimcnd. After >> ntfimcnd restart, it reinitialize CcbUtilCcbData and operation invoke name >> is empty. >> >> Moreover, in current code in ntfimcn_imm.c, there are many place use >> imcn_exit(EXIT_FAILURE) when detect the error. Example for this is #2859. >> We consider to open a new ticket to consider your suggestion by >> refactor/change current behavior of ntfimcnd. >> >> B.R/Thang >> >> -Original Message- >> From: Thuan Tran >> Sent: Thursday, April 16, 2020 10:16 AM >> To: Thang Duc Nguyen ; Minh Hon Chau >> >> C
Re: [devel] [PATCH 1/1] ntf: restart ntfimcnd if it fails to get operation invoke name [#3178]
Hi, I think this is just enhancement, not an urgent fix. Then we should make it better if possible. About #2859, I am not reviewer at that time. But I would not agree that solution as we can see service keep restart if service still start in middle of one CCB many operations. Best Regards, ThuanTr -Original Message- From: Thang Duc Nguyen Sent: Thursday, April 16, 2020 10:51 AM To: Thuan Tran ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net Subject: RE: [PATCH 1/1] ntf: restart ntfimcnd if it fails to get operation invoke name [#3178] Hi Thuan, Thanks for your comment. First this issue happen only in specific situation. And I think restart it is no cause big issue. And the ccb is internal data based mange by ntf/ntfimcnd. After ntfimcnd restart, it reinitialize CcbUtilCcbData and operation invoke name is empty. Moreover, in current code in ntfimcn_imm.c, there are many place use imcn_exit(EXIT_FAILURE) when detect the error. Example for this is #2859. We consider to open a new ticket to consider your suggestion by refactor/change current behavior of ntfimcnd. B.R/Thang -Original Message- From: Thuan Tran Sent: Thursday, April 16, 2020 10:16 AM To: Thang Duc Nguyen ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net Subject: RE: [PATCH 1/1] ntf: restart ntfimcnd if it fails to get operation invoke name [#3178] Hi Thang, >From reproduce method, with solution after exit (instead of crash), user >continue input another operation then service exit again. The point is why we cannot get admin owner or object implementer via 2nd imm modify callback in this scenario? Is it an IMM limit that don't include admin owner or object implementer from 2nd modify callback? If limit, can we use another way to get admin owner or object implementer base on object name? By this, we can avoid continuous exit if user keep going on operations by same CCB. Best Regards, ThuanTr -Original Message- From: Thang Duc Nguyen Sent: Wednesday, April 15, 2020 3:43 PM To: Minh Hon Chau ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net; Thang Duc Nguyen Subject: [PATCH 1/1] ntf: restart ntfimcnd if it fails to get operation invoke name [#3178] If ntfimcnd is restarted during ccb modify, it will initialize ccbUtilCcbData that not contain operation invoke name. This causes ntfimcnd crashed due to operation invoke name not existed. The fix is to restart ntfimcnd instead of raising the coredump. --- src/ntf/ntfimcnd/ntfimcn_imm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ntf/ntfimcnd/ntfimcn_imm.c b/src/ntf/ntfimcnd/ntfimcn_imm.c index 3c0a8c02a..3563a2264 100644 --- a/src/ntf/ntfimcnd/ntfimcn_imm.c +++ b/src/ntf/ntfimcnd/ntfimcn_imm.c @@ -376,9 +376,9 @@ get_operation_invoke_name_modify(SaImmOiCcbIdT ccbId, goto done; } } - /* If we get here no name is found! */ + /* ntfimcnd was restarted during ccb midify */ LOG_ER("%s no name was found", __FUNCTION__); - osafassert(0); + imcn_exit(EXIT_FAILURE); done: TRACE_LEAVE(); -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] ntf: restart ntfimcnd if it fails to get operation invoke name [#3178]
Hi Thang, >From reproduce method, with solution after exit (instead of crash), user >continue input another operation then service exit again. The point is why we cannot get admin owner or object implementer via 2nd imm modify callback in this scenario? Is it an IMM limit that don't include admin owner or object implementer from 2nd modify callback? If limit, can we use another way to get admin owner or object implementer base on object name? By this, we can avoid continuous exit if user keep going on operations by same CCB. Best Regards, ThuanTr -Original Message- From: Thang Duc Nguyen Sent: Wednesday, April 15, 2020 3:43 PM To: Minh Hon Chau ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net; Thang Duc Nguyen Subject: [PATCH 1/1] ntf: restart ntfimcnd if it fails to get operation invoke name [#3178] If ntfimcnd is restarted during ccb modify, it will initialize ccbUtilCcbData that not contain operation invoke name. This causes ntfimcnd crashed due to operation invoke name not existed. The fix is to restart ntfimcnd instead of raising the coredump. --- src/ntf/ntfimcnd/ntfimcn_imm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ntf/ntfimcnd/ntfimcn_imm.c b/src/ntf/ntfimcnd/ntfimcn_imm.c index 3c0a8c02a..3563a2264 100644 --- a/src/ntf/ntfimcnd/ntfimcn_imm.c +++ b/src/ntf/ntfimcnd/ntfimcn_imm.c @@ -376,9 +376,9 @@ get_operation_invoke_name_modify(SaImmOiCcbIdT ccbId, goto done; } } - /* If we get here no name is found! */ + /* ntfimcnd was restarted during ccb midify */ LOG_ER("%s no name was found", __FUNCTION__); - osafassert(0); + imcn_exit(EXIT_FAILURE); done: TRACE_LEAVE(); -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfnd: correct checking su assignement pending flag [#3176]
Hi Thang, ACK. Best Regards, ThuanTr -Original Message- From: Thang Duc Nguyen Sent: Tuesday, April 14, 2020 9:16 PM To: Thuan Tran ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net; Thang Duc Nguyen Subject: [PATCH 1/1] amfnd: correct checking su assignement pending flag [#3176] Su-si assigment flag only is set when amfnd receives request from amfd. In SU restart escalation, the re-assignment is handled by amfnd internally. And this flag is not set in this situation. When SU is in assigning after SU restart due to escalation. The component failed and amfnd escalate it to component failover. The Amfnd will try to mark su-si as assigned temporaryly to remove assignment later. But amfnd crashes due to fail to check su-si assignment flag. In su-si assignment/removal response to AvD, the pending flag is only checked if su_oper_state is enable. --- src/amf/amfnd/di.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc index 1f310b949..7b018682b 100644 --- a/src/amf/amfnd/di.cc +++ b/src/amf/amfnd/di.cc @@ -882,8 +882,9 @@ uint32_t avnd_di_susi_resp_send(AVND_CB *cb, AVND_SU *su, AVND_SU_SI_REC *si) { if (cb->term_state == AVND_TERM_STATE_OPENSAF_SHUTDOWN_STARTED) return rc; - // should be in assignment pending state to be here - osafassert(m_AVND_SU_IS_ASSIGN_PEND(su)); + if (m_AVND_SU_OPER_STATE_IS_ENABLED(su)) +// should be in assignment pending state to be here +osafassert(m_AVND_SU_IS_ASSIGN_PEND(su)); /* get the curr-si */ curr_si = (si) ? si : (AVND_SU_SI_REC *)m_NCS_DBLIST_FIND_FIRST(>si_list); -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] log: log content is placed in a file of another stream [#3175]
Hi Thien, I have same opinion with Thang, it looks redundant for these variables. Just need check and return directly, or you can write comments for clear. // Return 0 if file name is not started with specific file name if (strncmp(finfo->d_name, file_name_find_g.c_str(), name_len) != 0) return 0; // Return 0 if next part of file name is not underscored character If (finfo->d_name[day_offset - 1] != '_') return 0; // Return 0 if next part of file name is not mmdd format // Return 0 if next part of file name is not underscored character // Return 0 if next part of file name is not hhmmss format // Return 0 if last part of file name is not ".log" // Return 1 when file name is completely matching the format. return 1 Best Regards, ThuanTr -Original Message- From: Thien Minh Huynh Sent: Monday, April 13, 2020 11:29 AM To: Thang Duc Nguyen ; Thuan Tran ; Vu Minh Nguyen Cc: opensaf-devel@lists.sourceforge.net Subject: RE: [devel] [PATCH 1/1] log: log content is placed in a file of another stream [#3175] Hi Thang, Thanks for your comment. The purpose check through variable, it is clearer. Because the variable name tells me what I want to check and easy to view code. In the same function all_digits(), One then check mmdd and one then check hhmmss. Best Regards, ThienHuynh -Original Message- From: Thang Duc Nguyen Sent: Monday, April 13, 2020 11:00 AM To: Thien Minh Huynh ; Thuan Tran ; Vu Minh Nguyen Cc: opensaf-devel@lists.sourceforge.net Subject: RE: [devel] [PATCH 1/1] log: log content is placed in a file of another stream [#3175] Hi Thien, Just a minor comment. Instead of using many variables then just use them to compare with 0/1 or true/false. I think it's better to use if directly. E.g, + bool hhmmss_format = all_digits(finfo->d_name + time_offset, + time_length); if (hhmmss_format == false) return 0; -> // time format + if (!all_digits(finfo->d_name + time_offset, time_length) ) + return 0; -Original Message- From: thien.m.huynh Sent: Monday, April 13, 2020 9:49 AM To: Thuan Tran ; Vu Minh Nguyen Cc: opensaf-devel@lists.sourceforge.net Subject: [devel] [PATCH 1/1] log: log content is placed in a file of another stream [#3175] Replace a previous patch of this ticket due to regex is not yet fully supported by gcc 4.8. --- src/log/logd/lgs_filehdl.cc | 53 +++-- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/src/log/logd/lgs_filehdl.cc b/src/log/logd/lgs_filehdl.cc index 9f4e27979..1743c53fd 100644 --- a/src/log/logd/lgs_filehdl.cc +++ b/src/log/logd/lgs_filehdl.cc @@ -26,7 +26,6 @@ #include #include #include -#include #include "base/logtrace.h" #include "base/osaf_time.h" @@ -955,6 +954,19 @@ static int chr_cnt_b(char *str, char c, int lim) { return cnt; } +/** + * Check if all character is decimal digit + * @param data string to be checked + * @param size number of characters to check + * @return: true if all character is decimal digit */ static bool +all_digits(const char *data, int size) { + for (int i = 0; i < size; i++) { +if (!isdigit(data[i])) return false; + } + return true; +} + /** * Filter function used by scandir. * Find a current log file if it exist @@ -965,13 +977,38 @@ static int chr_cnt_b(char *str, char c, int lim) { /* Filename prefix (no timestamps or extension */ static std::string file_name_find_g; static int filter_logfile_name(const struct dirent *finfo) { - const std::string pattern = - "^" + file_name_find_g + "_[0-9]{8}_[0-9]{6}.log$"; - const std::regex e{pattern}; - if (std::regex_match(finfo->d_name, e)) { -return 1; - } - return 0; + size_t name_len = strlen(file_name_find_g.c_str()); size_t + fixed_length = name_len + strlen("_mmdd_hhmmss.log"); + + if (strlen(finfo->d_name) != fixed_length) return 0; + + size_t day_length = strlen("mmdd"); size_t time_length = + strlen("hhmmss"); int day_offset = 1 + name_len; int time_offset = 1 + + day_offset + day_length; int extension_offset = time_offset + + time_length; + + bool start_with_filename = + strncmp(finfo->d_name, file_name_find_g.c_str(), name_len) == 0; + if (start_with_filename == false) return 0; + + bool underscore_1 = finfo->d_name[day_offset - 1] == '_'; if + (underscore_1 == false) return 0; + + bool mmdd_format = all_digits(finfo->d_name + day_offset, + day_length); if (mmdd_format == false) return 0; + + bool underscore_2 = finfo->d_name[time_offset - 1] == '_'; if + (underscore_2 == false) return 0; + + bool hhmmss_format = all_digits(finfo->d_name + time_offset, + time_length); if (hhmmss_format == false) return 0; + + bool log_extension = + strncmp(finfo->d_name + extension_offset, ".log", strlen(".log")) + == 0; if (log_extension == f
Re: [devel] [PATCH 1/1] log: log content is placed in a file of another stream [#3175]
Hi Thien, ACK with minor comment inline. Best Regards, ThuanTr -Original Message- From: Vu Minh Nguyen Sent: Tuesday, April 7, 2020 5:27 PM To: Thien Minh Huynh ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1/1] log: log content is placed in a file of another stream [#3175] Ack. Regards, Vu On 4/7/20 4:33 PM, thien.m.huynh wrote: > --- > src/log/logd/lgs_filehdl.cc | 15 --- > 1 file changed, 8 insertions(+), 7 deletions(-) > > diff --git a/src/log/logd/lgs_filehdl.cc b/src/log/logd/lgs_filehdl.cc > index 0d7fb2b74..362df4c27 100644 > --- a/src/log/logd/lgs_filehdl.cc > +++ b/src/log/logd/lgs_filehdl.cc > @@ -26,6 +26,7 @@ > #include > #include > #include > +#include > > #include "base/logtrace.h" > #include "base/osaf_time.h" > @@ -964,13 +965,13 @@ static int chr_cnt_b(char *str, char c, int lim) { > /* Filename prefix (no timestamps or extension */ > static std::string file_name_find_g; > static int filter_logfile_name(const struct dirent *finfo) { > - int found = 0; > - > - if ((strstr(finfo->d_name, file_name_find_g.c_str()) != nullptr) && > - (strstr(finfo->d_name, ".log") != nullptr)) > -found = 1; > - > - return found; > + const std::string pattern = > + "^" + file_name_find_g + "_[0-9]{8}_[0-9]{6}.log$"; > + const std::regex e{pattern}; > + if (std::regex_match(finfo->d_name, e)) { > +return true; > + } > + return false; [Thuan] use return value 0/1 as before (match function prototype). > } > > /** ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] log: log content is placed in a file of another stream [#3175]
Hi Thien, If d_name = "abcd1_123xxx" and file_name_find_g = "abc", then your code still match. Can you try to use regex which is available in c++11? For example: #include ... std::regex pattern1(file_name_find_g + "_[0-9]+_[0-9]+.log"); std::regex pattern2(file_name_find_g + "_[0-9]+_[0-9]+_[0-9]+_[0-9]+.log"); if (std::regex_match(finfo->d_name, pattern1) || std::regex_match(finfo->d_name, pattern2)) { ... } Not yet verify, just the idea. Best Regards, ThuanTr -Original Message- From: Thien Minh Huynh Sent: Tuesday, April 7, 2020 1:21 PM To: Thuan Tran ; Vu Minh Nguyen Cc: opensaf-devel@lists.sourceforge.net; Thien Minh Huynh Subject: [PATCH 1/1] log: log content is placed in a file of another stream [#3175] --- src/log/logd/lgs_filehdl.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/log/logd/lgs_filehdl.cc b/src/log/logd/lgs_filehdl.cc index 0d7fb2b74..238259454 100644 --- a/src/log/logd/lgs_filehdl.cc +++ b/src/log/logd/lgs_filehdl.cc @@ -965,8 +965,10 @@ static int chr_cnt_b(char *str, char c, int lim) { static std::string file_name_find_g; static int filter_logfile_name(const struct dirent *finfo) { int found = 0; + int len = file_name_find_g.length(); - if ((strstr(finfo->d_name, file_name_find_g.c_str()) != nullptr) && + if ((strncmp(finfo->d_name, file_name_find_g.c_str(), len) == 0) && + isdigit(finfo->d_name[len + 1]) && (strstr(finfo->d_name, ".log") != nullptr)) found = 1; -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] mds: fix mds_tipc_fctrl_shutdown() stuck forever [#3169]
Hi Thanh, Thanks for reviewing. Yes, it is possible use that global variable instead of running, but I prefer running. Because I need a local variable to control while loop, not mess up with is_fctrl_enabled (global var). Best Regards, ThuanTr -Original Message- From: Thanh Nguyen Sent: Tuesday, March 31, 2020 10:01 AM To: Thuan Tran ; Minh Hon Chau ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net Subject: RE: [devel] [PATCH 1/1] mds: fix mds_tipc_fctrl_shutdown() stuck forever [#3169] Hi Thuan, I have a minor comment and also an ACK. Dealing with event interactions, we try not to introduce new variable such as running. We tend to use existing variable as much as we can. This will make further interactions update simpler. The existing variable is_fctrl_enabled can be used without introducing the new variable running. >From my observation of this code, on the logical side, is_fctrl_enabled == true ==> running == true is_fctrl_enabled == false ==> running == false and running == true ==> is_fctrl_enabled == true running == false ==> is_fctrl_enabled == false Thus is_fctrl_enabled is identical to running. Here is the practical side 1) is_fctrl_enabled == true; the process_all_events() loop runs. is_fctrl_enable == false; the loop stops or does not start. Instead of using while (running), it can just be while (is_fctrl_enabled). Surrounding code needs minor adjustment. 2) Initially is_fctrl_enable == false. When the app call mds_tipc_fctrl_initialize(), it will call create_ncs_task() which set is_fctrl_enable to true and start the loop. Best Regards, Thanh -Original Message- From: thuan.tran Sent: Monday, 23 March 2020 8:59 PM To: Minh Hon Chau ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net Subject: [devel] [PATCH 1/1] mds: fix mds_tipc_fctrl_shutdown() stuck forever [#3169] - Deadlock of portid_map_mutex locking: mds_tipc_fctrl_shutdown() take lock then wait for thread process_all_events() to be canceled. But that thread also want get lock then it is keep waiting for lock. - Create safe method to cancel process_all_events() thread similar as the way MDS destroy legacy receiving thread. And getting portid_map_mutex lock only after that thread released. --- src/mds/mds_tipc_fctrl_intf.cc | 24 ++-- src/mds/mds_tipc_fctrl_msg.h | 8 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/mds/mds_tipc_fctrl_intf.cc b/src/mds/mds_tipc_fctrl_intf.cc index 6ce00782e..93bfce51c 100644 --- a/src/mds/mds_tipc_fctrl_intf.cc +++ b/src/mds/mds_tipc_fctrl_intf.cc @@ -27,6 +27,8 @@ #include "base/ncssysf_def.h" #include "base/ncssysf_tsk.h" +#include "base/ncs_osprm.h" +#include "base/osaf_poll.h" #include "mds/mds_log.h" #include "mds/mds_tipc_fctrl_portid.h" @@ -194,6 +196,7 @@ bool mds_fctrl_mbx_cleanup(NCSCONTEXT arg, NCSCONTEXT msg) { } uint32_t process_all_events(void) { + bool running = true; enum { FD_FCTRL = 0, NUM_FDS }; int poll_tmo = chunk_ack_timeout; @@ -203,7 +206,7 @@ uint32_t process_all_events(void) { ncs_ipc_get_sel_obj(_events).rmv_obj; pfd[FD_FCTRL].events = POLLIN; - while (true) { + while (running) { int pollres; pollres = poll(pfd, NUM_FDS, poll_tmo); @@ -231,9 +234,13 @@ uint32_t process_all_events(void) { if (evt->IsFlowEvent()) { process_flow_event(*evt); } +if (evt->IsShutDownEvent()) { + running = false; +} delete evt; portid_map_mutex.unlock(); +if (!running) m_NCS_SEL_OBJ_IND(>destroy_ack_obj_); } } // timeout, scan all portid and send ack msgs @@ -243,6 +250,7 @@ uint32_t process_all_events(void) { portid_map_mutex.unlock(); } } /* while */ + m_MDS_LOG_DBG("FCTRL: process_all_events() thread end"); return NCSCC_RC_SUCCESS; } @@ -305,7 +313,18 @@ uint32_t mds_tipc_fctrl_initialize(int dgramsock, struct tipc_portid id, uint32_t mds_tipc_fctrl_shutdown(void) { if (is_fctrl_enabled == false) return NCSCC_RC_SUCCESS; - portid_map_mutex.lock(); + NCS_SEL_OBJ destroy_ack_obj; + m_NCS_SEL_OBJ_CREATE(_ack_obj); + Event* pevt = new Event(Event::Type::kEvtShutDown, destroy_ack_obj); + if (m_NCS_IPC_SEND(_events, pevt, + NCS_IPC_PRIORITY_HIGH) != NCSCC_RC_SUCCESS) { +m_MDS_LOG_ERR("FCTRL: Failed to send shutdown, Error[%s]", +strerror(errno)); +abort(); + } + osaf_poll_one_fd(m_GET_FD_FROM_SEL_OBJ(destroy_ack_obj), 1); + m_NCS_SEL_OBJ_DESTROY(_ack_obj); + memset(_ack_obj, 0, sizeof(destroy_ack_obj)); if (ncs_task_release(p_task_hdl) != NCSCC_RC_SUCCESS) { m_MDS_LOG_ERR("FCTRL: Stop of the Created Task-failed, Error[%s]", @@ -315,6 +334,7 @@ uint32_t mds_tipc_fctrl_shutdown(void) { m_NCS_IPC_DETACH(_events, mds_fctrl_mbx_cleanup, nullptr);
Re: [devel] [PATCH 1/1] mds: fix mds_tipc_fctrl_shutdown() stuck forever [#3169]
Hi Minh, See my replies inline. Best Regards, ThuanTr -Original Message- From: Minh Hon Chau Sent: Tuesday, March 31, 2020 8:05 AM To: Thuan Tran ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1/1] mds: fix mds_tipc_fctrl_shutdown() stuck forever [#3169] Hi Thuan, More comments inline. Thanks Minh On 30/3/20 9:19 pm, Thuan Tran wrote: > Hi Minh, > > Thanks for reviewing. See my replies inline. > > Best Regards, > ThuanTr > > -Original Message- > From: Minh Hon Chau > Sent: Monday, March 30, 2020 5:00 PM > To: Thuan Tran ; Thang Duc Nguyen > > Cc: opensaf-devel@lists.sourceforge.net > Subject: Re: [PATCH 1/1] mds: fix mds_tipc_fctrl_shutdown() stuck forever > [#3169] > > Hi Thuan, > > Please find my comments inline [M] > > Thanks > > Minh > > On 23/3/20 8:59 pm, thuan.tran wrote: >> - Deadlock of portid_map_mutex locking: mds_tipc_fctrl_shutdown() >> take lock then wait for thread process_all_events() to be canceled. >> But that thread also want get lock then it is keep waiting for lock. >> - Create safe method to cancel process_all_events() thread similar >> as the way MDS destroy legacy receiving thread. > [M] I could miss it but I don't find where MDS destroys the legacy > receiving thread by sending event to the legacy receiving thread to > cancel the thread. Is it mdtm_tipc_destroy() you mean? > [T] Yes, before calling that function, check mds_main.c/ line: status = > mds_mdtm_destroy(); [M]: OK I see it, thanks > >> And getting portid_map_mutex lock only after that thread released. >> --- >>src/mds/mds_tipc_fctrl_intf.cc | 24 ++-- >>src/mds/mds_tipc_fctrl_msg.h | 8 >>2 files changed, 30 insertions(+), 2 deletions(-) >> >> diff --git a/src/mds/mds_tipc_fctrl_intf.cc b/src/mds/mds_tipc_fctrl_intf.cc >> index 6ce00782e..93bfce51c 100644 >> --- a/src/mds/mds_tipc_fctrl_intf.cc >> +++ b/src/mds/mds_tipc_fctrl_intf.cc >> @@ -27,6 +27,8 @@ >> >>#include "base/ncssysf_def.h" >>#include "base/ncssysf_tsk.h" >> +#include "base/ncs_osprm.h" >> +#include "base/osaf_poll.h" >> >>#include "mds/mds_log.h" >>#include "mds/mds_tipc_fctrl_portid.h" >> @@ -194,6 +196,7 @@ bool mds_fctrl_mbx_cleanup(NCSCONTEXT arg, NCSCONTEXT >> msg) { >>} >> >>uint32_t process_all_events(void) { >> + bool running = true; >> enum { FD_FCTRL = 0, NUM_FDS }; >> >> int poll_tmo = chunk_ack_timeout; >> @@ -203,7 +206,7 @@ uint32_t process_all_events(void) { >> ncs_ipc_get_sel_obj(_events).rmv_obj; >> pfd[FD_FCTRL].events = POLLIN; >> >> - while (true) { >> + while (running) { >>int pollres; >> >>pollres = poll(pfd, NUM_FDS, poll_tmo); >> @@ -231,9 +234,13 @@ uint32_t process_all_events(void) { >>if (evt->IsFlowEvent()) { >> process_flow_event(*evt); >>} >> +if (evt->IsShutDownEvent()) { >> + running = false; >> +} >> >>delete evt; >>portid_map_mutex.unlock(); >> +if (!running) m_NCS_SEL_OBJ_IND(>destroy_ack_obj_); >> } >>} >>// timeout, scan all portid and send ack msgs >> @@ -243,6 +250,7 @@ uint32_t process_all_events(void) { >> portid_map_mutex.unlock(); >>} >> } /* while */ >> + m_MDS_LOG_DBG("FCTRL: process_all_events() thread end"); >> return NCSCC_RC_SUCCESS; >>} >> >> @@ -305,7 +313,18 @@ uint32_t mds_tipc_fctrl_initialize(int dgramsock, >> struct tipc_portid id, >>uint32_t mds_tipc_fctrl_shutdown(void) { >> if (is_fctrl_enabled == false) return NCSCC_RC_SUCCESS; >> >> - portid_map_mutex.lock(); >> + NCS_SEL_OBJ destroy_ack_obj; >> + m_NCS_SEL_OBJ_CREATE(_ack_obj); >> + Event* pevt = new Event(Event::Type::kEvtShutDown, destroy_ack_obj); >> + if (m_NCS_IPC_SEND(_events, pevt, >> + NCS_IPC_PRIORITY_HIGH) != NCSCC_RC_SUCCESS) { >> +m_MDS_LOG_ERR("FCTRL: Failed to send shutdown, Error[%s]", >> +strerror(errno)); >> +abort(); >> + } >> + osaf_poll_one_fd(m_GET_FD_FROM_SEL_OBJ(destroy_ack_obj), 1); >> + m_NCS_SEL_OBJ_DESTROY(_ack_obj); >> + memset(_ack_obj, 0, sizeof(destroy_ack_obj)); [M]: Don't you need to memset 0? [T]: Just clone it as legacy code. >> >&
Re: [devel] [PATCH 1/1] mds: fix mds_tipc_fctrl_shutdown() stuck forever [#3169]
Hi Minh, Thanks for reviewing. See my replies inline. Best Regards, ThuanTr -Original Message- From: Minh Hon Chau Sent: Monday, March 30, 2020 5:00 PM To: Thuan Tran ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1/1] mds: fix mds_tipc_fctrl_shutdown() stuck forever [#3169] Hi Thuan, Please find my comments inline [M] Thanks Minh On 23/3/20 8:59 pm, thuan.tran wrote: > - Deadlock of portid_map_mutex locking: mds_tipc_fctrl_shutdown() > take lock then wait for thread process_all_events() to be canceled. > But that thread also want get lock then it is keep waiting for lock. > - Create safe method to cancel process_all_events() thread similar > as the way MDS destroy legacy receiving thread. [M] I could miss it but I don't find where MDS destroys the legacy receiving thread by sending event to the legacy receiving thread to cancel the thread. Is it mdtm_tipc_destroy() you mean? [T] Yes, before calling that function, check mds_main.c/ line: status = mds_mdtm_destroy(); > And getting portid_map_mutex lock only after that thread released. > --- > src/mds/mds_tipc_fctrl_intf.cc | 24 ++-- > src/mds/mds_tipc_fctrl_msg.h | 8 > 2 files changed, 30 insertions(+), 2 deletions(-) > > diff --git a/src/mds/mds_tipc_fctrl_intf.cc b/src/mds/mds_tipc_fctrl_intf.cc > index 6ce00782e..93bfce51c 100644 > --- a/src/mds/mds_tipc_fctrl_intf.cc > +++ b/src/mds/mds_tipc_fctrl_intf.cc > @@ -27,6 +27,8 @@ > > #include "base/ncssysf_def.h" > #include "base/ncssysf_tsk.h" > +#include "base/ncs_osprm.h" > +#include "base/osaf_poll.h" > > #include "mds/mds_log.h" > #include "mds/mds_tipc_fctrl_portid.h" > @@ -194,6 +196,7 @@ bool mds_fctrl_mbx_cleanup(NCSCONTEXT arg, NCSCONTEXT > msg) { > } > > uint32_t process_all_events(void) { > + bool running = true; > enum { FD_FCTRL = 0, NUM_FDS }; > > int poll_tmo = chunk_ack_timeout; > @@ -203,7 +206,7 @@ uint32_t process_all_events(void) { > ncs_ipc_get_sel_obj(_events).rmv_obj; > pfd[FD_FCTRL].events = POLLIN; > > - while (true) { > + while (running) { > int pollres; > > pollres = poll(pfd, NUM_FDS, poll_tmo); > @@ -231,9 +234,13 @@ uint32_t process_all_events(void) { > if (evt->IsFlowEvent()) { > process_flow_event(*evt); > } > +if (evt->IsShutDownEvent()) { > + running = false; > +} > > delete evt; > portid_map_mutex.unlock(); > +if (!running) m_NCS_SEL_OBJ_IND(>destroy_ack_obj_); > } > } > // timeout, scan all portid and send ack msgs > @@ -243,6 +250,7 @@ uint32_t process_all_events(void) { > portid_map_mutex.unlock(); > } > } /* while */ > + m_MDS_LOG_DBG("FCTRL: process_all_events() thread end"); > return NCSCC_RC_SUCCESS; > } > > @@ -305,7 +313,18 @@ uint32_t mds_tipc_fctrl_initialize(int dgramsock, struct > tipc_portid id, > uint32_t mds_tipc_fctrl_shutdown(void) { > if (is_fctrl_enabled == false) return NCSCC_RC_SUCCESS; > > - portid_map_mutex.lock(); > + NCS_SEL_OBJ destroy_ack_obj; > + m_NCS_SEL_OBJ_CREATE(_ack_obj); > + Event* pevt = new Event(Event::Type::kEvtShutDown, destroy_ack_obj); > + if (m_NCS_IPC_SEND(_events, pevt, > + NCS_IPC_PRIORITY_HIGH) != NCSCC_RC_SUCCESS) { > +m_MDS_LOG_ERR("FCTRL: Failed to send shutdown, Error[%s]", > +strerror(errno)); > +abort(); > + } > + osaf_poll_one_fd(m_GET_FD_FROM_SEL_OBJ(destroy_ack_obj), 1); > + m_NCS_SEL_OBJ_DESTROY(_ack_obj); > + memset(_ack_obj, 0, sizeof(destroy_ack_obj)); > > if (ncs_task_release(p_task_hdl) != NCSCC_RC_SUCCESS) { > m_MDS_LOG_ERR("FCTRL: Stop of the Created Task-failed, Error[%s]", > @@ -315,6 +334,7 @@ uint32_t mds_tipc_fctrl_shutdown(void) { > m_NCS_IPC_DETACH(_events, mds_fctrl_mbx_cleanup, nullptr); > m_NCS_IPC_RELEASE(_events, nullptr); > > + portid_map_mutex.lock(); > for (auto i : portid_map) delete i.second; > portid_map.clear(); [M] Moving the lock() down here is not enough? [T] It's not enough, it will continue stuck due to lock inside mailbox handling, m_NCS_IPC_DETACH() can stuck forever. > > diff --git a/src/mds/mds_tipc_fctrl_msg.h b/src/mds/mds_tipc_fctrl_msg.h > index c4641ed4e..1ba625650 100644 > --- a/src/mds/mds_tipc_fctrl_msg.h > +++ b/src/mds/mds_tipc_fctrl_msg.h > @@ -49,10 +49,13 @@ class Event { > kEvtTmrAll, > kEvtTmrTxProb,// event that tx probation timer expired for once > kEvtTmrChunk
Re: [devel] [PATCH 1/1] osaf: enhance vm frozen detection in tcp.plugin [#3164]
Hi Minh, Thanks for late comments. See my reply inline. Best Regards, ThuanTr -Original Message- From: Minh Hon Chau Sent: Friday, March 20, 2020 9:27 AM To: Thuan Tran ; Thang Duc Nguyen ; Gary Lee Cc: opensaf-devel@lists.sourceforge.net; Thanh Nguyen Subject: Re: [PATCH 1/1] osaf: enhance vm frozen detection in tcp.plugin [#3164] Hi Thuan, I'm adding Thanh since he's looking at the patch as well. I see you pushed the patch, here some late comments. Thanks Minh On 9/3/20 4:49 pm, thuan.tran wrote: > - Active SC will reboot if arb time somehow has big gap b/w heartbeats > in watch takeover request. Active SC may still OK but be rebooted > unexpectedly. > - Enhance VM was frozen detection base on arb time and local time counter. [M]: The patch has a general solution for both vm and container, and running a counter thread stead of reading time.time(), we need to explain it with a bit more details. [T]: Sorry that commit is merged, I cannot update commit message but I have explained in function time_counting(), hope it is still enough info. > --- > src/osaf/consensus/plugins/tcp/tcp.plugin | 43 ++- > 1 file changed, 35 insertions(+), 8 deletions(-) > > diff --git a/src/osaf/consensus/plugins/tcp/tcp.plugin > b/src/osaf/consensus/plugins/tcp/tcp.plugin > index 0be20fcee..aaa1c1c3f 100755 > --- a/src/osaf/consensus/plugins/tcp/tcp.plugin > +++ b/src/osaf/consensus/plugins/tcp/tcp.plugin > @@ -23,8 +23,24 @@ import sys > import time > import xmlrpc.client > import syslog > +import threading > > > +counter_run = False > +counter_time = 0.0 > + > +def time_counting(hb_interval): > +''' > +When node is frozen, if it is VM, clock time not jump > +but if it is container, clock time still jump. > +This function to help know node is frozen or arbitrator server issue > +''' > +global counter_run, counter_time > +counter_time = 0.0 > +while (counter_run): > +time.sleep(hb_interval) > +counter_time += hb_interval > + > class ArbitratorPlugin(object): > """ This class represents a TCP Plugin """ > > @@ -478,6 +494,8 @@ class ArbitratorPlugin(object): > return ret > > last_arb_timestamp = 0 > +global counter_run, counter_time > +counter = None > while True: > if key == self.takeover_request: > if self.is_active() is False: > @@ -486,15 +504,24 @@ class ArbitratorPlugin(object): > while True: > try: > time_at_arb = self.proxy.heartbeat(self.hostname) > -if last_arb_timestamp == 0: > -last_arb_timestamp = time_at_arb > -break > -elif (time_at_arb - last_arb_timestamp) > > self.timeout: > -# VM was frozen? > -syslog.syslog('VM was frozen!') > -ret['code'] = 126 > -return ret > +if counter is not None: > +counter_run = False > +counter.join() > +if (last_arb_timestamp != 0) and \ > + (time_at_arb - last_arb_timestamp > self.timeout): > +if counter_time < self.timeout: > +syslog.syslog('VM was frozen!') > +ret['code'] = 126 > +return ret > +syslog.syslog('Arb server issue?') > +raise socket.error('Arb server issue?') > else: > +counter = threading.Thread( > +target=time_counting, > +args=(self.heartbeat_interval,)) > +counter_run = True > +counter.setDaemon(True) > +counter.start() [M] What it means to we are going to start the thread, and wait for it join() back multiple times in this while loop. [T] Yes, it's true. If you has any idea for better, I will create another ticket to update since this ticket commit is merged. > last_arb_timestamp = time_at_arb > break > except socket.error: ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] imm: enhance "immlist --help" output more appropriate [#3165]
Hi Phuc, ACK. Best Regards, ThuanTr -Original Message- From: Phuc Hoang Chau Sent: Monday, March 9, 2020 5:42 PM To: Thuan Tran ; Vu Minh Nguyen Cc: opensaf-devel@lists.sourceforge.net; Phuc Hoang Chau Subject: [PATCH 1/1] imm: enhance "immlist --help" output more appropriate [#3165] After a caption down the line for information more clearly --- src/imm/tools/imm_list.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/imm/tools/imm_list.c b/src/imm/tools/imm_list.c index fd2537a..b5694bb 100644 --- a/src/imm/tools/imm_list.c +++ b/src/imm/tools/imm_list.c @@ -75,7 +75,7 @@ static void usage(const char *progname) printf( "\t\t--pretty-print does not work with the options -a and -c\n"); printf( - "\t-d, --delimiter= - separate multiple attribute values by "); + "\t-d, --delimiter= - separate multiple attribute values by \n"); printf("\t-t, --timeout \n"); printf("\t\tutility timeout in seconds\n"); @@ -83,7 +83,7 @@ static void usage(const char *progname) printf("\timmlist -a saAmfApplicationAdminState safApp=OpenSAF\n"); printf("\timmlist safApp=myApp1 safApp=myApp2\n"); printf("\timmlist --pretty-print=no -a saAmfAppType safApp=OpenSAF\n"); - printf("\timmlist -d '|' -a safAmfNodeGroup safAmfNodeGroup=AllNodes,safAmfCluster=myAmfCluster"); + printf("\timmlist -d '|' -a safAmfNodeGroup safAmfNodeGroup=AllNodes,safAmfCluster=myAmfCluster\n"); } static void print_attr_value_raw(SaImmValueTypeT attrValueType, -- 2.7.4 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] fm: ignore unexpected event on standby node [#3017]
Hi Thang, But in roaming SC with many quiesced SCs, it will become spam. Best Regards, ThuanTr -Original Message- From: Thang Duc Nguyen Sent: Friday, March 6, 2020 8:36 AM To: Thuan Tran ; Gary Lee ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net Subject: RE: [PATCH 1/1] fm: ignore unexpected event on standby node [#3017] Hi Thuan, In normal runing/condition the message not printed. So it can not be considered as spam. B.R/Thnag -Original Message- From: Thuan Tran Sent: Thursday, March 5, 2020 5:25 PM To: Thang Duc Nguyen ; Gary Lee ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net Subject: RE: [PATCH 1/1] fm: ignore unexpected event on standby node [#3017] Hi Thang, ACK with minor comment. Best Regards, ThuanTr -Original Message- From: Thang Duc Nguyen Sent: Thursday, March 5, 2020 3:48 PM To: Gary Lee ; Minh Hon Chau ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net; Thang Duc Nguyen Subject: [PATCH 1/1] fm: ignore unexpected event on standby node [#3017] In roaming sc, when the standby SC down, the new standby is selected. But the amfd on the old standby is stuck. And the new standby receives the amfd down event of old standby. In this case, FM should ignore events from old standby SC. --- src/fm/fmd/fm_mds.cc | 8 1 file changed, 8 insertions(+) diff --git a/src/fm/fmd/fm_mds.cc b/src/fm/fmd/fm_mds.cc index c5b3581ee..755856e2d 100644 --- a/src/fm/fmd/fm_mds.cc +++ b/src/fm/fmd/fm_mds.cc @@ -434,6 +434,14 @@ static uint32_t fm_mds_svc_evt(FM_CB *cb, return NCSCC_RC_FAILURE; } + if ((cb->role == PCS_RDA_STANDBY) && cb->peer_sc_up) { +if (svc_evt->i_node_id != cb->peer_node_id) { + LOG_NO("Ignore event of node %x. Peer node is %x", + (unsigned)svc_evt->i_node_id, (unsigned)cb->peer_node_id); [Thuan] Will it spam syslog? I think it should be TRACE() + return NCSCC_RC_SUCCESS; +} + } + switch (svc_evt->i_change) { case NCSMDS_DOWN: switch (svc_evt->i_svc_id) { -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] fm: ignore unexpected event on standby node [#3017]
Hi Thang, ACK with minor comment. Best Regards, ThuanTr -Original Message- From: Thang Duc Nguyen Sent: Thursday, March 5, 2020 3:48 PM To: Gary Lee ; Minh Hon Chau ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net; Thang Duc Nguyen Subject: [PATCH 1/1] fm: ignore unexpected event on standby node [#3017] In roaming sc, when the standby SC down, the new standby is selected. But the amfd on the old standby is stuck. And the new standby receives the amfd down event of old standby. In this case, FM should ignore events from old standby SC. --- src/fm/fmd/fm_mds.cc | 8 1 file changed, 8 insertions(+) diff --git a/src/fm/fmd/fm_mds.cc b/src/fm/fmd/fm_mds.cc index c5b3581ee..755856e2d 100644 --- a/src/fm/fmd/fm_mds.cc +++ b/src/fm/fmd/fm_mds.cc @@ -434,6 +434,14 @@ static uint32_t fm_mds_svc_evt(FM_CB *cb, return NCSCC_RC_FAILURE; } + if ((cb->role == PCS_RDA_STANDBY) && cb->peer_sc_up) { +if (svc_evt->i_node_id != cb->peer_node_id) { + LOG_NO("Ignore event of node %x. Peer node is %x", + (unsigned)svc_evt->i_node_id, (unsigned)cb->peer_node_id); [Thuan] Will it spam syslog? I think it should be TRACE() + return NCSCC_RC_SUCCESS; +} + } + switch (svc_evt->i_change) { case NCSMDS_DOWN: switch (svc_evt->i_svc_id) { -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] amfnd: reset component register once it is terminated [#3160]
Hi Thang, ACK with minor comment. Best Regards, ThuanTr -Original Message- From: Thang Duc Nguyen Sent: Wednesday, March 4, 2020 9:03 AM To: Minh Hon Chau ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net; Thang Duc Nguyen Subject: [PATCH 1/1] amfnd: reset component register once it is terminated [#3160] Reset component register once it is terminated to avoid process instantiate event in late. --- src/amf/amfnd/err.cc | 4 1 file changed, 4 insertions(+) diff --git a/src/amf/amfnd/err.cc b/src/amf/amfnd/err.cc index 65cc3a5c3..3c612b65a 100644 --- a/src/amf/amfnd/err.cc +++ b/src/amf/amfnd/err.cc @@ -487,6 +487,10 @@ uint32_t avnd_err_process(AVND_CB *cb, AVND_COMP *comp, LOG_NO("'%s' faulted due to '%s' : Recovery is '%s'", comp->name.c_str(), g_comp_err[err_info->src], g_comp_rcvr[esc_rcvr - 1]); + if (comp->err_info.src == AVND_ERR_SRC_AVA_DN) { +// reset comp-reg [Thuan] No need comment since function has its meaning already +m_AVND_COMP_REG_PARAM_RESET(cb, comp); + } /* execute the recovery */ rc = avnd_err_recover(cb, comp->su, comp, esc_rcvr); -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] imm: imm_list tool add new option --delimiter [#3155]
Hi Phuc, ACK. Best Regards, ThuanTr -Original Message- From: Phuc Hoang Chau Sent: Monday, March 2, 2020 2:49 PM To: Vu Minh Nguyen ; Thuan Tran ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net; Phuc Hoang Chau Subject: [PATCH 1/1] imm: imm_list tool add new option --delimiter [#3155] Make delimiter of multiple attribute value from immlist configurable Update usage --pretty-print does not work with option -a and -c --- src/imm/tools/imm_list.c | 49 +++- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/src/imm/tools/imm_list.c b/src/imm/tools/imm_list.c index d1dc422..c2e84e9 100644 --- a/src/imm/tools/imm_list.c +++ b/src/imm/tools/imm_list.c @@ -72,6 +72,11 @@ static void usage(const char *progname) printf("\t-h, --help - display this help and exit\n"); printf( "\t-p, --pretty-print= - select pretty print, default yes\n"); + printf( + "\t\t--pretty-print does not work with the options -a and -c\n"); + printf( + "\t-d,--delimiter= - separate multiple attribute "); + printf("values by \n"); printf("\t-t, --timeout \n"); printf("\t\tutility timeout in seconds\n"); @@ -79,6 +84,8 @@ static void usage(const char *progname) printf("\timmlist -a saAmfApplicationAdminState safApp=OpenSAF\n"); printf("\timmlist safApp=myApp1 safApp=myApp2\n"); printf("\timmlist --pretty-print=no -a saAmfAppType safApp=OpenSAF\n"); + printf("\timmlist -d '|' -a safAmfNodeGroup "); + printf("safAmfNodeGroup=AllNodes,safAmfCluster=myAmfCluster\n"); } static void print_attr_value_raw(SaImmValueTypeT attrValueType, @@ -141,19 +148,19 @@ static void print_attr_value(SaImmValueTypeT attrValueType, { switch (attrValueType) { case SA_IMM_ATTR_SAINT32T: - printf("%d (0x%x) ", *((SaInt32T *)attrValue), + printf("%d (0x%x)", *((SaInt32T *)attrValue), *((SaInt32T *)attrValue)); break; case SA_IMM_ATTR_SAUINT32T: - printf("%u (0x%x) ", *((SaUint32T *)attrValue), + printf("%u (0x%x)", *((SaUint32T *)attrValue), *((SaUint32T *)attrValue)); break; case SA_IMM_ATTR_SAINT64T: - printf("%lld (0x%llx) ", *((SaInt64T *)attrValue), + printf("%lld (0x%llx)", *((SaInt64T *)attrValue), *((SaInt64T *)attrValue)); break; case SA_IMM_ATTR_SAUINT64T: - printf("%llu (0x%llx) ", *((SaUint64T *)attrValue), + printf("%llu (0x%llx)", *((SaUint64T *)attrValue), *((SaUint64T *)attrValue)); break; case SA_IMM_ATTR_SATIMET: { @@ -163,24 +170,24 @@ static void print_attr_value(SaImmValueTypeT attrValueType, ctime_r(, buf); buf[strlen(buf) - 1] = '\0'; /* Remove new line */ - printf("%llu (0x%llx, %s) ", *((SaTimeT *)attrValue), + printf("%llu (0x%llx, %s)", *((SaTimeT *)attrValue), *((SaTimeT *)attrValue), buf); break; } case SA_IMM_ATTR_SAFLOATT: - printf("%.8g ", *((SaFloatT *)attrValue)); + printf("%.8g", *((SaFloatT *)attrValue)); break; case SA_IMM_ATTR_SADOUBLET: - printf("%.17g ", *((SaDoubleT *)attrValue)); + printf("%.17g", *((SaDoubleT *)attrValue)); break; case SA_IMM_ATTR_SANAMET: { SaNameT *myNameT = (SaNameT *)attrValue; - printf("%s (%zu) ", saAisNameBorrow(myNameT), + printf("%s (%zu)", saAisNameBorrow(myNameT), strlen(saAisNameBorrow(myNameT))); break; } case SA_IMM_ATTR_SASTRINGT: - printf("%s ", *((char **)attrValue)); + printf("%s", *((char **)attrValue)); break; case SA_IMM_ATTR_SAANYT: { SaAnyT *anyp = (SaAnyT *)attrValue; @@ -404,6 +411,7 @@ static void display_class_definition(const SaImmClassNameT className, static void display_object(const char *name, SaImmAccessorHandleT accessorHandle, int pretty_print, + char delimiter, const SaImmAttrNameT *attributeNames) { int i = 0, j; @@ -436,9 +444,12 @@ static void display_object(const char *name, printf("\n%-50s %
Re: [devel] [PATCH 1/1] amfnd: handle comp instantiate event with SU in terminating presence state [#3160]
Hi Thang, I have 2 comments inline. Best Regards, ThuanTr -Original Message- From: Thang Duc Nguyen Sent: Thursday, February 27, 2020 1:19 PM To: Minh Hon Chau ; Thuan Tran ; Gary Lee Cc: opensaf-devel@lists.sourceforge.net; Thang Duc Nguyen Subject: [PATCH 1/1] amfnd: handle comp instantiate event with SU in terminating presence state [#3160] - When component is instantiated with SU in terminating presence state, need trigger SU fsm with instantiate event if all components are either uninstantiated or instantiated. - Need informing error recovery when amfnd fails to send callback parameters to component when component already exited. --- src/amf/amfnd/avnd_su.h | 19 +++ src/amf/amfnd/comp.cc | 23 ++- src/amf/amfnd/susm.cc | 6 -- 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/src/amf/amfnd/avnd_su.h b/src/amf/amfnd/avnd_su.h index 45effd6c9..eb708dce0 100644 --- a/src/amf/amfnd/avnd_su.h +++ b/src/amf/amfnd/avnd_su.h @@ -256,6 +256,25 @@ typedef struct avnd_su_tag { } \ } \ } +/* macro to determine if all the pi comps in an su + are instantiated or uninstantiated +*/ +#define m_AVND_SU_IS_INSTANTIATED_UNINSTANTIATED(su, is) \ + { \ +AVND_COMP *curr = 0; \ +(is) = true; \ +for (curr = m_AVND_COMP_FROM_SU_DLL_NODE_GET( \ + m_NCS_DBLIST_FIND_FIRST(>comp_list));\ + curr; curr = m_AVND_COMP_FROM_SU_DLL_NODE_GET( \ + m_NCS_DBLIST_FIND_NEXT(>su_dll_node))) { \ + if (m_AVND_COMP_TYPE_IS_PREINSTANTIABLE(curr) &&\ + (!m_AVND_COMP_PRES_STATE_IS_INSTANTIATED(curr) && \ + !m_AVND_COMP_PRES_STATE_IS_UNINSTANTIATED(curr))) { \ +(is) = false; \ +break;\ + } \ +} \ + } /* macros to manage the presence state */ #define m_AVND_SU_PRES_STATE_IS_INSTANTIATED(x) \ diff --git a/src/amf/amfnd/comp.cc b/src/amf/amfnd/comp.cc index 8a11d75fb..cef94a0d4 100644 --- a/src/amf/amfnd/comp.cc +++ b/src/amf/amfnd/comp.cc @@ -2031,6 +2031,7 @@ uint32_t avnd_comp_cbk_send(AVND_CB *cb, AVND_COMP *comp, AVND_COMP_CSI_REC *csi_rec) { SaAmfCSIDescriptorT csi_desc; SaAmfCSIFlagsT csi_flag; + AVND_ERR_INFO err_info; std::string csi_name; AVSV_AMF_CBK_INFO *cbk_info = 0; AVND_COMP_CSI_REC *curr_csi = 0; @@ -2039,6 +2040,7 @@ uint32_t avnd_comp_cbk_send(AVND_CB *cb, AVND_COMP *comp, SaAmfHandleT hdl = 0; SaTimeT per = 0; uint32_t rc = NCSCC_RC_SUCCESS; + uint32_t ret = NCSCC_RC_SUCCESS; TRACE_ENTER2("'%s' %u", comp->name.c_str(), type); /* @@ -2201,7 +2203,26 @@ uint32_t avnd_comp_cbk_send(AVND_CB *cb, AVND_COMP *comp, rc = avnd_comp_cbq_send(cb, comp, dest, hdl, cbk_info, per); done: - if ((NCSCC_RC_SUCCESS != rc) && cbk_info) avsv_amf_cbk_free(cbk_info); + if ((NCSCC_RC_SUCCESS != rc) && cbk_info) { +avsv_amf_cbk_free(cbk_info); +LOG_ER("avnd_comp_cbk_send failed for comp: %s, type: %d", + comp->name.c_str(), type); +if (type == AVSV_AMF_CSI_SET) { + err_info.src = AVND_ERR_SRC_CBK_CSI_SET_FAILED; +} else if (type == AVSV_AMF_COMP_TERM) { + err_info.src = AVND_ERR_SRC_CMD_FAILED; +} else { + // For another callback types + err_info.src = AVND_ERR_SRC_MAX; +} +err_info.rec_rcvr.avsv_ext = +static_cast(comp->err_info.def_rec); +ret = avnd_err_process(cb, comp, _info); +if (NCSCC_RC_SUCCESS != ret) { + LOG_ER("process error failed for comp: %s", + comp->name.c_str()); +} + } [Thuan] Will AMF handle later as "ava down" detect? Why still need new code to handle send callback fail? TRACE_LEAVE2("%u", rc); return rc; diff --git a/src/amf/amfnd/susm.cc b/src/amf/amfnd/susm.cc index 86811f1e4..162f65625 100644 --- a/src/amf/amfnd/susm.cc +++ b/src/amf/amfnd/susm.cc @@ -2897,7 +2897,7 @@ uint32_t avnd_su_pres_terming_compinst_hdler(AVND_CB *cb, AVND_SU *su, su->name.c_str(), compname.c_str()); if (m_AVND_SU_IS_PREINSTANTIABLE(su)) { -bool is; +bool is = false; osafassert(comp != nullptr); if (m_AVND_COMP_IS_FAILED(comp)) { m_AVND_COMP_FAILED_RESET(comp); @@ -2908,8 +2908,10 @@ uint32_t avnd_su_pres_terming_compinst_hdler(AVND_CB *cb, AVND_SU *su, if (true == is) { avnd_su_pres_state_set(cb, su,
Re: [devel] [PATCH 1/1] imm: imm_list tool add new option --delimeter [#3155]
Hi Phuc, In general, please do not change what is not need, e.g: add new line, etc... Some typo: "delimeter" -> "delimiter", and other comments inline. One question, why not support just one character as delimiter? Why a string? Best Regards, ThuanTr -Original Message- From: Phuc Hoang Chau Sent: Wednesday, February 26, 2020 10:53 AM To: Vu Minh Nguyen ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net Subject: RE: [PATCH 1/1] imm: imm_list tool add new option --delimeter [#3155] Hi , Due to I changed mode of the file , I have updated the old mode Thanks Phuc Chau -Original Message- From: Phuc Hoang Chau Sent: Wednesday, February 26, 2020 10:43 AM To: Vu Minh Nguyen ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net; Phuc Hoang Chau Subject: [PATCH 1/1] imm: imm_list tool add new option --delimeter [#3155] Make delimiter of multiple attribute value from immlist configurable Update option --pretty-print will not support print with option --atribute --- src/imm/tools/imm_list.c | 113 +++ 1 file changed, 74 insertions(+), 39 deletions(-) diff --git a/src/imm/tools/imm_list.c b/src/imm/tools/imm_list.c index d1dc422..4326396 100644 --- a/src/imm/tools/imm_list.c +++ b/src/imm/tools/imm_list.c @@ -72,13 +72,18 @@ static void usage(const char *progname) printf("\t-h, --help - display this help and exit\n"); printf( "\t-p, --pretty-print= - select pretty print, default yes\n"); + printf( + "\tdo not support pretty print with option -a, --attribute=NAME\n"); + printf( + "\t-d, --delimiter= - format multiple attribute value\n"); printf("\t-t, --timeout \n"); printf("\t\tutility timeout in seconds\n"); printf("\nEXAMPLE\n"); printf("\timmlist -a saAmfApplicationAdminState safApp=OpenSAF\n"); printf("\timmlist safApp=myApp1 safApp=myApp2\n"); - printf("\timmlist --pretty-print=no -a saAmfAppType safApp=OpenSAF\n"); + printf("\timmlist --pretty-print=no saAmfAppType safApp=OpenSAF\n"); + printf("\timmlist -d 'abc' safApp=OpenSAF\n"); } static void print_attr_value_raw(SaImmValueTypeT attrValueType, @@ -108,6 +113,7 @@ static void print_attr_value_raw(SaImmValueTypeT attrValueType, break; case SA_IMM_ATTR_SANAMET: { SaNameT *myNameT = (SaNameT *)attrValue; + printf("%s", saAisNameBorrow(myNameT)); break; } @@ -117,14 +123,14 @@ static void print_attr_value_raw(SaImmValueTypeT attrValueType, case SA_IMM_ATTR_SAANYT: { SaAnyT *anyp = (SaAnyT *)attrValue; unsigned int i = 0; + if (anyp->bufferSize == 0) { printf("-empty-"); } else { printf("0x"); for (; i < anyp->bufferSize; i++) { - if (((int)anyp->bufferAddr[i]) < 0x10) { + if (((int)anyp->bufferAddr[i]) < 0x10) printf("0"); - } printf("%x", (int)anyp->bufferAddr[i]); } } @@ -141,19 +147,19 @@ static void print_attr_value(SaImmValueTypeT attrValueType, { switch (attrValueType) { case SA_IMM_ATTR_SAINT32T: - printf("%d (0x%x) ", *((SaInt32T *)attrValue), + printf("%d (0x%x)", *((SaInt32T *)attrValue), *((SaInt32T *)attrValue)); break; case SA_IMM_ATTR_SAUINT32T: - printf("%u (0x%x) ", *((SaUint32T *)attrValue), + printf("%u (0x%x)", *((SaUint32T *)attrValue), *((SaUint32T *)attrValue)); break; case SA_IMM_ATTR_SAINT64T: - printf("%lld (0x%llx) ", *((SaInt64T *)attrValue), + printf("%lld (0x%llx)", *((SaInt64T *)attrValue), *((SaInt64T *)attrValue)); break; case SA_IMM_ATTR_SAUINT64T: - printf("%llu (0x%llx) ", *((SaUint64T *)attrValue), + printf("%llu (0x%llx)", *((SaUint64T *)attrValue), *((SaUint64T *)attrValue)); break; case SA_IMM_ATTR_SATIMET: { @@ -163,34 +169,35 @@ static void print_attr_value(SaImmValueTypeT attrValueType, ctime_r(, buf); buf[strlen(buf) - 1] = '\0'; /* Remove new line */ - printf("%llu (0x%llx, %s) ", *((SaTimeT *)attrValue), + printf
Re: [devel] [PATCH 1/1] amfnd: correct handling "terminate success" evt in terminating state [#3157]
Hi Thang, 2 comments (code review, not test) - The function name should be: "avnd_clean_and_exit" - Conditions for call the function sometimes only "all_comps_terminated()" sometimes with SHUTDOWN_STARTED. Can you check if we can make conditions consistent (only one or always two)? Best Regards, ThuanTr -Original Message- From: Thang Duc Nguyen Sent: Thursday, February 20, 2020 1:28 PM To: Minh Hon Chau ; Thuan Tran ; Gary Lee Cc: opensaf-devel@lists.sourceforge.net; Thang Duc Nguyen Subject: [PATCH 1/1] amfnd: correct handling "terminate success" evt in terminating state [#3157] Amfnd need to exist in node in shutdown state and all components terminated. --- src/amf/amfnd/clc.cc | 40 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/src/amf/amfnd/clc.cc b/src/amf/amfnd/clc.cc index de57838c9..f78e1a707 100644 --- a/src/amf/amfnd/clc.cc +++ b/src/amf/amfnd/clc.cc @@ -80,6 +80,8 @@ uint32_t avnd_comp_clc_st_chng_prc(AVND_CB *, AVND_COMP *, SaAmfPresenceStateT, static uint32_t avnd_instfail_su_failover(AVND_CB *, AVND_SU *, AVND_COMP *); +static void amfnd_clean_before_exit(AVND_CB *); + /*** ** C O M P O N E N T C L C F S M M A T R I X D E F I N I T I O N ** ***/ @@ -297,6 +299,23 @@ static void log_failed_exec(NCS_OS_PROC_EXEC_STATUS_INFO *exec_stat, comp->clc_info.cmds[exec_cmd - 1].cmd); } +/ + Name : amfnd_clean_before_exit + + Description : Clean database before exit + + Arguments : cb - ptr to the AvND control block + + Return Values : None + +**/ +void amfnd_clean_before_exit(AVND_CB *cb) { [Thuan] + LOG_NO("Shutdown completed, exiting"); + cb->nodeid_mdsdest_db.deleteAll(); + cb->hctypedb.deleteAll(); + daemon_exit(); +} + / Name : avnd_evt_clc_resp @@ -810,10 +829,7 @@ uint32_t avnd_comp_clc_fsm_run(AVND_CB *cb, AVND_COMP *comp, avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_UNINSTANTIATED); if (all_comps_terminated()) { LOG_NO("Terminated all AMF components"); - LOG_NO("Shutdown completed, exiting"); - cb->nodeid_mdsdest_db.deleteAll(); - cb->hctypedb.deleteAll(); - daemon_exit(); + amfnd_clean_before_exit(cb); } else { TRACE("Do nothing"); goto done; @@ -2401,6 +2417,12 @@ uint32_t avnd_comp_clc_terming_termsucc_hdler(AVND_CB *cb, AVND_COMP *comp) { avnd_comp_curr_info_del(cb, comp); } + if ((cb->term_state == AVND_TERM_STATE_OPENSAF_SHUTDOWN_STARTED) && + all_comps_terminated()) { +LOG_NO("Terminated all AMF components"); +amfnd_clean_before_exit(cb); + } + TRACE_LEAVE(); return rc; } @@ -2520,10 +2542,7 @@ uint32_t avnd_comp_clc_terming_cleansucc_hdler(AVND_CB *cb, AVND_COMP *comp) { } if (all_comps_terminated()) { LOG_NO("Terminated all AMF components"); - LOG_NO("Shutdown completed, exiting"); - cb->nodeid_mdsdest_db.deleteAll(); - cb->hctypedb.deleteAll(); - daemon_exit(); + amfnd_clean_before_exit(cb); } } /* @@ -2584,10 +2603,7 @@ uint32_t avnd_comp_clc_terming_cleanfail_hdler(AVND_CB *cb, AVND_COMP *comp) { if ((cb->term_state == AVND_TERM_STATE_OPENSAF_SHUTDOWN_STARTED) && all_comps_terminated()) { LOG_WA("Terminated all AMF components (with failures)"); -LOG_NO("Shutdown completed, exiting"); -cb->nodeid_mdsdest_db.deleteAll(); -cb->hctypedb.deleteAll(); -daemon_exit(); +amfnd_clean_before_exit(cb); } TRACE_LEAVE(); -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] clmd: retry once to send message to clmna [#3156]
Hi Gary, Thanks for comment. I think it's same. + do { +rc = clms_mds_msg_send(cb, _msg, >fr_dest, >mds_ctxt, + MDS_SEND_PRIORITY_HIGH, NCSMDS_SVC_ID_CLMNA); +if (rc != NCSCC_RC_SUCCESS && retry < 1) { + osaf_nanosleep(); +} + } while (rc != NCSCC_RC_SUCCESS && retry++ < 1); Best Regards, ThuanTr From: Gary Lee Sent: Tuesday, February 18, 2020 2:18 PM To: Thuan Tran ; Vu Minh Nguyen ; Minh Hon Chau ; Thang Duc Nguyen Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1/1] clmd: retry once to send message to clmna [#3156] Hi Thuan Would this be simpler? + while (retry < 1) { +rc = clms_mds_msg_send(cb, _msg, >fr_dest, >mds_ctxt, + MDS_SEND_PRIORITY_HIGH, NCSMDS_SVC_ID_CLMNA); +if (rc != NCSCC_RC_SUCCESS) { + ... + osaf_nanosleep(); + ++retry; +} else { + break; +} + } Thanks Gary ____ From: Thuan Tran mailto:thuan.t...@dektech.com.au>> Sent: 18 February 2020 17:38 To: Vu Minh Nguyen mailto:vu.m.ngu...@dektech.com.au>>; Minh Hon Chau mailto:minh.c...@dektech.com.au>>; Thang Duc Nguyen mailto:thang.d.ngu...@dektech.com.au>>; Gary Lee mailto:gary@dektech.com.au>> Cc: opensaf-devel@lists.sourceforge.net<mailto:opensaf-devel@lists.sourceforge.net> mailto:opensaf-devel@lists.sourceforge.net>>; Thuan Tran mailto:thuan.t...@dektech.com.au>> Subject: [PATCH 1/1] clmd: retry once to send message to clmna [#3156] - If a node reboot up, clmna svc_up is not yet come but clmd get message join request then send message back clmna failed. It leads to amfnd timeout init clm agent and delay send node up. This may cause amfd order reboot that node if node up delay (osafAmfDelayNodeFailoverNodeWaitTimeout) is set smaller than total time amfnd retry until init clm agent successfully. - One retry to send messsage to clmna help avoid this scenario. --- src/clm/clmd/clms_evt.cc | 18 -- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/clm/clmd/clms_evt.cc b/src/clm/clmd/clms_evt.cc index 1059c6cfa..59e9c4156 100644 --- a/src/clm/clmd/clms_evt.cc +++ b/src/clm/clmd/clms_evt.cc @@ -34,6 +34,7 @@ #include "base/logtrace.h" #include "base/ncsgl_defs.h" #include "base/osaf_utility.h" +#include "base/osaf_time.h" #include "clm/clmd/clms.h" static uint32_t process_api_evt(CLMSV_CLMS_EVT *evt); @@ -535,6 +536,7 @@ uint32_t proc_node_up_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) { SaNameT node_name = {0}; CLMSV_MSG clm_msg; SaBoolT check_member; + int retry = 0; TRACE_ENTER2("Node up mesg for nodename length %d %s", nodeup_info->node_name.length, nodeup_info->node_name.value); @@ -636,8 +638,20 @@ uint32_t proc_node_up_msg(CLMS_CB *cb, CLMSV_CLMS_EVT *evt) { clm_msg.info.api_resp_info.type = CLMSV_CLUSTER_JOIN_RESP; clm_msg.info.api_resp_info.param.node_name = node_name; /*rc will be updated down in the positive flow */ - rc = clms_mds_msg_send(cb, _msg, >fr_dest, >mds_ctxt, - MDS_SEND_PRIORITY_HIGH, NCSMDS_SVC_ID_CLMNA); + do { +rc = clms_mds_msg_send(cb, _msg, >fr_dest, >mds_ctxt, + MDS_SEND_PRIORITY_HIGH, NCSMDS_SVC_ID_CLMNA); +if (rc != NCSCC_RC_SUCCESS && retry < 1) { + /* If a node reboot up, clmna svc_up is not yet come but clmd + * get message join request then send message back clmna failed. + * It leads to amfnd timeout init clm agent and delay send node up. + * This may cause amfd order reboot that node if node up delay + * (osafAmfDelayNodeFailoverNodeWaitTimeout) is set smaller than + * total time amfnd retry until init clm agent successfully. + * If retry here, it would help avoid this scenario */ + osaf_nanosleep(); +} + } while (rc != NCSCC_RC_SUCCESS && retry++ < 1); /*if mds send failed, we need to report failure */ if (rc != NCSCC_RC_SUCCESS) { LOG_NO("%s: send failed. dest:%" PRIx64, __FUNCTION__, evt->fr_dest); -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] base: add serial number arithmetic (RFC1982) [#3074]
Hi Vu, Thanks, I will update and send out new version. See my replies inline. Best Regards, ThuanTr From: Nguyen Minh Vu Sent: Monday, February 17, 2020 12:29 PM To: Thuan Tran ; Minh Hon Chau ; Thang Duc Nguyen ; Gary Lee Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1/1] base: add serial number arithmetic (RFC1982) [#3074] Hi Thuan, Thanks. See my responses inline. Regards, Vu On 2/14/20 11:48 AM, Thuan Tran wrote: Hi Vu, Thanks for comments. Please check my replies inline. Best Regards, ThuanTr From: Nguyen Minh Vu <mailto:vu.m.ngu...@dektech.com.au> Sent: Thursday, February 13, 2020 5:50 PM To: Thuan Tran <mailto:thuan.t...@dektech.com.au>; Minh Hon Chau <mailto:minh.c...@dektech.com.au>; Thang Duc Nguyen <mailto:thang.d.ngu...@dektech.com.au>; Gary Lee <mailto:gary@dektech.com.au> Cc: opensaf-devel@lists.sourceforge.net<mailto:opensaf-devel@lists.sourceforge.net> Subject: Re: [PATCH 1/1] base: add serial number arithmetic (RFC1982) [#3074] Hi Thuan, Ack with comments inline. Regards, Vu On 2/12/20 5:36 PM, thuan.tran wrote: - Adapt MDS with this SNA implementation. --- src/base/Makefile.am | 6 +- src/base/sna.h | 136 +++ src/base/tests/sna_test.cc | 117 ++ src/mds/mds_tipc_fctrl_intf.cc | 2 +- src/mds/mds_tipc_fctrl_portid.cc | 17 ++-- src/mds/mds_tipc_fctrl_portid.h | 64 +-- 6 files changed, 267 insertions(+), 75 deletions(-) create mode 100644 src/base/sna.h create mode 100644 src/base/tests/sna_test.cc diff --git a/src/base/Makefile.am b/src/base/Makefile.am index 025fb86a2..5082175cf 100644 --- a/src/base/Makefile.am +++ b/src/base/Makefile.am @@ -173,7 +173,8 @@ noinst_HEADERS += \ src/base/unix_client_socket.h \ src/base/unix_server_socket.h \ src/base/unix_socket.h \ - src/base/usrbuf.h + src/base/usrbuf.h \ + src/base/sna.h TESTS += bin/testleap bin/libbase_test bin/core_common_test @@ -237,7 +238,8 @@ bin_libbase_test_SOURCES = \ src/base/tests/time_compare_test.cc \ src/base/tests/time_convert_test.cc \ src/base/tests/time_subtract_test.cc \ - src/base/tests/unix_socket_test.cc + src/base/tests/unix_socket_test.cc \ + src/base/tests/sna_test.cc bin_libbase_test_LDADD = \ $(GTEST_DIR)/lib/libgtest.la \ diff --git a/src/base/sna.h b/src/base/sna.h new file mode 100644 index 0..fee6627bb --- /dev/null +++ b/src/base/sna.h @@ -0,0 +1,136 @@ +/* -*- OpenSAF -*- + * + * Copyright Ericsson AB 2020 - All Rights Reserved. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed + * under the GNU Lesser General Public License Version 2.1, February 1999. + * The complete license can be accessed from the following location: + * http://opensource.org/licenses/lgpl-license.php + * See the Copying file included with the OpenSAF distribution for full + * licensing terms. + * + * Reference: Serial Number Arithmetic from RFC1982 + * + */ + +#ifndef BASE_SNA_H_ +#define BASE_SNA_H_ + +#include +#include + +#define SNA16_MAX 65536 // 2^16 +#define SNA16_SPACE 32768 // (2^16)/2 +#define SNA32_MAX 4294967296 // 2^32 +#define SNA32_SPACE 2147483648 // (2^32)/2 [Vu] can use: #define SNA16_MAX (1 << 16) #define SNA32_MAX (1 << 32) SPACE ones probably are not necessary. See my comment for space() method below. [Thuan] Is there any benefit with 1 << 16 or 32? I think define a clear value is better. [Vu] the benefit is you won't need to add the comment e.g. // 2^16 [Thuan] OK, change as your suggestion. Btw, I will change define macro SPACE (MAX/2). About space(), it is intended because I don’t want CPU calculate every time refer to it. + +template +class _sna { [Vu] How about `class SerialNumber {}` [Thuan] OK, I will change the class name. + private: [Vu] Declaration order should start with a public: section, followed by protected:, then private: [Thuan] OK, will change order. + T i; [Vu] Should use a descriptive name. e.g: T value_ {0}; [Thuan] OK, will update the name. + uint64_t max() { +if (typeid(T) == typeid(uint64_t)) { + return SNA32_MAX; +} +if (typeid(T) == typeid(uint32_t)) { + return SNA16_MAX; +} +throw std::out_of_range("Invalid type"); [Vu] OpenSAF code does not handle exception. Should use assertion instead. e.g: assert(0 && "Invalid data type"); [Thuan] I throw to do basic test, if assert() then cannot test the case. Btw, I think throw exception without try catch will also end with terminate? [Vu] I think so; and if you you do tests on Seq16, Seq32, you probably won't reach exceptions. [Thuan] OK, chang
Re: [devel] [PATCH 1/1] base: add serial number arithmetic (RFC1982) [#3074]
Hi Vu, Thanks for comments. Please check my replies inline. Best Regards, ThuanTr From: Nguyen Minh Vu Sent: Thursday, February 13, 2020 5:50 PM To: Thuan Tran ; Minh Hon Chau ; Thang Duc Nguyen ; Gary Lee Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1/1] base: add serial number arithmetic (RFC1982) [#3074] Hi Thuan, Ack with comments inline. Regards, Vu On 2/12/20 5:36 PM, thuan.tran wrote: - Adapt MDS with this SNA implementation. --- src/base/Makefile.am | 6 +- src/base/sna.h | 136 +++ src/base/tests/sna_test.cc | 117 ++ src/mds/mds_tipc_fctrl_intf.cc | 2 +- src/mds/mds_tipc_fctrl_portid.cc | 17 ++-- src/mds/mds_tipc_fctrl_portid.h | 64 +-- 6 files changed, 267 insertions(+), 75 deletions(-) create mode 100644 src/base/sna.h create mode 100644 src/base/tests/sna_test.cc diff --git a/src/base/Makefile.am b/src/base/Makefile.am index 025fb86a2..5082175cf 100644 --- a/src/base/Makefile.am +++ b/src/base/Makefile.am @@ -173,7 +173,8 @@ noinst_HEADERS += \ src/base/unix_client_socket.h \ src/base/unix_server_socket.h \ src/base/unix_socket.h \ - src/base/usrbuf.h + src/base/usrbuf.h \ + src/base/sna.h TESTS += bin/testleap bin/libbase_test bin/core_common_test @@ -237,7 +238,8 @@ bin_libbase_test_SOURCES = \ src/base/tests/time_compare_test.cc \ src/base/tests/time_convert_test.cc \ src/base/tests/time_subtract_test.cc \ - src/base/tests/unix_socket_test.cc + src/base/tests/unix_socket_test.cc \ + src/base/tests/sna_test.cc bin_libbase_test_LDADD = \ $(GTEST_DIR)/lib/libgtest.la \ diff --git a/src/base/sna.h b/src/base/sna.h new file mode 100644 index 0..fee6627bb --- /dev/null +++ b/src/base/sna.h @@ -0,0 +1,136 @@ +/* -*- OpenSAF -*- + * + * Copyright Ericsson AB 2020 - All Rights Reserved. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed + * under the GNU Lesser General Public License Version 2.1, February 1999. + * The complete license can be accessed from the following location: + * http://opensource.org/licenses/lgpl-license.php + * See the Copying file included with the OpenSAF distribution for full + * licensing terms. + * + * Reference: Serial Number Arithmetic from RFC1982 + * + */ + +#ifndef BASE_SNA_H_ +#define BASE_SNA_H_ + +#include +#include + +#define SNA16_MAX 65536 // 2^16 +#define SNA16_SPACE 32768 // (2^16)/2 +#define SNA32_MAX 4294967296 // 2^32 +#define SNA32_SPACE 2147483648 // (2^32)/2 [Vu] can use: #define SNA16_MAX (1 << 16) #define SNA32_MAX (1 << 32) SPACE ones probably are not necessary. See my comment for space() method below. [Thuan] Is there any benefit with 1 << 16 or 32? I think define a clear value is better. About space(), it is intended because I don’t want CPU calculate every time refer to it. + +template +class _sna { [Vu] How about `class SerialNumber {}` [Thuan] OK, I will change the class name. + private: [Vu] Declaration order should start with a public: section, followed by protected:, then private: [Thuan] OK, will change order. + T i; [Vu] Should use a descriptive name. e.g: T value_ {0}; [Thuan] OK, will update the name. + uint64_t max() { +if (typeid(T) == typeid(uint64_t)) { + return SNA32_MAX; +} +if (typeid(T) == typeid(uint32_t)) { + return SNA16_MAX; +} +throw std::out_of_range("Invalid type"); [Vu] OpenSAF code does not handle exception. Should use assertion instead. e.g: assert(0 && "Invalid data type"); [Thuan] I throw to do basic test, if assert() then cannot test the case. Btw, I think throw exception without try catch will also end with terminate? + } + uint64_t space() { +if (typeid(T) == typeid(uint64_t)) { + return SNA32_SPACE; +} +if (typeid(T) == typeid(uint32_t)) { + return SNA16_SPACE; +} +throw std::out_of_range("Invalid type"); [Vu] uint64_t space() { return max()/2;} [Thuan] As explain above, I don’t want calculate every time refer it. + } + + public: + _sna(): i(0) {} + _sna(const _sna ) { +i = t.i; + } + explicit _sna(const uint64_t ) { +if ((n < 0) || (n > (max()-1))) + throw std::out_of_range("Invalid initial value"); [Vu] Use assert() instead of throwing exception. [Thuan] Same above. +i = n; + } + _sna& operator=(const _sna ) { +// check for self-assignment +if ( == this) + return *this; +i = t.i; +return *this; + } + T v() const { +return i; + } + _sna& operator+=(const uint64_t& n) { +if ((n < 0) || (n > (space() - 1))) +
Re: [devel] [PATCH 1/1] amfd: fix calculating standby rank for SIrankedSU with non-unique rank [#3149]
Hi Alex, See my comment in line with [Thuan]. Best Regards, ThuanTr From: Alex Jones Sent: Saturday, February 8, 2020 4:10 AM To: Gary Lee ; Thuan Tran Cc: opensaf-devel@lists.sourceforge.net; Alex Jones Subject: [PATCH 1/1] amfd: fix calculating standby rank for SIrankedSU with non-unique rank [#3149] Standby rank which is passed to CSI set and protection group callbacks may not be accurate. If SIrankedSUs exist with non-unique ranks, AVD_SI::get_sisu_rank() is not traversing all the SUs at that rank to determine the standby rank. AVD_SI::get_sisu_rank() needs to traverse all the SUs at the particular rank. --- src/amf/amfd/si.cc | 24 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/amf/amfd/si.cc b/src/amf/amfd/si.cc index cd8be9479..df9d511f3 100644 --- a/src/amf/amfd/si.cc +++ b/src/amf/amfd/si.cc @@ -339,7 +339,7 @@ void AVD_SI::update_sisu_rank(const std::string& suname, uint32_t newRank) { } uint32_t AVD_SI::get_sisu_rank(const std::string& suname) const { - uint32_t rank{}; + uint32_t rank{}, currentRank{}; TRACE_ENTER2("%s", suname.c_str()); @@ -348,11 +348,27 @@ uint32_t AVD_SI::get_sisu_rank(const std::string& suname) const { susi->su->name.c_str(), susi->si->name.c_str(), susi->state); - if (susi->state == SA_AMF_HA_STANDBY) + if (susi->state == SA_AMF_HA_STANDBY) { + // if there are SUs with the same rank we need to go through all of them + if (currentRank) { + const AVD_SIRANKEDSU *sirankedsu{get_si_ranked_su(susi->su->name)}; + if (!sirankedsu || + (sirankedsu && sirankedsu->get_sa_amf_rank() != currentRank)) { [Thuan] '!A || (A && B)' is equivalent to '!A || B' + break; + } + } + rank++; + } - if (suname == susi->su->name) - break; + if (suname == susi->su->name) { + // see if there are any other SUs at this same rank + const AVD_SIRANKEDSU *sirankedsu{get_si_ranked_su(susi->su->name)}; + if (sirankedsu) + currentRank = sirankedsu->get_sa_amf_rank(); + else + break; + } } TRACE_LEAVE(); -- 2.21.1 Notice: This e-mail together with any attachments may contain information of Ribbon Communications Inc. that is confidential and/or proprietary for the sole use of the intended recipient. Any review, disclosure, reliance or distribution by others or forwarding without express permission is strictly prohibited. If you are not the intended recipient, please notify the sender immediately and then delete all copies, including any attachments. ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 5/5] build: fix compile errors with gcc 9.x [#3134]
Hi Alex, OK, you can keep strncpy with len + 1. No more comment from me. From: Alex Jones Sent: Tuesday, February 4, 2020 9:40 PM To: Thuan Tran ; Vu Minh Nguyen Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 5/5] build: fix compile errors with gcc 9.x [#3134] Hi ThuanTr, I will add fclose(). Good catch. We can't leave the original code in SmfUtils.cc because it fails to compile in gcc 9.x. The compiler complains that you are only copying the length of the string, so the output is not null terminated (even though the next line null terminates it). We could change the code to use memcpy instead. That would make it clearer that we are not intending to null terminate with the function call, and are doing it ourselves in the next line. Alex On 2/3/20 9:28 PM, Tran Thuan wrote: NOTICE: This email was received from an EXTERNAL sender Hi Alex, About test_ntf_imcn.cc, please update following too Since you add “return” then static code check report leak “ f ”. @@ -6202,6 +6202,7 @@ __attribute__((constructor)) static void ntf_imcn_constructor(void) { snprintf(cp_cmd, sizeof(cp_cmd), "cp "); if ((strlen(line) - 1) > (sizeof(cp_cmd) - sizeof("cp "))) { printf("line: %s too long", line); + fclose(f); return; } About SmfUtils.cc: - strncpy(*((SaStringT *)*i_value), i_str, len - 1); + strncpy(*((SaStringT *)*i_value), i_str, len + 1); (*((SaStringT *)*i_value))[len] = '\0'; => strncpy with “len + 1” then later overwrite with ‘\0’. I suggest strncpy with “len” as original code to avoid redundant changes. Best Regards, ThuanTr From: Alex Jones <mailto:ajo...@rbbn.com> Sent: Monday, February 3, 2020 10:39 PM To: thuan.t...@dektech.com.au<mailto:thuan.t...@dektech.com.au>; vu.m.ngu...@dektech.com.au<mailto:vu.m.ngu...@dektech.com.au> Cc: opensaf-devel@lists.sourceforge.net<mailto:opensaf-devel@lists.sourceforge.net>; Alex Jones <mailto:ajo...@rbbn.com> Subject: [PATCH 5/5] build: fix compile errors with gcc 9.x [#3134] Rework fixes in NTF and SMF. --- src/ntf/apitest/test_ntf_imcn.cc | 2 +- src/smf/smfd/SmfUtils.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ntf/apitest/test_ntf_imcn.cc b/src/ntf/apitest/test_ntf_imcn.cc index 51b9076c6..04f155074 100644 --- a/src/ntf/apitest/test_ntf_imcn.cc +++ b/src/ntf/apitest/test_ntf_imcn.cc @@ -1140,7 +1140,7 @@ static SaAisErrorT set_add_info( >additionalInfo[idx].infoValue); if (error == SA_AIS_OK) { strcpy(reinterpret_cast(temp), infoValue); - temp[strlen(infoValue) - 1] = '\0'; + //temp[strlen(infoValue)] = '\0'; nHeader->additionalInfo[idx].infoId = infoId; nHeader->additionalInfo[idx].infoType = SA_NTF_VALUE_STRING; } diff --git a/src/smf/smfd/SmfUtils.cc b/src/smf/smfd/SmfUtils.cc index 2d539e7c2..f1593b4cf 100644 --- a/src/smf/smfd/SmfUtils.cc +++ b/src/smf/smfd/SmfUtils.cc @@ -993,7 +993,7 @@ bool smf_stringToValue(SaImmValueTypeT i_type, SaImmAttrValueT *i_value, len = strlen(i_str); *i_value = malloc(sizeof(SaStringT)); *((SaStringT *)*i_value) = (SaStringT)malloc(len + 1); - strncpy(*((SaStringT *)*i_value), i_str, len - 1); + strncpy(*((SaStringT *)*i_value), i_str, len + 1); (*((SaStringT *)*i_value))[len] = '\0'; break; case SA_IMM_ATTR_SAANYT: -- 2.21.1 Notice: This e-mail together with any attachments may contain information of Ribbon Communications Inc. that is confidential and/or proprietary for the sole use of the intended recipient. Any review, disclosure, reliance or distribution by others or forwarding without express permission is strictly prohibited. If you are not the intended recipient, please notify the sender immediately and then delete all copies, including any attachments. ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] smf: campaign is executing forever until cluster reset [#1353]
Hi Lennart, Thanks for review. I will update regarding to 2nd comment about one line code of IF. But I still not catch up your 1st comment about change to 5 seconds. Current: timeout = 10s sleep(2s) timeout--; //reduce 1 My change: timeout = 10s sleep(2s) timeout -= 2; //reduce 2 Your comment: timeout = 5s //change this, right? sleep(1s) //change this, right? timeout--; Regards, Thuan (from home) Quoting Lennart Lund : Hi Thuan I have some comments, see below snippet from the code in function getNodeDestination() in SmfUtils.cc I have not tested // Try to get the node director for a while. If the nodes reboot very fast // after a cluster reboot, it could happend the rebooted nodes comes up before // the last one is rebooted. This could make the campaign to fail when the // campaign continue. if (strcmp(className, "SaClmNode") == 0) { int timeout = 10 * ONE_SECOND; while (smfnd_for_name(i_node.c_str(), o_nodeDest) == false) { if (timeout <= 0) { LOG_NO("Failed to get node dest for clm node %s", i_node.c_str()); return false; } struct timespec time = {2 * ONE_SECOND, 0}; osaf_nanosleep(); // [Lennart] This will in practice change the timeout to 5 seconds and // to do it like this is confusing and requires a reader to not miss this code line // It is likely that a reader will believe it is still 10 seconds // It is better to change the timeout setting above to 5 * ONE_SECOND // timeout -= 2; // [Lennart] Even if the following code line is not changed by you but is not // a good way to write code. First elapsedTime is not a boolean and // secondly no if statements should be written as one liner like this // Should be changed to: // if (elapsedTime != 0) { // *elapsedTime = *elapsedTime + 2 * ONE_SECOND; // } // It is good to fix things like this if it is found in a place where other // changes are done if (elapsedTime) *elapsedTime = *elapsedTime + 2 * ONE_SECOND; if (maxWaitTime != -1) { // [Lennart] The same problem as above with elapsedTime as above // Change to (elapsedTime != 0) if ((elapsedTime) && (*elapsedTime >= maxWaitTime)) { LOG_NO("Failed to get node dest for clm node %s", i_node.c_str()); return false; } } } LOG_NO("%s: className '%s'", __FUNCTION__, className); return true; Thanks Lennart -Original Message- From: thuan.tran Sent: den 25 september 2018 09:04 To: Lennart Lund ; Gary Lee Cc: opensaf-devel@lists.sourceforge.net; Thuan Tran Subject: [PATCH 1/1] smf: campaign is executing forever until cluster reset [#1353] The function getNodeDestination() reset elapsedTime to zero cause the node reboot timeout at waitForNodeDestination() never reach. If scenario that node reboot cannot come back then campaign is stuck in executing forever until cluster reset. --- src/smf/smfd/SmfUpgradeStep.cc | 1 + src/smf/smfd/SmfUtils.cc | 11 --- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/smf/smfd/SmfUpgradeStep.cc b/src/smf/smfd/SmfUpgradeStep.cc index 4c0ddd192..80da668de 100644 --- a/src/smf/smfd/SmfUpgradeStep.cc +++ b/src/smf/smfd/SmfUpgradeStep.cc @@ -2399,6 +2399,7 @@ bool SmfUpgradeStep::nodeReboot() { "SmfUpgradeStep::nodeReboot: Waiting to get node destination with increased UP counter"); while (true) { +elapsedTime = 0; for (nodeIt = rebootedNodeList.begin(); nodeIt != rebootedNodeList.end();) { if (getNodeDestination((*nodeIt).node_name, , , -1)) { diff --git a/src/smf/smfd/SmfUtils.cc b/src/smf/smfd/SmfUtils.cc index 915c086a5..4ac5af163 100644 --- a/src/smf/smfd/SmfUtils.cc +++ b/src/smf/smfd/SmfUtils.cc @@ -95,9 +95,6 @@ bool getNodeDestination(const std::string _node, SmfndNodeDest *o_nodeDest, TRACE("Find destination for node '%s'", i_node.c_str()); - if (elapsedTime) // Initialize elapsedTime to zero. -*elapsedTime = 0; - /* It seems SaAmfNode objects can be stored, but the code * indicates that SaClmNode's are expected. Anyway an attempt * to go for it is probably faster that examining IMM classes @@ -133,10 +130,10 @@ bool getNodeDestination(const std::string _node, SmfndNodeDest *o_nodeDest, } struct timespec time = {2 * ONE_SECOND, 0}; osaf_nanosleep(); - timeout--; + timeout -= 2; if (elapsedTime) *elapsedTime = *elapsedTime + 2 * ONE_SECOND; if (maxWaitTime != -1) { -if (*elapsedTime >= maxWaitTime) { +if ((elapsedTime) && (*elapsedTime >= maxWaitTime)) { LOG_NO("Failed to get node dest for clm node %s", i_node.c_str()); return false; } @@ -165,11 +162,11 @@ bool getNodeDestination(const std::