Hi Mathi,
please see comment inlined with [HansN].
/Thanks HansN
On 12/19/2016 01:05 PM, Mathivanan Naickan Palanivelu wrote:
> Hi Hans,
>
> Quick comments
>
> (a) Remove the comma below:
> + {"CLMD", "osafclmd.fifo", -1},
[HansN] I have removed the comma.
>
> (b) NULL is not a part of c++11 right.
[HansN] yes, I changed NULL to nullptr.
>
> (c) I'm not sure there is an obvious reason for converting this file to c++
[HansN] one reason to change to C++ was easier use of e.g. FileNotify
and use STL.
>
> Thanks,
> Mathi.
>
>> -----Original Message-----
>> From: Hans Nordeback [mailto:[email protected]]
>> Sent: Tuesday, December 13, 2016 7:27 PM
>> To: Mathivanan Naickan Palanivelu; Praveen Malviya; Ramesh Babu Betham;
>> [email protected]
>> Cc: [email protected]
>> Subject: [PATCH 1 of 1] nid: Use the FIFO monitoring for started services V2
>> [#2204]
>>
>> osaf/services/infrastructure/nid/Makefile.am | 2 +-
>> osaf/services/infrastructure/nid/nodeinit.c | 285
>> ++++++++++++++++++++++++++-
>> 2 files changed, 278 insertions(+), 9 deletions(-)
>>
>>
>> diff --git a/osaf/services/infrastructure/nid/Makefile.am
>> b/osaf/services/infrastructure/nid/Makefile.am
>> --- a/osaf/services/infrastructure/nid/Makefile.am
>> +++ b/osaf/services/infrastructure/nid/Makefile.am
>> @@ -31,7 +31,7 @@ opensafd_CPPFLAGS = \
>> $(AM_CPPFLAGS)
>>
>> opensafd_SOURCES = \
>> - nodeinit.c
>> + nodeinit.cc
>>
>> opensafd_LDADD = \
>> $(top_builddir)/osaf/libs/core/libopensaf_core.la
>> diff --git a/osaf/services/infrastructure/nid/nodeinit.c
>> b/osaf/services/infrastructure/nid/nodeinit.cc
>> rename from osaf/services/infrastructure/nid/nodeinit.c
>> rename to osaf/services/infrastructure/nid/nodeinit.cc
>> --- a/osaf/services/infrastructure/nid/nodeinit.c
>> +++ b/osaf/services/infrastructure/nid/nodeinit.cc
>> @@ -63,10 +63,15 @@
>> #include <configmake.h>
>> #include <rda_papi.h>
>> #include <logtrace.h>
>> +
>> +#include <string>
>> +#include <vector>
>> +
>> #include "osaf_poll.h"
>> #include "osaf_time.h"
>>
>> #include "nodeinit.h"
>> +#include "osaf/libs/core/cplusplus/base/file_notify.h"
>>
>> #define SETSIG(sa, sig, fun, flags) \
>> do { \
>> @@ -111,11 +116,46 @@ static uint32_t recovery_action(NID_SPAW static
>> uint32_t spawn_services(char *); static void nid_sleep(uint32_t);
>>
>> +/* Functions used for service monitoring */ static uint32_t
>> +create_svc_monitor_thread(void); static void* svc_monitor_thread(void
>> +*fd); static int handle_data_request(struct pollfd *fds, const
>> +std::string &nid_name); static void handle_svc_exit(int fd); static
>> +std::string get_svc_name(int fd); static int start_monitor_svc(const
>> +char *svc);
>> +
>> +/* Data declarations for service monitoring */ static int svc_mon_fd =
>> +-1; static int next_svc_fds_slot = 0;
>> +
>> +struct SvcMap {
>> + std::string nid_name;
>> + std::string fifo_file;
>> + int fifo_fd;
>> +};
>> +
>> +static std::vector<SvcMap> svc_map = {
>> + {"AMFD", "osafamfd.fifo", -1},
>> + {"TRANSPORT", "osaftransportd.fifo", -1},
>> + {"CLMNA", "osafclmna.fifo", -1},
>> + {"RDED", "osafrded.fifo", -1},
>> + {"HLFM", "osaffmd.fifo", -1},
>> + {"IMMD", "osafimmd.fifo", -1},
>> + {"IMMND", "osafimmnd.fifo", -1},
>> + {"LOGD", "osaflogd.fifo", -1},
>> + {"NTFD", "osafntfd.fifo", -1},
>> + {"PLMD", "osafplmd.fifo", -1},
>> + {"CLMD", "osafclmd.fifo", -1},
>> +};
>> +static const std::string fifo_dir = PKGLOCALSTATEDIR; const int
>> +kMaxNumOfFds = 40; const int kTenSecondsInMilliseconds = 10000;
>> +
>> /* List of recovery strategies */
>> NID_FUNC recovery_funcs[] = { spawn_wait }; NID_FORK_FUNC
>> fork_funcs[] = { fork_process, fork_script, fork_daemon };
>>
>> -char *nid_recerr[NID_MAXREC][4] = {
>> +const char *nid_recerr[NID_MAXREC][4] = {
>> {"Trying To RESPAWN", "Could Not RESPAWN", "Succeeded To
>> RESPAWN", "FAILED TO RESPAWN"},
>> {"Trying To RESET", "Faild to RESET", "suceeded To RESET", "FAILED
>> AFTER RESTART"} }; @@ -167,10 +207,10 @@ char *gettoken(char **str,
>> uint32_t tok)
>> return (NULL);
>> }
>>
>> - while ((*p != tok) && (*p != '\n') && *p)
>> + while ((*p != static_cast<int>(tok)) && (*p != '\n') && *p)
>> p++;
>>
>> - if ((*p == tok) || (*p == '\n')) {
>> + if ((*p == static_cast<int>(tok)) || (*p == '\n')) {
>> *p++ = 0;
>> *str = p;
>> }
>> @@ -522,7 +562,7 @@ uint32_t parse_nodeinit_conf(char *strbu
>> NID_SPAWN_INFO *childinfo;
>> char buff[256], sbuf[200], *ch, *ch1, tmp[30], nidconf[256];
>> uint32_t lineno = 0, retry = 0;
>> - struct nid_resetinfo info = { {""}, -1 };
>> + struct nid_resetinfo info = { {""}, static_cast<uint32_t>(-1) };
>> FILE *file, *ntfile;
>>
>> TRACE_ENTER();
>> @@ -565,7 +605,7 @@ uint32_t parse_nodeinit_conf(char *strbu
>> }
>>
>> /* Allocate mem for new child info */
>> - while ((childinfo = malloc(sizeof(NID_SPAWN_INFO))) ==
>> NULL) {
>> + while ((childinfo =
>> +reinterpret_cast<NID_SPAWN_INFO*>(malloc(sizeof(NID_SPAWN_INFO))
>> )) ==
>> +NULL) {
>> if (retry++ == 5) {
>> sprintf(strbuf, "FAILURE: Out of memory\n");
>> return NCSCC_RC_FAILURE;
>> @@ -994,6 +1034,8 @@ uint32_t spawn_wait(NID_SPAWN_INFO *serv
>> break;
>> }
>>
>> + waitpid(pid, NULL, WNOHANG);
>> +
>> /* Read the message from FIFO and fill in structure. */
>> while ((n = read(select_fd, buff1, sizeof(buff1))) <= 0) {
>> if (errno == EINTR) {
>> @@ -1263,7 +1305,7 @@ uint32_t recovery_action(NID_SPAWN_INFO
>> if (service->recovery_matrix[opt].retry_count == 0) {
>> if (count != 0)
>> LOG_ER("%s", nid_recerr[opt][3]);
>> - opt++;
>> + opt =
>> static_cast<NID_RECOVERY_OPT>(static_cast<int>(opt) +1);
>> continue;
>> }
>> }
>> @@ -1285,8 +1327,7 @@ uint32_t recovery_action(NID_SPAWN_INFO
>> * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.
>> *
>> *
>> *
>>
>> **********************************************************
>> *****************/
>> -uint32_t spawn_services(char *strbuf)
>> -{
>> +uint32_t spawn_services(char *strbuf) {
>> NID_SPAWN_INFO *service;
>> NID_CHILD_LIST sp_list = spawn_list;
>> char sbuff[100];
>> @@ -1322,6 +1363,10 @@ uint32_t spawn_services(char *strbuf)
>> if (strlen(sbuff) > 0)
>> LOG_NO("%s", sbuff);
>>
>> + if (start_monitor_svc(service->serv_name) !=
>> NCSCC_RC_SUCCESS) {
>> + exit(EXIT_FAILURE);
>> + }
>> +
>> sp_list.head = sp_list.head->next;
>> }
>>
>> @@ -1330,6 +1375,225 @@ uint32_t spawn_services(char *strbuf)
>> return NCSCC_RC_SUCCESS;
>> }
>>
>> +int start_monitor_svc(const char *svc) {
>> + int rc = NCSCC_RC_SUCCESS;
>> + char svc_name[NID_MAXSNAME];
>> +
>> + TRACE_ENTER2("service: %s", svc);
>> +
>> + strncpy(svc_name, svc, sizeof(svc_name));
>> +
>> + while (true) {
>> + ssize_t write_rc = write(svc_mon_fd, svc_name, strlen(svc_name));
>> + if (write_rc == -1) {
>> + if (errno == EINTR) {
>> + continue;
>> + } else {
>> + LOG_ER("Failed to start sevice %s, error: %s",
>> + svc_name, strerror(errno));
>> + rc = NCSCC_RC_FAILURE;
>> + break;
>> + }
>> + }
>> + break;
>> + }
>> + TRACE_LEAVE();
>> + return rc;
>> +}
>> +
>> +int handle_data_request(struct pollfd *fds, const std::string
>> +&nid_name) {
>> + base::FileNotify file_notify;
>> + base::FileNotify::FileNotifyErrors notify_rc;
>> + int rc = NCSCC_RC_SUCCESS;
>> + int fifo_fd = -1;
>> +
>> + TRACE_ENTER2("service: %s", nid_name.c_str());
>> +
>> + for (auto &svc : svc_map) {
>> + if (nid_name == svc.nid_name) {
>> + std::string fifo_file = fifo_dir + "/" + svc.fifo_file;
>> + notify_rc = file_notify.WaitForFileCreation(fifo_file,
>> +
>> kTenSecondsInMilliseconds);
>> + if (notify_rc != base::FileNotify::FileNotifyErrors::kOK) {
>> + LOG_ER("fifo file %s does not exist, notify rc: %d",
>> + fifo_file.c_str(), notify_rc);
>> + rc = NCSCC_RC_FAILURE;
>> + break;
>> + }
>> + int retry_cnt = 0;
>> + do {
>> + if (retry_cnt > 0) {
>> + osaf_nanosleep(&kHundredMilliseconds);
>> + }
>> + fifo_fd = open(fifo_file.c_str(), O_WRONLY|O_NONBLOCK);
>> + } while ((fifo_fd == -1) &&
>> + (retry_cnt++ < 5 && (errno == EINTR || errno ==
>> + ENXIO)));
>> +
>> + if (fifo_fd == -1) {
>> + LOG_ER("Failed to open %s, error: %s", fifo_file.c_str(),
>> + strerror(errno));
>> + rc = NCSCC_RC_FAILURE;
>> + break;
>> + } else {
>> + svc.fifo_fd = fifo_fd;
>> + fds[next_svc_fds_slot].fd = fifo_fd;
>> + fds[next_svc_fds_slot].events = POLLIN;
>> + next_svc_fds_slot++;
>> + LOG_NO("Monitoring of %s started", nid_name.c_str());
>> + break;
>> + }
>> + }
>> + }
>> + TRACE_LEAVE();
>> + return rc;
>> +}
>> +
>> +std::string get_svc_name(int fd) {
>> + std::string svc_name;
>> +
>> + for (auto const& svc : svc_map) {
>> + if (fd == svc.fifo_fd) {
>> + svc_name = svc.nid_name;
>> + break;
>> + }
>> + }
>> + return svc_name;
>> +}
>> +
>> +void handle_svc_exit(int fd) {
>> + const std::string &svc_name = get_svc_name(fd);
>> +
>> + if (svc_name.size() != 0) {
>> + LOG_ER("Service %s has unexpectedly crashed. Unable to continue,
>> exiting",
>> + svc_name.c_str());
>> + exit(EXIT_FAILURE);
>> + } else {
>> + LOG_NO("fd %d was not found in service map", fd);
>> + }
>> +}
>> +
>> +/*********************************************************
>> *******************
>> + * Name : svc_monitor_thread
>> *
>> + *
>> *
>> + * Description : creates the service monitor thread
>> *
>> + *
>> *
>> + * Arguments : -
>> *
>> + *
>> *
>> + * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.
>> *
>> + *
>> *
>> +
>> +*********************************************************
>> **************
>> +****/
>> +void* svc_monitor_thread(void *fd) {
>> + char nid_name[NID_MAXSNAME];
>> + int svc_mon_thr_fd = *(reinterpret_cast<int*>(fd));
>> + enum {
>> + FD_SVC_MON_THR = 0,
>> + };
>> +
>> + struct pollfd *fds;
>> +
>> + fds = new pollfd[sizeof(pollfd) * kMaxNumOfFds]; osafassert(fds !=
>> + NULL); ssize_t read_rc = -1;
>> +
>> + fds[FD_SVC_MON_THR].fd = svc_mon_thr_fd;
>> fds[FD_SVC_MON_THR].events
>> + = POLLIN; next_svc_fds_slot++;
>> +
>> + while (true) {
>> + unsigned rc = osaf_poll(fds, next_svc_fds_slot, -1);
>> + if (rc > 0) {
>> + // check if any monitored service has exit
>> + for (int i = next_svc_fds_slot-1; i > 0; --i) {
>> + if ((fds[i].revents & POLLIN) ||
>> + (fds[i].revents & POLLHUP) ||
>> + (fds[i].revents & POLLERR)) {
>> + handle_svc_exit(fds[i].fd);
>> + }
>> + }
>> +
>> + if (fds[FD_SVC_MON_THR].revents & POLLIN) {
>> + while (true) {
>> + read_rc = read(svc_mon_thr_fd, nid_name, NID_MAXSNAME);
>> + if (read_rc == -1) {
>> + if (errno == EINTR) {
>> + continue;
>> + } else {
>> + LOG_ER("Failed to read on socketpair descriptor: %s",
>> + strerror(errno));
>> + exit(EXIT_FAILURE);
>> + }
>> + }
>> + osafassert(read_rc < NID_MAXSNAME);
>> + nid_name[read_rc] = '\0';
>> + break;
>> + }
>> + if (handle_data_request(fds, nid_name) != NCSCC_RC_SUCCESS) {
>> + LOG_ER("Failed to start monitoring for service %s, exiting",
>> + nid_name);
>> + exit(EXIT_FAILURE);
>> + }
>> + }
>> + } else {
>> + LOG_ER("osaf_poll timed out and no descriptors are ready, exiting");
>> + exit(EXIT_FAILURE);
>> + }
>> + }
>> + delete [] fds;
>> +}
>> +
>> +/*********************************************************
>> *******************
>> + * Name : create_svc_monitor_thread
>> *
>> + *
>> *
>> + * Description : creates the service monitor thread
>> *
>> + *
>> *
>> + * Arguments : -
>> *
>> + *
>> *
>> + * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.
>> *
>> + *
>> *
>> +
>> +*********************************************************
>> **************
>> +****/ uint32_t create_svc_monitor_thread(void) {
>> + int s_pair[2];
>> + int svc_mon_thr_fd = -1;
>> + pthread_t thread;
>> + pthread_attr_t attr;
>> +
>> + TRACE_ENTER();
>> +
>> + if (socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, s_pair) == -1) {
>> + LOG_ER("socketpair FAILED: %s", strerror(errno));
>> + return NCSCC_RC_FAILURE;
>> + }
>> +
>> + svc_mon_fd = s_pair[0];
>> + svc_mon_thr_fd = s_pair[1];
>> +
>> + TRACE("sd1: %d sd2: %d", svc_mon_fd, svc_mon_thr_fd);
>> +
>> + if (pthread_attr_init(&attr) != 0) {
>> + LOG_ER("pthread_attr_init FAILED: %s", strerror(errno));
>> + return NCSCC_RC_FAILURE;
>> + }
>> +
>> + if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) !=
>> 0) {
>> + LOG_ER("pthread_setdetachstate FAILED: %s", strerror(errno));
>> + return NCSCC_RC_FAILURE;
>> + }
>> +
>> + if (pthread_create(&thread, &attr, svc_monitor_thread,
>> + reinterpret_cast<void*>(&svc_mon_thr_fd)) != 0) {
>> + LOG_ER("pthread_create FAILED: %s", strerror(errno));
>> + return NCSCC_RC_FAILURE;
>> + }
>> +
>> + if (pthread_attr_destroy(&attr) != 0) {
>> + LOG_ER("pthread_attr_destroy FAILED: %s", strerror(errno));
>> + return NCSCC_RC_FAILURE;
>> + }
>> +
>> + TRACE_LEAVE();
>> + return NCSCC_RC_SUCCESS;
>> +}
>> +
>>
>> /**********************************************************
>> ******************
>> * Name : main
>> *
>> *
>> *
>> @@ -1365,6 +1629,11 @@ int main(int argc, char *argv[])
>> exit(EXIT_FAILURE);
>> }
>>
>> + if (create_svc_monitor_thread() != NCSCC_RC_SUCCESS) {
>> + LOG_ER("Failed to create service monitor thread, exiting");
>> + exit(EXIT_FAILURE);
>> + }
>> +
>> if (parse_nodeinit_conf(sbuf) != NCSCC_RC_SUCCESS) {
>> LOG_ER("Failed to parse file %s. Exiting", sbuf);
>> exit(EXIT_FAILURE);
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel