osaf/services/infrastructure/nid/Makefile.am | 2 +- osaf/services/infrastructure/nid/nodeinit.c | 285 ++++++++++++++++++++++++++- 2 files changed, 278 insertions(+), 9 deletions(-)
diff --git a/osaf/services/infrastructure/nid/Makefile.am b/osaf/services/infrastructure/nid/Makefile.am --- a/osaf/services/infrastructure/nid/Makefile.am +++ b/osaf/services/infrastructure/nid/Makefile.am @@ -31,7 +31,7 @@ opensafd_CPPFLAGS = \ $(AM_CPPFLAGS) opensafd_SOURCES = \ - nodeinit.c + nodeinit.cc opensafd_LDADD = \ $(top_builddir)/osaf/libs/core/libopensaf_core.la diff --git a/osaf/services/infrastructure/nid/nodeinit.c b/osaf/services/infrastructure/nid/nodeinit.cc rename from osaf/services/infrastructure/nid/nodeinit.c rename to osaf/services/infrastructure/nid/nodeinit.cc --- a/osaf/services/infrastructure/nid/nodeinit.c +++ b/osaf/services/infrastructure/nid/nodeinit.cc @@ -63,10 +63,15 @@ #include <configmake.h> #include <rda_papi.h> #include <logtrace.h> + +#include <string> +#include <vector> + #include "osaf_poll.h" #include "osaf_time.h" #include "nodeinit.h" +#include "osaf/libs/core/cplusplus/base/file_notify.h" #define SETSIG(sa, sig, fun, flags) \ do { \ @@ -111,11 +116,46 @@ static uint32_t recovery_action(NID_SPAW static uint32_t spawn_services(char *); static void nid_sleep(uint32_t); +/* Functions used for service monitoring */ +static uint32_t create_svc_monitor_thread(void); +static void* svc_monitor_thread(void *fd); +static int handle_data_request(struct pollfd *fds, const std::string &nid_name); +static void handle_svc_exit(int fd); +static std::string get_svc_name(int fd); +static int start_monitor_svc(const char *svc); + +/* Data declarations for service monitoring */ +static int svc_mon_fd = -1; +static int next_svc_fds_slot = 0; + +struct SvcMap { + std::string nid_name; + std::string fifo_file; + int fifo_fd; +}; + +static std::vector<SvcMap> svc_map = { + {"AMFD", "osafamfd.fifo", -1}, + {"TRANSPORT", "osaftransportd.fifo", -1}, + {"CLMNA", "osafclmna.fifo", -1}, + {"RDED", "osafrded.fifo", -1}, + {"HLFM", "osaffmd.fifo", -1}, + {"IMMD", "osafimmd.fifo", -1}, + {"IMMND", "osafimmnd.fifo", -1}, + {"LOGD", "osaflogd.fifo", -1}, + {"NTFD", "osafntfd.fifo", -1}, + {"PLMD", "osafplmd.fifo", -1}, + {"CLMD", "osafclmd.fifo", -1}, +}; +static const std::string fifo_dir = PKGLOCALSTATEDIR; +const int kMaxNumOfFds = 40; +const int kTenSecondsInMilliseconds = 10000; + /* List of recovery strategies */ NID_FUNC recovery_funcs[] = { spawn_wait }; NID_FORK_FUNC fork_funcs[] = { fork_process, fork_script, fork_daemon }; -char *nid_recerr[NID_MAXREC][4] = { +const char *nid_recerr[NID_MAXREC][4] = { {"Trying To RESPAWN", "Could Not RESPAWN", "Succeeded To RESPAWN", "FAILED TO RESPAWN"}, {"Trying To RESET", "Faild to RESET", "suceeded To RESET", "FAILED AFTER RESTART"} }; @@ -167,10 +207,10 @@ char *gettoken(char **str, uint32_t tok) return (NULL); } - while ((*p != tok) && (*p != '\n') && *p) + while ((*p != static_cast<int>(tok)) && (*p != '\n') && *p) p++; - if ((*p == tok) || (*p == '\n')) { + if ((*p == static_cast<int>(tok)) || (*p == '\n')) { *p++ = 0; *str = p; } @@ -522,7 +562,7 @@ uint32_t parse_nodeinit_conf(char *strbu NID_SPAWN_INFO *childinfo; char buff[256], sbuf[200], *ch, *ch1, tmp[30], nidconf[256]; uint32_t lineno = 0, retry = 0; - struct nid_resetinfo info = { {""}, -1 }; + struct nid_resetinfo info = { {""}, static_cast<uint32_t>(-1) }; FILE *file, *ntfile; TRACE_ENTER(); @@ -565,7 +605,7 @@ uint32_t parse_nodeinit_conf(char *strbu } /* Allocate mem for new child info */ - while ((childinfo = malloc(sizeof(NID_SPAWN_INFO))) == NULL) { + while ((childinfo = reinterpret_cast<NID_SPAWN_INFO*>(malloc(sizeof(NID_SPAWN_INFO)))) == NULL) { if (retry++ == 5) { sprintf(strbuf, "FAILURE: Out of memory\n"); return NCSCC_RC_FAILURE; @@ -994,6 +1034,8 @@ uint32_t spawn_wait(NID_SPAWN_INFO *serv break; } + waitpid(pid, NULL, WNOHANG); + /* Read the message from FIFO and fill in structure. */ while ((n = read(select_fd, buff1, sizeof(buff1))) <= 0) { if (errno == EINTR) { @@ -1263,7 +1305,7 @@ uint32_t recovery_action(NID_SPAWN_INFO if (service->recovery_matrix[opt].retry_count == 0) { if (count != 0) LOG_ER("%s", nid_recerr[opt][3]); - opt++; + opt = static_cast<NID_RECOVERY_OPT>(static_cast<int>(opt) +1); continue; } } @@ -1285,8 +1327,7 @@ uint32_t recovery_action(NID_SPAWN_INFO * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE. * * * ***************************************************************************/ -uint32_t spawn_services(char *strbuf) -{ +uint32_t spawn_services(char *strbuf) { NID_SPAWN_INFO *service; NID_CHILD_LIST sp_list = spawn_list; char sbuff[100]; @@ -1322,6 +1363,10 @@ uint32_t spawn_services(char *strbuf) if (strlen(sbuff) > 0) LOG_NO("%s", sbuff); + if (start_monitor_svc(service->serv_name) != NCSCC_RC_SUCCESS) { + exit(EXIT_FAILURE); + } + sp_list.head = sp_list.head->next; } @@ -1330,6 +1375,225 @@ uint32_t spawn_services(char *strbuf) return NCSCC_RC_SUCCESS; } +int start_monitor_svc(const char *svc) { + int rc = NCSCC_RC_SUCCESS; + char svc_name[NID_MAXSNAME]; + + TRACE_ENTER2("service: %s", svc); + + strncpy(svc_name, svc, sizeof(svc_name)); + + while (true) { + ssize_t write_rc = write(svc_mon_fd, svc_name, strlen(svc_name)); + if (write_rc == -1) { + if (errno == EINTR) { + continue; + } else { + LOG_ER("Failed to start sevice %s, error: %s", + svc_name, strerror(errno)); + rc = NCSCC_RC_FAILURE; + break; + } + } + break; + } + TRACE_LEAVE(); + return rc; +} + +int handle_data_request(struct pollfd *fds, const std::string &nid_name) { + base::FileNotify file_notify; + base::FileNotify::FileNotifyErrors notify_rc; + int rc = NCSCC_RC_SUCCESS; + int fifo_fd = -1; + + TRACE_ENTER2("service: %s", nid_name.c_str()); + + for (auto &svc : svc_map) { + if (nid_name == svc.nid_name) { + std::string fifo_file = fifo_dir + "/" + svc.fifo_file; + notify_rc = file_notify.WaitForFileCreation(fifo_file, + kTenSecondsInMilliseconds); + if (notify_rc != base::FileNotify::FileNotifyErrors::kOK) { + LOG_ER("fifo file %s does not exist, notify rc: %d", + fifo_file.c_str(), notify_rc); + rc = NCSCC_RC_FAILURE; + break; + } + int retry_cnt = 0; + do { + if (retry_cnt > 0) { + osaf_nanosleep(&kHundredMilliseconds); + } + fifo_fd = open(fifo_file.c_str(), O_WRONLY|O_NONBLOCK); + } while ((fifo_fd == -1) && + (retry_cnt++ < 5 && (errno == EINTR || errno == ENXIO))); + + if (fifo_fd == -1) { + LOG_ER("Failed to open %s, error: %s", fifo_file.c_str(), + strerror(errno)); + rc = NCSCC_RC_FAILURE; + break; + } else { + svc.fifo_fd = fifo_fd; + fds[next_svc_fds_slot].fd = fifo_fd; + fds[next_svc_fds_slot].events = POLLIN; + next_svc_fds_slot++; + LOG_NO("Monitoring of %s started", nid_name.c_str()); + break; + } + } + } + TRACE_LEAVE(); + return rc; +} + +std::string get_svc_name(int fd) { + std::string svc_name; + + for (auto const& svc : svc_map) { + if (fd == svc.fifo_fd) { + svc_name = svc.nid_name; + break; + } + } + return svc_name; +} + +void handle_svc_exit(int fd) { + const std::string &svc_name = get_svc_name(fd); + + if (svc_name.size() != 0) { + LOG_ER("Service %s has unexpectedly crashed. Unable to continue, exiting", + svc_name.c_str()); + exit(EXIT_FAILURE); + } else { + LOG_NO("fd %d was not found in service map", fd); + } +} + +/**************************************************************************** + * Name : svc_monitor_thread * + * * + * Description : creates the service monitor thread * + * * + * Arguments : - * + * * + * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE. * + * * + ***************************************************************************/ +void* svc_monitor_thread(void *fd) { + char nid_name[NID_MAXSNAME]; + int svc_mon_thr_fd = *(reinterpret_cast<int*>(fd)); + enum { + FD_SVC_MON_THR = 0, + }; + + struct pollfd *fds; + + fds = new pollfd[sizeof(pollfd) * kMaxNumOfFds]; + osafassert(fds != NULL); + ssize_t read_rc = -1; + + fds[FD_SVC_MON_THR].fd = svc_mon_thr_fd; + fds[FD_SVC_MON_THR].events = POLLIN; + next_svc_fds_slot++; + + while (true) { + unsigned rc = osaf_poll(fds, next_svc_fds_slot, -1); + if (rc > 0) { + // check if any monitored service has exit + for (int i = next_svc_fds_slot-1; i > 0; --i) { + if ((fds[i].revents & POLLIN) || + (fds[i].revents & POLLHUP) || + (fds[i].revents & POLLERR)) { + handle_svc_exit(fds[i].fd); + } + } + + if (fds[FD_SVC_MON_THR].revents & POLLIN) { + while (true) { + read_rc = read(svc_mon_thr_fd, nid_name, NID_MAXSNAME); + if (read_rc == -1) { + if (errno == EINTR) { + continue; + } else { + LOG_ER("Failed to read on socketpair descriptor: %s", + strerror(errno)); + exit(EXIT_FAILURE); + } + } + osafassert(read_rc < NID_MAXSNAME); + nid_name[read_rc] = '\0'; + break; + } + if (handle_data_request(fds, nid_name) != NCSCC_RC_SUCCESS) { + LOG_ER("Failed to start monitoring for service %s, exiting", + nid_name); + exit(EXIT_FAILURE); + } + } + } else { + LOG_ER("osaf_poll timed out and no descriptors are ready, exiting"); + exit(EXIT_FAILURE); + } + } + delete [] fds; +} + +/**************************************************************************** + * Name : create_svc_monitor_thread * + * * + * Description : creates the service monitor thread * + * * + * Arguments : - * + * * + * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE. * + * * + ***************************************************************************/ +uint32_t create_svc_monitor_thread(void) { + int s_pair[2]; + int svc_mon_thr_fd = -1; + pthread_t thread; + pthread_attr_t attr; + + TRACE_ENTER(); + + if (socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, s_pair) == -1) { + LOG_ER("socketpair FAILED: %s", strerror(errno)); + return NCSCC_RC_FAILURE; + } + + svc_mon_fd = s_pair[0]; + svc_mon_thr_fd = s_pair[1]; + + TRACE("sd1: %d sd2: %d", svc_mon_fd, svc_mon_thr_fd); + + if (pthread_attr_init(&attr) != 0) { + LOG_ER("pthread_attr_init FAILED: %s", strerror(errno)); + return NCSCC_RC_FAILURE; + } + + if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0) { + LOG_ER("pthread_setdetachstate FAILED: %s", strerror(errno)); + return NCSCC_RC_FAILURE; + } + + if (pthread_create(&thread, &attr, svc_monitor_thread, + reinterpret_cast<void*>(&svc_mon_thr_fd)) != 0) { + LOG_ER("pthread_create FAILED: %s", strerror(errno)); + return NCSCC_RC_FAILURE; + } + + if (pthread_attr_destroy(&attr) != 0) { + LOG_ER("pthread_attr_destroy FAILED: %s", strerror(errno)); + return NCSCC_RC_FAILURE; + } + + TRACE_LEAVE(); + return NCSCC_RC_SUCCESS; +} + /**************************************************************************** * Name : main * * * @@ -1365,6 +1629,11 @@ int main(int argc, char *argv[]) exit(EXIT_FAILURE); } + if (create_svc_monitor_thread() != NCSCC_RC_SUCCESS) { + LOG_ER("Failed to create service monitor thread, exiting"); + exit(EXIT_FAILURE); + } + if (parse_nodeinit_conf(sbuf) != NCSCC_RC_SUCCESS) { LOG_ER("Failed to parse file %s. Exiting", sbuf); exit(EXIT_FAILURE); ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel