osaf/services/infrastructure/nid/Makefile.am |    2 +-
 osaf/services/infrastructure/nid/nodeinit.c  |  285 ++++++++++++++++++++++++++-
 2 files changed, 278 insertions(+), 9 deletions(-)


diff --git a/osaf/services/infrastructure/nid/Makefile.am 
b/osaf/services/infrastructure/nid/Makefile.am
--- a/osaf/services/infrastructure/nid/Makefile.am
+++ b/osaf/services/infrastructure/nid/Makefile.am
@@ -31,7 +31,7 @@ opensafd_CPPFLAGS = \
        $(AM_CPPFLAGS)
 
 opensafd_SOURCES = \
-       nodeinit.c
+       nodeinit.cc
 
 opensafd_LDADD = \
        $(top_builddir)/osaf/libs/core/libopensaf_core.la
diff --git a/osaf/services/infrastructure/nid/nodeinit.c 
b/osaf/services/infrastructure/nid/nodeinit.cc
rename from osaf/services/infrastructure/nid/nodeinit.c
rename to osaf/services/infrastructure/nid/nodeinit.cc
--- a/osaf/services/infrastructure/nid/nodeinit.c
+++ b/osaf/services/infrastructure/nid/nodeinit.cc
@@ -63,10 +63,15 @@
 #include <configmake.h>
 #include <rda_papi.h>
 #include <logtrace.h>
+
+#include <string>
+#include <vector>
+
 #include "osaf_poll.h"
 #include "osaf_time.h"
 
 #include "nodeinit.h"
+#include "osaf/libs/core/cplusplus/base/file_notify.h"
 
 #define SETSIG(sa, sig, fun, flags) \
        do { \
@@ -111,11 +116,46 @@ static uint32_t recovery_action(NID_SPAW
 static uint32_t spawn_services(char *);
 static void nid_sleep(uint32_t);
 
+/* Functions used for service monitoring */
+static uint32_t create_svc_monitor_thread(void);
+static void* svc_monitor_thread(void *fd);
+static int handle_data_request(struct pollfd *fds, const std::string 
&nid_name);
+static void handle_svc_exit(int fd);
+static std::string get_svc_name(int fd);
+static int start_monitor_svc(const char *svc);
+
+/* Data declarations for service monitoring */
+static int svc_mon_fd = -1;
+static int next_svc_fds_slot = 0;
+
+struct SvcMap {
+  std::string nid_name;
+  std::string fifo_file;
+  int fifo_fd;
+};
+
+static std::vector<SvcMap> svc_map = {
+  {"AMFD", "osafamfd.fifo", -1},
+  {"TRANSPORT", "osaftransportd.fifo", -1},
+  {"CLMNA", "osafclmna.fifo", -1},
+  {"RDED", "osafrded.fifo", -1},
+  {"HLFM", "osaffmd.fifo", -1},
+  {"IMMD", "osafimmd.fifo", -1},
+  {"IMMND", "osafimmnd.fifo", -1},
+  {"LOGD", "osaflogd.fifo", -1},
+  {"NTFD", "osafntfd.fifo", -1},
+  {"PLMD", "osafplmd.fifo", -1},
+  {"CLMD", "osafclmd.fifo", -1},
+};
+static const std::string fifo_dir = PKGLOCALSTATEDIR;
+const int kMaxNumOfFds = 40;
+const int kTenSecondsInMilliseconds = 10000;
+
 /* List of recovery strategies */
 NID_FUNC recovery_funcs[] = { spawn_wait  };
 NID_FORK_FUNC fork_funcs[] = { fork_process, fork_script, fork_daemon };
 
-char *nid_recerr[NID_MAXREC][4] = {
+const char *nid_recerr[NID_MAXREC][4] = {
        {"Trying To RESPAWN", "Could Not RESPAWN", "Succeeded To RESPAWN", 
"FAILED TO RESPAWN"},
        {"Trying To RESET", "Faild to RESET", "suceeded To RESET", "FAILED 
AFTER RESTART"}
 };
@@ -167,10 +207,10 @@ char *gettoken(char **str, uint32_t tok)
                return (NULL);
        }
 
-       while ((*p != tok) && (*p != '\n') && *p)
+       while ((*p != static_cast<int>(tok)) && (*p != '\n') && *p)
                p++;
 
-       if ((*p == tok) || (*p == '\n')) {
+       if ((*p == static_cast<int>(tok)) || (*p == '\n')) {
                *p++ = 0;
                *str = p;
        }
@@ -522,7 +562,7 @@ uint32_t parse_nodeinit_conf(char *strbu
        NID_SPAWN_INFO *childinfo;
        char buff[256], sbuf[200], *ch, *ch1, tmp[30], nidconf[256];
        uint32_t lineno = 0, retry = 0;
-       struct nid_resetinfo info = { {""}, -1 };
+       struct nid_resetinfo info = { {""}, static_cast<uint32_t>(-1) };
        FILE *file, *ntfile;
 
        TRACE_ENTER();
@@ -565,7 +605,7 @@ uint32_t parse_nodeinit_conf(char *strbu
                }
 
                /* Allocate mem for new child info */
-               while ((childinfo = malloc(sizeof(NID_SPAWN_INFO))) == NULL) {
+               while ((childinfo = 
reinterpret_cast<NID_SPAWN_INFO*>(malloc(sizeof(NID_SPAWN_INFO)))) == NULL) {
                        if (retry++ == 5) {
                                sprintf(strbuf, "FAILURE: Out of memory\n");
                                return NCSCC_RC_FAILURE;
@@ -994,6 +1034,8 @@ uint32_t spawn_wait(NID_SPAWN_INFO *serv
                break;
        }
 
+       waitpid(pid, NULL, WNOHANG);
+
        /* Read the message from FIFO and fill in structure. */
        while ((n = read(select_fd, buff1, sizeof(buff1))) <= 0) {
                if (errno == EINTR) {
@@ -1263,7 +1305,7 @@ uint32_t recovery_action(NID_SPAWN_INFO 
                if (service->recovery_matrix[opt].retry_count == 0) {
                        if (count != 0)
                                LOG_ER("%s", nid_recerr[opt][3]);
-                       opt++;
+                       opt = 
static_cast<NID_RECOVERY_OPT>(static_cast<int>(opt) +1);
                        continue;
                }
        }
@@ -1285,8 +1327,7 @@ uint32_t recovery_action(NID_SPAWN_INFO 
  * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.                       *
  *                                                                          *
  ***************************************************************************/
-uint32_t spawn_services(char *strbuf)
-{
+uint32_t spawn_services(char *strbuf) {
        NID_SPAWN_INFO *service;
        NID_CHILD_LIST sp_list = spawn_list;
        char sbuff[100];
@@ -1322,6 +1363,10 @@ uint32_t spawn_services(char *strbuf)
                if (strlen(sbuff) > 0)
                        LOG_NO("%s", sbuff);
 
+               if (start_monitor_svc(service->serv_name) != NCSCC_RC_SUCCESS) {
+                       exit(EXIT_FAILURE);
+               }
+
                sp_list.head = sp_list.head->next;
        }
 
@@ -1330,6 +1375,225 @@ uint32_t spawn_services(char *strbuf)
        return NCSCC_RC_SUCCESS;
 }
 
+int start_monitor_svc(const char *svc) {
+  int rc = NCSCC_RC_SUCCESS;
+  char svc_name[NID_MAXSNAME];
+
+  TRACE_ENTER2("service: %s", svc);
+
+  strncpy(svc_name, svc, sizeof(svc_name));
+
+  while (true) {
+    ssize_t write_rc = write(svc_mon_fd, svc_name, strlen(svc_name));
+    if (write_rc == -1) {
+      if (errno == EINTR) {
+        continue;
+      } else {
+        LOG_ER("Failed to start sevice %s, error: %s",
+               svc_name, strerror(errno));
+        rc = NCSCC_RC_FAILURE;
+        break;
+      }
+    }
+    break;
+  }
+  TRACE_LEAVE();
+  return rc;
+}
+
+int handle_data_request(struct pollfd *fds, const std::string &nid_name) {
+  base::FileNotify file_notify;
+  base::FileNotify::FileNotifyErrors notify_rc;
+  int rc = NCSCC_RC_SUCCESS;
+  int fifo_fd = -1;
+
+  TRACE_ENTER2("service: %s", nid_name.c_str());
+
+  for (auto &svc : svc_map) {
+    if (nid_name == svc.nid_name) {
+      std::string fifo_file = fifo_dir + "/" + svc.fifo_file;
+      notify_rc = file_notify.WaitForFileCreation(fifo_file,
+                                                  kTenSecondsInMilliseconds);
+      if (notify_rc != base::FileNotify::FileNotifyErrors::kOK) {
+        LOG_ER("fifo file %s does not exist, notify rc: %d",
+               fifo_file.c_str(), notify_rc);
+        rc = NCSCC_RC_FAILURE;
+        break;
+      }
+      int retry_cnt = 0;
+      do {
+        if (retry_cnt > 0) {
+          osaf_nanosleep(&kHundredMilliseconds);
+        }
+        fifo_fd = open(fifo_file.c_str(), O_WRONLY|O_NONBLOCK);
+      } while ((fifo_fd == -1) &&
+               (retry_cnt++ < 5 && (errno == EINTR || errno == ENXIO)));
+
+      if (fifo_fd == -1) {
+        LOG_ER("Failed to open %s, error: %s", fifo_file.c_str(),
+               strerror(errno));
+        rc = NCSCC_RC_FAILURE;
+        break;
+      } else {
+        svc.fifo_fd = fifo_fd;
+        fds[next_svc_fds_slot].fd = fifo_fd;
+        fds[next_svc_fds_slot].events = POLLIN;
+        next_svc_fds_slot++;
+        LOG_NO("Monitoring of %s started", nid_name.c_str());
+        break;
+      }
+    }
+  }
+  TRACE_LEAVE();
+  return rc;
+}
+
+std::string get_svc_name(int fd) {
+  std::string svc_name;
+
+  for (auto const& svc : svc_map) {
+    if (fd == svc.fifo_fd) {
+      svc_name = svc.nid_name;
+      break;
+    }
+  }
+  return svc_name;
+}
+
+void handle_svc_exit(int fd) {
+  const std::string &svc_name = get_svc_name(fd);
+
+  if (svc_name.size() != 0) {
+    LOG_ER("Service %s has unexpectedly crashed. Unable to continue, exiting",
+           svc_name.c_str());
+    exit(EXIT_FAILURE);
+  } else {
+    LOG_NO("fd %d was not found in service map", fd);
+  }
+}
+
+/****************************************************************************
+ * Name          : svc_monitor_thread                                       *
+ *                                                                          *
+ * Description   : creates the service monitor thread                       *
+ *                                                                          *
+ * Arguments     : -                                                        *
+ *                                                                          *
+ * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.                       *
+ *                                                                          *
+ ***************************************************************************/
+void* svc_monitor_thread(void *fd) {
+  char nid_name[NID_MAXSNAME];
+  int svc_mon_thr_fd = *(reinterpret_cast<int*>(fd));
+  enum {
+    FD_SVC_MON_THR = 0,
+  };
+
+  struct pollfd *fds;
+
+  fds = new pollfd[sizeof(pollfd) * kMaxNumOfFds];
+  osafassert(fds != NULL);
+  ssize_t read_rc = -1;
+
+  fds[FD_SVC_MON_THR].fd = svc_mon_thr_fd;
+  fds[FD_SVC_MON_THR].events = POLLIN;
+  next_svc_fds_slot++;
+  
+  while (true) {
+    unsigned rc = osaf_poll(fds, next_svc_fds_slot, -1);
+    if (rc > 0) {
+      // check if any monitored service has exit
+      for (int i = next_svc_fds_slot-1; i > 0; --i) {
+        if ((fds[i].revents & POLLIN) ||
+            (fds[i].revents & POLLHUP) ||
+            (fds[i].revents & POLLERR)) {
+          handle_svc_exit(fds[i].fd);
+        }
+      }
+
+      if (fds[FD_SVC_MON_THR].revents & POLLIN) {
+        while (true) {
+          read_rc = read(svc_mon_thr_fd, nid_name, NID_MAXSNAME);
+          if (read_rc == -1) {
+            if (errno == EINTR) {
+              continue;
+            } else {
+              LOG_ER("Failed to read on socketpair descriptor: %s",
+                     strerror(errno));
+              exit(EXIT_FAILURE);
+            }
+          }
+          osafassert(read_rc < NID_MAXSNAME);
+          nid_name[read_rc] = '\0';
+          break;
+        }
+        if (handle_data_request(fds, nid_name) != NCSCC_RC_SUCCESS) {
+          LOG_ER("Failed to start monitoring for service %s, exiting",
+                 nid_name);
+          exit(EXIT_FAILURE);
+        }
+      }
+    } else {
+      LOG_ER("osaf_poll timed out and no descriptors are ready, exiting");
+      exit(EXIT_FAILURE);
+    }
+  }
+  delete [] fds;
+}
+
+/****************************************************************************
+ * Name          : create_svc_monitor_thread                                *
+ *                                                                          *
+ * Description   : creates the service monitor thread                       *
+ *                                                                          *
+ * Arguments     : -                                                        *
+ *                                                                          *
+ * Return Values : NCSCC_RC_SUCCESS/NCSCC_RC_FAILURE.                       *
+ *                                                                          *
+ ***************************************************************************/
+uint32_t create_svc_monitor_thread(void) {
+  int s_pair[2];
+  int svc_mon_thr_fd = -1;
+  pthread_t thread;
+  pthread_attr_t attr;
+
+  TRACE_ENTER();
+
+  if (socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, s_pair) == -1) {
+    LOG_ER("socketpair FAILED: %s", strerror(errno));
+    return NCSCC_RC_FAILURE;
+  }
+
+  svc_mon_fd = s_pair[0];
+  svc_mon_thr_fd = s_pair[1];
+
+  TRACE("sd1: %d sd2: %d", svc_mon_fd, svc_mon_thr_fd);
+
+  if (pthread_attr_init(&attr) != 0) {
+    LOG_ER("pthread_attr_init FAILED: %s", strerror(errno));
+    return NCSCC_RC_FAILURE;
+  }
+
+  if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0) {
+    LOG_ER("pthread_setdetachstate FAILED: %s", strerror(errno));
+    return NCSCC_RC_FAILURE;
+  }
+
+  if (pthread_create(&thread, &attr, svc_monitor_thread,
+  reinterpret_cast<void*>(&svc_mon_thr_fd)) != 0) {
+    LOG_ER("pthread_create FAILED: %s", strerror(errno));
+    return NCSCC_RC_FAILURE;
+  }
+
+  if (pthread_attr_destroy(&attr) != 0) {
+    LOG_ER("pthread_attr_destroy FAILED: %s", strerror(errno));
+    return NCSCC_RC_FAILURE;
+  }
+
+  TRACE_LEAVE();
+  return NCSCC_RC_SUCCESS;
+}
+
 /****************************************************************************
  * Name          : main                                                     *
  *                                                                          *
@@ -1365,6 +1629,11 @@ int main(int argc, char *argv[])
                exit(EXIT_FAILURE);
        }
 
+       if (create_svc_monitor_thread() != NCSCC_RC_SUCCESS) {
+               LOG_ER("Failed to create service monitor thread, exiting");
+               exit(EXIT_FAILURE);
+       }
+
        if (parse_nodeinit_conf(sbuf) != NCSCC_RC_SUCCESS) {
                LOG_ER("Failed to parse file %s. Exiting", sbuf);
                exit(EXIT_FAILURE);

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most 
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to