There is a dependency b/w svc_monitor_thread and spawn_services. The coredump happens when spawn_services is executed while the thread has not yet started. In this case, data is sent to the pipe but no one consumed it. When it comes to consume the data, will get unexpected data and crash the program.
This patch ensures the things will happen in the right order: svc_monitor_thread must be in ready state before spawn_services() is executed. --- src/nid/nodeinit.cc | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/src/nid/nodeinit.cc b/src/nid/nodeinit.cc index 5f15916b4..2e6a5cd05 100644 --- a/src/nid/nodeinit.cc +++ b/src/nid/nodeinit.cc @@ -47,6 +47,8 @@ * any notification. * ************************************************************************/ +#include "nid/nodeinit.h" + #include <unistd.h> #include <fcntl.h> #include <syslog.h> @@ -61,20 +63,18 @@ #include <sys/wait.h> #include <stdint.h> -#include "osaf/configmake.h" -#include "rde/agent/rda_papi.h" -#include "base/logtrace.h" - +#include <atomic> #include <string> #include <vector> #include <cerrno> #include <cstdio> +#include "osaf/configmake.h" +#include "rde/agent/rda_papi.h" +#include "base/logtrace.h" #include "base/conf.h" #include "base/osaf_poll.h" #include "base/osaf_time.h" - -#include "nid/nodeinit.h" #include "base/file_notify.h" #define SETSIG(sa, sig, fun, flags) \ @@ -134,6 +134,7 @@ static int start_monitor_svc(const char *svc); /* Data declarations for service monitoring */ static int svc_mon_fd = -1; static int next_svc_fds_slot = 0; +static std::atomic<bool> svc_monitor_thread_ready{false}; struct SAFServices { const std::string fifo_dir = PKGLOCALSTATEDIR; @@ -712,9 +713,9 @@ int32_t fork_daemon(NID_SPAWN_INFO *service, char *app, char *args[], tmp_pid = getpid(); while (write(filedes[1], &tmp_pid, sizeof(int)) < 0) { - if (errno == EINTR) + if (errno == EINTR) { continue; - else if (errno == EPIPE) { + } else if (errno == EPIPE) { LOG_ER("Reader not available to return my PID"); } else { LOG_ER("Problem writing to pipe, err=%s", strerror(errno)); @@ -1517,6 +1518,7 @@ void *svc_monitor_thread(void *fd) { next_svc_fds_slot++; while (true) { + svc_monitor_thread_ready = true; unsigned rc = osaf_poll(fds, next_svc_fds_slot, -1); if (rc > 0) { // check if any monitored service has exit @@ -1529,9 +1531,9 @@ void *svc_monitor_thread(void *fd) { if (fds[FD_SVC_MON_THR].revents & POLLIN) { while (true) { - read_rc = read(svc_mon_thr_fd, nid_name, NID_MAXSNAME); + read_rc = recv(svc_mon_thr_fd, nid_name, NID_MAXSNAME, MSG_DONTWAIT); if (read_rc == -1) { - if (errno == EINTR) { + if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) { continue; } else { LOG_ER("Failed to read on socketpair descriptor: %s", @@ -1574,7 +1576,7 @@ uint32_t create_svc_monitor_thread(void) { TRACE_ENTER(); - if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, s_pair) == -1) { + if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0, s_pair) == -1) { LOG_ER("socketpair FAILED: %s", strerror(errno)); return NCSCC_RC_FAILURE; } @@ -1655,6 +1657,16 @@ int main(int argc, char *argv[]) { exit(EXIT_FAILURE); } + // Waiting until svc_monitor_thread is up and in ready state. + unsigned no_repeat = 0; + while (svc_monitor_thread_ready == false && no_repeat < 100) { + osaf_nanosleep(&kHundredMilliseconds); + no_repeat++; + } + + osafassert(svc_monitor_thread_ready); + LOG_NO("svc_monitor_thread is up and in ready state"); + if (parse_nodeinit_conf(sbuf) != NCSCC_RC_SUCCESS) { LOG_ER("Failed to parse file %s. Exiting", sbuf); exit(EXIT_FAILURE); -- 2.19.2 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel