Hi Vu,

ack, with one comment below. /BR HansN

On 3/1/19 10:07, Vu Minh Nguyen wrote:
> There is a dependency b/w svc_monitor_thread and spawn_services.
> The coredump happens when spawn_services is executed while
> the thread has not yet started. In this case, data is sent to the
> pipe but no one consumed it. When it comes to consume the data,
> will get unexpected data and crash the program.
>
> This patch ensures the things will happen in the right order:
> svc_monitor_thread must be in ready state before spawn_services()
> is executed.
> ---
>   src/nid/nodeinit.cc | 34 +++++++++++++++++++++++-----------
>   1 file changed, 23 insertions(+), 11 deletions(-)
>
> diff --git a/src/nid/nodeinit.cc b/src/nid/nodeinit.cc
> index 5f15916b4..2e6a5cd05 100644
> --- a/src/nid/nodeinit.cc
> +++ b/src/nid/nodeinit.cc
> @@ -47,6 +47,8 @@
>    *            any notification.                                          *
>    ************************************************************************/
>   
> +#include "nid/nodeinit.h"
> +
>   #include <unistd.h>
>   #include <fcntl.h>
>   #include <syslog.h>
> @@ -61,20 +63,18 @@
>   #include <sys/wait.h>
>   #include <stdint.h>
>   
> -#include "osaf/configmake.h"
> -#include "rde/agent/rda_papi.h"
> -#include "base/logtrace.h"
> -
> +#include <atomic>
>   #include <string>
>   #include <vector>
>   #include <cerrno>
>   #include <cstdio>
>   
> +#include "osaf/configmake.h"
> +#include "rde/agent/rda_papi.h"
> +#include "base/logtrace.h"
>   #include "base/conf.h"
>   #include "base/osaf_poll.h"
>   #include "base/osaf_time.h"
> -
> -#include "nid/nodeinit.h"
>   #include "base/file_notify.h"
>   
>   #define SETSIG(sa, sig, fun, flags) \
> @@ -134,6 +134,7 @@ static int start_monitor_svc(const char *svc);
>   /* Data declarations for service monitoring */
>   static int svc_mon_fd = -1;
>   static int next_svc_fds_slot = 0;
> +static std::atomic<bool> svc_monitor_thread_ready{false};
>   
>   struct SAFServices {
>     const std::string fifo_dir = PKGLOCALSTATEDIR;
> @@ -712,9 +713,9 @@ int32_t fork_daemon(NID_SPAWN_INFO *service, char *app, 
> char *args[],
>   
>       tmp_pid = getpid();
>       while (write(filedes[1], &tmp_pid, sizeof(int)) < 0) {
> -      if (errno == EINTR)
> +      if (errno == EINTR) {
>           continue;
> -      else if (errno == EPIPE) {
> +      } else if (errno == EPIPE) {
>           LOG_ER("Reader not available to return my PID");
>         } else {
>           LOG_ER("Problem writing to pipe, err=%s", strerror(errno));
> @@ -1517,6 +1518,7 @@ void *svc_monitor_thread(void *fd) {
>     next_svc_fds_slot++;
>   
>     while (true) {
> +    svc_monitor_thread_ready = true;
>       unsigned rc = osaf_poll(fds, next_svc_fds_slot, -1);
>       if (rc > 0) {
>         // check if any monitored service has exit
> @@ -1529,9 +1531,9 @@ void *svc_monitor_thread(void *fd) {
>   
>         if (fds[FD_SVC_MON_THR].revents & POLLIN) {
>           while (true) {
> -          read_rc = read(svc_mon_thr_fd, nid_name, NID_MAXSNAME);
> +          read_rc = recv(svc_mon_thr_fd, nid_name, NID_MAXSNAME, 
> MSG_DONTWAIT);
>             if (read_rc == -1) {
> -            if (errno == EINTR) {
> +            if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) {

[HansN] should be

   if (errno == EINTR)

     continue;

   if (errno == EAGAIN || errno == EWOULDBLOCK)

     break;


>                 continue;
>               } else {
>                 LOG_ER("Failed to read on socketpair descriptor: %s",
> @@ -1574,7 +1576,7 @@ uint32_t create_svc_monitor_thread(void) {
>   
>     TRACE_ENTER();
>   
> -  if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, s_pair) == -1) {
> +  if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0, s_pair) == -1) {
>       LOG_ER("socketpair FAILED: %s", strerror(errno));
>       return NCSCC_RC_FAILURE;
>     }
> @@ -1655,6 +1657,16 @@ int main(int argc, char *argv[]) {
>       exit(EXIT_FAILURE);
>     }
>   
> +  // Waiting until svc_monitor_thread is up and in ready state.
> +  unsigned no_repeat = 0;
> +  while (svc_monitor_thread_ready == false && no_repeat < 100) {
> +    osaf_nanosleep(&kHundredMilliseconds);
> +    no_repeat++;
> +  }
> +
> +  osafassert(svc_monitor_thread_ready);
> +  LOG_NO("svc_monitor_thread is up and in ready state");
> +
>     if (parse_nodeinit_conf(sbuf) != NCSCC_RC_SUCCESS) {
>       LOG_ER("Failed to parse file %s. Exiting", sbuf);
>       exit(EXIT_FAILURE);

_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to