Re: [devel] [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]

2019-03-04 Thread Hans Nordebäck
Hi Vu,

ack, with one comment below. /BR HansN

On 3/1/19 10:07, Vu Minh Nguyen wrote:
> There is a dependency b/w svc_monitor_thread and spawn_services.
> The coredump happens when spawn_services is executed while
> the thread has not yet started. In this case, data is sent to the
> pipe but no one consumed it. When it comes to consume the data,
> will get unexpected data and crash the program.
>
> This patch ensures the things will happen in the right order:
> svc_monitor_thread must be in ready state before spawn_services()
> is executed.
> ---
>   src/nid/nodeinit.cc | 34 +++---
>   1 file changed, 23 insertions(+), 11 deletions(-)
>
> diff --git a/src/nid/nodeinit.cc b/src/nid/nodeinit.cc
> index 5f15916b4..2e6a5cd05 100644
> --- a/src/nid/nodeinit.cc
> +++ b/src/nid/nodeinit.cc
> @@ -47,6 +47,8 @@
>*any notification.  *
>/
>   
> +#include "nid/nodeinit.h"
> +
>   #include 
>   #include 
>   #include 
> @@ -61,20 +63,18 @@
>   #include 
>   #include 
>   
> -#include "osaf/configmake.h"
> -#include "rde/agent/rda_papi.h"
> -#include "base/logtrace.h"
> -
> +#include 
>   #include 
>   #include 
>   #include 
>   #include 
>   
> +#include "osaf/configmake.h"
> +#include "rde/agent/rda_papi.h"
> +#include "base/logtrace.h"
>   #include "base/conf.h"
>   #include "base/osaf_poll.h"
>   #include "base/osaf_time.h"
> -
> -#include "nid/nodeinit.h"
>   #include "base/file_notify.h"
>   
>   #define SETSIG(sa, sig, fun, flags) \
> @@ -134,6 +134,7 @@ static int start_monitor_svc(const char *svc);
>   /* Data declarations for service monitoring */
>   static int svc_mon_fd = -1;
>   static int next_svc_fds_slot = 0;
> +static std::atomic svc_monitor_thread_ready{false};
>   
>   struct SAFServices {
> const std::string fifo_dir = PKGLOCALSTATEDIR;
> @@ -712,9 +713,9 @@ int32_t fork_daemon(NID_SPAWN_INFO *service, char *app, 
> char *args[],
>   
>   tmp_pid = getpid();
>   while (write(filedes[1], _pid, sizeof(int)) < 0) {
> -  if (errno == EINTR)
> +  if (errno == EINTR) {
>   continue;
> -  else if (errno == EPIPE) {
> +  } else if (errno == EPIPE) {
>   LOG_ER("Reader not available to return my PID");
> } else {
>   LOG_ER("Problem writing to pipe, err=%s", strerror(errno));
> @@ -1517,6 +1518,7 @@ void *svc_monitor_thread(void *fd) {
> next_svc_fds_slot++;
>   
> while (true) {
> +svc_monitor_thread_ready = true;
>   unsigned rc = osaf_poll(fds, next_svc_fds_slot, -1);
>   if (rc > 0) {
> // check if any monitored service has exit
> @@ -1529,9 +1531,9 @@ void *svc_monitor_thread(void *fd) {
>   
> if (fds[FD_SVC_MON_THR].revents & POLLIN) {
>   while (true) {
> -  read_rc = read(svc_mon_thr_fd, nid_name, NID_MAXSNAME);
> +  read_rc = recv(svc_mon_thr_fd, nid_name, NID_MAXSNAME, 
> MSG_DONTWAIT);
> if (read_rc == -1) {
> -if (errno == EINTR) {
> +if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) {

[HansN] should be

   if (errno == EINTR)

     continue;

   if (errno == EAGAIN || errno == EWOULDBLOCK)

     break;


> continue;
>   } else {
> LOG_ER("Failed to read on socketpair descriptor: %s",
> @@ -1574,7 +1576,7 @@ uint32_t create_svc_monitor_thread(void) {
>   
> TRACE_ENTER();
>   
> -  if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, s_pair) == -1) {
> +  if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0, s_pair) == -1) {
>   LOG_ER("socketpair FAILED: %s", strerror(errno));
>   return NCSCC_RC_FAILURE;
> }
> @@ -1655,6 +1657,16 @@ int main(int argc, char *argv[]) {
>   exit(EXIT_FAILURE);
> }
>   
> +  // Waiting until svc_monitor_thread is up and in ready state.
> +  unsigned no_repeat = 0;
> +  while (svc_monitor_thread_ready == false && no_repeat < 100) {
> +osaf_nanosleep();
> +no_repeat++;
> +  }
> +
> +  osafassert(svc_monitor_thread_ready);
> +  LOG_NO("svc_monitor_thread is up and in ready state");
> +
> if (parse_nodeinit_conf(sbuf) != NCSCC_RC_SUCCESS) {
>   LOG_ER("Failed to parse file %s. Exiting", sbuf);
>   exit(EXIT_FAILURE);

___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


[devel] [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]

2019-03-01 Thread Vu Minh Nguyen
There is a dependency b/w svc_monitor_thread and spawn_services.
The coredump happens when spawn_services is executed while
the thread has not yet started. In this case, data is sent to the
pipe but no one consumed it. When it comes to consume the data,
will get unexpected data and crash the program.

This patch ensures the things will happen in the right order:
svc_monitor_thread must be in ready state before spawn_services()
is executed.
---
 src/nid/nodeinit.cc | 34 +++---
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/src/nid/nodeinit.cc b/src/nid/nodeinit.cc
index 5f15916b4..2e6a5cd05 100644
--- a/src/nid/nodeinit.cc
+++ b/src/nid/nodeinit.cc
@@ -47,6 +47,8 @@
  *any notification.  *
  /
 
+#include "nid/nodeinit.h"
+
 #include 
 #include 
 #include 
@@ -61,20 +63,18 @@
 #include 
 #include 
 
-#include "osaf/configmake.h"
-#include "rde/agent/rda_papi.h"
-#include "base/logtrace.h"
-
+#include 
 #include 
 #include 
 #include 
 #include 
 
+#include "osaf/configmake.h"
+#include "rde/agent/rda_papi.h"
+#include "base/logtrace.h"
 #include "base/conf.h"
 #include "base/osaf_poll.h"
 #include "base/osaf_time.h"
-
-#include "nid/nodeinit.h"
 #include "base/file_notify.h"
 
 #define SETSIG(sa, sig, fun, flags) \
@@ -134,6 +134,7 @@ static int start_monitor_svc(const char *svc);
 /* Data declarations for service monitoring */
 static int svc_mon_fd = -1;
 static int next_svc_fds_slot = 0;
+static std::atomic svc_monitor_thread_ready{false};
 
 struct SAFServices {
   const std::string fifo_dir = PKGLOCALSTATEDIR;
@@ -712,9 +713,9 @@ int32_t fork_daemon(NID_SPAWN_INFO *service, char *app, 
char *args[],
 
 tmp_pid = getpid();
 while (write(filedes[1], _pid, sizeof(int)) < 0) {
-  if (errno == EINTR)
+  if (errno == EINTR) {
 continue;
-  else if (errno == EPIPE) {
+  } else if (errno == EPIPE) {
 LOG_ER("Reader not available to return my PID");
   } else {
 LOG_ER("Problem writing to pipe, err=%s", strerror(errno));
@@ -1517,6 +1518,7 @@ void *svc_monitor_thread(void *fd) {
   next_svc_fds_slot++;
 
   while (true) {
+svc_monitor_thread_ready = true;
 unsigned rc = osaf_poll(fds, next_svc_fds_slot, -1);
 if (rc > 0) {
   // check if any monitored service has exit
@@ -1529,9 +1531,9 @@ void *svc_monitor_thread(void *fd) {
 
   if (fds[FD_SVC_MON_THR].revents & POLLIN) {
 while (true) {
-  read_rc = read(svc_mon_thr_fd, nid_name, NID_MAXSNAME);
+  read_rc = recv(svc_mon_thr_fd, nid_name, NID_MAXSNAME, MSG_DONTWAIT);
   if (read_rc == -1) {
-if (errno == EINTR) {
+if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) {
   continue;
 } else {
   LOG_ER("Failed to read on socketpair descriptor: %s",
@@ -1574,7 +1576,7 @@ uint32_t create_svc_monitor_thread(void) {
 
   TRACE_ENTER();
 
-  if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, s_pair) == -1) {
+  if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0, s_pair) == -1) {
 LOG_ER("socketpair FAILED: %s", strerror(errno));
 return NCSCC_RC_FAILURE;
   }
@@ -1655,6 +1657,16 @@ int main(int argc, char *argv[]) {
 exit(EXIT_FAILURE);
   }
 
+  // Waiting until svc_monitor_thread is up and in ready state.
+  unsigned no_repeat = 0;
+  while (svc_monitor_thread_ready == false && no_repeat < 100) {
+osaf_nanosleep();
+no_repeat++;
+  }
+
+  osafassert(svc_monitor_thread_ready);
+  LOG_NO("svc_monitor_thread is up and in ready state");
+
   if (parse_nodeinit_conf(sbuf) != NCSCC_RC_SUCCESS) {
 LOG_ER("Failed to parse file %s. Exiting", sbuf);
 exit(EXIT_FAILURE);
-- 
2.19.2



___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


Re: [devel] [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]

2019-02-28 Thread Hans Nordebäck
Hi Vu,
fine,  perhaps also changing the  static bool svc_monitor_thread_running = 
false  to std::atomic?/BR Hans

From: Vu Minh Nguyen 
Sent: den 28 februari 2019 09:30
To: Hans Nordebäck ; Gary Lee 

Cc: opensaf-devel@lists.sourceforge.net
Subject: RE: [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]

Thanks Hans. I will send the V2 for these updates.

Regards, Vu

From: Hans Nordebäck 
mailto:hans.nordeb...@ericsson.com>>
Sent: Thursday, February 28, 2019 2:16 PM
To: Vu Minh Nguyen 
mailto:vu.m.ngu...@dektech.com.au>>; Gary Lee 
mailto:gary@dektech.com.au>>
Cc: 
opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]


Hi Vu,

you can keep your patch for the ready state, but also change SOCK_STREAM to 
SOCK_DGRAM and change

the read(svc_mon_thr_fd, nid_name, NID_MAXSNAME) in svc_monitor_thread to

recv(svc_mon_thr_fd, nid_name, NID_MAXSNAME, MSG_DONTWAIT) and also handle 
EAGAIN and

EWOULDBLOCK. Then only one nid_name per read/recv will be given instead of 
several nid_names

as in the SOCK_STREAM case.

/BR Hans


On 2/28/19 05:30, Vu Minh Nguyen wrote:

Hi Hans,



Thanks for your comment.



But I has a concern that the service-monitoring function may not fully work

if a service is crashed before the svc_monitor_thread goes to ready state?



Is it mandatory for monitoring thread to enter ready state before spawning

SAF services?



Regards, Vu



-Original Message-

From: Hans Nordebäck 


Sent: Wednesday, February 27, 2019 8:23 PM

To: Vu Minh Nguyen 
; Gary Lee



Cc: 
opensaf-devel@lists.sourceforge.net;
 Vu Minh Nguyen



Subject: RE: [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]



Hi Vu,

I discussed a bit with Anders, likely it should work if the socketpair is

changed

to socketpair(AF_UNIX, SOCK_DGRAM .. from SOCK_STREAM. /BR Hans



-Original Message-

From: Hans Nordebäck

Sent: den 27 februari 2019 11:55

To: 'Vu Minh Nguyen' 
; Gary Lee



Cc: 
opensaf-devel@lists.sourceforge.net;
 Vu Minh Nguyen



Subject: RE: [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]



Hi Vu,

ack, code review only/Thanks HansN



-Original Message-

From: Vu Minh Nguyen 


Sent: den 27 februari 2019 11:48

To: Hans Nordebäck 
; Gary Lee



Cc: 
opensaf-devel@lists.sourceforge.net;
 Vu Minh Nguyen



Subject: [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]



There is a dependency b/w svc_monitor_thread and spawn_services.

The coredump happens when spawn_services is executed while the thread

has not yet started. In this case, data is sent to the pipe but no one

consumed

it. Later on, reading data from the pipe, will get unexpected data and

crash

the program.



This patch ensures the order: svc_monitor_thread must be in ready state

before spawn_services() is executed.

---

 src/nid/nodeinit.cc | 11 +++

 1 file changed, 11 insertions(+)



diff --git a/src/nid/nodeinit.cc b/src/nid/nodeinit.cc index

5f15916b4..b4945b05c 100644

--- a/src/nid/nodeinit.cc

+++ b/src/nid/nodeinit.cc

@@ -134,6 +134,7 @@ static int start_monitor_svc(const char *svc);

 /* Data declarations for service monitoring */  static int svc_mon_fd =

-1;

static int next_svc_fds_slot = 0;

+static bool svc_monitor_thread_running = false;



 struct SAFServices {

   const std::string fifo_dir = PKGLOCALSTATEDIR; @@ -1517,6 +1518,7 @@

void *svc_monitor_thread(void *fd) {

   next_svc_fds_slot++;



   while (true) {

+svc_monitor_thread_running = true;

 unsigned rc = osaf_poll(fds, next_svc_fds_slot, -1);

 if (rc > 0) {

   // check if any monitored service has exit @@ -1655,6 +1657,15 @@

int

main(int argc, char *argv[]) {

 exit(EXIT_FAILURE);

   }



+  // Waiting until svc_monitor_thread is up and in ready state.

+  // If spawn_services runs before the thread is in ready state,  //

+ receive side of the pipe s_pair will get unexpected data and  // may

+ crash the process.

+  while (svc_monitor_thread_running == false) {

+usleep(100);

+  }

+

+  LOG_NO("svc_monitor_thread is up and in ready state");

   if (parse_nodeinit_conf(sbuf) != NCSCC_RC_SUCCESS) {

 LOG_ER("Failed to parse file %s. Exiting", sbuf);

 exit(EXIT_FAILURE);

--

2.19.2





___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net

Re: [devel] [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]

2019-02-28 Thread Vu Minh Nguyen
Thanks Hans. I will send the V2 for these updates.

 

Regards, Vu

 

From: Hans Nordebäck  
Sent: Thursday, February 28, 2019 2:16 PM
To: Vu Minh Nguyen ; Gary Lee 

Cc: opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]

 

Hi Vu,

you can keep your patch for the ready state, but also change SOCK_STREAM to 
SOCK_DGRAM and change 

the read(svc_mon_thr_fd, nid_name, NID_MAXSNAME) in svc_monitor_thread to

recv(svc_mon_thr_fd, nid_name, NID_MAXSNAME, MSG_DONTWAIT) and also handle 
EAGAIN and

EWOULDBLOCK. Then only one nid_name per read/recv will be given instead of 
several nid_names

as in the SOCK_STREAM case.

/BR Hans

 

On 2/28/19 05:30, Vu Minh Nguyen wrote:

Hi Hans,
 
Thanks for your comment. 
 
But I has a concern that the service-monitoring function may not fully work 
if a service is crashed before the svc_monitor_thread goes to ready state?
 
Is it mandatory for monitoring thread to enter ready state before spawning
SAF services?
 
Regards, Vu
 

-Original Message-
From: Hans Nordebäck   

Sent: Wednesday, February 27, 2019 8:23 PM
To: Vu Minh Nguyen   
; Gary Lee
  
Cc: opensaf-devel@lists.sourceforge.net 
 ; Vu Minh Nguyen
  
Subject: RE: [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]
 
Hi Vu,
I discussed a bit with Anders, likely it should work if the socketpair is

changed

to socketpair(AF_UNIX, SOCK_DGRAM .. from SOCK_STREAM. /BR Hans
 
-Original Message-
From: Hans Nordebäck
Sent: den 27 februari 2019 11:55
To: 'Vu Minh Nguyen'   
; Gary Lee
  
Cc: opensaf-devel@lists.sourceforge.net 
 ; Vu Minh Nguyen
  
Subject: RE: [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]
 
Hi Vu,
ack, code review only/Thanks HansN
 
-Original Message-
From: Vu Minh Nguyen   

Sent: den 27 februari 2019 11:48
To: Hans Nordebäck   
; Gary Lee
  
Cc: opensaf-devel@lists.sourceforge.net 
 ; Vu Minh Nguyen
  
Subject: [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]
 
There is a dependency b/w svc_monitor_thread and spawn_services.
The coredump happens when spawn_services is executed while the thread
has not yet started. In this case, data is sent to the pipe but no one

consumed

it. Later on, reading data from the pipe, will get unexpected data and

crash

the program.
 
This patch ensures the order: svc_monitor_thread must be in ready state
before spawn_services() is executed.
---
 src/nid/nodeinit.cc | 11 +++
 1 file changed, 11 insertions(+)
 
diff --git a/src/nid/nodeinit.cc b/src/nid/nodeinit.cc index
5f15916b4..b4945b05c 100644
--- a/src/nid/nodeinit.cc
+++ b/src/nid/nodeinit.cc
@@ -134,6 +134,7 @@ static int start_monitor_svc(const char *svc);
 /* Data declarations for service monitoring */  static int svc_mon_fd =

-1;

static int next_svc_fds_slot = 0;
+static bool svc_monitor_thread_running = false;
 
 struct SAFServices {
   const std::string fifo_dir = PKGLOCALSTATEDIR; @@ -1517,6 +1518,7 @@
void *svc_monitor_thread(void *fd) {
   next_svc_fds_slot++;
 
   while (true) {
+svc_monitor_thread_running = true;
 unsigned rc = osaf_poll(fds, next_svc_fds_slot, -1);
 if (rc > 0) {
   // check if any monitored service has exit @@ -1655,6 +1657,15 @@

int

main(int argc, char *argv[]) {
 exit(EXIT_FAILURE);
   }
 
+  // Waiting until svc_monitor_thread is up and in ready state.
+  // If spawn_services runs before the thread is in ready state,  //
+ receive side of the pipe s_pair will get unexpected data and  // may
+ crash the process.
+  while (svc_monitor_thread_running == false) {
+usleep(100);
+  }
+
+  LOG_NO("svc_monitor_thread is up and in ready state");
   if (parse_nodeinit_conf(sbuf) != NCSCC_RC_SUCCESS) {
 LOG_ER("Failed to parse file %s. Exiting", sbuf);
 exit(EXIT_FAILURE);
--
2.19.2

 
 


___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


Re: [devel] [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]

2019-02-27 Thread Hans Nordebäck
Hi Vu,

you can keep your patch for the ready state, but also change SOCK_STREAM to 
SOCK_DGRAM and change

the read(svc_mon_thr_fd, nid_name, NID_MAXSNAME) in svc_monitor_thread to

recv(svc_mon_thr_fd, nid_name, NID_MAXSNAME, MSG_DONTWAIT) and also handle 
EAGAIN and

EWOULDBLOCK. Then only one nid_name per read/recv will be given instead of 
several nid_names

as in the SOCK_STREAM case.

/BR Hans


On 2/28/19 05:30, Vu Minh Nguyen wrote:

Hi Hans,

Thanks for your comment.

But I has a concern that the service-monitoring function may not fully work
if a service is crashed before the svc_monitor_thread goes to ready state?

Is it mandatory for monitoring thread to enter ready state before spawning
SAF services?

Regards, Vu



-Original Message-
From: Hans Nordebäck 

Sent: Wednesday, February 27, 2019 8:23 PM
To: Vu Minh Nguyen 
; Gary Lee

Cc: 
opensaf-devel@lists.sourceforge.net;
 Vu Minh Nguyen

Subject: RE: [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]

Hi Vu,
I discussed a bit with Anders, likely it should work if the socketpair is


changed


to socketpair(AF_UNIX, SOCK_DGRAM .. from SOCK_STREAM. /BR Hans

-Original Message-
From: Hans Nordebäck
Sent: den 27 februari 2019 11:55
To: 'Vu Minh Nguyen' 
; Gary Lee

Cc: 
opensaf-devel@lists.sourceforge.net;
 Vu Minh Nguyen

Subject: RE: [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]

Hi Vu,
ack, code review only/Thanks HansN

-Original Message-
From: Vu Minh Nguyen 

Sent: den 27 februari 2019 11:48
To: Hans Nordebäck 
; Gary Lee

Cc: 
opensaf-devel@lists.sourceforge.net;
 Vu Minh Nguyen

Subject: [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]

There is a dependency b/w svc_monitor_thread and spawn_services.
The coredump happens when spawn_services is executed while the thread
has not yet started. In this case, data is sent to the pipe but no one


consumed


it. Later on, reading data from the pipe, will get unexpected data and


crash


the program.

This patch ensures the order: svc_monitor_thread must be in ready state
before spawn_services() is executed.
---
 src/nid/nodeinit.cc | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/nid/nodeinit.cc b/src/nid/nodeinit.cc index
5f15916b4..b4945b05c 100644
--- a/src/nid/nodeinit.cc
+++ b/src/nid/nodeinit.cc
@@ -134,6 +134,7 @@ static int start_monitor_svc(const char *svc);
 /* Data declarations for service monitoring */  static int svc_mon_fd =


-1;


static int next_svc_fds_slot = 0;
+static bool svc_monitor_thread_running = false;

 struct SAFServices {
   const std::string fifo_dir = PKGLOCALSTATEDIR; @@ -1517,6 +1518,7 @@
void *svc_monitor_thread(void *fd) {
   next_svc_fds_slot++;

   while (true) {
+svc_monitor_thread_running = true;
 unsigned rc = osaf_poll(fds, next_svc_fds_slot, -1);
 if (rc > 0) {
   // check if any monitored service has exit @@ -1655,6 +1657,15 @@


int


main(int argc, char *argv[]) {
 exit(EXIT_FAILURE);
   }

+  // Waiting until svc_monitor_thread is up and in ready state.
+  // If spawn_services runs before the thread is in ready state,  //
+ receive side of the pipe s_pair will get unexpected data and  // may
+ crash the process.
+  while (svc_monitor_thread_running == false) {
+usleep(100);
+  }
+
+  LOG_NO("svc_monitor_thread is up and in ready state");
   if (parse_nodeinit_conf(sbuf) != NCSCC_RC_SUCCESS) {
 LOG_ER("Failed to parse file %s. Exiting", sbuf);
 exit(EXIT_FAILURE);
--
2.19.2






___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


Re: [devel] [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]

2019-02-27 Thread Vu Minh Nguyen
Hi Hans,

Thanks for your comment. 

But I has a concern that the service-monitoring function may not fully work 
if a service is crashed before the svc_monitor_thread goes to ready state?

Is it mandatory for monitoring thread to enter ready state before spawning
SAF services?

Regards, Vu

> -Original Message-
> From: Hans Nordebäck 
> Sent: Wednesday, February 27, 2019 8:23 PM
> To: Vu Minh Nguyen ; Gary Lee
> 
> Cc: opensaf-devel@lists.sourceforge.net; Vu Minh Nguyen
> 
> Subject: RE: [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]
> 
> Hi Vu,
> I discussed a bit with Anders, likely it should work if the socketpair is
changed
> to socketpair(AF_UNIX, SOCK_DGRAM .. from SOCK_STREAM. /BR Hans
> 
> -Original Message-
> From: Hans Nordebäck
> Sent: den 27 februari 2019 11:55
> To: 'Vu Minh Nguyen' ; Gary Lee
> 
> Cc: opensaf-devel@lists.sourceforge.net; Vu Minh Nguyen
> 
> Subject: RE: [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]
> 
> Hi Vu,
> ack, code review only/Thanks HansN
> 
> -Original Message-
> From: Vu Minh Nguyen 
> Sent: den 27 februari 2019 11:48
> To: Hans Nordebäck ; Gary Lee
> 
> Cc: opensaf-devel@lists.sourceforge.net; Vu Minh Nguyen
> 
> Subject: [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]
> 
> There is a dependency b/w svc_monitor_thread and spawn_services.
> The coredump happens when spawn_services is executed while the thread
> has not yet started. In this case, data is sent to the pipe but no one
consumed
> it. Later on, reading data from the pipe, will get unexpected data and
crash
> the program.
> 
> This patch ensures the order: svc_monitor_thread must be in ready state
> before spawn_services() is executed.
> ---
>  src/nid/nodeinit.cc | 11 +++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/src/nid/nodeinit.cc b/src/nid/nodeinit.cc index
> 5f15916b4..b4945b05c 100644
> --- a/src/nid/nodeinit.cc
> +++ b/src/nid/nodeinit.cc
> @@ -134,6 +134,7 @@ static int start_monitor_svc(const char *svc);
>  /* Data declarations for service monitoring */  static int svc_mon_fd =
-1;
> static int next_svc_fds_slot = 0;
> +static bool svc_monitor_thread_running = false;
> 
>  struct SAFServices {
>const std::string fifo_dir = PKGLOCALSTATEDIR; @@ -1517,6 +1518,7 @@
> void *svc_monitor_thread(void *fd) {
>next_svc_fds_slot++;
> 
>while (true) {
> +svc_monitor_thread_running = true;
>  unsigned rc = osaf_poll(fds, next_svc_fds_slot, -1);
>  if (rc > 0) {
>// check if any monitored service has exit @@ -1655,6 +1657,15 @@
int
> main(int argc, char *argv[]) {
>  exit(EXIT_FAILURE);
>}
> 
> +  // Waiting until svc_monitor_thread is up and in ready state.
> +  // If spawn_services runs before the thread is in ready state,  //
> + receive side of the pipe s_pair will get unexpected data and  // may
> + crash the process.
> +  while (svc_monitor_thread_running == false) {
> +usleep(100);
> +  }
> +
> +  LOG_NO("svc_monitor_thread is up and in ready state");
>if (parse_nodeinit_conf(sbuf) != NCSCC_RC_SUCCESS) {
>  LOG_ER("Failed to parse file %s. Exiting", sbuf);
>  exit(EXIT_FAILURE);
> --
> 2.19.2




___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


Re: [devel] [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]

2019-02-27 Thread Hans Nordebäck
Hi Vu,
I discussed a bit with Anders, likely it should work if the socketpair is 
changed to socketpair(AF_UNIX, SOCK_DGRAM .. from SOCK_STREAM. /BR Hans

-Original Message-
From: Hans Nordebäck 
Sent: den 27 februari 2019 11:55
To: 'Vu Minh Nguyen' ; Gary Lee 

Cc: opensaf-devel@lists.sourceforge.net; Vu Minh Nguyen 

Subject: RE: [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]

Hi Vu,
ack, code review only/Thanks HansN

-Original Message-
From: Vu Minh Nguyen 
Sent: den 27 februari 2019 11:48
To: Hans Nordebäck ; Gary Lee 

Cc: opensaf-devel@lists.sourceforge.net; Vu Minh Nguyen 

Subject: [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]

There is a dependency b/w svc_monitor_thread and spawn_services.
The coredump happens when spawn_services is executed while the thread has not 
yet started. In this case, data is sent to the pipe but no one consumed it. 
Later on, reading data from the pipe, will get unexpected data and crash the 
program.

This patch ensures the order: svc_monitor_thread must be in ready state before 
spawn_services() is executed.
---
 src/nid/nodeinit.cc | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/nid/nodeinit.cc b/src/nid/nodeinit.cc index 
5f15916b4..b4945b05c 100644
--- a/src/nid/nodeinit.cc
+++ b/src/nid/nodeinit.cc
@@ -134,6 +134,7 @@ static int start_monitor_svc(const char *svc);
 /* Data declarations for service monitoring */  static int svc_mon_fd = -1;  
static int next_svc_fds_slot = 0;
+static bool svc_monitor_thread_running = false;
 
 struct SAFServices {
   const std::string fifo_dir = PKGLOCALSTATEDIR; @@ -1517,6 +1518,7 @@ void 
*svc_monitor_thread(void *fd) {
   next_svc_fds_slot++;
 
   while (true) {
+svc_monitor_thread_running = true;
 unsigned rc = osaf_poll(fds, next_svc_fds_slot, -1);
 if (rc > 0) {
   // check if any monitored service has exit @@ -1655,6 +1657,15 @@ int 
main(int argc, char *argv[]) {
 exit(EXIT_FAILURE);
   }
 
+  // Waiting until svc_monitor_thread is up and in ready state.
+  // If spawn_services runs before the thread is in ready state,  // 
+ receive side of the pipe s_pair will get unexpected data and  // may 
+ crash the process.
+  while (svc_monitor_thread_running == false) {
+usleep(100);
+  }
+
+  LOG_NO("svc_monitor_thread is up and in ready state");
   if (parse_nodeinit_conf(sbuf) != NCSCC_RC_SUCCESS) {
 LOG_ER("Failed to parse file %s. Exiting", sbuf);
 exit(EXIT_FAILURE);
--
2.19.2



___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


Re: [devel] [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]

2019-02-27 Thread Hans Nordebäck
Hi Vu,
ack, code review only/Thanks HansN

-Original Message-
From: Vu Minh Nguyen  
Sent: den 27 februari 2019 11:48
To: Hans Nordebäck ; Gary Lee 

Cc: opensaf-devel@lists.sourceforge.net; Vu Minh Nguyen 

Subject: [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]

There is a dependency b/w svc_monitor_thread and spawn_services.
The coredump happens when spawn_services is executed while the thread has not 
yet started. In this case, data is sent to the pipe but no one consumed it. 
Later on, reading data from the pipe, will get unexpected data and crash the 
program.

This patch ensures the order: svc_monitor_thread must be in ready state before 
spawn_services() is executed.
---
 src/nid/nodeinit.cc | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/nid/nodeinit.cc b/src/nid/nodeinit.cc index 
5f15916b4..b4945b05c 100644
--- a/src/nid/nodeinit.cc
+++ b/src/nid/nodeinit.cc
@@ -134,6 +134,7 @@ static int start_monitor_svc(const char *svc);
 /* Data declarations for service monitoring */  static int svc_mon_fd = -1;  
static int next_svc_fds_slot = 0;
+static bool svc_monitor_thread_running = false;
 
 struct SAFServices {
   const std::string fifo_dir = PKGLOCALSTATEDIR; @@ -1517,6 +1518,7 @@ void 
*svc_monitor_thread(void *fd) {
   next_svc_fds_slot++;
 
   while (true) {
+svc_monitor_thread_running = true;
 unsigned rc = osaf_poll(fds, next_svc_fds_slot, -1);
 if (rc > 0) {
   // check if any monitored service has exit @@ -1655,6 +1657,15 @@ int 
main(int argc, char *argv[]) {
 exit(EXIT_FAILURE);
   }
 
+  // Waiting until svc_monitor_thread is up and in ready state.
+  // If spawn_services runs before the thread is in ready state,  // 
+ receive side of the pipe s_pair will get unexpected data and  // may 
+ crash the process.
+  while (svc_monitor_thread_running == false) {
+usleep(100);
+  }
+
+  LOG_NO("svc_monitor_thread is up and in ready state");
   if (parse_nodeinit_conf(sbuf) != NCSCC_RC_SUCCESS) {
 LOG_ER("Failed to parse file %s. Exiting", sbuf);
 exit(EXIT_FAILURE);
--
2.19.2



___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


[devel] [PATCH 1/1] nid: fix opensafd crashed during start-up [#3013]

2019-02-27 Thread Vu Minh Nguyen
There is a dependency b/w svc_monitor_thread and spawn_services.
The coredump happens when spawn_services is executed while
the thread has not yet started. In this case, data is sent to the
pipe but no one consumed it. Later on, reading data from the pipe,
will get unexpected data and crash the program.

This patch ensures the order: svc_monitor_thread must be in ready state
before spawn_services() is executed.
---
 src/nid/nodeinit.cc | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/nid/nodeinit.cc b/src/nid/nodeinit.cc
index 5f15916b4..b4945b05c 100644
--- a/src/nid/nodeinit.cc
+++ b/src/nid/nodeinit.cc
@@ -134,6 +134,7 @@ static int start_monitor_svc(const char *svc);
 /* Data declarations for service monitoring */
 static int svc_mon_fd = -1;
 static int next_svc_fds_slot = 0;
+static bool svc_monitor_thread_running = false;
 
 struct SAFServices {
   const std::string fifo_dir = PKGLOCALSTATEDIR;
@@ -1517,6 +1518,7 @@ void *svc_monitor_thread(void *fd) {
   next_svc_fds_slot++;
 
   while (true) {
+svc_monitor_thread_running = true;
 unsigned rc = osaf_poll(fds, next_svc_fds_slot, -1);
 if (rc > 0) {
   // check if any monitored service has exit
@@ -1655,6 +1657,15 @@ int main(int argc, char *argv[]) {
 exit(EXIT_FAILURE);
   }
 
+  // Waiting until svc_monitor_thread is up and in ready state.
+  // If spawn_services runs before the thread is in ready state,
+  // receive side of the pipe s_pair will get unexpected data and
+  // may crash the process.
+  while (svc_monitor_thread_running == false) {
+usleep(100);
+  }
+
+  LOG_NO("svc_monitor_thread is up and in ready state");
   if (parse_nodeinit_conf(sbuf) != NCSCC_RC_SUCCESS) {
 LOG_ER("Failed to parse file %s. Exiting", sbuf);
 exit(EXIT_FAILURE);
-- 
2.19.2



___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel