In the scenario that amfnd terminates a huge number of components
at once (around 800 components), amfnd catches the sigchild signal
from components' processes in signal handler and calls write() to
notify amfnd's threads to proceed the component termination. As of
this result, multiple blocking write() calls are observed being
blocked because the thread calls read() being busy with waitpid
despite that waitpid is nohang.

The slowness of read() thread is due to scanning through all pids
and there are so many child processes being terminated at the same
time.

This patch changes the socketpair as non-blocking to avoid write()
being blocked. It also uses poll event to avoid hogging cpu in the
read() thread.
---
 src/base/sysf_exc_scr.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/base/sysf_exc_scr.c b/src/base/sysf_exc_scr.c
index 378b1eeab..6348985cb 100644
--- a/src/base/sysf_exc_scr.c
+++ b/src/base/sysf_exc_scr.c
@@ -33,10 +33,11 @@
 #include "base/sysf_exc_scr.h"
 #include "base/ncssysf_def.h"
 
+#include <poll.h>
 #include <sched.h>
 
 SYSF_EXECUTE_MODULE_CB module_cb;
-
+static struct pollfd fds[1];
 /*****************************************************************************
 
   PROCEDURE        : ncs_exc_mdl_start_timer
@@ -169,8 +170,20 @@ void ncs_exec_mod_hdlr(void)
        SYSF_PID_LIST *exec_pid = NULL;
        int status = -1;
        int pid = -1;
+       int polltmo = -1;
+
+       fds[0].fd = module_cb.read_fd;
+       fds[0].events = POLLIN;
 
        while (1) {
+               int pollretval = poll(fds, 1, polltmo);
+               if (pollretval == -1) {
+                       if (errno == EINTR) continue;
+                       LOG_ER("ncs_exec_mod_hdlr: poll FAILED - %s",
+                           strerror(errno));
+                       break;
+               }
+               if ((fds[0].revents & POLLIN) == false) continue;
                while ((ret_val = read(
                            module_cb.read_fd, (((uint8_t *)&info) + count),
                            (maxsize - count))) != (maxsize - count)) {
@@ -430,7 +443,7 @@ uint32_t start_exec_mod_cb(void)
                return m_LEAP_DBG_SINK(NCSCC_RC_FAILURE);
        }
 
-       if (0 != socketpair(AF_UNIX, SOCK_DGRAM, 0, spair)) {
+       if (0 != socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, spair)) {
                perror("init_exec_mod_cb: socketpair: ");
                return m_LEAP_DBG_SINK(NCSCC_RC_FAILURE);
        }
-- 
2.20.1



_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to