At startup, osaftransportd waits for osafdtmd.pid file creation
and then reads dtm pid. If osafdtmd.pid has not been completedly
created but osaftransportd still receives IN_CREATE, osaftransported
will fail to read pid of dtmd. That results in a node reboot with
a reason as "osafdtmd failed to start".

The patch implements an approach suggested by Anders Widell, which
creates a completed temporary pid file first, then link temporary
pid file to pid file.
---
 src/base/daemon.c   | 69 ++++++++++++++++++++++++++++++++++++-----------------
 src/nid/opensafd.in |  2 +-
 2 files changed, 48 insertions(+), 23 deletions(-)

diff --git a/src/base/daemon.c b/src/base/daemon.c
index 77a869561..5dc95fdc3 100644
--- a/src/base/daemon.c
+++ b/src/base/daemon.c
@@ -1,6 +1,7 @@
 /*      -*- OpenSAF  -*-
  *
  * (C) Copyright 2010 The OpenSAF Foundation
+ * (C) Copyright 2017 Ericsson AB - All Rights Reserved.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
@@ -44,6 +45,7 @@
 #include "base/ncsgl_defs.h"
 #include "base/os_defs.h"
 #include "base/osaf_secutil.h"
+#include "base/osaf_time.h"
 
 #include <sys/types.h>
 #include <time.h>
@@ -87,42 +89,64 @@ static void __print_usage(const char *progname, FILE 
*stream, int exit_code)
 static int __create_pidfile(const char *pidfile)
 {
        FILE *file = NULL;
-       int fd, pid, rc = 0;
+       int fd, rc = 0;
+       char pidfiletmp[NAME_MAX] = {0};
+       pid_t pid;
 
-       /* open the file and associate a stream with it */
-       if (((fd = open(pidfile, O_RDWR | O_CREAT, 0644)) == -1) ||
-           ((file = fdopen(fd, "r+")) == NULL)) {
-               syslog(LOG_ERR, "open failed, pidfile=%s, errno=%s", pidfile,
-                      strerror(errno));
-               return -1;
-       }
+       pid = getpid();
+       snprintf(pidfiletmp, NAME_MAX, "%s.%u.tmp", pidfile, pid);
 
-       /* Lock the file */
-       if (flock(fd, LOCK_EX | LOCK_NB) == -1) {
-               syslog(LOG_ERR, "flock failed, pidfile=%s, errno=%s", pidfile,
-                      strerror(errno));
-               fclose(file);
+       /* open the file and associate a stream with it */
+       if (((fd = open(pidfiletmp, O_RDWR | O_CREAT, 0644)) == -1) ||
+                       ((file = fdopen(fd, "r+")) == NULL)) {
+               syslog(LOG_ERR, "open failed, pidfiletmp=%s, errno=%s",
+                       pidfiletmp, strerror(errno));
                return -1;
        }
 
-       pid = getpid();
        if (!fprintf(file, "%d\n", pid)) {
-               syslog(LOG_ERR, "fprintf failed, pidfile=%s, errno=%s", pidfile,
-                      strerror(errno));
+               syslog(LOG_ERR, "fprintf failed, pidfiletmp=%s, errno=%s",
+                       pidfiletmp, strerror(errno));
                fclose(file);
+
                return -1;
        }
        fflush(file);
+       fclose(file);
 
-       if (flock(fd, LOCK_UN) == -1) {
-               syslog(LOG_ERR, "flock failed, pidfile=%s, errno=%s", pidfile,
-                      strerror(errno));
-               fclose(file);
+       int retry_cnt = 0;
+       while (link(pidfiletmp, pidfile) != 0) {
+               /* don't expect old pid file pre-existed and being used because
+                * we removed all before starting OpenSAF (see opensafd script),
+                * but do 5 retries to unlink and link again.
+                */
+               if (errno == EEXIST && retry_cnt == 0) {
+                       do {
+                               rc = unlink(pidfile);
+                               if (retry_cnt > 0) {
+                                       osaf_nanosleep(&kHundredMilliseconds);
+                               }
+                       } while ((rc != 0) && (++retry_cnt < 5)
+                                       && (errno == EBUSY));
+                       if (rc != 0) {
+                               syslog(LOG_ERR, "unlink failed, pidfile=%s, "
+                                       "error:%s", pidfile, strerror(errno));
+                               return -1;
+                       }
+               } else {
+                       syslog(LOG_ERR, "link failed, old=%s new=%s, error:%s",
+                               pidfiletmp, pidfile, strerror(errno));
+                       return -1;
+               }
+       }
+
+       if (unlink(pidfiletmp) != 0) {
+               syslog(LOG_ERR, "unlink failed, pidfiletmp=%s, error:%s",
+                       pidfiletmp, strerror(errno));
                return -1;
        }
-       fclose(file);
 
-       return rc;
+       return 0;
 }
 
 static void create_fifofile(const char *fifofile)
@@ -509,6 +533,7 @@ void daemon_exit(void)
 
        /* Lets remove any such file if it already exists */
        unlink(fifo_file);
+       unlink(__pidfile);
 
        if (__gcov_flush) {
                __gcov_flush();
diff --git a/src/nid/opensafd.in b/src/nid/opensafd.in
index e7683bd7e..d316967c5 100644
--- a/src/nid/opensafd.in
+++ b/src/nid/opensafd.in
@@ -90,7 +90,7 @@ check_env() {
                        getent passwd $OPENSAF_USER > /dev/null && chown 
$OPENSAF_USER $directory
                fi
        done
-
+       rm -f $pkgpiddir/*
        rm -f $pkglogdir/nid.log
 }
 
-- 
2.11.0


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to