At startup, osaftransportd waits for osafdtmd.pid file creation
and then reads dtm pid. If osafdtmd.pid has not been completedly
created but osaftransportd still receives IN_CREATE, osaftransported
will fail to read pid of dtmd. That results in a node reboot with
a reason as "osafdtmd failed to start".
The patch implements an approach suggested by Anders Widell, which
creates a completed temporary pid file first, then renames it to
correct pid file name. Whenever osaftransportd is notified to read
dtmd's pid, the data in pid file should be always safe to read. In
addition to this, FileNotify needs to introduce IN_MOVED_TO event.
---
src/base/daemon.c | 27 ++++++++++++++++++---------
src/base/file_notify.cc | 10 +++++++++-
2 files changed, 27 insertions(+), 10 deletions(-)
diff --git a/src/base/daemon.c b/src/base/daemon.c
index 77a869561..2fd161ce0 100644
--- a/src/base/daemon.c
+++ b/src/base/daemon.c
@@ -88,40 +88,49 @@ static int __create_pidfile(const char *pidfile)
{
FILE *file = NULL;
int fd, pid, rc = 0;
+ char pidfiletmp[256] = {0};
+
+ sprintf(pidfiletmp, "%s.tmp", pidfile);
/* open the file and associate a stream with it */
- if (((fd = open(pidfile, O_RDWR | O_CREAT, 0644)) == -1) ||
+ if (((fd = open(pidfiletmp, O_RDWR | O_CREAT, 0644)) == -1) ||
((file = fdopen(fd, "r+")) == NULL)) {
- syslog(LOG_ERR, "open failed, pidfile=%s, errno=%s", pidfile,
- strerror(errno));
+ syslog(LOG_ERR, "open failed, pidfiletmp=%s, errno=%s",
+ pidfiletmp, strerror(errno));
return -1;
}
/* Lock the file */
if (flock(fd, LOCK_EX | LOCK_NB) == -1) {
- syslog(LOG_ERR, "flock failed, pidfile=%s, errno=%s", pidfile,
- strerror(errno));
+ syslog(LOG_ERR, "flock failed, pidfiletmp=%s, errno=%s",
+ pidfiletmp, strerror(errno));
fclose(file);
return -1;
}
pid = getpid();
if (!fprintf(file, "%d\n", pid)) {
- syslog(LOG_ERR, "fprintf failed, pidfile=%s, errno=%s", pidfile,
- strerror(errno));
+ syslog(LOG_ERR, "fprintf failed, pidfiletmp=%s, errno=%s",
+ pidfiletmp, strerror(errno));
fclose(file);
return -1;
}
fflush(file);
if (flock(fd, LOCK_UN) == -1) {
- syslog(LOG_ERR, "flock failed, pidfile=%s, errno=%s", pidfile,
- strerror(errno));
+ syslog(LOG_ERR, "flock failed, pidfiletmp=%s, errno=%s",
+ pidfiletmp, strerror(errno));
fclose(file);
return -1;
}
fclose(file);
+ if (rename(pidfiletmp, pidfile) != 0) {
+ syslog(LOG_ERR, "rename failed, old=%s new=%s, error:%s",
+ pidfiletmp, pidfile, strerror(errno));
+ return -1;
+ }
+
return rc;
}
diff --git a/src/base/file_notify.cc b/src/base/file_notify.cc
index e96be8b4a..80d1b7d54 100644
--- a/src/base/file_notify.cc
+++ b/src/base/file_notify.cc
@@ -50,7 +50,7 @@ FileNotify::FileNotifyErrors FileNotify::WaitForFileCreation(
SplitFileName(file_name);
if ((inotify_wd_ = inotify_add_watch(inotify_fd_, file_path_.c_str(),
- IN_CREATE)) == -1) {
+ IN_CREATE | IN_MOVED_TO)) == -1) {
LOG_NO("inotify_add_watch failed: %s", strerror(errno));
return FileNotifyErrors::kError;
}
@@ -144,6 +144,14 @@ FileNotify::FileNotifyErrors FileNotify::ProcessEvents(
return FileNotifyErrors::kOK;
}
}
+ if (event->mask & IN_MOVED_TO) {
+ if (file_name_ == event->name) {
+ TRACE("file name: %s moved to %s", file_name_.c_str(),
+ file_path_.c_str());
+ delete[] fds;
+ return FileNotifyErrors::kOK;
+ }
+ }
if (event->mask & IN_IGNORED) {
TRACE("IN_IGNORE received, (ignored)");
}
--
2.11.0
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel