Ack with comments, marked AndersW below.
regards,
Anders Widell
On 04/28/2017 11:36 AM, Minh Chau wrote:
> At startup, osaftransportd waits for osafdtmd.pid file creation
> and then reads dtm pid. If osafdtmd.pid has not been completedly
> created but osaftransportd still receives IN_CREATE, osaftransported
> will fail to read pid of dtmd. That results in a node reboot with
> a reason as "osafdtmd failed to start".
>
> The patch implements an approach suggested by Anders Widell, which
> creates a completed temporary pid file first, then renames it to
> correct pid file name. Whenever osaftransportd is notified to read
> dtmd's pid, the data in pid file should be always safe to read. In
> addition to this, FileNotify needs to introduce IN_MOVED_TO event.
> ---
> src/base/daemon.c | 27 ++++++++++++++++++---------
> src/base/file_notify.cc | 10 +++++++++-
> 2 files changed, 27 insertions(+), 10 deletions(-)
>
> diff --git a/src/base/daemon.c b/src/base/daemon.c
> index 77a869561..2fd161ce0 100644
> --- a/src/base/daemon.c
> +++ b/src/base/daemon.c
> @@ -88,40 +88,49 @@ static int __create_pidfile(const char *pidfile)
> {
> FILE *file = NULL;
> int fd, pid, rc = 0;
> + char pidfiletmp[256] = {0};
> +
> + sprintf(pidfiletmp, "%s.tmp", pidfile);
AndersW> Use snprintf instead of sprintf.
AndersW> Must include PID in the name of the temporary file, e.g.
"%s.%u.tmp", pidfile, getpid().
>
> /* open the file and associate a stream with it */
> - if (((fd = open(pidfile, O_RDWR | O_CREAT, 0644)) == -1) ||
> + if (((fd = open(pidfiletmp, O_RDWR | O_CREAT, 0644)) == -1) ||
> ((file = fdopen(fd, "r+")) == NULL)) {
> - syslog(LOG_ERR, "open failed, pidfile=%s, errno=%s", pidfile,
> - strerror(errno));
> + syslog(LOG_ERR, "open failed, pidfiletmp=%s, errno=%s",
> + pidfiletmp, strerror(errno));
> return -1;
> }
>
> /* Lock the file */
> if (flock(fd, LOCK_EX | LOCK_NB) == -1) {
> - syslog(LOG_ERR, "flock failed, pidfile=%s, errno=%s", pidfile,
> - strerror(errno));
> + syslog(LOG_ERR, "flock failed, pidfiletmp=%s, errno=%s",
> + pidfiletmp, strerror(errno));
> fclose(file);
> return -1;
> }
>
AndersW> flock() is no longer necessary when we use a temporary file, so
remove the above lines for handling flock()
> pid = getpid();
> if (!fprintf(file, "%d\n", pid)) {
> - syslog(LOG_ERR, "fprintf failed, pidfile=%s, errno=%s", pidfile,
> - strerror(errno));
> + syslog(LOG_ERR, "fprintf failed, pidfiletmp=%s, errno=%s",
> + pidfiletmp, strerror(errno));
> fclose(file);
AndersW> unlink() the file
> return -1;
> }
> fflush(file);
>
> if (flock(fd, LOCK_UN) == -1) {
> - syslog(LOG_ERR, "flock failed, pidfile=%s, errno=%s", pidfile,
> - strerror(errno));
> + syslog(LOG_ERR, "flock failed, pidfiletmp=%s, errno=%s",
> + pidfiletmp, strerror(errno));
> fclose(file);
> return -1;
> }
AndersW> Remove flock()
> fclose(file);
>
> + if (rename(pidfiletmp, pidfile) != 0) {
> + syslog(LOG_ERR, "rename failed, old=%s new=%s, error:%s",
> + pidfiletmp, pidfile, strerror(errno));
AndersW> unlink(pidfiletmp)
> + return -1;
> + }
I think we can use link(pidfiletmp, pidfile); unlink(pidtmpfile) instead
of rename() here. link() will cause an IN_CREATE inotify event, so we
don't need to modify file_notify.cc. That will also be more compatible
with other potential readers accessing the pidfile, which might also
expect an IN_CREATE event.
> +
> return rc;
> }
>
> diff --git a/src/base/file_notify.cc b/src/base/file_notify.cc
> index e96be8b4a..80d1b7d54 100644
> --- a/src/base/file_notify.cc
> +++ b/src/base/file_notify.cc
> @@ -50,7 +50,7 @@ FileNotify::FileNotifyErrors
> FileNotify::WaitForFileCreation(
> SplitFileName(file_name);
>
> if ((inotify_wd_ = inotify_add_watch(inotify_fd_, file_path_.c_str(),
> - IN_CREATE)) == -1) {
> + IN_CREATE | IN_MOVED_TO)) == -1) {
> LOG_NO("inotify_add_watch failed: %s", strerror(errno));
> return FileNotifyErrors::kError;
> }
> @@ -144,6 +144,14 @@ FileNotify::FileNotifyErrors FileNotify::ProcessEvents(
> return FileNotifyErrors::kOK;
> }
> }
> + if (event->mask & IN_MOVED_TO) {
> + if (file_name_ == event->name) {
> + TRACE("file name: %s moved to %s", file_name_.c_str(),
> + file_path_.c_str());
> + delete[] fds;
> + return FileNotifyErrors::kOK;
> + }
> + }
AndersW> These changes are not needed if we use link() as suggested above.
> if (event->mask & IN_IGNORED) {
> TRACE("IN_IGNORE received, (ignored)");
> }
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel