Ack with comments, marked AndersW below.

regards,
Anders Widell


On 04/28/2017 11:36 AM, Minh Chau wrote:
> At startup, osaftransportd waits for osafdtmd.pid file creation
> and then reads dtm pid. If osafdtmd.pid has not been completedly
> created but osaftransportd still receives IN_CREATE, osaftransported
> will fail to read pid of dtmd. That results in a node reboot with
> a reason as "osafdtmd failed to start".
>
> The patch implements an approach suggested by Anders Widell, which
> creates a completed temporary pid file first, then renames it to
> correct pid file name. Whenever osaftransportd is notified to read
> dtmd's pid, the data in pid file should be always safe to read. In
> addition to this, FileNotify needs to introduce IN_MOVED_TO event.
> ---
>   src/base/daemon.c       | 27 ++++++++++++++++++---------
>   src/base/file_notify.cc | 10 +++++++++-
>   2 files changed, 27 insertions(+), 10 deletions(-)
>
> diff --git a/src/base/daemon.c b/src/base/daemon.c
> index 77a869561..2fd161ce0 100644
> --- a/src/base/daemon.c
> +++ b/src/base/daemon.c
> @@ -88,40 +88,49 @@ static int __create_pidfile(const char *pidfile)
>   {
>       FILE *file = NULL;
>       int fd, pid, rc = 0;
> +     char pidfiletmp[256] = {0};
> +
> +     sprintf(pidfiletmp, "%s.tmp", pidfile);
AndersW> Use snprintf instead of sprintf.
AndersW> Must include PID in the name of the temporary file, e.g. 
"%s.%u.tmp", pidfile, getpid().
>   
>       /* open the file and associate a stream with it */
> -     if (((fd = open(pidfile, O_RDWR | O_CREAT, 0644)) == -1) ||
> +     if (((fd = open(pidfiletmp, O_RDWR | O_CREAT, 0644)) == -1) ||
>           ((file = fdopen(fd, "r+")) == NULL)) {
> -             syslog(LOG_ERR, "open failed, pidfile=%s, errno=%s", pidfile,
> -                    strerror(errno));
> +             syslog(LOG_ERR, "open failed, pidfiletmp=%s, errno=%s",
> +                     pidfiletmp, strerror(errno));
>               return -1;
>       }
>   
>       /* Lock the file */
>       if (flock(fd, LOCK_EX | LOCK_NB) == -1) {
> -             syslog(LOG_ERR, "flock failed, pidfile=%s, errno=%s", pidfile,
> -                    strerror(errno));
> +             syslog(LOG_ERR, "flock failed, pidfiletmp=%s, errno=%s",
> +                     pidfiletmp, strerror(errno));
>               fclose(file);
>               return -1;
>       }
>   
AndersW> flock() is no longer necessary when we use a temporary file, so 
remove the above lines for handling flock()
>       pid = getpid();
>       if (!fprintf(file, "%d\n", pid)) {
> -             syslog(LOG_ERR, "fprintf failed, pidfile=%s, errno=%s", pidfile,
> -                    strerror(errno));
> +             syslog(LOG_ERR, "fprintf failed, pidfiletmp=%s, errno=%s",
> +                      pidfiletmp, strerror(errno));
>               fclose(file);
AndersW> unlink() the file
>               return -1;
>       }
>       fflush(file);
>   
>       if (flock(fd, LOCK_UN) == -1) {
> -             syslog(LOG_ERR, "flock failed, pidfile=%s, errno=%s", pidfile,
> -                    strerror(errno));
> +             syslog(LOG_ERR, "flock failed, pidfiletmp=%s, errno=%s",
> +                     pidfiletmp, strerror(errno));
>               fclose(file);
>               return -1;
>       }
AndersW> Remove flock()
>       fclose(file);
>   
> +     if (rename(pidfiletmp, pidfile) != 0) {
> +             syslog(LOG_ERR, "rename failed, old=%s new=%s, error:%s",
> +                     pidfiletmp, pidfile, strerror(errno));
AndersW> unlink(pidfiletmp)
> +             return -1;
> +     }
I think we can use link(pidfiletmp, pidfile); unlink(pidtmpfile) instead 
of rename() here. link() will cause an IN_CREATE inotify event, so we 
don't need to modify file_notify.cc. That will also be more compatible 
with other potential readers accessing the pidfile, which might also 
expect an IN_CREATE event.

> +
>       return rc;
>   }
>   
> diff --git a/src/base/file_notify.cc b/src/base/file_notify.cc
> index e96be8b4a..80d1b7d54 100644
> --- a/src/base/file_notify.cc
> +++ b/src/base/file_notify.cc
> @@ -50,7 +50,7 @@ FileNotify::FileNotifyErrors 
> FileNotify::WaitForFileCreation(
>     SplitFileName(file_name);
>   
>     if ((inotify_wd_ = inotify_add_watch(inotify_fd_, file_path_.c_str(),
> -                                       IN_CREATE)) == -1) {
> +                                       IN_CREATE | IN_MOVED_TO)) == -1) {
>       LOG_NO("inotify_add_watch failed: %s", strerror(errno));
>       return FileNotifyErrors::kError;
>     }
> @@ -144,6 +144,14 @@ FileNotify::FileNotifyErrors FileNotify::ProcessEvents(
>                   return FileNotifyErrors::kOK;
>                 }
>               }
> +            if (event->mask & IN_MOVED_TO) {
> +              if (file_name_ == event->name) {
> +                TRACE("file name: %s moved to %s", file_name_.c_str(),
> +                    file_path_.c_str());
> +                delete[] fds;
> +                return FileNotifyErrors::kOK;
> +              }
> +            }
AndersW> These changes are not needed if we use link() as suggested above.
>               if (event->mask & IN_IGNORED) {
>                 TRACE("IN_IGNORE received, (ignored)");
>               }


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to