Hi Ramesh,
I'm going to revise the patch according to Anders and Hans' comments. Do
you have any comments?
Thanks,
Minh
On 12/05/17 20:00, Anders Widell wrote:
> Ack with comments, marked AndersW below.
>
> regards,
> Anders Widell
>
>
> On 04/28/2017 11:36 AM, Minh Chau wrote:
>> At startup, osaftransportd waits for osafdtmd.pid file creation
>> and then reads dtm pid. If osafdtmd.pid has not been completedly
>> created but osaftransportd still receives IN_CREATE, osaftransported
>> will fail to read pid of dtmd. That results in a node reboot with
>> a reason as "osafdtmd failed to start".
>>
>> The patch implements an approach suggested by Anders Widell, which
>> creates a completed temporary pid file first, then renames it to
>> correct pid file name. Whenever osaftransportd is notified to read
>> dtmd's pid, the data in pid file should be always safe to read. In
>> addition to this, FileNotify needs to introduce IN_MOVED_TO event.
>> ---
>> src/base/daemon.c | 27 ++++++++++++++++++---------
>> src/base/file_notify.cc | 10 +++++++++-
>> 2 files changed, 27 insertions(+), 10 deletions(-)
>>
>> diff --git a/src/base/daemon.c b/src/base/daemon.c
>> index 77a869561..2fd161ce0 100644
>> --- a/src/base/daemon.c
>> +++ b/src/base/daemon.c
>> @@ -88,40 +88,49 @@ static int __create_pidfile(const char *pidfile)
>> {
>> FILE *file = NULL;
>> int fd, pid, rc = 0;
>> + char pidfiletmp[256] = {0};
>> +
>> + sprintf(pidfiletmp, "%s.tmp", pidfile);
> AndersW> Use snprintf instead of sprintf.
> AndersW> Must include PID in the name of the temporary file, e.g.
> "%s.%u.tmp", pidfile, getpid().
>> /* open the file and associate a stream with it */
>> - if (((fd = open(pidfile, O_RDWR | O_CREAT, 0644)) == -1) ||
>> + if (((fd = open(pidfiletmp, O_RDWR | O_CREAT, 0644)) == -1) ||
>> ((file = fdopen(fd, "r+")) == NULL)) {
>> - syslog(LOG_ERR, "open failed, pidfile=%s, errno=%s", pidfile,
>> - strerror(errno));
>> + syslog(LOG_ERR, "open failed, pidfiletmp=%s, errno=%s",
>> + pidfiletmp, strerror(errno));
>> return -1;
>> }
>> /* Lock the file */
>> if (flock(fd, LOCK_EX | LOCK_NB) == -1) {
>> - syslog(LOG_ERR, "flock failed, pidfile=%s, errno=%s", pidfile,
>> - strerror(errno));
>> + syslog(LOG_ERR, "flock failed, pidfiletmp=%s, errno=%s",
>> + pidfiletmp, strerror(errno));
>> fclose(file);
>> return -1;
>> }
> AndersW> flock() is no longer necessary when we use a temporary file,
> so remove the above lines for handling flock()
>> pid = getpid();
>> if (!fprintf(file, "%d\n", pid)) {
>> - syslog(LOG_ERR, "fprintf failed, pidfile=%s, errno=%s",
>> pidfile,
>> - strerror(errno));
>> + syslog(LOG_ERR, "fprintf failed, pidfiletmp=%s, errno=%s",
>> + pidfiletmp, strerror(errno));
>> fclose(file);
> AndersW> unlink() the file
>> return -1;
>> }
>> fflush(file);
>> if (flock(fd, LOCK_UN) == -1) {
>> - syslog(LOG_ERR, "flock failed, pidfile=%s, errno=%s", pidfile,
>> - strerror(errno));
>> + syslog(LOG_ERR, "flock failed, pidfiletmp=%s, errno=%s",
>> + pidfiletmp, strerror(errno));
>> fclose(file);
>> return -1;
>> }
> AndersW> Remove flock()
>> fclose(file);
>> + if (rename(pidfiletmp, pidfile) != 0) {
>> + syslog(LOG_ERR, "rename failed, old=%s new=%s, error:%s",
>> + pidfiletmp, pidfile, strerror(errno));
> AndersW> unlink(pidfiletmp)
>> + return -1;
>> + }
> I think we can use link(pidfiletmp, pidfile); unlink(pidtmpfile)
> instead of rename() here. link() will cause an IN_CREATE inotify
> event, so we don't need to modify file_notify.cc. That will also be
> more compatible with other potential readers accessing the pidfile,
> which might also expect an IN_CREATE event.
>
>> +
>> return rc;
>> }
>> diff --git a/src/base/file_notify.cc b/src/base/file_notify.cc
>> index e96be8b4a..80d1b7d54 100644
>> --- a/src/base/file_notify.cc
>> +++ b/src/base/file_notify.cc
>> @@ -50,7 +50,7 @@ FileNotify::FileNotifyErrors
>> FileNotify::WaitForFileCreation(
>> SplitFileName(file_name);
>> if ((inotify_wd_ = inotify_add_watch(inotify_fd_,
>> file_path_.c_str(),
>> - IN_CREATE)) == -1) {
>> + IN_CREATE | IN_MOVED_TO)) ==
>> -1) {
>> LOG_NO("inotify_add_watch failed: %s", strerror(errno));
>> return FileNotifyErrors::kError;
>> }
>> @@ -144,6 +144,14 @@ FileNotify::FileNotifyErrors
>> FileNotify::ProcessEvents(
>> return FileNotifyErrors::kOK;
>> }
>> }
>> + if (event->mask & IN_MOVED_TO) {
>> + if (file_name_ == event->name) {
>> + TRACE("file name: %s moved to %s", file_name_.c_str(),
>> + file_path_.c_str());
>> + delete[] fds;
>> + return FileNotifyErrors::kOK;
>> + }
>> + }
> AndersW> These changes are not needed if we use link() as suggested
> above.
>> if (event->mask & IN_IGNORED) {
>> TRACE("IN_IGNORE received, (ignored)");
>> }
>
>
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel