I have one more comment: I think you have to unlink() an existing 
pidfile before creating a new one, otherwise we may fail to start if 
there is an old pidfile that was never removed. We probably need to 
re-visit this and make it more safe, so that it checks if the old 
process already exists before removing an existing pidfile. But for now, 
I think it is enough to just remove it.

regards,

Anders Widell


On 05/16/2017 09:50 AM, minh chau wrote:
> Hi Ramesh,
>
> I'm going to revise the patch according to Anders and Hans' comments. 
> Do you have any comments?
>
> Thanks,
> Minh
>
> On 12/05/17 20:00, Anders Widell wrote:
>> Ack with comments, marked AndersW below.
>>
>> regards,
>> Anders Widell
>>
>>
>> On 04/28/2017 11:36 AM, Minh Chau wrote:
>>> At startup, osaftransportd waits for osafdtmd.pid file creation
>>> and then reads dtm pid. If osafdtmd.pid has not been completedly
>>> created but osaftransportd still receives IN_CREATE, osaftransported
>>> will fail to read pid of dtmd. That results in a node reboot with
>>> a reason as "osafdtmd failed to start".
>>>
>>> The patch implements an approach suggested by Anders Widell, which
>>> creates a completed temporary pid file first, then renames it to
>>> correct pid file name. Whenever osaftransportd is notified to read
>>> dtmd's pid, the data in pid file should be always safe to read. In
>>> addition to this, FileNotify needs to introduce IN_MOVED_TO event.
>>> ---
>>>   src/base/daemon.c       | 27 ++++++++++++++++++---------
>>>   src/base/file_notify.cc | 10 +++++++++-
>>>   2 files changed, 27 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/src/base/daemon.c b/src/base/daemon.c
>>> index 77a869561..2fd161ce0 100644
>>> --- a/src/base/daemon.c
>>> +++ b/src/base/daemon.c
>>> @@ -88,40 +88,49 @@ static int __create_pidfile(const char *pidfile)
>>>   {
>>>       FILE *file = NULL;
>>>       int fd, pid, rc = 0;
>>> +    char pidfiletmp[256] = {0};
>>> +
>>> +    sprintf(pidfiletmp, "%s.tmp", pidfile);
>> AndersW> Use snprintf instead of sprintf.
>> AndersW> Must include PID in the name of the temporary file, e.g. 
>> "%s.%u.tmp", pidfile, getpid().
>>>         /* open the file and associate a stream with it */
>>> -    if (((fd = open(pidfile, O_RDWR | O_CREAT, 0644)) == -1) ||
>>> +    if (((fd = open(pidfiletmp, O_RDWR | O_CREAT, 0644)) == -1) ||
>>>           ((file = fdopen(fd, "r+")) == NULL)) {
>>> -        syslog(LOG_ERR, "open failed, pidfile=%s, errno=%s", pidfile,
>>> -               strerror(errno));
>>> +        syslog(LOG_ERR, "open failed, pidfiletmp=%s, errno=%s",
>>> +            pidfiletmp, strerror(errno));
>>>           return -1;
>>>       }
>>>         /* Lock the file */
>>>       if (flock(fd, LOCK_EX | LOCK_NB) == -1) {
>>> -        syslog(LOG_ERR, "flock failed, pidfile=%s, errno=%s", pidfile,
>>> -               strerror(errno));
>>> +        syslog(LOG_ERR, "flock failed, pidfiletmp=%s, errno=%s",
>>> +            pidfiletmp, strerror(errno));
>>>           fclose(file);
>>>           return -1;
>>>       }
>> AndersW> flock() is no longer necessary when we use a temporary file, 
>> so remove the above lines for handling flock()
>>>       pid = getpid();
>>>       if (!fprintf(file, "%d\n", pid)) {
>>> -        syslog(LOG_ERR, "fprintf failed, pidfile=%s, errno=%s", 
>>> pidfile,
>>> -               strerror(errno));
>>> +        syslog(LOG_ERR, "fprintf failed, pidfiletmp=%s, errno=%s",
>>> +             pidfiletmp, strerror(errno));
>>>           fclose(file);
>> AndersW> unlink() the file
>>>           return -1;
>>>       }
>>>       fflush(file);
>>>         if (flock(fd, LOCK_UN) == -1) {
>>> -        syslog(LOG_ERR, "flock failed, pidfile=%s, errno=%s", pidfile,
>>> -               strerror(errno));
>>> +        syslog(LOG_ERR, "flock failed, pidfiletmp=%s, errno=%s",
>>> +            pidfiletmp, strerror(errno));
>>>           fclose(file);
>>>           return -1;
>>>       }
>> AndersW> Remove flock()
>>>       fclose(file);
>>>   +    if (rename(pidfiletmp, pidfile) != 0) {
>>> +        syslog(LOG_ERR, "rename failed, old=%s new=%s, error:%s",
>>> +            pidfiletmp, pidfile, strerror(errno));
>> AndersW> unlink(pidfiletmp)
>>> +        return -1;
>>> +    }
>> I think we can use link(pidfiletmp, pidfile); unlink(pidtmpfile) 
>> instead of rename() here. link() will cause an IN_CREATE inotify 
>> event, so we don't need to modify file_notify.cc. That will also be 
>> more compatible with other potential readers accessing the pidfile, 
>> which might also expect an IN_CREATE event.
>>
>>> +
>>>       return rc;
>>>   }
>>>   diff --git a/src/base/file_notify.cc b/src/base/file_notify.cc
>>> index e96be8b4a..80d1b7d54 100644
>>> --- a/src/base/file_notify.cc
>>> +++ b/src/base/file_notify.cc
>>> @@ -50,7 +50,7 @@ FileNotify::FileNotifyErrors 
>>> FileNotify::WaitForFileCreation(
>>>     SplitFileName(file_name);
>>>       if ((inotify_wd_ = inotify_add_watch(inotify_fd_, 
>>> file_path_.c_str(),
>>> -                                       IN_CREATE)) == -1) {
>>> +                                       IN_CREATE | IN_MOVED_TO)) == 
>>> -1) {
>>>       LOG_NO("inotify_add_watch failed: %s", strerror(errno));
>>>       return FileNotifyErrors::kError;
>>>     }
>>> @@ -144,6 +144,14 @@ FileNotify::FileNotifyErrors 
>>> FileNotify::ProcessEvents(
>>>                   return FileNotifyErrors::kOK;
>>>                 }
>>>               }
>>> +            if (event->mask & IN_MOVED_TO) {
>>> +              if (file_name_ == event->name) {
>>> +                TRACE("file name: %s moved to %s", file_name_.c_str(),
>>> +                    file_path_.c_str());
>>> +                delete[] fds;
>>> +                return FileNotifyErrors::kOK;
>>> +              }
>>> +            }
>> AndersW> These changes are not needed if we use link() as suggested 
>> above.
>>>               if (event->mask & IN_IGNORED) {
>>>                 TRACE("IN_IGNORE received, (ignored)");
>>>               }
>>
>>
>


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to