Hi Mathi,

Isn't the problem that the programs nodeinit spawns are in its turn 
daemonized, i.e. now owned by the init process.
But the pid from nodeinit's initial fork, service->pid, may now be a 
zombie process?
So  the /var/run/pid files should be used in both script, daemon and the 
process case.
/Thanks HansN

On 04/14/2015 03:02 PM, Mathivanan Naickan Palanivelu wrote:
> Hi Hans,
>
> I have clarified to your comment below. It's an ACK anyways.
>
>> Nodeinit sends SIGKILL to parent pid returned from fork. Sending
>> SIGABRT
>> the child pid should be used instead.
> Yes that's true, and a good catch. Must have been caught in the first version!
>
> You would have already guessed the reason also. But in case if you have not,
> then this behaviour is seen because we marked in the nodeinit.conf, for NID 
> to spawn scripts
> by specifying "S"
> i.e. For eg:-
> /usr/local/lib/opensaf/clc-cli/osaf-rded:RDE:S:/usr/local/lib/opensaf/clc-cli/osaf-rded:12000:-6:2:1:start:stop
>
> Therefore the pid is that of the script and not of our executable, also while 
> spawning scripts, NID also cancels all the signals for that process.
>
> If we had specified it as "D" for daemon or "E" for regular process then the 
> service->pid would have
> had the pid of that process itself.
>
> Thanks,
> Mathi.
>
> ----- [email protected] wrote:
>
>> osaf/services/infrastructure/nid/nodeinit.c |  88
>> +++++++++++++++++++++++++++++
>>   1 files changed, 88 insertions(+), 0 deletions(-)
>>
>>
>> Nodeinit sends SIGKILL to parent pid returned from fork. Sending
>> SIGABRT
>> the child pid should be used instead.
>>
>> diff --git a/osaf/services/infrastructure/nid/nodeinit.c
>> b/osaf/services/infrastructure/nid/nodeinit.c
>> --- a/osaf/services/infrastructure/nid/nodeinit.c
>> +++ b/osaf/services/infrastructure/nid/nodeinit.c
>> @@ -56,6 +56,10 @@
>>   #include <sys/time.h>
>>   #include <sys/resource.h>
>>   
>> +#include <signal.h>
>> +#include <sys/wait.h>
>> +#include <stdint.h>
>> +
>>   #include <configmake.h>
>>   #include <rda_papi.h>
>>   #include <logtrace.h>
>> @@ -1084,6 +1088,58 @@ uint32_t check_process(NID_SPAWN_INFO *s
>>      TRACE_LEAVE();
>>   }
>>   
>> +
>> +/****************************************************************************
>> + * Name          : get_pid_from_file
>>        *
>> + *
>>        *
>> + * Description   : Retrieves the given service name pid.
>>        *
>> + *
>>        *
>> + * Arguments     : service name.
>>        *
>> + *
>>        *
>> + * Return Values : > 0 - process id of given service
>>        *
>> + *                 -1 - error, see syslog
>>        *
>> + *
>>        *
>> +
>> ***************************************************************************/
>> +static pid_t get_pid_from_file(const char* service_name)
>> +{
>> +    char pid_file[NAME_MAX];
>> +
>> +    char prog_name[40];
>> +    char *service, *tmp;
>> +    FILE *f;
>> +    pid_t pid;
>> +
>> +    service = (char*) malloc(strlen(service_name) +1);
>> +    strcpy(service, service_name);
>> +    tmp = service;
>> +    for ( ; *tmp; ++tmp) *tmp = tolower(*tmp);
>> +
>> +    strcpy(prog_name, "osaf");
>> +    strcat(prog_name, service);
>> +    free(service);
>> +
>> +    LOG_IN("XXXX %s", prog_name);
>> +
>> +    snprintf(pid_file, sizeof(pid_file), PKGPIDDIR "/%s.pid",
>> prog_name);
>> +
>> +    if ((f = fopen(pid_file, "r")) == 0) {
>> +            LOG_WA("Failed to open %s", pid_file);
>> +            return -1;
>> +    }
>> +
>> +    if (fscanf(f, "%d", &pid) == 0) {
>> +            LOG_WA("Could not read PID from file %s", pid_file);
>> +            return -1;
>> +    }
>> +
>> +    if (fclose(f) != 0) {
>> +            LOG_WA("Could not close file");
>> +            return -1;
>> +    }
>> +
>> +    return pid;
>> +}
>> +
>>
>> /****************************************************************************
>>    * Name          : cleanup
>>        *
>>    *
>>        *
>> @@ -1108,6 +1164,38 @@ void cleanup(NID_SPAWN_INFO *service)
>>      nid_close_ipc();
>>      select_fd = -1;
>>   
>> +    pid_t w_pid;
>> +    pid_t pid;
>> +    int status;
>> +    uint32_t no_of_retries = 0;
>> +    const uint32_t MAX_NO_RETRIES = 5;
>> +
>> +    // get pid of current service_name instead of the parent pid
>> +    pid = get_pid_from_file(service->serv_name);
>> +    if (pid > 0) {
>> +            if (check_process(service)) {
>> +                    // send abort signal to process to generate a core dump
>> +                    LOG_ER("Sending SIGABRT to %s, pid=%d, (parent pid=%d)",
>> service->serv_name, pid, service->pid);
>> +                    if (kill(pid, SIGABRT) >= 0) {
>> +                            // wait a short period for process to exit
>> +                            do {
>> +                                    w_pid = waitpid(service->pid, &status, 
>> WNOHANG);
>> +                                    if (w_pid < 0) {
>> +                                            if (errno == EINTR)
>> +                                                    continue;
>> +                                            else
>> +                                                    break;
>> +                                    } else if (w_pid > 0) {
>> +                                            if (WIFEXITED(status) || 
>> WIFSIGNALED(status)) {
>> +                                                    break;
>> +                                            }
>> +                                    }
>> +                                    sleep(1);
>> +                            } while (++no_of_retries < MAX_NO_RETRIES);
>> +                    }
>> +            }
>> +    }
>> +    // if sending abort signal did not succeed, fallback to sigkill
>>      if (check_process(service)) {
>>              LOG_ER("Sending SIGKILL to %s, pid=%d", service->serv_name,
>> service->pid);
>>              kill(service->pid, SIGKILL);


------------------------------------------------------------------------------
BPM Camp - Free Virtual Workshop May 6th at 10am PDT/1PM EDT
Develop your own process in accordance with the BPMN 2 standard
Learn Process modeling best practices with Bonita BPM through live exercises
http://www.bonitasoft.com/be-part-of-it/events/bpm-camp-virtual- event?utm_
source=Sourceforge_BPM_Camp_5_6_15&utm_medium=email&utm_campaign=VA_SF
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to