Attached is better implementation of SUBJ patch. It also modifies init script file to NOT create pid file.
Second patch is for better integration with cman. If corosync was started by cman, corosync refuses to stop. Regards, Honza Fabio M. Di Nitto wrote: > On 7/22/2010 7:56 PM, Steven Dake wrote: >> On 07/22/2010 10:25 AM, Steven Dake wrote: >>> On 07/22/2010 08:49 AM, Jan Friesse wrote: >>>> Patch uses flock to ensure that only one instance of corosync is running. >>>> >>>> Regards, >>>> Honza >>>> >>>> >>>> >>>> _______________________________________________ >>>> Openais mailing list >>>> [email protected] >>>> https://lists.linux-foundation.org/mailman/listinfo/openais >>> g >>> reat work! good for merge >>> >> Honza, >> >> After talking with Fabio, he mentioned that the proper place for this >> file is /var/run/corosync.pid, and the contents of this file should be >> the active process ID of the corosync child. >> >> Sorry for not getting more details earlier. > > My bad, it did pass under my radar. > > Honza, the implementation you did is ok, i also suggest to look into > dm_create_lockfile(const char*) implementation that´s been recently > written to be "strong". > > Fabio
>From beb9f73e88ee76fefe0563ae41cea5e63a39eebe Mon Sep 17 00:00:00 2001 From: Jan Friesse <[email protected]> Date: Thu, 22 Jul 2010 17:42:14 +0200 Subject: [PATCH 1/2] Ability to run only one instance of corosync This patch uses flock to allow only one instance of corosync running. --- trunk/exec/main.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++++ trunk/exec/util.h | 4 +- trunk/init/generic.in | 2 - 3 files changed, 104 insertions(+), 3 deletions(-) diff --git a/trunk/exec/main.c b/trunk/exec/main.c index 4ddb5f2..56d4316 100644 --- a/trunk/exec/main.c +++ b/trunk/exec/main.c @@ -38,6 +38,7 @@ #include <pthread.h> #include <assert.h> #include <sys/types.h> +#include <sys/file.h> #include <sys/poll.h> #include <sys/uio.h> #include <sys/mman.h> @@ -139,6 +140,8 @@ static pthread_t corosync_exit_thread; static sem_t corosync_exit_sem; +static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid"; + static void serialize_unlock (void); hdb_handle_t corosync_poll_handle_get (void) @@ -170,6 +173,11 @@ static void unlink_all_completed (void) poll_stop (corosync_poll_handle); totempg_finalize (); + /* + * Remove pid lock file + */ + unlink (corosync_lock_file); + corosync_exit_error (AIS_DONE_EXIT); } @@ -1367,6 +1375,93 @@ static void main_service_ready (void) } +static enum e_ais_done corosync_flock (const char *lockfile, pid_t pid) +{ + struct flock lock; + enum e_ais_done err; + char pid_s[17]; + int fd_flag; + int lf; + + err = AIS_DONE_EXIT; + + lf = open (lockfile, O_WRONLY | O_CREAT, 0640); + if (lf == -1) { + log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create lock file.\n"); + return (AIS_DONE_AQUIRE_LOCK); + } + +retry_fcntl: + lock.l_type = F_WRLCK; + lock.l_start = 0; + lock.l_whence = SEEK_SET; + lock.l_len = 0; + if (fcntl (lf, F_SETLK, &lock) == -1) { + switch (errno) { + case EINTR: + goto retry_fcntl; + break; + case EAGAIN: + case EACCES: + log_printf (LOGSYS_LEVEL_ERROR, "Another Corosync instance is already running.\n"); + err = AIS_DONE_ALREADY_RUNNING; + goto error_close; + break; + default: + log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't aquire lock. Error was %s\n", + strerror(errno)); + err = AIS_DONE_AQUIRE_LOCK; + goto error_close; + break; + } + } + + if (ftruncate (lf, 0) == -1) { + log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't truncate lock file. Error was %s\n", + strerror (errno)); + err = AIS_DONE_AQUIRE_LOCK; + goto error_close_unlink; + } + + memset (pid_s, 0, sizeof (pid_s)); + snprintf (pid_s, sizeof (pid_s) - 1, "%u\n", pid); + +retry_write: + if (write (lf, pid_s, strlen (pid_s)) != strlen (pid_s)) { + if (errno == EINTR) { + goto retry_write; + } else { + log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't write pid to lock file. " + "Error was %s\n", strerror (errno)); + err = AIS_DONE_AQUIRE_LOCK; + goto error_close_unlink; + } + } + + if ((fd_flag = fcntl (lf, F_GETFD, 0)) == -1) { + log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't get close-on-exec flag from lock file. " + "Error was %s\n", strerror (errno)); + err = AIS_DONE_AQUIRE_LOCK; + goto error_close_unlink; + } + fd_flag |= FD_CLOEXEC; + if (fcntl (lf, F_SETFD, fd_flag) == -1) { + log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't set close-on-exec flag to lock file. " + "Error was %s\n", strerror (errno)); + err = AIS_DONE_AQUIRE_LOCK; + goto error_close_unlink; + } + + return (err); + +error_close_unlink: + unlink (lockfile); +error_close: + close (lf); + + return (err); +} + int main (int argc, char **argv, char **envp) { const char *error_string; @@ -1386,6 +1481,7 @@ int main (int argc, char **argv, char **envp) struct stat stat_out; char corosync_lib_dir[PATH_MAX]; hdb_handle_t object_runtime_handle; + enum e_ais_done flock_err; #if defined(HAVE_PTHREAD_SPIN_LOCK) pthread_spin_init (&serialize_spin, 0); @@ -1436,6 +1532,7 @@ int main (int argc, char **argv, char **envp) log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine ('%s'): started and ready to provide service.\n", VERSION); log_printf (LOGSYS_LEVEL_INFO, "Corosync built-in features:" PACKAGE_FEATURES "\n"); + (void)signal (SIGINT, sigintr_handler); (void)signal (SIGUSR2, sigusr2_handler); (void)signal (SIGSEGV, sigsegv_handler); @@ -1605,6 +1702,10 @@ int main (int argc, char **argv, char **envp) } logsys_fork_completed(); + if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) != AIS_DONE_EXIT) { + corosync_exit_error (flock_err); + } + /* callthis after our fork() */ tsafe_init (envp); diff --git a/trunk/exec/util.h b/trunk/exec/util.h index 7b95536..ed3529c 100644 --- a/trunk/exec/util.h +++ b/trunk/exec/util.h @@ -60,7 +60,9 @@ enum e_ais_done { AIS_DONE_INIT_SERVICES = 13, AIS_DONE_OUT_OF_MEMORY = 14, AIS_DONE_FATAL_ERR = 15, - AIS_DONE_DIR_NOT_PRESENT = 16 + AIS_DONE_DIR_NOT_PRESENT = 16, + AIS_DONE_AQUIRE_LOCK = 17, + AIS_DONE_ALREADY_RUNNING = 18, }; /* diff --git a/trunk/init/generic.in b/trunk/init/generic.in index 261ceaf..37235b9 100755 --- a/trunk/init/generic.in +++ b/trunk/init/generic.in @@ -90,7 +90,6 @@ start() sleep 2 if status $prog > /dev/null 2>&1; then touch $LOCK_FILE - pidof $prog > @LOCALSTATEDIR@/run/$prog.pid success else failure @@ -116,7 +115,6 @@ stop() done rm -f $LOCK_FILE - rm -f @LOCALSTATEDIR@/run/$prog.pid success echo } -- 1.6.2.5
>From e0b10cecf5fe9756ca571489a54a618141a2c5fc Mon Sep 17 00:00:00 2001 From: Jan Friesse <[email protected]> Date: Wed, 28 Jul 2010 16:01:46 +0200 Subject: [PATCH 2/2] Check that corosync was not runned by cman --- trunk/init/generic.in | 17 +++++++++++++++++ 1 files changed, 17 insertions(+), 0 deletions(-) diff --git a/trunk/init/generic.in b/trunk/init/generic.in index 37235b9..578fa19 100755 --- a/trunk/init/generic.in +++ b/trunk/init/generic.in @@ -99,10 +99,27 @@ start() echo } +executed_by_cman() +{ + [ -f @LOCALSTATEDIR@/run/cman.pid ] || return 0 + + read cman_pid foo < @LOCALSTATEDIR@/run/cman.pid + if [ "$(pidof $prog)" == "$cman_pid" ];then + echo -n "$desc was executed by cman" + failure + echo + return 1 + fi + + return 0 +} + stop() { ! status $prog > /dev/null 2>&1 && return + ! executed_by_cman && return + echo -n "Signaling $desc ($prog) to terminate: " kill -TERM $(pidof $prog) > /dev/null 2>&1 success -- 1.6.2.5
_______________________________________________ Openais mailing list [email protected] https://lists.linux-foundation.org/mailman/listinfo/openais
