sheep employes lockf() for mutex of lock_base_dir() now. But the lockf() is not suitable for sheep bacause sheep calls daemon(3) after the lockf(). daemon(3) forks internally and a parent process exits immediately. In a case of sheep, daemon() must be called after locking base dir so the lock owner, parent process, exits and the lock will be released even though the child process is running. This is the reason current lock_base_dir() doesn't work well. And it causes writing logs to sheep.log from multiple sheeps. This phenomenon is very confusing and should be avoided.
This patch implement custom function for daemonizing, lock_and_daemon(). And mutex of base directory is done in it. lock_and_daemon() does mutex between fork() and closing fd 0, 1, and 2, so it can report the failure of mutexing base directory to stderr. Signed-off-by: Hitoshi Mitake <mitake.hito...@lab.ntt.co.jp> --- v4: also lock file when sheep runs in foreground v3: implement custom function for daemonizing and do mutex in it v2: call exit_handler() from crash_handler() too sheep/sheep.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++- sheep/sheep_priv.h | 1 + sheep/store.c | 4 ++-- 3 files changed, 63 insertions(+), 3 deletions(-) diff --git a/sheep/sheep.c b/sheep/sheep.c index 95bfa9a..bbfe772 100644 --- a/sheep/sheep.c +++ b/sheep/sheep.c @@ -390,6 +390,65 @@ static void check_host_env(void) r.rlim_cur); } +static int lock_and_daemon(bool daemonize, const char *base_dir) +{ + int ret, devnull_fd; + + if (daemonize) { + switch (fork()) { + case 0: + break; + case -1: + panic("fork() failed during daemonize: %m"); + break; + default: + exit(0); + break; + } + + if (setsid() == -1) + panic("becoming a leader of a new session failed: %m"); + + switch (fork()) { + case 0: + break; + case -1: + panic("fork() failed during daemonize: %m"); + break; + default: + exit(0); + break; + } + + if (chdir("/")) + panic("chdir to / failed: %m"); + + devnull_fd = open("/dev/null", O_RDWR); + if (devnull_fd < 0) + panic("opening /dev/null failed: %m"); + } + + ret = lock_base_dir(base_dir); + if (ret < 0) { + sd_eprintf("locking directory: %s failed", base_dir); + return -1; + } + + if (daemonize) { + /* + * now we can use base_dir/sheep.log for logging error messages, + * we can close 0, 1, and 2 safely + */ + dup2(devnull_fd, 0); + dup2(devnull_fd, 1); + dup2(devnull_fd, 2); + + close(devnull_fd); + } + + return 0; +} + int main(int argc, char **argv) { int ch, longindex, ret, port = SD_LISTEN_PORT, io_port = SD_LISTEN_PORT; @@ -582,7 +641,7 @@ int main(int argc, char **argv) srandom(port); - if (is_daemon && daemon(0, 0)) + if (lock_and_daemon(is_daemon, dirp)) exit(1); ret = log_init(program_name, LOG_SPACE_SIZE, to_stdout, log_level, diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index b9d6f97..89247a9 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -250,6 +250,7 @@ int init_store_driver(bool is_gateway); int init_global_pathnames(const char *d, char *); int init_base_path(const char *dir); int init_disk_space(const char *d); +int lock_base_dir(const char *d); int fill_vdi_copy_list(void *data); int get_vdi_copy_number(uint32_t vid); diff --git a/sheep/store.c b/sheep/store.c index ec3451c..6590664 100644 --- a/sheep/store.c +++ b/sheep/store.c @@ -185,7 +185,7 @@ uint32_t get_latest_epoch(void) return epoch; } -static int lock_base_dir(const char *d) +int lock_base_dir(const char *d) { #define LOCK_PATH "/lock" char *lock_path; @@ -224,7 +224,7 @@ int init_base_path(const char *d) return -1; } - return lock_base_dir(d); + return 0; } /* -- 1.7.10.rc0.41.gfa678 -- sheepdog mailing list sheepdog@lists.wpkg.org http://lists.wpkg.org/mailman/listinfo/sheepdog