Hi, taking a look at my local slurm repository, I found some pending commits that where never published but used on our site.... (that explains the date of the patches...)
You will find enclosed a set of 3 patches that : - ensure the service slurm stop has an exit code of 0 when processes are stopped (helps to manage slurm with puppet) - add a new variable SLURMD_NODENAME in the prolog/epilog environment set to either the node or virtual node name of the calling slurmd - enhance support of cpuset in multiple slurmd node by giving to every slurmd a separate cpuset root directory Do you think that these patches could be integrated in a future release ? Thanks in advance. Best regards, Matthieu
From 34ebe88f9fe2784f010cf32c263db42a67009e40 Mon Sep 17 00:00:00 2001 From: Matthieu Hautreux <matthieu.hautr...@cea.fr> Date: Tue, 15 Nov 2011 15:49:21 +0100 Subject: [PATCH 1/3] task/affinity: add multiple_slurmd support in cpuset management When cpuset is used as the task/affinity backend, a common root directory is used to group every cpuset subdirectories created by the plugin. With multiple slurmd support, this result in all the virtual nodes executing on a same physical node sharing the same cpuset directories. This patch make sure that every virtual node has its own root directory in the cpuset hierarchy. --- src/plugins/task/affinity/task_affinity.c | 40 +++++++++++++++++++++++++++++ 1 files changed, 40 insertions(+), 0 deletions(-) diff --git a/src/plugins/task/affinity/task_affinity.c b/src/plugins/task/affinity/task_affinity.c index 88d3db6..1e4ad92 100644 --- a/src/plugins/task/affinity/task_affinity.c +++ b/src/plugins/task/affinity/task_affinity.c @@ -244,11 +244,21 @@ extern int task_slurmd_release_resources (uint32_t job_id) * happening reliably. */ if (conf->task_plugin_param & CPU_BIND_CPUSETS) { char base[PATH_MAX], path[PATH_MAX]; +#ifdef MULTIPLE_SLURMD + if (snprintf(base, PATH_MAX, "%s/slurm_%s_%u", + CPUSET_DIR, + (conf->node_name != NULL)?conf->node_name:"", + job_id) > PATH_MAX) { + error("cpuset path too long"); + return SLURM_ERROR; + } +#else if (snprintf(base, PATH_MAX, "%s/slurm%u", CPUSET_DIR, job_id) > PATH_MAX) { error("cpuset path too long"); return SLURM_ERROR; } +#endif if (rmdir(base) && (errno == ENOTEMPTY)) { DIR *dirp; struct dirent entry; @@ -294,11 +304,21 @@ extern int task_pre_setuid (slurmd_job_t *job) if (!(conf->task_plugin_param & CPU_BIND_CPUSETS)) return SLURM_SUCCESS; +#ifdef MULTIPLE_SLURMD + if (snprintf(path, PATH_MAX, "%s/slurm_%s_%u", + CPUSET_DIR, + (conf->node_name != NULL)?conf->node_name:"", + job->jobid) > PATH_MAX) { + error("cpuset path too long"); + return SLURM_ERROR; + } +#else if (snprintf(path, PATH_MAX, "%s/slurm%u", CPUSET_DIR, job->jobid) > PATH_MAX) { error("cpuset path too long"); return SLURM_ERROR; } +#endif return slurm_build_cpuset(CPUSET_DIR, path, job->uid, job->gid); } @@ -318,11 +338,21 @@ extern int task_pre_launch (slurmd_job_t *job) if (conf->task_plugin_param & CPU_BIND_CPUSETS) { info("Using cpuset affinity for tasks"); +#ifdef MULTIPLE_SLURMD + if (snprintf(base, PATH_MAX, "%s/slurm_%s_%u", + CPUSET_DIR, + (conf->node_name != NULL)?conf->node_name:"", + job->jobid) > PATH_MAX) { + error("cpuset path too long"); + return SLURM_ERROR; + } +#else if (snprintf(base, PATH_MAX, "%s/slurm%u", CPUSET_DIR, job->jobid) > PATH_MAX) { error("cpuset path too long"); return SLURM_ERROR; } +#endif if (snprintf(path, PATH_MAX, "%s/slurm%u.%u_%d", base, job->jobid, job->stepid, job->envtp->localid) > PATH_MAX) { @@ -416,11 +446,21 @@ extern int task_post_term (slurmd_job_t *job) * happening reliably. */ if (conf->task_plugin_param & CPU_BIND_CPUSETS) { char base[PATH_MAX], path[PATH_MAX]; +#ifdef MULTIPLE_SLURMD + if (snprintf(base, PATH_MAX, "%s/slurm_%s_%u", + CPUSET_DIR, + (conf->node_name != NULL)?conf->node_name:"", + job->jobid) > PATH_MAX) { + error("cpuset path too long"); + return SLURM_ERROR; + } +#else if (snprintf(base, PATH_MAX, "%s/slurm%u", CPUSET_DIR, job->jobid) > PATH_MAX) { error("cpuset path too long"); return SLURM_ERROR; } +#endif if (snprintf(path, PATH_MAX, "%s/slurm%u.%u_%d", base, job->jobid, job->stepid, job->envtp->localid) > PATH_MAX) { -- 1.7.6.2
From 673e30be6e623ed53006d8d317695639f08f349f Mon Sep 17 00:00:00 2001 From: Matthieu Hautreux <matthieu.hautr...@cea.fr> Date: Tue, 15 Nov 2011 15:52:25 +0100 Subject: [PATCH 2/3] slurmd epilog: add SLURMD_NODENAME in slurmd prolog/epilog env When multiple slurmd is enabled, there is no way in the prolog and epilog scripts associated to the slurmd daemons to determine which node, either virtual or physical is associated to the execution. SLURMD_NODENAME will provide that information, that is to say the name of the virtual or physical SLURM node at the origin of the prolog/epilog execution. --- src/slurmd/slurmd/req.c | 5 +++++ 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index add5e03..7172636 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -3722,6 +3722,11 @@ _build_env(uint32_t jobid, uid_t uid, char *resv_id, xfree(name); setenvf(&env, "SLURM_JOBID", "%u", jobid); setenvf(&env, "SLURM_UID", "%u", uid); + + slurm_mutex_lock(&conf->config_mutex); + setenvf(&env, "SLURMD_NODENAME", "%s", conf->node_name); + slurm_mutex_unlock(&conf->config_mutex); + if (resv_id) { #if defined(HAVE_BG) setenvf(&env, "MPIRUN_PARTITION", "%s", resv_id); -- 1.7.6.2
From 12a3343c26b162ae61dc61998dca2cf96bbe4efe Mon Sep 17 00:00:00 2001 From: Matthieu Hautreux <matthieu.hautr...@cea.fr> Date: Tue, 15 Nov 2011 15:57:58 +0100 Subject: [PATCH 3/3] slurm init script : bug correction in stop target error code mgmt In current version of slurm initscript, a stop action returns a non null exit code as slurmstatus exit code is directly used and the daemons are stopped. Ensure that when called from slurmstop, slurmstatus error code is reversed to correctly match the attended error code of the stop stage. --- etc/init.d.slurm | 9 +++++++++ 1 files changed, 9 insertions(+), 0 deletions(-) diff --git a/etc/init.d.slurm b/etc/init.d.slurm index 74a52a9..419c794 100644 --- a/etc/init.d.slurm +++ b/etc/init.d.slurm @@ -223,6 +223,15 @@ slurmstop() { fi done done + + # slurmstatus return 1 in case of stopped daemon + # and that is what we are looking for here + if [[ ${RETVAL} == "1" ]] + then + RETVAL=0 + else + RETVAL=1 + fi } # -- 1.7.6.2