[slurm-dev] Patch for health check during slurmd start

hmlth Wed, 02 Mar 2016 10:12:06 -0800


Hello,

We want to introduce a new behavior in the way slurmd uses theHealthCheckProgram. The idea is to avoid a race condition between thefirst HealthCheckProgram run and the node accepting jobs. The slurmddaemon will initialize and then loop on HealthCheckProgram executionbefore registering with slurmctld. It will stay in this loop until theHealthCheckProgram returns successfully (the node is still DOWN).

On our clusters we are using NHC as an HealthCheckProgram. NHC drainsthe node if it fails and remove the drain if it is successfull, thisbehavior fits well with our purpose. This behavior permits us to startslurmd at boot without setting up a complex boot sequence in the initsystem, slurmd just wait for the node to be ready before registering.

The HealthCheckProgram is not run during slurmd startup ifHealthCheckInteval is 0.

We are looking for comments and feedback on this proposed behavior andwould like to know if something like this could be included in the next15.08 release or 16.05.


regards,

Thomas

Patch (on 15.08.8):

diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c
index 309c91d..912b1fe 100644
--- a/src/slurmd/slurmd/req.c
+++ b/src/slurmd/slurmd/req.c
@@ -2534,10 +2534,8 @@ _rpc_health_check(slurm_msg_t *msg)
                send_registration_msg(SLURM_SUCCESS, false);
        }

-       if ((rc == SLURM_SUCCESS) && (conf->health_check_program)) {
-               char *env[1] = { NULL };

- rc = run_script("health_check",conf->health_check_program,

-                               0, 60, env, 0);
+       if (rc == SLURM_SUCCESS) {
+               rc = run_script_health_check();
        }

        /* Take this opportunity to enforce any job memory limits */
diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c
index dfb416f..b9e15fc 100644
--- a/src/slurmd/slurmd/slurmd.c
+++ b/src/slurmd/slurmd/slurmd.c
@@ -104,6 +104,7 @@
 #include "src/slurmd/common/core_spec_plugin.h"
 #include "src/slurmd/common/job_container_plugin.h"
 #include "src/slurmd/common/proctrack.h"
+#include "src/slurmd/common/run_script.h"
 #include "src/slurmd/common/slurmd_cgroup.h"
 #include "src/slurmd/common/xcpuinfo.h"
 #include "src/slurmd/slurmd/get_mach_stat.h"
@@ -119,6 +120,8 @@

 #define MAX_THREADS            256

+#define HEALTH_RETRY_DELAY 10
+
 #define _free_and_set(__dst, __src) \
        xfree(__dst); __dst = __src

@@ -203,6 +206,7 @@ static void      _update_nice(void);
 static void      _usage(void);
 static int       _validate_and_convert_cpu_list(void);
 static void      _wait_for_all_threads(int secs);
+static void      _wait_health_check(void);


 int
@@ -363,6 +367,10 @@ main (int argc, char *argv[])
        if (slurmd_plugstack_init())
                fatal("failed to initialize slurmd_plugstack");

+       /* Wait for a successfull health check if
+        * HealthCheckInterval != 0 */
+       _wait_health_check();
+
        _spawn_registration_engine();
        msg_aggr_sender_init(conf->hostname, conf->port,
                             conf->msg_aggr_window_time,
@@ -1002,6 +1010,8 @@ _read_config(void)

        conf->mem_limit_enforce = cf->mem_limit_enforce;

+       conf->health_check_interval = cf->health_check_interval;
+
        slurm_mutex_unlock(&conf->config_mutex);
        slurm_conf_unlock();
 }
@@ -2290,3 +2300,40 @@ static void _resource_spec_fini(void)
        FREE_NULL_BITMAP(res_core_bitmap);
        FREE_NULL_BITMAP(res_cpu_bitmap);
 }
+
+/*
+ * Wait for health check to execute successfully
+ *
+ * Return imediately if a shutdown has been requested or
+ * if the HealthCheckInterval is 0.
+ */
+static void _wait_health_check(void)
+{
+       while (!_shutdown &&
+           (conf->health_check_interval != 0 ) &&
+           (run_script_health_check() != SLURM_SUCCESS)) {
+               info ("Node Health Check failed, retrying in %ds...",
+                   HEALTH_RETRY_DELAY);
+               sleep(HEALTH_RETRY_DELAY);
+  }
+}
+
+/*
+ * Run the configured health check program
+ *
+ * Returns the run result. If the health check program
+ * is not defined, returns success immediately.
+ *
+ */
+extern int run_script_health_check(void)
+{
+       int rc = SLURM_SUCCESS;
+
+       if (conf->health_check_program) {
+               char *env[1] = { NULL };

+ rc = run_script("health_check",conf->health_check_program,

+                   0, 60, env, 0);
+       }
+
+  return rc;
+}
diff --git a/src/slurmd/slurmd/slurmd.h b/src/slurmd/slurmd/slurmd.h
index d253815..c58a54a 100644
--- a/src/slurmd/slurmd/slurmd.h
+++ b/src/slurmd/slurmd/slurmd.h
@@ -125,7 +125,8 @@ typedef struct slurmd_config {

char *logfile; /* slurmd logfile, if any*/char *spooldir; /* SlurmdSpoolDir*/char *pidfile; /* PidFile location*/- char *health_check_program; /* run on RPC request*/+ char *health_check_program; /* run on RPC request orat start */+ uint64_t health_check_interval; /* Interval between runs*/char *tmpfs; /* directory of tmp FS*/char *pubkey; /* location of job cred publickey */char *epilog; /* Path to Epilog script*/@@ -194,5 +195,8 @@ int send_registration_msg(uint32_t status, boolstartup);

  */
 int save_cred_state(slurm_cred_ctx_t vctx);

+/* Run the health check program if configured
+ */
+int run_script_health_check(void);

 #endif /* !_SLURMD_H */

--
Thomas HAMEL
github: hmlth

[slurm-dev] Patch for health check during slurmd start

Reply via email to