When prolog and epilog scripts start to diverge on multiple clusters
within the same center it begins to get very painful to keep these
scripts up-to-date, and their implementation usually ends up being
quite convoluted.

One solution is to have prolog and epilog simply run a series of
scripts from a directory, e.g. the epilog might do

 for script in /etc/slurm/epilog.d/*; do
   $script || exit 1;
 done

Then an optional bit of prolog/epilog logic can be implemented
as a little scriptlet and only installed on the needed clusters.

However, it might be nice if this globbing functionality was standard in
SLURM, because it would be more easily available and standardized
for sysadmins.

This is especially nice for plugins that use the job_control_env()
environment to affect behavior of the epilog and prolog. With

 Prolog=/etc/slurm/prolog.d/*
 Epilog=/etc/slurm/epilog.d/*

in slurm.conf, a plugin could be a self-contained package by dropping
a setup script into /etc/slurm/prolog.d/, a cleanup script into
/etc/slurm/epilog.d/ and the actual plugin which implements the
srun/sbatch/salloc option could be installed into /etc/slurm/lua.d/
or /etc/slurm/plugstack.conf.d/

This patch implements a glob syntax for the run_script logic
in slurmd. It should not change behavior of existing configs,
since they should not have any special glob characters in
the Prolog and Epilog config settings.

I'm just sending this out as a request for comments at this time.
---
 src/slurmd/common/run_script.c |   87 +++++++++++++++++++++++++++++++++++++++-
 1 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/src/slurmd/common/run_script.c b/src/slurmd/common/run_script.c
index 6fc3309..aa6891c 100644
--- a/src/slurmd/common/run_script.c
+++ b/src/slurmd/common/run_script.c
@@ -44,10 +44,12 @@
 #include <sys/wait.h>
 #include <sys/errno.h>
 #include <string.h>
+#include <glob.h>
 
 #include "src/common/xmalloc.h"
 #include "src/common/xstring.h"
 #include "src/common/xassert.h"
+#include "src/common/list.h"
 
 #include "src/slurmd/common/run_script.h"
 
@@ -62,8 +64,8 @@
  *     if NULL
  * RET 0 on success, -1 on failure.
  */
-int
-run_script(const char *name, const char *path, uint32_t jobid,
+static int
+run_one_script(const char *name, const char *path, uint32_t jobid,
           int max_wait, char **env)
 {
        int status, rc, opt;
@@ -130,3 +132,84 @@ run_script(const char *name, const char *path, uint32_t 
jobid,
 
        /* NOTREACHED */
 }
+
+static void xfree_f (void *x)
+{
+       xfree (x);
+}
+
+
+static int ef (const char *p, int errnum)
+{
+       return error ("run_script: glob: %s: %s", p, strerror (errno));
+}
+
+static List script_list_create (const char *pattern)
+{
+       glob_t gl;
+       size_t i;
+       List l = NULL;
+
+       if (pattern == NULL)
+               return (NULL);
+
+       int rc = glob (pattern, GLOB_ERR, ef, &gl);
+       switch (rc) {
+       case 0:
+               l = list_create ((ListDelF) xfree_f);
+               for (i = 0; i < gl.gl_pathc; i++)
+                       list_push (l, xstrdup (gl.gl_pathv[i]));
+               break;
+       case GLOB_NOMATCH:
+               break;
+       case GLOB_NOSPACE:
+               error ("run_script: glob(3): Out of memory");
+               break;
+       case GLOB_ABORTED:
+               error ("run_script: cannot read dir %s: %m", pattern);
+               break;
+       default:
+               error ("Unknown glob(3) return code = %d", rc);
+               break;
+       }
+
+       globfree (&gl);
+
+       return l;
+}
+
+int run_script(const char *name, const char *pattern, uint32_t jobid,
+          int max_wait, char **env)
+{
+       int rc;
+       List l;
+       ListIterator i;
+       char *s;
+
+       if (pattern == NULL || pattern[0] == '\0')
+               return 0;
+
+       l = script_list_create (pattern);
+       if (l == NULL)
+               return error ("Unable to run %s [%s]", name, pattern);
+
+       i = list_iterator_create (l);
+       if (i == NULL) {
+               list_destroy (l);
+               return error ("run_script: list_iterator_create: Out of 
memory");
+       }
+
+       while ((s = list_next (i))) {
+               rc = run_one_script (name, s, jobid, max_wait, env);
+               if (rc) {
+                       error ("%s: exited with status 0x%04x\n", s, rc);
+                       break;
+               }
+
+       }
+       list_iterator_destroy (i);
+       list_destroy (l);
+
+       return rc;
+}
+
-- 
1.7.1

Reply via email to