Hello there!
There is a little problem with SLURM: each node should have own copy
of slurm.conf and if you change it on controller then you should update
it on all other nodes as well. There is a simple solution for it - you
should just have it on one node - slurmdbd for example - and load it for
each slurmctld or slurmd by means of accounting_storage plugin. See a
simple API for that in attachment, it expands config file name (option -f
of slurmd or slurmctld or environment variable SLURM_CONF) so it can now
contain some non-local name in form "plugin:host:port", i.e. for example
"slurmdbd:sqlnode:7031". If use it with accounting_storage/slurmdbd the
slurmdbd.conf should contain slurmdbd variables and slurm.conf variables
too, or else RPC DBD_GET_CONFIG (and acct_storage_g_get_config() function
and appropriate acct_storage_p_get_config() as well) should be expanded
in future version of SLURM to request exact config type.
This solution already works very well in our cluster but we use own
accounting_storage plugin so to get accounting_storage/slurmdbd work that
way you have to do something with described problem as either slurmdbd
will complain about unknown variables in config or scontrol will do. So
I've marked the proposal as RFD - if you want it then resolve the problem
(in 2.3.0 probably?) and include this patch. :)
Andriy.diff -udpr slurm-2.2.3.new4/src/common/parse_config.c slurm-2.2.3.plus/src/common/parse_config.c
--- slurm-2.2.3.new4/src/common/parse_config.c 2011-02-01 20:53:48.000000000 +0200
+++ slurm-2.2.3.plus/src/common/parse_config.c 2011-05-03 18:27:20.000000000 +0300
@@ -905,6 +905,44 @@ int s_p_parse_file(s_p_hashtbl_t *hashtb
return rc;
}
+/*
+ * Returns 1 if the line is parsed cleanly, and 0 otherwise.
+ */
+int s_p_parse_pair(s_p_hashtbl_t *hashtbl, const char *key, const char *value)
+{
+ s_p_values_t *p;
+ char *leftover, *v;
+
+ if ((p = _conf_hashtbl_lookup(hashtbl, key)) == NULL) {
+ error("Parsing error at unrecognized key: %s", key);
+ return 0;
+ }
+ /* we have value separated from key here so parse it different way */
+ while (*value != '\0' && isspace(*value))
+ value++; /* skip spaces at start if any */
+ if (*value == '"') { /* quoted value */
+ v = (char *)value + 1;
+ leftover = strchr(v, '"');
+ if (leftover == NULL) {
+ error("Parse error in data for key %s: %s", key, value);
+ return 0;
+ }
+ } else { /* unqouted value */
+ leftover = v = (char *)value;
+ while (*leftover != '\0' && !isspace(*leftover))
+ leftover++;
+ }
+ value = xstrndup(v, leftover - v);
+ if (*leftover != '\0')
+ leftover++;
+ while (*leftover != '\0' && isspace(*leftover))
+ leftover++; /* skip trailing spaces */
+ _handle_keyvalue_match(p, value, leftover, &leftover);
+ xfree(value);
+
+ return 1;
+}
+
/*
* s_p_get_string
*
diff -udpr slurm-2.2.3.new4/src/common/parse_config.h slurm-2.2.3.plus/src/common/parse_config.h
--- slurm-2.2.3.new4/src/common/parse_config.h 2010-03-17 01:07:17.000000000 +0200
+++ slurm-2.2.3.plus/src/common/parse_config.h 2011-04-29 16:08:11.000000000 +0300
@@ -190,6 +190,11 @@ int s_p_parse_file(s_p_hashtbl_t *hashtb
*/
int s_p_parse_line(s_p_hashtbl_t *hashtbl, const char *line, char **leftover);
+/*
+ * Returns 1 if the line is parsed cleanly, and 0 otherwise.
+ */
+int s_p_parse_pair(s_p_hashtbl_t *hashtbl, const char *key, const char *value);
+
/*
* s_p_get_string
*
diff -udpr slurm-2.2.3.new4/src/common/read_config.c slurm-2.2.3.plus/src/common/read_config.c
--- slurm-2.2.3.new4/src/common/read_config.c 2011-03-17 18:14:39.000000000 +0200
+++ slurm-2.2.3.plus/src/common/read_config.c 2011-05-03 16:17:06.000000000 +0300
@@ -76,6 +76,7 @@
#include "src/common/util-net.h"
#include "src/common/uid.h"
#include "src/common/strlcpy.h"
+#include "src/common/slurm_accounting_storage.h"
/*
** Define slurm-specific aliases for use by plugins, see slurm_xlator.h
@@ -1682,6 +1685,64 @@ init_slurm_conf (slurm_ctl_conf_t *ctl_c
return;
}
+/* handle config name in form (example) slurmdbd:10.0.0.254:6819 */
+static int _config_is_storage(s_p_hashtbl_t *hashtbl, char *name)
+{
+ char *tst, *port;
+ void *db_conn;
+ config_key_pair_t *pair;
+ List config;
+ ListIterator iter;
+ int rc = -1;
+
+ tst = strchr(name, ':');
+ if (tst == NULL)
+ return (-1);
+ port = strrchr(&tst[1], ':');
+ if (port == NULL)
+ return (-1);
+ conf_ptr->accounting_storage_type = xstrdup_printf("accounting_storage/%.*s",
+ (int)(tst - name), name);
+ tst++;
+ conf_ptr->accounting_storage_host = xstrndup(tst, port - tst);
+ port++;
+ debug3("trying retrieve config via %s from host %s on port %s",
+ conf_ptr->accounting_storage_type,
+ conf_ptr->accounting_storage_host, port);
+ conf_ptr->accounting_storage_port = atoi(port);
+ conf_ptr->plugindir = xstrdup(default_plugin_path);
+ /* unlock conf_lock and set as initialized before accessing it */
+ conf_initialized = true;
+ pthread_mutex_unlock(&conf_lock);
+ db_conn = acct_storage_g_get_connection(NULL, 0, false, NULL);
+ if (db_conn == NULL)
+ goto end; /* plugin will out error itself */
+ config = acct_storage_g_get_config(db_conn);
+ acct_storage_g_close_connection(&db_conn); /* ignore error code */
+ if (config == NULL) {
+ error("cannot retrieve config from storage");
+ goto end;
+ }
+ iter = list_iterator_create(config);
+ while ((pair = list_next(iter)) != NULL)
+ s_p_parse_pair(hashtbl, pair->name, pair->value);
+ list_iterator_destroy(iter);
+ list_destroy(config);
+ rc = 0; /* done */
+
+end:
+ /* restore status quo now */
+ pthread_mutex_lock(&conf_lock);
+ conf_initialized = false;
+ xfree(conf_ptr->accounting_storage_type);
+ xfree(conf_ptr->accounting_storage_host);
+ xfree(conf_ptr->plugindir);
+ conf_ptr->accounting_storage_type = NULL;
+ conf_ptr->accounting_storage_host = NULL;
+ conf_ptr->plugindir = NULL;
+ return (rc);
+}
+
/* caller must lock conf_lock */
static void _init_slurm_conf(const char *file_name)
{
@@ -1701,6 +1764,7 @@ static void _init_slurm_conf(const char
/* init hash to 0 */
conf_ptr->hash_val = 0;
- if(s_p_parse_file(conf_hashtbl, &conf_ptr->hash_val, name)
+ if (_config_is_storage(conf_hashtbl, name) < 0 &&
+ s_p_parse_file(conf_hashtbl, &conf_ptr->hash_val, name)
== SLURM_ERROR)
fatal("something wrong with opening/reading conf file");
diff -udpr slurm-2.2.3.new4/src/slurmctld/power_save.c slurm-2.2.3.plus/src/slurmctld/power_save.c
--- slurm-2.2.3.new4/src/slurmctld/power_save.c 2011-03-30 19:19:10.000000000 +0300
+++ slurm-2.2.3.plus/src/slurmctld/power_save.c 2011-05-04 15:45:25.000000000 +0300
@@ -298,6 +298,7 @@ static pid_t _run_prog(char *prog, char
int i;
char program[1024], arg0[1024], arg1[1024], *pname;
pid_t child;
+ slurm_ctl_conf_t *ctlconf;
if (prog == NULL) /* disabled, useful for testing */
return -1;
@@ -320,6 +323,9 @@ static pid_t _run_prog(char *prog, char
#else
setpgrp();
#endif
+ ctlconf = slurm_conf_lock();
+ setenv("SLURM_CONF", ctlconf->slurm_conf, 1);
+ slurm_conf_unlock();
execl(program, arg0, arg1, NULL);
exit(1);
} else if (child < 0) {