Hello!
Once in May, on Friday, 13, at 2:16 I've got a message:
>I would also add that you can include other files within a slurm.conf
>file, which could make management of SLURM's configuration file easier
>for you.
New version of patch is coming.
I've added one parameter to DBD_GET_CONFIG RPC call which is the name
of config so now it can be possible to do includes from the storage too.
That is also resolving a problem I mentioned earlier which is difference
with current support of DBD_GET_CONFIG in slurmdbd so it will send only
own config for current RPC scheme but may send any other config if loaded
accounting storage plugin can do it. That assumes new protocol version is
set to 9. I've changed it in slurmdbd and in accounting storage plugins
with this patch, it would not harm anything unless the accounting storage
supports 'slurm.conf' config retrieving. And it works well in our cluster
as we have such functionality. I still have no solution for your 'single
point of failure' statement about this configuration unfortunately though
but I hope to find something later. And unfortunately I cannot test new
functionality with slurmdbd because we don't use slurmdbd as accounting
storage but other means.
See attached patch. This slurm.conf distribution will not work for
now as none of standard plugins support this but it can do in the future
for advanced admins if they know exactly how it works. And no changes
were made for ordinary admins or users, it's harmless for them. :)
With best wishes.
Andriy.diff -udpr slurm-2.2.6/src/common/parse_config.c slurm-2.2.6.loadconf/src/common/parse_config.c
--- slurm-2.2.6/src/common/parse_config.c 2011-05-27 21:25:05.000000000 +0300
+++ slurm-2.2.6.loadconf/src/common/parse_config.c 2011-07-06 12:05:57.000000000 +0300
@@ -905,6 +905,44 @@ int s_p_parse_file(s_p_hashtbl_t *hashtb
return rc;
}
+/*
+ * Returns 1 if the line is parsed cleanly, and 0 otherwise.
+ */
+int s_p_parse_pair(s_p_hashtbl_t *hashtbl, const char *key, const char *value)
+{
+ s_p_values_t *p;
+ char *leftover, *v;
+
+ if ((p = _conf_hashtbl_lookup(hashtbl, key)) == NULL) {
+ error("Parsing error at unrecognized key: %s", key);
+ return 0;
+ }
+ /* we have value separated from key here so parse it different way */
+ while (*value != '\0' && isspace(*value))
+ value++; /* skip spaces at start if any */
+ if (*value == '"') { /* quoted value */
+ v = (char *)value + 1;
+ leftover = strchr(v, '"');
+ if (leftover == NULL) {
+ error("Parse error in data for key %s: %s", key, value);
+ return 0;
+ }
+ } else { /* unqouted value */
+ leftover = v = (char *)value;
+ while (*leftover != '\0' && !isspace(*leftover))
+ leftover++;
+ }
+ value = xstrndup(v, leftover - v);
+ if (*leftover != '\0')
+ leftover++;
+ while (*leftover != '\0' && isspace(*leftover))
+ leftover++; /* skip trailing spaces */
+ _handle_keyvalue_match(p, value, leftover, &leftover);
+ xfree(value);
+
+ return 1;
+}
+
/*
* s_p_get_string
*
diff -udpr slurm-2.2.6/src/common/parse_config.h slurm-2.2.6.loadconf/src/common/parse_config.h
--- slurm-2.2.6/src/common/parse_config.h 2011-05-27 21:25:05.000000000 +0300
+++ slurm-2.2.6.loadconf/src/common/parse_config.h 2011-07-06 12:05:57.000000000 +0300
@@ -190,6 +190,10 @@ int s_p_parse_file(s_p_hashtbl_t *hashtb
*/
int s_p_parse_line(s_p_hashtbl_t *hashtbl, const char *line, char **leftover);
+/*
+ * Returns 1 if the line is parsed cleanly, and 0 otherwise.
+ */
+int s_p_parse_pair(s_p_hashtbl_t *hashtbl, const char *key, const char *value);
/*
* s_p_get_string
*
diff -udpr slurm-2.2.6/src/common/read_config.c slurm-2.2.6.loadconf/src/common/read_config.c
--- slurm-2.2.6/src/common/read_config.c 2011-05-27 21:25:05.000000000 +0300
+++ slurm-2.2.6.loadconf/src/common/read_config.c 2011-07-06 12:25:32.000000000 +0300
@@ -76,6 +76,7 @@
#include "src/common/util-net.h"
#include "src/common/uid.h"
#include "src/common/strlcpy.h"
+#include "src/common/slurm_accounting_storage.h"
/*
** Define slurm-specific aliases for use by plugins, see slurm_xlator.h
@@ -1682,6 +1685,71 @@ init_slurm_conf (slurm_ctl_conf_t *ctl_c
return;
}
+/* handle config name in form (example) slurmdbd:thecluster:10.0.0.254:6819 */
+static int _config_is_storage(s_p_hashtbl_t *hashtbl, char *name)
+{
+ char *cluster, *host, *port;
+ void *db_conn;
+ config_key_pair_t *pair;
+ List config;
+ ListIterator iter;
+ int rc = -1;
+
+ cluster = strchr(name, ':');
+ if (cluster == NULL)
+ return (-1);
+ host = strchr(&cluster[1], ':');
+ if (host == NULL)
+ return (-1);
+ port = strrchr(&host[1], ':');
+ if (port == NULL)
+ return (-1);
+ conf_ptr->accounting_storage_type =
+ xstrdup_printf("accounting_storage/%.*s",
+ (int)(cluster - name), name);
+ cluster++;
+ cluster = xstrndup(cluster, host - cluster);
+ host++;
+ conf_ptr->accounting_storage_host = xstrndup(host, port - host);
+ port++;
+ debug3("trying retrieve config via %s from host %s on port %s",
+ conf_ptr->accounting_storage_type,
+ conf_ptr->accounting_storage_host, port);
+ conf_ptr->accounting_storage_port = atoi(port);
+ conf_ptr->plugindir = xstrdup(default_plugin_path);
+ /* unlock conf_lock and set as initialized before accessing it */
+ conf_initialized = true;
+ pthread_mutex_unlock(&conf_lock);
+ db_conn = acct_storage_g_get_connection(NULL, 0, false, cluster);
+ if (db_conn == NULL)
+ goto end; /* plugin will out error itself */
+ config = acct_storage_g_get_config(db_conn, "slurm.conf");
+ acct_storage_g_close_connection(&db_conn); /* ignore error code */
+ if (config == NULL) {
+ error("cannot retrieve config from storage");
+ goto end;
+ }
+ iter = list_iterator_create(config);
+ while ((pair = list_next(iter)) != NULL)
+ s_p_parse_pair(hashtbl, pair->name, pair->value);
+ list_iterator_destroy(iter);
+ list_destroy(config);
+ rc = 0; /* done */
+
+end:
+ /* restore status quo now */
+ pthread_mutex_lock(&conf_lock);
+ conf_initialized = false;
+ xfree(cluster);
+ xfree(conf_ptr->accounting_storage_type);
+ xfree(conf_ptr->accounting_storage_host);
+ xfree(conf_ptr->plugindir);
+ conf_ptr->accounting_storage_type = NULL;
+ conf_ptr->accounting_storage_host = NULL;
+ conf_ptr->plugindir = NULL;
+ return (rc);
+}
+
/* caller must lock conf_lock */
static void _init_slurm_conf(const char *file_name)
{
@@ -1701,6 +1771,7 @@ static void _init_slurm_conf(const char
/* init hash to 0 */
conf_ptr->hash_val = 0;
+ if (_config_is_storage(conf_hashtbl, name) < 0)
if(s_p_parse_file(conf_hashtbl, &conf_ptr->hash_val, name)
== SLURM_ERROR)
fatal("something wrong with opening/reading conf file");
diff -udpr slurm-2.2.6/src/common/slurm_accounting_storage.c slurm-2.2.6.loadconf/src/common/slurm_accounting_storage.c
--- slurm-2.2.6/src/common/slurm_accounting_storage.c 2011-05-27 21:25:05.000000000 +0300
+++ slurm-2.2.6.loadconf/src/common/slurm_accounting_storage.c 2011-07-06 15:22:03.000000000 +0300
@@ -134,7 +134,7 @@ typedef struct slurm_acct_storage_ops {
slurmdb_account_cond_t *acct_cond);
List (*get_clusters) (void *db_conn, uint32_t uid,
slurmdb_cluster_cond_t *cluster_cond);
- List (*get_config) (void *db_conn);
+ List (*get_config) (void *db_conn, char *config_name);
List (*get_associations) (void *db_conn, uint32_t uid,
slurmdb_association_cond_t *assoc_cond);
List (*get_events) (void *db_conn, uint32_t uid,
@@ -708,11 +708,11 @@ extern List acct_storage_g_get_clusters(
(db_conn, uid, cluster_cond);
}
-extern List acct_storage_g_get_config(void *db_conn)
+extern List acct_storage_g_get_config(void *db_conn, char *config_name)
{
if (slurm_acct_storage_init(NULL) < 0)
return NULL;
- return (*(g_acct_storage_context->ops.get_config))(db_conn);
+ return (*(g_acct_storage_context->ops.get_config))(db_conn, config_name);
}
extern List acct_storage_g_get_associations(
diff -udpr slurm-2.2.6/src/common/slurm_accounting_storage.h slurm-2.2.6.loadconf/src/common/slurm_accounting_storage.h
--- slurm-2.2.6/src/common/slurm_accounting_storage.h 2011-05-27 21:25:05.000000000 +0300
+++ slurm-2.2.6.loadconf/src/common/slurm_accounting_storage.h 2011-07-06 13:23:33.000000000 +0300
@@ -327,7 +327,7 @@ extern List acct_storage_g_get_clusters(
* RET: List of config_key_pairs_t *
* note List needs to be freed when called
*/
-extern List acct_storage_g_get_config(void *db_conn);
+extern List acct_storage_g_get_config(void *db_conn, char *config_name);
/*
* get info from the storage
diff -udpr slurm-2.2.6/src/common/slurmdbd_defs.c slurm-2.2.6.loadconf/src/common/slurmdbd_defs.c
--- slurm-2.2.6/src/common/slurmdbd_defs.c 2011-05-27 21:25:05.000000000 +0300
+++ slurm-2.2.6.loadconf/src/common/slurmdbd_defs.c 2011-07-06 15:51:56.000000000 +0300
@@ -615,7 +615,8 @@ extern Buf pack_slurmdbd_msg(slurmdbd_ms
buffer);
break;
case DBD_GET_CONFIG:
- /* No message to pack */
+ if (rpc_version >= 9)
+ packstr((char *)req->data, buffer);
break;
default:
error("slurmdbd: Invalid message type pack %u(%s:%u)",
diff -udpr slurm-2.2.6/src/db_api/extra_get_functions.c slurm-2.2.6.loadconf/src/db_api/extra_get_functions.c
--- slurm-2.2.6/src/db_api/extra_get_functions.c 2011-05-27 21:25:05.000000000 +0300
+++ slurm-2.2.6.loadconf/src/db_api/extra_get_functions.c 2011-07-06 14:01:38.000000000 +0300
@@ -56,7 +56,7 @@
*/
extern List slurmdb_config_get(void *db_conn)
{
- return acct_storage_g_get_config(db_conn);
+ return acct_storage_g_get_config(db_conn, "slurmdbd.conf");
}
/*
diff -udpr slurm-2.2.6/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c slurm-2.2.6.loadconf/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c
--- slurm-2.2.6/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c 2011-05-27 21:25:05.000000000 +0300
+++ slurm-2.2.6.loadconf/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c 2011-07-06 14:04:03.000000000 +0300
@@ -449,7 +449,7 @@ extern List acct_storage_p_get_clusters(
return NULL;
}
-extern List acct_storage_p_get_config(void *db_conn)
+extern List acct_storage_p_get_config(void *db_conn, char *config_name)
{
return NULL;
}
diff -udpr slurm-2.2.6/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c slurm-2.2.6.loadconf/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c
--- slurm-2.2.6/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c 2011-05-27 21:25:05.000000000 +0300
+++ slurm-2.2.6.loadconf/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c 2011-07-06 14:03:42.000000000 +0300
@@ -2495,7 +2495,7 @@ end_it:
return ret_list;
}
-extern List acct_storage_p_get_config(void *db_conn)
+extern List acct_storage_p_get_config(void *db_conn, char *config_name)
{
return NULL;
}
diff -udpr slurm-2.2.6/src/plugins/accounting_storage/none/accounting_storage_none.c slurm-2.2.6.loadconf/src/plugins/accounting_storage/none/accounting_storage_none.c
--- slurm-2.2.6/src/plugins/accounting_storage/none/accounting_storage_none.c 2011-05-27 21:25:05.000000000 +0300
+++ slurm-2.2.6.loadconf/src/plugins/accounting_storage/none/accounting_storage_none.c 2011-07-06 14:04:09.000000000 +0300
@@ -275,7 +275,7 @@ extern List acct_storage_p_get_clusters(
return NULL;
}
-extern List acct_storage_p_get_config(void *db_conn)
+extern List acct_storage_p_get_config(void *db_conn, char *config_name)
{
return NULL;
}
diff -udpr slurm-2.2.6/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c slurm-2.2.6.loadconf/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c
--- slurm-2.2.6/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c 2011-05-27 21:25:05.000000000 +0300
+++ slurm-2.2.6.loadconf/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c 2011-07-06 14:03:58.000000000 +0300
@@ -498,7 +498,7 @@ extern List acct_storage_p_get_clusters(
return as_pg_get_clusters(pg_conn, uid, cluster_cond);
}
-extern List acct_storage_p_get_config(pgsql_conn_t *pg_conn)
+extern List acct_storage_p_get_config(pgsql_conn_t *pg_conn, char *config_name)
{
return NULL;
}
diff -udpr slurm-2.2.6/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.h slurm-2.2.6.loadconf/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.h
--- slurm-2.2.6/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.h 2011-05-27 21:24:40.000000000 +0300
+++ slurm-2.2.6.loadconf/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.h 2011-07-06 14:03:53.000000000 +0300
@@ -122,7 +122,7 @@ extern List acct_storage_p_get_accts(pgs
slurmdb_account_cond_t *acct_cond);
extern List acct_storage_p_get_clusters(pgsql_conn_t *pg_conn, uid_t uid,
slurmdb_cluster_cond_t *cluster_cond);
-extern List acct_storage_p_get_config(pgsql_conn_t *pg_conn);
+extern List acct_storage_p_get_config(pgsql_conn_t *pg_conn, char *config_name);
extern List acct_storage_p_get_associations(pgsql_conn_t *pg_conn, uid_t uid,
slurmdb_association_cond_t *assoc_cond);
extern List acct_storage_p_get_problems(pgsql_conn_t *pg_conn, uid_t uid,
diff -udpr slurm-2.2.6/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c slurm-2.2.6.loadconf/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c
--- slurm-2.2.6/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c 2011-05-27 21:25:05.000000000 +0300
+++ slurm-2.2.6.loadconf/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c 2011-07-06 15:53:14.000000000 +0300
@@ -1388,7 +1388,7 @@ extern List acct_storage_p_get_clusters(
return ret_list;
}
-extern List acct_storage_p_get_config(void *db_conn)
+extern List acct_storage_p_get_config(void *db_conn, char *config_name)
{
slurmdbd_msg_t req, resp;
dbd_list_msg_t *got_msg;
@@ -1396,7 +1396,7 @@ extern List acct_storage_p_get_config(vo
List ret_list = NULL;
req.msg_type = DBD_GET_CONFIG;
- req.data = NULL;
+ req.data = config_name;
rc = slurm_send_recv_slurmdbd_msg(SLURMDBD_VERSION, &req, &resp);
if (rc != SLURM_SUCCESS)
diff -udpr slurm-2.2.6/src/slurmctld/power_save.c slurm-2.2.6.loadconf/src/slurmctld/power_save.c
--- slurm-2.2.6/src/slurmctld/power_save.c 2011-05-27 21:24:42.000000000 +0300
+++ slurm-2.2.6.loadconf/src/slurmctld/power_save.c 2011-07-06 12:05:57.000000000 +0300
@@ -296,6 +296,7 @@ static pid_t _run_prog(char *prog, char
int i;
char program[1024], arg0[1024], arg1[1024], *pname;
pid_t child;
+ slurm_ctl_conf_t *ctlconf;
if (prog == NULL) /* disabled, useful for testing */
return -1;
@@ -318,6 +321,9 @@ static pid_t _run_prog(char *prog, char
#else
setpgrp();
#endif
+ ctlconf = slurm_conf_lock();
+ setenv("SLURM_CONF", ctlconf->slurm_conf, 1);
+ slurm_conf_unlock();
execl(program, arg0, arg1, NULL);
exit(1);
} else if (child < 0) {
diff -udpr slurm-2.2.6/src/slurmdbd/proc_req.c slurm-2.2.6.loadconf/src/slurmdbd/proc_req.c
--- slurm-2.2.6/src/slurmdbd/proc_req.c 2011-05-27 21:25:07.000000000 +0300
+++ slurm-2.2.6.loadconf/src/slurmdbd/proc_req.c 2011-07-06 15:40:09.000000000 +0300
@@ -1145,15 +1145,49 @@ static int _get_clusters(slurmdbd_conn_t
return rc;
}
+static int _unpack_config_name(char **object, Buf buffer)
+{
+ char *config_name;
+ uint32_t uint32_tmp;
+
+ safe_unpackstr_xmalloc(&config_name, &uint32_tmp, buffer);
+ *object = config_name;
+ return SLURM_SUCCESS;
+
+unpack_error:
+ *object = NULL;
+ return SLURM_ERROR;
+}
+
static int _get_config(slurmdbd_conn_t *slurmdbd_conn,
Buf in_buffer, Buf *out_buffer, uint32_t *uid)
{
+ char *config_name = NULL;
dbd_list_msg_t list_msg = { NULL };
debug2("DBD_GET_CONFIG: called");
- /* No message body to unpack */
+ if (slurmdbd_conn->rpc_version >= 9 &&
+ _unpack_config_name(&config_name, in_buffer) != SLURM_SUCCESS) {
+ char *comment = "Failed to unpack DBD_GET_CONFIG message";
+ error("CONN:%u %s", slurmdbd_conn->newsockfd, comment);
+ *out_buffer = make_dbd_rc_msg(slurmdbd_conn->rpc_version,
+ SLURM_ERROR, comment,
+ DBD_GET_CONFIG);
+ return SLURM_ERROR;
+ }
+
+ if (config_name == NULL ||
+ strcmp(config_name, "slurmdbd.conf") == 0)
+ list_msg.my_list = dump_config();
+ else if ((list_msg.my_list = acct_storage_g_get_config(
+ slurmdbd_conn->db_conn, config_name)) == NULL) {
+ *out_buffer = make_dbd_rc_msg(slurmdbd_conn->rpc_version,
+ errno, slurm_strerror(errno),
+ DBD_GET_CONFIG);
+ xfree(config_name);
+ return SLURM_ERROR;
+ }
- list_msg.my_list = dump_config();
*out_buffer = init_buf(1024);
pack16((uint16_t) DBD_GOT_CONFIG, *out_buffer);
slurmdbd_pack_list_msg(&list_msg, slurmdbd_conn->rpc_version,
@@ -1161,6 +1195,8 @@ static int _get_config(slurmdbd_conn_t *
if (list_msg.my_list)
list_destroy(list_msg.my_list);
+ xfree(config_name);
+
return SLURM_SUCCESS;
}