I am writing PMI2 () support in SLURM. In the design the PMI2 server
will be implemented in a MPI plugin, mpi/pmi2. To accomplish this I made
some change to the source code of SLURM (2.4.0-0.pre1):

* An API function, slurm_forward_data() is added, which transfers data
to specified nodes and forward the data to specified UNIX domain socket,
using the tree communication mechanism of SLURM. This is a generic
function which exports the high efficiency tree communication mechanism
of SLURM.
* A mpi plugin hook mpi_hook_slurmstepd_prefork() is added. It is called
by slurmstepd to fork a PMI server thread.
* Some error handling of step launching when MPI plugin prelaunch setup
fails is fixed.

I am not sure if this is appropriate for SLURM. The patch is attached
for you to review. The mpi/pmi2 code is still under test and will be
published later. 



diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/api/step_io.c /home/hjcao/work/slurm-2.4.0-0.pre1/src/api/step_io.c
--- slurm-2.4.0-0.pre1/src/api/step_io.c	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/api/step_io.c	2011-11-25 09:26:49.000000000 +0800
@@ -1170,6 +1170,8 @@
 int
 client_io_handler_finish(client_io_t *cio)
 {
+	if (cio == NULL)
+		return SLURM_SUCCESS;
 	eio_signal_shutdown(cio->eio);
 	if (pthread_join(cio->ioid, NULL) < 0) {
 		error("Waiting for client io pthread: %m");
@@ -1182,7 +1184,8 @@
 void
 client_io_handler_destroy(client_io_t *cio)
 {
-	xassert(cio);
+	if (cio == NULL)
+		return;
 
 	/* FIXME - perhaps should make certain that IO engine is shutdown
 	   (by calling client_io_handler_finish()) before freeing anything */
@@ -1239,6 +1242,8 @@
 	struct server_io_info *info;
 	int i;
 
+	if (cio == NULL)
+		return;
 	pthread_mutex_lock(&cio->ioservers_lock);
 	for (i = 0; i < cio->num_nodes; i++) {
 		if (!bit_test(cio->ioservers_ready_bits, i)) {
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/api/step_launch.c /home/hjcao/work/slurm-2.4.0-0.pre1/src/api/step_launch.c
--- slurm-2.4.0-0.pre1/src/api/step_launch.c	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/api/step_launch.c	2011-11-25 09:27:10.000000000 +0800
@@ -490,11 +490,13 @@
 	eio_signal_shutdown(sls->msg_handle);
 
 	pthread_mutex_unlock(&sls->lock);
-	pthread_join(sls->msg_thread, NULL);
+	if (sls->msg_thread)
+		pthread_join(sls->msg_thread, NULL);
 	pthread_mutex_lock(&sls->lock);
 	pmi_kvs_free();
 
-	eio_handle_destroy(sls->msg_handle);
+	if (sls->msg_handle)
+		eio_handle_destroy(sls->msg_handle);
 
 	/* Shutdown the io timeout thread, if one exists */
 	if (sls->io_timeout_thread_created) {
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/common/eio.c /home/hjcao/work/slurm-2.4.0-0.pre1/src/common/eio.c
--- slurm-2.4.0-0.pre1/src/common/eio.c	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/common/eio.c	2011-11-25 09:10:50.000000000 +0800
@@ -197,6 +197,8 @@
 int eio_signal_shutdown(eio_handle_t *eio)
 {
 	char c = 1;
+	if (eio == NULL)
+		return 0;
 	if (write(eio->fds[1], &c, sizeof(char)) != 1)
 		return error("eio_handle_signal_shutdown: write; %m");
 	return 0;
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/common/mpi.c /home/hjcao/work/slurm-2.4.0-0.pre1/src/common/mpi.c
--- slurm-2.4.0-0.pre1/src/common/mpi.c	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/common/mpi.c	2011-11-25 09:50:45.000000000 +0800
@@ -50,7 +50,7 @@
 #include "src/common/mpi.h"
 #include "src/common/xmalloc.h"
 #include "src/common/xstring.h"
-
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
 
 /*
  * WARNING:  Do not change the order of these fields or add additional
@@ -59,6 +59,8 @@
  * at the end of the structure.
  */
 typedef struct slurm_mpi_ops {
+	int          (*slurmstepd_prefork)(const slurmd_job_t *job,
+					   char ***env);
 	int          (*slurmstepd_init)   (const mpi_plugin_task_info_t *job,
 					   char ***env);
 	mpi_plugin_client_state_t *
@@ -142,6 +144,7 @@
 	 * declared for slurm_mpi_ops_t.
 	 */
 	static const char *syms[] = {
+		"p_mpi_hook_slurmstepd_prefork",
 		"p_mpi_hook_slurmstepd_task",
 		"p_mpi_hook_client_prelaunch",
 		"p_mpi_hook_client_single_task_per_node",
@@ -265,6 +268,14 @@
 	return SLURM_SUCCESS;
 }
 
+int mpi_hook_slurmstepd_prefork (const slurmd_job_t *job, char ***env)
+{
+	if (mpi_hook_slurmstepd_init(env) == SLURM_ERROR)
+		return SLURM_ERROR;
+
+	return (*(g_context->ops.slurmstepd_prefork))(job, env);
+}
+
 int mpi_hook_slurmstepd_task (const mpi_plugin_task_info_t *job, char ***env)
 {
 	if (mpi_hook_slurmstepd_init(env) == SLURM_ERROR)
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/common/pack.h /home/hjcao/work/slurm-2.4.0-0.pre1/src/common/pack.h
--- slurm-2.4.0-0.pre1/src/common/pack.h	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/common/pack.h	2011-11-10 09:47:14.000000000 +0800
@@ -269,7 +269,7 @@
 #define safe_packstr(str,max_len,buf) do {		\
 	uint32_t _size;					\
 	assert(buf->magic == BUF_MAGIC);		\
-	assert(sizeof(*max_len) === sizeof(uint32_t));	\
+	assert(sizeof(max_len) === sizeof(uint32_t));	\
 	_size = (str ? strlen(str)+1 : 0);		\
 	assert(_size == 0 || str != NULL);		\
 	if (_size <= max_len)				\
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/common/slurm_protocol_api.c /home/hjcao/work/slurm-2.4.0-0.pre1/src/common/slurm_protocol_api.c
--- slurm-2.4.0-0.pre1/src/common/slurm_protocol_api.c	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/common/slurm_protocol_api.c	2011-11-14 09:20:09.000000000 +0800
@@ -3648,6 +3648,49 @@
 
 #endif
 
+/*
+ * slurm_forward_data - forward arbitrary data to unix domain sockets on nodes
+ * IN nodelist: nodes to forward data to
+ * IN address: address of unix domain socket
+ * IN len: length of data
+ * IN data: real data
+ * RET: error code
+ */
+extern int
+slurm_forward_data(char *nodelist, char *address, uint32_t len, char *data)
+{
+	List ret_list = NULL;
+	int temp_rc = 0, rc = 0;
+	ret_data_info_t *ret_data_info = NULL;
+	slurm_msg_t *msg = xmalloc(sizeof(slurm_msg_t));
+	forward_data_msg_t req;
+
+	slurm_msg_t_init(msg);
+
+	debug("slurm_forward_data: nodelist=%s, address=%s, len=%u", nodelist, address, len);
+	req.address = address;
+	req.len = len;
+	req.data = data;
+
+	msg->msg_type = REQUEST_FORWARD_DATA;
+	msg->data = &req;
+
+	if ((ret_list = slurm_send_recv_msgs(nodelist, msg, 0, false))) {
+		while ((ret_data_info = list_pop(ret_list))) {
+			temp_rc = slurm_get_return_code(ret_data_info->type,
+							ret_data_info->data);
+			if (temp_rc)
+				rc = temp_rc;
+		}
+	} else {
+		error("slurm_forward_data: no list was returned");
+		rc = SLURM_ERROR;
+	}
+
+	slurm_free_msg(msg);
+	return rc;
+}
+
 
 /*
  * vi: shiftwidth=8 tabstop=8 expandtab
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/common/slurm_protocol_defs.c /home/hjcao/work/slurm-2.4.0-0.pre1/src/common/slurm_protocol_defs.c
--- slurm-2.4.0-0.pre1/src/common/slurm_protocol_defs.c	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/common/slurm_protocol_defs.c	2011-11-01 08:39:46.000000000 +0800
@@ -958,6 +958,15 @@
         }
 }
 
+inline void slurm_free_forward_data_msg(forward_data_msg_t *msg)
+{
+	if (msg) {
+		xfree(msg->address);
+		xfree(msg->data);
+		xfree(msg);
+	}
+}
+
 extern char *preempt_mode_string(uint16_t preempt_mode)
 {
 	char *gang_str;
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/common/slurm_protocol_defs.h /home/hjcao/work/slurm-2.4.0-0.pre1/src/common/slurm_protocol_defs.h
--- slurm-2.4.0-0.pre1/src/common/slurm_protocol_defs.h	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/common/slurm_protocol_defs.h	2011-11-01 08:39:46.000000000 +0800
@@ -280,6 +280,7 @@
 	RESPONSE_SLURMCTLD_STATUS,
 	REQUEST_JOB_STEP_PIDS,
         RESPONSE_JOB_STEP_PIDS,
+	REQUEST_FORWARD_DATA,
 
 	REQUEST_LAUNCH_TASKS = 6001,
 	RESPONSE_LAUNCH_TASKS,
@@ -894,6 +895,12 @@
 	time_t start_time;	/* time when job will start */
 } will_run_response_msg_t;
 
+typedef struct forward_data_msg {
+	char *address;
+	uint32_t len;
+	char *data;
+} forward_data_msg_t;
+
 /*****************************************************************************\
  * Slurm API Message Types
 \*****************************************************************************/
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/common/slurm_protocol_pack.c /home/hjcao/work/slurm-2.4.0-0.pre1/src/common/slurm_protocol_pack.c
--- slurm-2.4.0-0.pre1/src/common/slurm_protocol_pack.c	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/common/slurm_protocol_pack.c	2011-11-14 09:24:42.000000000 +0800
@@ -607,7 +607,10 @@
 					 Buf buffer, uint16_t protocol_version);
 static int _unpack_spank_env_responce_msg(spank_env_responce_msg_t ** msg_ptr,
 					  Buf buffer, uint16_t protocol_version);
-
+static void _pack_forward_data_msg(forward_data_msg_t *msg,
+				   Buf buffer, uint16_t protocol_version);
+static int _unpack_forward_data_msg(forward_data_msg_t **msg_ptr,
+				    Buf buffer, uint16_t protocol_version);
 /* pack_header
  * packs a slurm protocol header that precedes every slurm message
  * IN header - the header structure to pack
@@ -1176,6 +1179,10 @@
 			(spank_env_responce_msg_t *)msg->data, buffer,
 			msg->protocol_version);
 		break;
+	case REQUEST_FORWARD_DATA:
+		_pack_forward_data_msg((forward_data_msg_t *)msg->data,
+				       buffer, msg->protocol_version);
+		break;
 	default:
 		debug("No pack method for msg type %u", msg->msg_type);
 		return EINVAL;
@@ -1729,6 +1736,10 @@
 			(spank_env_responce_msg_t **)&msg->data, buffer,
 			msg->protocol_version);
 		break;
+	case REQUEST_FORWARD_DATA:
+		rc = _unpack_forward_data_msg((forward_data_msg_t **)&msg->data,
+					      buffer, msg->protocol_version);
+		break;
 	default:
 		debug("No unpack method for msg type %u", msg->msg_type);
 		return EINVAL;
@@ -10954,6 +10965,35 @@
 	return SLURM_ERROR;
 }
 
+static void _pack_forward_data_msg(forward_data_msg_t *msg,
+				   Buf buffer, uint16_t protocol_version)
+{
+	xassert (msg != NULL);
+	packstr(msg->address, buffer);
+	pack32(msg->len, buffer);
+	packmem(msg->data, msg->len, buffer);
+}
+
+static int _unpack_forward_data_msg(forward_data_msg_t **msg_ptr,
+				    Buf buffer, uint16_t protocol_version)
+{
+	forward_data_msg_t *msg;
+	uint32_t temp32;
+
+	xassert (msg_ptr != NULL);
+	msg = xmalloc(sizeof(forward_data_msg_t));
+	*msg_ptr = msg;
+	safe_unpackstr_xmalloc(&msg->address, &temp32, buffer);
+	safe_unpack32(&msg->len, buffer);
+	safe_unpackmem_xmalloc(&msg->data, &temp32, buffer);
+
+	return SLURM_SUCCESS;
+	
+unpack_error:
+	slurm_free_forward_data_msg(msg);
+	*msg_ptr = NULL;
+	return SLURM_ERROR;
+}
 
 /* template
    void pack_ ( * msg , Buf buffer )
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/plugins/mpi/lam/mpi_lam.c /home/hjcao/work/slurm-2.4.0-0.pre1/src/plugins/mpi/lam/mpi_lam.c
--- slurm-2.4.0-0.pre1/src/plugins/mpi/lam/mpi_lam.c	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/plugins/mpi/lam/mpi_lam.c	2011-11-09 09:52:48.000000000 +0800
@@ -48,6 +48,7 @@
 #include "slurm/slurm_errno.h"
 #include "src/common/slurm_xlator.h"
 #include "src/plugins/mpi/lam/lam.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
 
 /*
  * These variables are required by the generic plugin interface.  If they
@@ -80,6 +81,12 @@
 const char plugin_type[]        = "mpi/lam";
 const uint32_t plugin_version   = 100;
 
+int p_mpi_hook_slurmstepd_prefork(const slurmd_job_t *job, char ***env)
+{
+	debug("mpi/lam: slurmstepd prefork");
+	return SLURM_SUCCESS;
+}
+
 int p_mpi_hook_slurmstepd_task(const mpi_plugin_task_info_t *job,
 			       char ***env)
 {
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/plugins/mpi/mpich1_p4/mpich1_p4.c /home/hjcao/work/slurm-2.4.0-0.pre1/src/plugins/mpi/mpich1_p4/mpich1_p4.c
--- slurm-2.4.0-0.pre1/src/plugins/mpi/mpich1_p4/mpich1_p4.c	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/plugins/mpi/mpich1_p4/mpich1_p4.c	2011-11-09 09:48:23.000000000 +0800
@@ -55,6 +55,7 @@
 #include "src/common/net.h"
 #include "src/common/xmalloc.h"
 #include "src/common/xstring.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
 
 /*
  * These variables are required by the generic plugin interface.  If they
@@ -103,7 +104,13 @@
 static pthread_cond_t  shutdown_cond;
 
 
-int p_mpi_hook_slurmstepd_task (const mpi_plugin_client_info_t *job,
+int p_mpi_hook_slurmstepd_prefork(const slurmd_job_t *job, char ***env)
+{
+	debug("mpi/mpich1_p4: slurmstepd prefork");
+	return SLURM_SUCCESS;
+}
+
+int p_mpi_hook_slurmstepd_task (const mpi_plugin_task_info_t *job,
 				char ***env)
 {
 	char *nodelist, *task_cnt;
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/plugins/mpi/mpich1_shmem/mpich1_shmem.c /home/hjcao/work/slurm-2.4.0-0.pre1/src/plugins/mpi/mpich1_shmem/mpich1_shmem.c
--- slurm-2.4.0-0.pre1/src/plugins/mpi/mpich1_shmem/mpich1_shmem.c	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/plugins/mpi/mpich1_shmem/mpich1_shmem.c	2011-11-09 09:54:20.000000000 +0800
@@ -48,6 +48,7 @@
 #include "slurm/slurm_errno.h"
 #include "src/common/slurm_xlator.h"
 #include "src/plugins/mpi/lam/lam.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
 
 /*
  * These variables are required by the generic plugin interface.  If they
@@ -80,6 +81,12 @@
 const char plugin_type[]        = "mpi/mpich1_shmem";
 const uint32_t plugin_version   = 100;
 
+int p_mpi_hook_slurmstepd_prefork(const slurmd_job_t *job, char ***env)
+{
+	debug("mpi/mpich1_shmem: slurmstepd prefork");
+	return SLURM_SUCCESS;
+}
+
 int p_mpi_hook_slurmstepd_task(const mpi_plugin_task_info_t *job,
 			       char ***env)
 {
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/plugins/mpi/mpichgm/mpi_mpichgm.c /home/hjcao/work/slurm-2.4.0-0.pre1/src/plugins/mpi/mpichgm/mpi_mpichgm.c
--- slurm-2.4.0-0.pre1/src/plugins/mpi/mpichgm/mpi_mpichgm.c	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/plugins/mpi/mpichgm/mpi_mpichgm.c	2011-11-09 09:50:59.000000000 +0800
@@ -49,6 +49,7 @@
 #include "slurm/slurm_errno.h"
 #include "src/common/slurm_xlator.h"
 #include "src/plugins/mpi/mpichgm/mpichgm.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
 
 /*
  * These variables are required by the generic plugin interface.  If they
@@ -81,6 +82,12 @@
 const char plugin_type[]        = "mpi/mpichgm";
 const uint32_t plugin_version   = 100;
 
+int p_mpi_hook_slurmstepd_prefork(const slurmd_job_t *job, char ***env)
+{
+	debug("mpi/mpichgm: slurmstepd prefork");
+	return SLURM_SUCCESS;
+}
+
 int p_mpi_hook_slurmstepd_task(const mpi_plugin_task_info_t *job,
 			       char ***env)
 {
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/plugins/mpi/mpichmx/mpi_mpichmx.c /home/hjcao/work/slurm-2.4.0-0.pre1/src/plugins/mpi/mpichmx/mpi_mpichmx.c
--- slurm-2.4.0-0.pre1/src/plugins/mpi/mpichmx/mpi_mpichmx.c	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/plugins/mpi/mpichmx/mpi_mpichmx.c	2011-11-09 09:51:21.000000000 +0800
@@ -48,6 +48,7 @@
 #include "slurm/slurm_errno.h"
 #include "src/common/slurm_xlator.h"
 #include "src/plugins/mpi/mpichmx/mpichmx.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
 
 /*
  * These variables are required by the generic plugin interface.  If they
@@ -80,6 +81,12 @@
 const char plugin_type[]        = "mpi/mpichmx";
 const uint32_t plugin_version   = 100;
 
+int p_mpi_hook_slurmstepd_prefork(const slurmd_job_t *job, char ***env)
+{
+	debug("mpi/mpichmx: slurmstepd prefork");
+	return SLURM_SUCCESS;
+}
+
 int p_mpi_hook_slurmstepd_task(const mpi_plugin_task_info_t *job,
 			       char ***env)
 {
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/plugins/mpi/mvapich/mpi_mvapich.c /home/hjcao/work/slurm-2.4.0-0.pre1/src/plugins/mpi/mvapich/mpi_mvapich.c
--- slurm-2.4.0-0.pre1/src/plugins/mpi/mvapich/mpi_mvapich.c	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/plugins/mpi/mvapich/mpi_mvapich.c	2011-11-09 09:52:03.000000000 +0800
@@ -49,6 +49,7 @@
 #include "slurm/slurm_errno.h"
 #include "src/common/slurm_xlator.h"
 #include "src/plugins/mpi/mvapich/mvapich.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
 
 /*
  * These variables are required by the generic plugin interface.  If they
@@ -81,6 +82,12 @@
 const char plugin_type[]        = "mpi/mvapich";
 const uint32_t plugin_version   = 100;
 
+int p_mpi_hook_slurmstepd_prefork(const slurmd_job_t *job, char ***env)
+{
+	debug("mpi/mvapich: slurmstepd prefork");
+	return SLURM_SUCCESS;
+}
+
 int p_mpi_hook_slurmstepd_task (const mpi_plugin_task_info_t *job,
 				char ***env)
 {
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/plugins/mpi/none/mpi_none.c /home/hjcao/work/slurm-2.4.0-0.pre1/src/plugins/mpi/none/mpi_none.c
--- slurm-2.4.0-0.pre1/src/plugins/mpi/none/mpi_none.c	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/plugins/mpi/none/mpi_none.c	2011-11-09 09:52:33.000000000 +0800
@@ -50,6 +50,7 @@
 #include "src/common/slurm_xlator.h"
 #include "src/common/mpi.h"
 #include "src/common/env.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
 
 /*
  * These variables are required by the generic plugin interface.  If they
@@ -82,6 +83,12 @@
 const char plugin_type[]        = "mpi/none";
 const uint32_t plugin_version   = 100;
 
+int p_mpi_hook_slurmstepd_prefork(const slurmd_job_t *job, char ***env)
+{
+	debug("mpi/none: slurmstepd prefork");
+	return SLURM_SUCCESS;
+}
+
 int p_mpi_hook_slurmstepd_task(const mpi_plugin_task_info_t*job,
 			       char ***env)
 {
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/plugins/mpi/openmpi/mpi_openmpi.c /home/hjcao/work/slurm-2.4.0-0.pre1/src/plugins/mpi/openmpi/mpi_openmpi.c
--- slurm-2.4.0-0.pre1/src/plugins/mpi/openmpi/mpi_openmpi.c	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/plugins/mpi/openmpi/mpi_openmpi.c	2011-11-09 09:53:07.000000000 +0800
@@ -50,6 +50,7 @@
 #include "src/common/slurm_xlator.h"
 #include "src/common/mpi.h"
 #include "src/common/env.h"
+#include "src/slurmd/slurmstepd/slurmstepd_job.h"
 
 /*
  * These variables are required by the generic plugin interface.  If they
@@ -82,6 +83,12 @@
 const char plugin_type[]        = "mpi/openmpi";
 const uint32_t plugin_version   = 100;
 
+int p_mpi_hook_slurmstepd_prefork(const slurmd_job_t *job, char ***env)
+{
+	debug("mpi/openmpi: slurmstepd prefork");
+	return SLURM_SUCCESS;
+}
+
 int p_mpi_hook_slurmstepd_task(const mpi_plugin_task_info_t *job,
 			       char ***env)
 {
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/slurmd/slurmd/req.c /home/hjcao/work/slurm-2.4.0-0.pre1/src/slurmd/slurmd/req.c
--- slurm-2.4.0-0.pre1/src/slurmd/slurmd/req.c	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/slurmd/slurmd/req.c	2011-11-14 09:40:54.000000000 +0800
@@ -53,6 +53,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/wait.h>
+#include <sys/un.h>
 #include <utime.h>
 #include <grp.h>
 
@@ -161,6 +162,8 @@
 			char **spank_job_env, uint32_t spank_job_env_size);
 static int  _run_epilog(uint32_t jobid, uid_t uid, char *resv_id,
 			char **spank_job_env, uint32_t spank_job_env_size);
+static void _rpc_forward_data(slurm_msg_t *msg);
+
 
 static bool _pause_for_job_completion(uint32_t jobid, char *nodes,
 		int maxtime);
@@ -366,6 +369,10 @@
 		_rpc_job_notify(msg);
 		slurm_free_job_notify_msg(msg->data);
 		break;
+	case REQUEST_FORWARD_DATA:
+		_rpc_forward_data(msg);
+		slurm_free_forward_data_msg(msg->data);
+		break;
 	default:
 		error("slurmd_req: invalid request msg type %d",
 		      msg->msg_type);
@@ -4379,3 +4386,58 @@
 	slurm_mutex_unlock(&conf->prolog_running_lock);
 	debug( "Finished wait for job %d's prolog to complete", job_id);
 }
+
+
+static void
+_rpc_forward_data(slurm_msg_t *msg)
+{
+	forward_data_msg_t *req = (forward_data_msg_t *)msg->data;
+	uint32_t req_uid;
+        struct sockaddr_un sa;
+        int fd, rc;
+
+        debug3("Entering _rpc_forward_data, address: %s, len: %u",
+	       req->address, req->len);
+
+	/* sanity check */
+	if (strlen(req->address) > sizeof(sa.sun_path) - 1) {
+		rc = EINVAL;
+		goto done;
+	}
+	
+	/* connect to specified address */
+        fd = socket(AF_UNIX, SOCK_STREAM, 0);
+        if (fd < 0) {
+                error("failed creating UNIX domain socket: %m");
+		goto done;
+        }
+        memset(&sa, 0, sizeof(sa));
+        sa.sun_family = AF_UNIX;
+        strcpy(sa.sun_path, req->address);
+        while ((rc = connect(fd, (struct sockaddr *)&sa, SUN_LEN(&sa)) < 0) &&
+               (errno == EINTR));
+        if (rc < 0) {
+                debug2("failed connecting to specified socket '%s': %m",
+		       req->address);
+		goto done;
+        }
+
+        req_uid = (uint32_t)g_slurm_auth_get_uid(msg->auth_cred, NULL);
+	/*
+	 * although always in localhost, we still convert it to network
+	 * byte order, to make it consistent with pack/unpack.
+	 */
+	req_uid = htonl(req_uid);
+	safe_write(fd, &req_uid, sizeof(uint32_t));
+	req_uid = htonl(req->len);
+	safe_write(fd, &req_uid, sizeof(uint32_t));
+	safe_write(fd, req->data, req->len);
+	
+rwfail:
+done:
+	close(fd);
+	rc = errno;
+	slurm_send_rc_msg(msg, rc);
+}
+
+
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/slurmd/slurmstepd/mgr.c /home/hjcao/work/slurm-2.4.0-0.pre1/src/slurmd/slurmstepd/mgr.c
--- slurm-2.4.0-0.pre1/src/slurmd/slurmstepd/mgr.c	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/slurmd/slurmstepd/mgr.c	2011-11-09 09:43:42.000000000 +0800
@@ -982,6 +982,14 @@
 		goto fail2;
 	}
 
+	/* fork necessary threads for MPI */
+	if (mpi_hook_slurmstepd_prefork(job, &job->env) != SLURM_SUCCESS) {
+		error("Failed mpi_hook_slurmstepd_prefork");
+		rc = SLURM_FAILURE;
+		io_close_task_fds(job);
+		goto fail2;
+	}
+	
 	/* calls pam_setup() and requires pam_finish() if successful */
 	if (_fork_all_tasks(job) < 0) {
 		debug("_fork_all_tasks failed");
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/srun/multi_prog.c /home/hjcao/work/slurm-2.4.0-0.pre1/src/srun/multi_prog.c
--- slurm-2.4.0-0.pre1/src/srun/multi_prog.c	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/srun/multi_prog.c	2011-11-23 17:20:23.000000000 +0800
@@ -85,7 +85,7 @@
 	}
 
 	/* check if already absolute path */
-	if (file_name[0] == '/')
+	if (file_name[0] == '/' || file_name[0] == '.')
 		return file_name;
 
 	/* search for the file using PATH environment variable */
diff -uNr -X DONTDIFF slurm-2.4.0-0.pre1/src/srun/srun.c /home/hjcao/work/slurm-2.4.0-0.pre1/src/srun/srun.c
--- slurm-2.4.0-0.pre1/src/srun/srun.c	2011-10-25 01:39:19.000000000 +0800
+++ /home/hjcao/work/slurm-2.4.0-0.pre1/src/srun/srun.c	2011-11-24 11:11:54.000000000 +0800
@@ -516,6 +516,7 @@
 	    SLURM_SUCCESS) {
 		error("Application launch failed: %m");
 		global_rc = 1;
+		slurm_step_launch_abort(job->step_ctx);
 		slurm_step_launch_wait_finish(job->step_ctx);
 		goto cleanup;
 	}

Attachment: signature.asc
Description: This is a digitally signed message part

Reply via email to