Hello there!

    As we using some specific node selection plugin we expect to see some
jobinfo data from it on nodes to check it but we got into trouble there
as only first node of batch job see that info (SLURM 2.2.6 is used) and
despite srun get the info it doesn't forward it to nodes. To resolve that
issue I've added one more field into RPCs RESPONSE_JOB_STEP_CREATE and
REQUEST_LAUNCH_TASKS and updated corresponding places in slurmctld and
slurmd so now it works perfect. I hope to see it in 2.3 protocol version.

    Andriy.
diff -udpr slurm-2.2.6/src/api/step_launch.c slurm-2.2.6.jobinfo/src/api/step_launch.c
--- slurm-2.2.6/src/api/step_launch.c	2011-05-27 21:25:05.000000000 +0300
+++ slurm-2.2.6.jobinfo/src/api/step_launch.c	2011-06-22 20:15:15.000000000 +0300
@@ -259,6 +259,8 @@ int slurm_step_launch (slurm_step_ctx_t
 	launch.cpus_allocated  = ctx->step_resp->step_layout->tasks;
 	launch.global_task_ids = ctx->step_resp->step_layout->tids;
 
+	launch.select_jobinfo  = ctx->step_resp->select_jobinfo;
+
 	launch.user_managed_io = params->user_managed_io ? 1 : 0;
 	ctx->launch_state->user_managed_io = params->user_managed_io;
 
diff -udpr slurm-2.2.6/src/common/slurm_protocol_defs.c slurm-2.2.6.jobinfo/src/common/slurm_protocol_defs.c
--- slurm-2.2.6/src/common/slurm_protocol_defs.c	2011-05-27 21:25:05.000000000 +0300
+++ slurm-2.2.6.jobinfo/src/common/slurm_protocol_defs.c	2011-06-22 20:18:57.000000000 +0300
@@ -673,6 +673,9 @@ void slurm_free_launch_tasks_request_msg
 	if (msg->options)
 		job_options_destroy(msg->options);
 
+	if (msg->select_jobinfo)
+		select_g_select_jobinfo_free(msg->select_jobinfo);
+
 	xfree(msg);
 }
 
@@ -1641,6 +1644,8 @@ void slurm_free_job_step_create_response
 		slurm_cred_destroy(msg->cred);
 		if (msg->switch_job)
 			switch_free_jobinfo(msg->switch_job);
+		if (msg->select_jobinfo)
+			select_g_select_jobinfo_free(msg->select_jobinfo);
 
 		xfree(msg);
 	}
diff -udpr slurm-2.2.6/src/common/slurm_protocol_defs.h slurm-2.2.6.jobinfo/src/common/slurm_protocol_defs.h
--- slurm-2.2.6/src/common/slurm_protocol_defs.h	2011-05-27 21:25:05.000000000 +0300
+++ slurm-2.2.6.jobinfo/src/common/slurm_protocol_defs.h	2011-06-22 19:23:13.000000000 +0300
@@ -602,6 +602,7 @@ typedef struct job_step_create_response_
 	slurm_cred_t *cred;    	  /* slurm job credential */
 	switch_jobinfo_t *switch_job;	/* switch context, opaque
                                          * data structure */
+	dynamic_plugin_data_t *select_jobinfo; /* select context, opaque data */
 } job_step_create_response_msg_t;
 
 typedef struct launch_tasks_request_msg {
@@ -667,6 +668,7 @@ typedef struct launch_tasks_request_msg
 	char *restart_dir;	/* restart from checkpoint if set */
 	char **spank_job_env;
 	uint32_t spank_job_env_size;
+	dynamic_plugin_data_t *select_jobinfo; /* opaque data */
 } launch_tasks_request_msg_t;
 
 typedef struct task_user_managed_io_msg {
diff -udpr slurm-2.2.6/src/common/slurm_protocol_pack.c slurm-2.2.6.jobinfo/src/common/slurm_protocol_pack.c
--- slurm-2.2.6/src/common/slurm_protocol_pack.c	2011-05-27 21:25:05.000000000 +0300
+++ slurm-2.2.6.jobinfo/src/common/slurm_protocol_pack.c	2011-06-23 13:18:13.000000000 +0300
@@ -3247,6 +3247,9 @@ _pack_job_step_create_response_msg(job_s
 	pack_slurm_step_layout(msg->step_layout, buffer, protocol_version);
 	slurm_cred_pack(msg->cred, buffer);
 	switch_pack_jobinfo(msg->switch_job, buffer);
+	/* FIXME: next one requires protocol version increased */
+	select_g_select_jobinfo_pack(msg->select_jobinfo, buffer,
+				     protocol_version);
 
 }
 
@@ -3278,6 +3281,11 @@ _unpack_job_step_create_response_msg(job
 		switch_free_jobinfo(tmp_ptr->switch_job);
 		goto unpack_error;
 	}
+
+	/* FIXME: next one requires protocol version increased */
+	if (select_g_select_jobinfo_unpack(&tmp_ptr->select_jobinfo, buffer,
+					   protocol_version))
+		goto unpack_error;
 	return SLURM_SUCCESS;
 
 unpack_error:
@@ -5735,6 +5743,9 @@ _pack_launch_tasks_request_msg(launch_ta
 		pack16(msg->acctg_freq, buffer);
 		packstr(msg->ckpt_dir, buffer);
 		packstr(msg->restart_dir, buffer);
+		/* FIXME: next one requires increased protocol version! */
+		select_g_select_jobinfo_pack(msg->select_jobinfo, buffer,
+					     protocol_version);
 	} else {
 		pack32(msg->job_id, buffer);
 		pack32(msg->job_step_id, buffer);
@@ -5898,6 +5909,9 @@ _unpack_launch_tasks_request_msg(launch_
 		safe_unpack16(&msg->acctg_freq, buffer);
 		safe_unpackstr_xmalloc(&msg->ckpt_dir, &uint32_tmp, buffer);
 		safe_unpackstr_xmalloc(&msg->restart_dir, &uint32_tmp, buffer);
+		/* FIXME: next one requires increased protocol version! */
+		select_g_select_jobinfo_unpack(&msg->select_jobinfo, buffer,
+					       protocol_version);
 	} else {
 		safe_unpack32(&msg->job_id, buffer);
 		safe_unpack32(&msg->job_step_id, buffer);
diff -udpr slurm-2.2.6/src/slurmctld/proc_req.c slurm-2.2.6.jobinfo/src/slurmctld/proc_req.c
--- slurm-2.2.6/src/slurmctld/proc_req.c	2011-05-27 21:25:06.000000000 +0300
+++ slurm-2.2.6.jobinfo/src/slurmctld/proc_req.c	2011-06-22 19:40:25.000000000 +0300
@@ -1562,6 +1562,8 @@ static void _slurm_rpc_job_step_create(s
 		job_step_resp.cred        = slurm_cred;
 		job_step_resp.switch_job  = switch_copy_jobinfo(
 			step_rec->switch_job);
+		job_step_resp.select_jobinfo = select_g_select_jobinfo_copy(
+			step_rec->job_ptr->select_jobinfo);
 
 		unlock_slurmctld(job_write_lock);
 		slurm_msg_t_init(&resp);
@@ -1576,6 +1578,7 @@ static void _slurm_rpc_job_step_create(s
 		slurm_step_layout_destroy(job_step_resp.step_layout);
 		slurm_cred_destroy(slurm_cred);
 		switch_free_jobinfo(job_step_resp.switch_job);
+		select_g_select_jobinfo_free(job_step_resp.select_jobinfo);
 		schedule_job_save();	/* Sets own locks */
 	}
 }
diff -udpr slurm-2.2.6/src/slurmd/slurmstepd/mgr.c slurm-2.2.6.jobinfo/src/slurmd/slurmstepd/mgr.c
--- slurm-2.2.6/src/slurmd/slurmstepd/mgr.c	2011-05-27 21:25:06.000000000 +0300
+++ slurm-2.2.6.jobinfo/src/slurmd/slurmstepd/mgr.c	2011-06-22 20:05:03.000000000 +0300
@@ -234,6 +234,7 @@ mgr_launch_tasks_setup(launch_tasks_requ
 
 	job->envtp->cli = cli;
 	job->envtp->self = self;
+	job->envtp->select_jobinfo = msg->select_jobinfo;
 
 	return job;
 }

Reply via email to