We have had requests to be able to suspend/resume MPI jobs within an SGE
environment. SGE sends a signal (which is configurable) to mpirun to
stop the job and another signal to resume it. To support this, I
propose that we add support in the ORTE to catch SIGTSTP/SIGCONT and
forward these to the a.outs. Actually, SIGTSTP will be caught,
forwarded, then converted to SIGSTOP before being delivered to the
a.outs. The one disadvantage is that we have overridden the SIGTSTP
default behavior which is typically to stop mpirun.
Does anyone else have a requirement like this or does anyone have issues
with these changes? FWIW, I know there is at least one other MPI that
supports this type of behavior.
One problem is that with SIGTSTP no longer delivering a stop signal to
mpirun, one cannot CTRL-Z at their terminal to stop mpirun. I am trying
to figure out how big a problem that is.
Rolf
PS: Here are the possible code changes. Not too major.
burl-ct-v440-2 62 =>svn diff
Index: orte/tools/orterun/orterun.c
===================================================================
--- orte/tools/orterun/orterun.c (revision 20072)
+++ orte/tools/orterun/orterun.c (working copy)
@@ -99,6 +99,8 @@
#ifndef __WINDOWS__
static struct opal_event sigusr1_handler;
static struct opal_event sigusr2_handler;
+static struct opal_event sigtstp_handler;
+static struct opal_event sigcont_handler;
#endif /* __WINDOWS__ */
static orte_job_t *jdata;
static char *orterun_basename = NULL;
@@ -511,6 +513,12 @@
opal_signal_set(&sigusr2_handler, SIGUSR2,
signal_forward_callback, &sigusr2_handler);
opal_signal_add(&sigusr2_handler, NULL);
+ opal_signal_set(&sigtstp_handler, SIGTSTP,
+ signal_forward_callback, &sigtstp_handler);
+ opal_signal_add(&sigtstp_handler, NULL);
+ opal_signal_set(&sigcont_handler, SIGCONT,
+ signal_forward_callback, &sigcont_handler);
+ opal_signal_add(&sigcont_handler, NULL);
#endif /* __WINDOWS__ */
/* we are an hnp, so update the contact info field for later use */
@@ -763,6 +771,8 @@
/** Remove the USR signal handlers */
opal_signal_del(&sigusr1_handler);
opal_signal_del(&sigusr2_handler);
+ opal_signal_del(&sigtstp_handler);
+ opal_signal_del(&sigcont_handler);
#endif /* __WINDOWS__ */
/* get the daemon job object */
Index: orte/orted/orted_comm.c
===================================================================
--- orte/orted/orted_comm.c (revision 20072)
+++ orte/orted/orted_comm.c (working copy)
@@ -457,10 +457,6 @@
/**** SIGNAL_LOCAL_PROCS ****/
case ORTE_DAEMON_SIGNAL_LOCAL_PROCS:
- if (orte_debug_daemons_flag) {
- opal_output(0, "%s orted_cmd: received signal_local_procs",
- ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
- }
/* unpack the jobid */
n = 1;
if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &job,
&n, ORTE_JOBID))) {
@@ -474,7 +470,22 @@
ORTE_ERROR_LOG(ret);
goto CLEANUP;
}
-
+
+ /* Convert SIGTSTP to SIGSTOP so we can suspend a.out */
+ if (SIGTSTP == signal) {
+ if (orte_debug_daemons_flag) {
+ opal_output(0, "%s orted_cmd: converted SIGTSTP to
SIGSTOP before delivering",
+ ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
+ }
+ signal = SIGSTOP;
+ }
+
+ if (orte_debug_daemons_flag) {
+ opal_output(0, "%s orted_cmd: received
signal_local_procs, delivering signal %d",
+ ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
+ signal);
+ }
+
/* signal them */
if (ORTE_SUCCESS != (ret =
orte_odls.signal_local_procs(NULL, signal))) {
ORTE_ERROR_LOG(ret);
burl-ct-v440-2 63 =>
--
=========================
rolf.vandeva...@sun.com
781-442-3043
=========================