These changes will be in the next release of SLURM version 2.5, probably within a few days. Thank you!
Quoting Hongjia Cao <[email protected]>: > > > 1. wrong parameters cause srun segfault: > > # srun --no-alloc -w cn8,cn9,cn10 -N 4 -n 4 hostname > srun: do not allocate resources > srun: error: Only allocated 3 nodes asked for 4 > segmentation fault (core dumped) > > --- slurm-2.5.0-0.pre2/src/srun/libsrun/srun_job.c 2012-07-20 > 23:27:28.000000000 +0800 > +++ /home/hjcao/work/slurm-2.5.0-0.pre2/src/srun/libsrun/srun_job.c > 2012-09-11 15:14:35.000000000 +0800 > @@ -164,7 +164,8 @@ > */ > job = _job_create_structure(ai); > > - job_update_io_fnames(job); > + if (job != NULL) > + job_update_io_fnames(job); > > error: > xfree(ai); > @@ -482,6 +483,10 @@ > } else if (opt.no_alloc) { > info("do not allocate resources"); > job = job_create_noalloc(); > + if (job == NULL) { > + error("Job creation failure."); > + exit(error_exit); > + } > if (create_job_step(job, false) < 0) { > exit(error_exit); > } > > > > 2. launching 0 tasks on node: > > # srun --no-alloc -w cn8,cn9,cn10 -N 3 -n 4 hostname > srun: do not allocate resources > cn9 > cn9 > cn8 > cn8 > > --- slurm-2.5.0-0.pre2/src/common/slurm_step_layout.c 2012-07-20 > 23:27:28.000000000 +0800 > +++ /home/hjcao/work/slurm-2.5.0-0.pre2/src/common/slurm_step_layout.c > 2012-09-11 16:05:54.000000000 +0800 > @@ -176,8 +176,8 @@ > > hl = hostlist_create(tlist); > /* make out how many cpus there are on each node */ > - if (task_cnt > 0) > - cpn = (task_cnt + node_cnt - 1) / node_cnt; > + /* if (task_cnt > 0) */ > + /* cpn = (task_cnt + node_cnt - 1) / node_cnt; */ > > step_layout = xmalloc(sizeof(slurm_step_layout_t)); > step_layout->node_list = xstrdup(tlist); > @@ -202,6 +202,8 @@ > cpu_cnt = 0; > } > } else { > + cpn = ((task_cnt - step_layout->task_cnt) + > + (node_cnt - i) - 1) / (node_cnt - i); > if (step_layout->task_cnt >= task_cnt) { > step_layout->tasks[i] = 0; > step_layout->tids[i] = NULL; > >
