I just saw that sbatch seems a little buggy here. It works as intended
if you pass the parameters directly to srun:

#!/bin/bash

#SBATCH -N 4
#SBATCH -n 4
#SBATCH -t 5
#SBATCH --mem-per-cpu=1000
#SBATCH -o bar_%j.out

srun -o foo_%N_%j.out hostname

2017-06-15 10:08 GMT+02:00 TO_Webmaster <[email protected]>:
>
> I think you misunderstood the concept of array jobs. If you want one
> job to run on several nodes, do not define array jobs. Use array jobs
> to run the same job or similar jobs several times.
>
> 2017-06-15 7:36 GMT+02:00 Ariel Balter <[email protected]>:
>>
>> Here is if I with just `srun hostname`. Why is it creating four jobs on node
>> 2-8 and then running four times on each? I just want `hostname` to run once
>> on four different nodes.
>>
>> balter@exahead1:~/slurm_tutorial$ cat nodes.sub
>> #!/bin/bash
>>
>>
>> #SBATCH --job-name=nodes
>> #SBATCH --array=0-3
>> #SBATCH --nodes=4
>> #SBATCH --tasks-per-node=1
>> ##SBATCH --ntasks=4
>> #SBATCH --output="nodes_%N_%A_%a_%j.out"
>> #SBATCH --error="nodes_%N_%A_%a_%j.err"
>>
>> srun hostname
>>
>> $ for i in *.out; do echo "*** $i ***"; cat $i; done
>> *** nodes_exanode-2-8_512_0_513.out ***
>> exanode-2-8
>> exanode-4-44
>> exanode-4-12
>> exanode-6-0
>> *** nodes_exanode-2-8_512_1_514.out ***
>> exanode-2-8
>> exanode-4-44
>> exanode-4-12
>> exanode-6-0
>> *** nodes_exanode-2-8_512_2_515.out ***
>> exanode-2-8
>> exanode-4-12
>> exanode-4-44
>> exanode-6-0
>> *** nodes_exanode-2-8_512_3_512.out ***
>> exanode-2-8
>> exanode-4-12
>> exanode-4-44
>> exanode-6-0
>>
>>
>> On 6/14/17 2:17 PM, TO_Webmaster wrote:
>>>
>>> I guess $(hostname) is evaluated too early (on the batch host). Please
>>> check the output of "srun hostname".
>>>
>>> 2017-06-14 21:23 GMT+02:00 Ariel Balter <[email protected]>:
>>>>
>>>> I want to create an array of jobs, but have them execute on different
>>>> nodes.
>>>> This is what I'm trying:
>>>>
>>>> ```
>>>>
>>>> balter@exahead1:~/slurm_tutorial$ cat nodes.sub
>>>> #!/bin/bash
>>>>
>>>>
>>>> #SBATCH --job-name=nodes
>>>> #SBATCH --array=0-3
>>>> #SBATCH --nodes=4
>>>> #SBATCH --tasks-per-node=1
>>>> ##SBATCH --ntasks=4
>>>> #SBATCH --output="hello_%N_%A_%a_%j.out"
>>>> #SBATCH --error="hello_%N_%A_%a_%j.err"
>>>>
>>>>
>>>> srun echo "SLURM_JOB_NAME: $SLURM_JOB_NAME   SLURM_JOB_ID: $SLURM_JOB_ID
>>>> SLURM_ARRAY_TASK_ID: $SLURM_ARRAY_TASK_ID SLURM_ARRAY_JOB_ID:
>>>> $SLURM_ARRAY_JOB_ID   SLURM_PROCID: $SLURM_PROCID  hostname: $(hostname)"
>>>>
>>>> balter@exahead1:~/slurm_tutorial$ sbatch nodes.sub
>>>> balter@exahead1:~/slurm_tutorial$ for i in *.out; do echo "*** $i ***";
>>>> cat
>>>> $i; done
>>>>
>>>> *** hello_exanode-6-3_408_0_409.out ***
>>>>
>>>> SLURM_JOB_NAME: nodes   SLURM_JOB_ID: 409  SLURM_ARRAY_TASK_ID: 0
>>>> SLURM_ARRAY_JOB_ID: 408   SLURM_PROCID: 0  hostname: exanode-6-3
>>>>
>>>> SLURM_JOB_NAME: nodes   SLURM_JOB_ID: 409  SLURM_ARRAY_TASK_ID: 0
>>>> SLURM_ARRAY_JOB_ID: 408   SLURM_PROCID: 0  hostname: exanode-6-3
>>>>
>>>> SLURM_JOB_NAME: nodes   SLURM_JOB_ID: 409  SLURM_ARRAY_TASK_ID: 0
>>>> SLURM_ARRAY_JOB_ID: 408   SLURM_PROCID: 0  hostname: exanode-6-3
>>>> SLURM_JOB_NAME: nodes   SLURM_JOB_ID: 409  SLURM_ARRAY_TASK_ID: 0
>>>> SLURM_ARRAY_JOB_ID: 408   SLURM_PROCID: 0  hostname: exanode-6-3
>>>> *** hello_exanode-6-3_408_1_410.out ***
>>>> SLURM_JOB_NAME: nodes   SLURM_JOB_ID: 410  SLURM_ARRAY_TASK_ID: 1
>>>> SLURM_ARRAY_JOB_ID: 408   SLURM_PROCID: 0  hostname: exanode-6-3
>>>> SLURM_JOB_NAME: nodes   SLURM_JOB_ID: 410  SLURM_ARRAY_TASK_ID: 1
>>>> SLURM_ARRAY_JOB_ID: 408   SLURM_PROCID: 0  hostname: exanode-6-3
>>>> SLURM_JOB_NAME: nodes   SLURM_JOB_ID: 410  SLURM_ARRAY_TASK_ID: 1
>>>> SLURM_ARRAY_JOB_ID: 408   SLURM_PROCID: 0  hostname: exanode-6-3
>>>> SLURM_JOB_NAME: nodes   SLURM_JOB_ID: 410  SLURM_ARRAY_TASK_ID: 1
>>>> SLURM_ARRAY_JOB_ID: 408   SLURM_PROCID: 0  hostname: exanode-6-3
>>>> *** hello_exanode-6-3_408_2_411.out ***
>>>> SLURM_JOB_NAME: nodes   SLURM_JOB_ID: 411  SLURM_ARRAY_TASK_ID: 2
>>>> SLURM_ARRAY_JOB_ID: 408   SLURM_PROCID: 0  hostname: exanode-6-3
>>>> SLURM_JOB_NAME: nodes   SLURM_JOB_ID: 411  SLURM_ARRAY_TASK_ID: 2
>>>> SLURM_ARRAY_JOB_ID: 408   SLURM_PROCID: 0  hostname: exanode-6-3
>>>> SLURM_JOB_NAME: nodes   SLURM_JOB_ID: 411  SLURM_ARRAY_TASK_ID: 2
>>>> SLURM_ARRAY_JOB_ID: 408   SLURM_PROCID: 0  hostname: exanode-6-3
>>>> SLURM_JOB_NAME: nodes   SLURM_JOB_ID: 411  SLURM_ARRAY_TASK_ID: 2
>>>> SLURM_ARRAY_JOB_ID: 408   SLURM_PROCID: 0  hostname: exanode-6-3
>>>> *** hello_exanode-6-3_408_3_408.out ***
>>>> SLURM_JOB_NAME: nodes   SLURM_JOB_ID: 408  SLURM_ARRAY_TASK_ID: 3
>>>> SLURM_ARRAY_JOB_ID: 408   SLURM_PROCID: 0  hostname: exanode-6-3
>>>> SLURM_JOB_NAME: nodes   SLURM_JOB_ID: 408  SLURM_ARRAY_TASK_ID: 3
>>>> SLURM_ARRAY_JOB_ID: 408   SLURM_PROCID: 0  hostname: exanode-6-3
>>>> SLURM_JOB_NAME: nodes   SLURM_JOB_ID: 408  SLURM_ARRAY_TASK_ID: 3
>>>> SLURM_ARRAY_JOB_ID: 408   SLURM_PROCID: 0  hostname: exanode-6-3
>>>> SLURM_JOB_NAME: nodes   SLURM_JOB_ID: 408  SLURM_ARRAY_TASK_ID: 3
>>>> SLURM_ARRAY_JOB_ID: 408   SLURM_PROCID: 0  hostname: exanode-6-3
>>>>
>>>> ```
>>
>>
>> --
>> Ariel Balter
>> "Don't believe everything you think."
>> 509-713-0087
>> ☮

Reply via email to