Good morning all,

This is my first time as a Slurm sysadmin, so please excuse me if the
problem has a trivial solution.  I have configured a small cluster based on
virtual machines (CentOS 7 minimal, NFS, no iptables). Everything is
working OK, but I keep having an error that I'm not able to solve.

The problem is that Slurm scripts do not write into stdout.

If I, for example, execute the following script:

---
---
$more myScript.sh
#!/bin/sh
set -x
echo "execute hostname"
hostname
echo "now create file with hostname as content"
hostname > /home/slurm/host

$sbatch myScript.sh

$more slurm-49.out
+ echo 'execute hostname'
execute hostname
+ hostname
+ echo 'now create file with hsotname as content'
now create file with hsotname as content
+ hostname
$more /home/slurm/host
(created but empty)
---
---

If I manually execute the script, either in the master or the computing
node, the result is succesful and   /home/slurm/host contains the name of
the host.

I realize two things that don't make much sense
-apparently, "stdout" is broken and nothing is wirtten into it
-however, "print" commands work OK.

I have tried to sumbit with different sbatch options but all my attempts
have been unsuccesful.

Any hints on the problem?

Thanks for your help,

Manuel


PS: the output of scontrol show config
---
[root@slurm-master slurm]# scontrol show config
Configuration data as of 2014-10-20T12:36:47
AccountingStorageBackupHost = (null)
AccountingStorageEnforce = none
AccountingStorageHost   = localhost
AccountingStorageLoc    = /var/log/slurm_jobacct.log
AccountingStoragePort   = 0
AccountingStorageType   = accounting_storage/none
AccountingStorageUser   = root
AccountingStoreJobComment = YES
AcctGatherEnergyType    = acct_gather_energy/none
AcctGatherFilesystemType = acct_gather_filesystem/none
AcctGatherInfinibandType = acct_gather_infiniband/none
AcctGatherNodeFreq      = 0 sec
AcctGatherProfileType   = acct_gather_profile/none
AuthInfo                = (null)
AuthType                = auth/munge
BackupAddr              = (null)
BackupController        = (null)
BatchStartTimeout       = 10 sec
BOOT_TIME               = 2014-10-17T16:45:18
CacheGroups             = 0
CheckpointType          = checkpoint/none
ClusterName             = cluster
CompleteWait            = 0 sec
ControlAddr             = slurm-master
ControlMachine          = slurm-master
CoreSpecPlugin          = core_spec/none
CryptoType              = crypto/munge
DebugFlags              = (null)
DefMemPerNode           = UNLIMITED
DisableRootJobs         = NO
DynAllocPort            = 0
EnforcePartLimits       = NO
Epilog                  = (null)
EpilogMsgTime           = 2000 usec
EpilogSlurmctld         = (null)
ExtSensorsType          = ext_sensors/none
ExtSensorsFreq          = 0 sec
FastSchedule            = 1
FirstJobId              = 1
GetEnvTimeout           = 2 sec
GresTypes               = (null)
GroupUpdateForce        = 0
GroupUpdateTime         = 600 sec
HASH_VAL                = Match
HealthCheckInterval     = 0 sec
HealthCheckNodeState    = ANY
HealthCheckProgram      = (null)
InactiveLimit           = 0 sec
JobAcctGatherFrequency  = 30
JobAcctGatherType       = jobacct_gather/none
JobAcctGatherParams     = (null)
JobCheckpointDir        = /var/slurm/checkpoint
JobCompHost             = localhost
JobCompLoc              = /var/log/slurm_jobcomp.log
JobCompPort             = 0
JobCompType             = jobcomp/none
JobCompUser             = root
JobContainerType        = job_container/none
JobCredentialPrivateKey = (null)
JobCredentialPublicCertificate = (null)
JobFileAppend           = 0
JobRequeue              = 1
JobSubmitPlugins        = (null)
KeepAliveTime           = SYSTEM_DEFAULT
KillOnBadExit           = 0
KillWait                = 30 sec
LaunchType              = launch/slurm
Licenses                = (null)
LicensesUsed            = (null)
MailProg                = /bin/mail
MaxArraySize            = 1001
MaxJobCount             = 10000
MaxJobId                = 4294901760
MaxMemPerNode           = UNLIMITED
MaxStepCount            = 40000
MaxTasksPerNode         = 128
MessageTimeout          = 10 sec
MinJobAge               = 300 sec
MpiDefault              = none
MpiParams               = (null)
NEXT_JOB_ID             = 50
OverTimeLimit           = 0 min
PluginDir               = /usr/local/lib/slurm
PlugStackConfig         = /usr/local/etc/plugstack.conf
PreemptMode             = OFF
PreemptType             = preempt/none
PriorityType            = priority/basic
PrivateData             = none
ProctrackType           = proctrack/pgid
Prolog                  = (null)
PrologSlurmctld         = (null)
PrologFlags             = (null)
PropagatePrioProcess    = 0
PropagateResourceLimits = ALL
PropagateResourceLimitsExcept = (null)
RebootProgram           = (null)
ReconfigFlags           = (null)
ResumeProgram           = (null)
ResumeRate              = 300 nodes/min
ResumeTimeout           = 60 sec
ResvEpilog              = (null)
ResvOverRun             = 0 min
ResvProlog              = (null)
ReturnToService         = 1
SallocDefaultCommand    = (null)
SchedulerParameters     = (null)
SchedulerPort           = 7321
SchedulerRootFilter     = 1
SchedulerTimeSlice      = 30 sec
SchedulerType           = sched/backfill
SelectType              = select/linear
SlurmUser               = slurm(500)
SlurmctldDebug          = info
SlurmctldLogFile        = (null)
SlurmSchedLogFile       = (null)
SlurmctldPort           = 6817
SlurmctldTimeout        = 120 sec
SlurmdDebug             = info
SlurmdLogFile           = (null)
SlurmdPidFile           = /var/run/slurmd.pid
SlurmdPlugstack         = (null)
SlurmdPort              = 6818
SlurmdSpoolDir          = /var/spool/slurmd
SlurmdTimeout           = 300 sec
SlurmdUser              = root(0)
SlurmSchedLogLevel      = 0
SlurmctldPidFile        = /var/run/slurmctld.pid
SlurmctldPlugstack      = (null)
SLURM_CONF              = /usr/local/etc/slurm.conf
SLURM_VERSION           = 14.03.8
SrunEpilog              = (null)
SrunProlog              = (null)
StateSaveLocation       = /var/spool/slurmState
SuspendExcNodes         = (null)
SuspendExcParts         = (null)
SuspendProgram          = (null)
SuspendRate             = 60 nodes/min
SuspendTime             = NONE
SuspendTimeout          = 30 sec
SwitchType              = switch/none
TaskEpilog              = (null)
TaskPlugin              = task/none
TaskPluginParam         = (null type)
TaskProlog              = (null)
TmpFS                   = /tmp
TopologyPlugin          = topology/none
TrackWCKey              = 0
TreeWidth               = 50
UsePam                  = 0
UnkillableStepProgram   = (null)
UnkillableStepTimeout   = 60 sec
VSizeFactor             = 0 percent
WaitTime                = 0 sec
---
---



-- 
Dr. Manuel Rodríguez-Pascual
skype: manuel.rodriguez.pascual
phone: (+34) 913466173 // (+34) 679925108

CIEMAT-Moncloa
Edificio 22, desp. 1.25
Avenida Complutense, 40
28040- MADRID
SPAIN

Reply via email to