The why is because I don't have standard path, and I don't want to install on each machine. I install it on a shared disk.
I used the slurm-lnll from the debian package and I changed it to fit to my configuration. I made it with some variables and change 1 or 2 hard-written path in the tests. it seems to work I'll copy it below so if the question come back again, they can have a easier start Le 29 mars 2012 00:53, Ralf Utermann <[email protected]> a écrit : > > On 28.03.2012 08:49, [email protected] wrote: >> >> Hi Xavier, >> >> On Tue, Mar 27, 2012 at 06:19:04PM -0600, Xavier Barthelemy wrote: >>> I am trying to set up slurm 2.3.4 on an small set of ubuntu 10.04. >> >> why don't you compile the debian sid [1] package on your server? >> It has everything you need in place. >> Best Regards >> >> [1] http://packages.debian.org/sid/slurm-llnl > thank you for maintaining the packages! > ###################################################### # init.d script for Ubuntu #!/bin/sh # # chkconfig: 345 90 10 # description: SLURM is a simple resource management system which \ # manages exclusive access o a set of compute \ # resources and distributes work to those resources. # # processname: /usr/sbin/slurmd # pidfile: /var/run/slurm-llnl/slurmd.pid # # processname: /usr/sbin/slurmctld # pidfile: /var/run/slurm-llnl/slurmctld.pid # # config: /etc/default/slurm-llnl # ### BEGIN INIT INFO # Provides: slurm-llnl # Required-Start: $remote_fs $syslog $network munge # Required-Stop: $remote_fs $syslog $network munge # Should-Start: $named # Should-Stop: $named # Default-Start: 2 3 4 5 # Default-Stop: 0 1 6 # Short-Description: slurm daemon management # Description: Start slurm to provide resource management ### END INIT INFO BASEDIR=/opt/SLURM-2.3.4 BINDIR=$BASEDIR/bin CONFDIR=$BASEDIR/etc LIBDIR=$BASEDIR/lib SBINDIR=$BASEDIR/sbin #BINDIR=/usr/bin #CONFDIR=/etc/slurm-llnl #LIBDIR=/usr/lib #SBINDIR=/usr/sbin user=MPIuser if [ ! -f $BINDIR/scontrol ] ; then export PATH=$PATH:$BINDIR:$SBINDIR fi # Source slurm specific configuration if [ -f /etc/default/slurm-llnl ] ; then . /etc/default/slurm-llnl else SLURMCTLD_OPTIONS="" SLURMD_OPTIONS="" fi # Checking for slurm.conf presence if [ ! -f $CONFDIR/slurm.conf ] ; then if [ -n "$(echo $1 | grep start)" ] ; then echo Not starting slurm-llnl fi echo slurm.conf was not found in $CONFDIR echo Please follow the instructions in \ /usr/share/doc/slurm-llnl/README.Debian.gz exit 0 fi test -f $BINDIR/scontrol || exit 0 DAEMONLIST=$($BINDIR/scontrol show daemons 2>/dev/null) if [ $? = 0 ] ; then for prog in $DAEMONLIST ; do test -f $SBINDIR/$prog || exit 0 done else if [ -n "$(echo $1 | grep start)" ] ; then echo "Not starting slurm-llnl for problems in the configuration file" else echo "Problems in the configuration file" fi echo "${CONFDIR}/slurm.conf" echo "If upgrading from version 1.2 it is recommended that you rebuild" echo "your configuration file. Please read instructions in" echo " /usr/share/doc/slurm-llnl/README.Debian" echo "Otherwise use \"scontrol show daemons\" for more information" exit 0 fi #Checking for lsb init function if [ -f /lib/lsb/init-functions ] ; then . /lib/lsb/init-functions else echo Can\'t find lsb init functions exit 1 fi # setup library paths for slurm and munge support export LD_LIBRARY_PATH="$LIBDIR:$LD_LIBRARY_PATH" #Function to check for cert and key presence and key vulnerabilty checkcertkey() { MISSING="" keyfile="" certfile="" if [ "$1" = "slurmd" ] ; then keyfile=$(grep JobCredentialPublicCertificate $CONFDIR/slurm.conf \ | grep -v "^ *#") keyfile=${keyfile##*=} keyfile=${keyfile%#*} [ -e $keyfile ] || MISSING="$keyfile" elif [ "$1" = "slurmctld" ] ; then keyfile=$(grep JobCredentialPrivateKey $CONFDIR/slurm.conf | grep -v "^ *#") keyfile=${keyfile##*=} keyfile=${keyfile%#*} [ -e $keyfile ] || MISSING="$keyfile" fi if [ "${MISSING}" != "" ] ; then echo Not starting slurm-llnl echo $MISSING not found echo Please follow the instructions in \ /usr/share/doc/slurm-llnl/README.cryptotype-openssl exit 0 fi if [ -f "$keyfile" ] && [ "$1" = "slurmctld" ] ; then keycheck=$(openssl-vulnkey $keyfile | cut -d : -f 1) if [ "$keycheck" = "COMPROMISED" ] ; then echo Your slurm key stored in the file $keyfile echo is vulnerable because has been created with a buggy openssl. echo Please rebuild it with openssl version \>= 0.9.8g-9 echo More information in /usr/share/doc/slurm-llnl/README.Debian exit 0 fi fi } get_daemon_description() { case $1 in slurmd) echo slurm compute node daemon ;; slurmctld) echo slurm central management daemon ;; *) echo slurm daemon ;; esac } start() { CRYPTOTYPE=$(grep CryptoType $CONFDIR/slurm.conf | grep -v "^ *#") CRYPTOTYPE=${CRYPTOTYPE##*=} CRYPTOTYPE=${CRYPTOTYPE%#*} if [ "$CRYPTOTYPE" = "crypto/openssl" ] ; then checkcertkey $1 fi # Create run-time variable data mkdir -p /var/run/slurm-llnl # chown slurm:slurm /var/run/slurm-llnl chown $user:$user /var/run/slurm-llnl if ! [ -e /var/log/slurm_jobcomp.log ] ; then touch /var/log/slurm_jobcomp.log chown $user:$user /var/log/slurm_jobcomp.log fi # Checking if SlurmdSpoolDir is under run if [ "$1" = "slurmd" ] ; then SDIRLOCATION=$(grep SlurmdSpoolDir $CONFDIR/slurm.conf \ | grep -v "^ *#") SDIRLOCATION=${SDIRLOCATION##*=} SDIRLOCATION=${SDIRLOCATION%#*} if [ "${SDIRLOCATION}" = "/var/run/slurm-llnl/slurmd" ] ; then if ! [ -e /var/run/slurm-llnl/slurmd ] ; then ln -s /var/lib/slurm-llnl/slurmd /var/run/slurm-llnl/slurmd fi fi fi # Checking if StateSaveLocation is under run if [ "$1" = "slurmctld" ] ; then SDIRLOCATION=$(grep StateSaveLocation $CONFDIR/slurm.conf \ | grep -v "^ *#") SDIRLOCATION=${SDIRLOCATION##*=} SDIRLOCATION=${SDIRLOCATION%#*} if [ "${SDIRLOCATION}" = "/var/run/slurm-llnl/slurmctld" ] ; then if ! [ -e /var/run/slurm-llnl/slurmctld ] ; then ln -s /var/lib/slurm-llnl/slurmctld /var/run/slurm-llnl/slurmctld fi fi fi desc="$(get_daemon_description $1)" log_daemon_msg "Starting $desc" "$1" unset HOME MAIL USER USERNAME #FIXME $STARTPROC $SBINDIR/$1 $2 STARTERRORMSG="$(start-stop-daemon --start --oknodo \ --exec "$SBINDIR/$1" -- $2 2>&1)" STATUS=$? log_end_msg $STATUS if [ "$STARTERRORMSG" != "" ] ; then echo $STARTERRORMSG fi touch /var/lock/slurm } stop() { desc="$(get_daemon_description $1)" log_daemon_msg "Stopping $desc" "$1" STOPERRORMSG="$(start-stop-daemon --oknodo --stop -s TERM \ --exec "$SBINDIR/$1" 2>&1)" STATUS=$? log_end_msg $STATUS if [ "$STOPERRORMSG" != "" ] ; then echo $STOPERRORMSG fi rm -f /var/lock/slurm } startall() { for PROG in $DAEMONLIST ; do case $PROG in slurmd) OPTVAR=$SLURMD_OPTIONS ;; slurmctld) OPTVAR=$SLURMCTLD_OPTIONS ;; *) ;; esac start $PROG $OPTVAR done } getpidfile() { dpidfile=`grep -i ${1}pid $CONFDIR/slurm.conf | grep -v '^ *#'` if [ $? = 0 ]; then dpidfile=${dpidfile##*=} dpidfile=${dpidfile%#*} else dpidfile=/var/run/${1}.pid fi echo $dpidfile } # # status() with slight modifications to take into account # instantiations of job manager slurmd's, which should not be # counted as "running" # slurmstatus() { base=${1##*/} pidfile=$(getpidfile $base) pid=`pidof -o $$ -o $$PPID -o %PPID -x $1 || \ pidof -o $$ -o $$PPID -o %PPID -x ${base}` if [ -f $pidfile ]; then read rpid < $pidfile if [ "$rpid" != "" -a "$pid" != "" ]; then for i in $pid ; do if [ "$i" = "$rpid" ]; then echo "${base} (pid $pid) is running..." return 0 fi done elif [ "$rpid" != "" -a "$pid" = "" ]; then # Due to change in user id, pid file may persist # after slurmctld terminates if [ "$base" != "slurmctld" ] ; then echo "${base} dead but pid file exists" fi return 1 fi fi if [ "$base" = "slurmctld" -a "$pid" != "" ] ; then echo "${base} (pid $pid) is running..." return 0 fi echo "${base} is stopped" return 3 } # # stop slurm daemons, # wait for termination to complete (up to 10 seconds) before returning # slurmstop() { for prog in $DAEMONLIST ; do stop $prog for i in 1 2 3 4 do sleep $i slurmstatus $prog if [ $? != 0 ]; then break fi done done } # # The pathname substitution in daemon command assumes prefix and # exec_prefix are same. This is the default, unless the user requests # otherwise. # # Any node can be a slurm controller and/or server. # case "$1" in start) startall ;; startclean) SLURMCTLD_OPTIONS="-c $SLURMCTLD_OPTIONS" SLURMD_OPTIONS="-c $SLURMD_OPTIONS" startall ;; stop) slurmstop ;; status) for prog in $DAEMONLIST ; do slurmstatus $prog done ;; restart) $0 stop $0 start ;; force-reload) $0 stop $0 start ;; condrestart) if [ -f /var/lock/subsys/slurm ]; then for prog in $DAEMONLIST ; do stop $prog start $prog done fi ;; reconfig) for prog in $DAEMONLIST ; do PIDFILE=$(getpidfile $prog) start-stop-daemon --stop --signal HUP --pidfile \ "$PIDFILE" --quiet $prog done ;; test) for prog in $DAEMONLIST ; do echo "$prog runs here" done ;; *) echo "Usage: $0 {start|startclean|stop|status|restart|reconfig|condrestart|test}" exit 1 ;; esac
