Recently I upgraded our cluster from RHEL5 to RHEL6. After the upgrade the
slurm PAM module no longer seems to work properly (I had built new rpms from
the 2.2.7 SRPM on the compute nodes and installed those rpms everywhere).
The problem is that it still allows users to log in whether or not they have
an allocation for that node. I believe it is partially working because it
does block a user from running sudo on a node they do not have allocated
while if they do have it allocated they can run sudo. I've been doing a lot
of searching but I haven't run across anyone else that has a similar issue.
Everything I have found basically says install the rpm, modify the
/etc/pam.d/system-auth file and that is it.

I'm not sure what else to do, so any help would be greatly appreciated.

Thanks,

Mark

Here is my system-auth file

#%PAM-1.0
# This file is auto-generated.
# User changes will be destroyed the next time authconfig is run.
auth        required      pam_env.so
auth        sufficient    pam_unix.so nullok try_first_pass
auth        requisite     pam_succeed_if.so uid >= 500 quiet
auth        required      pam_deny.so

account     required      pam_unix.so
account     sufficient    pam_succeed_if.so uid < 500 quiet
account     sufficient    pam_access.so
account     required      pam_permit.so
account     required      pam_slurm.so

password    requisite     pam_cracklib.so try_first_pass retry=3
password    sufficient    pam_unix.so md5 shadow nis nullok try_first_pass
use_authtok
password    required      pam_deny.so

session     optional      pam_keyinit.so revoke
session     required      pam_limits.so
session     [success=1 default=ignore] pam_succeed_if.so service in crond
quiet use_uid
session     required      pam_unix.so


[root@head ~]# scontrol show config
Configuration data as of 2011-09-06T13:32:36
AccountingStorageBackupHost = (null)
AccountingStorageEnforce = none
AccountingStorageHost   = localhost
AccountingStorageLoc    = /var/log/slurm_jobacct.log
AccountingStoragePort   = 0
AccountingStorageType   = accounting_storage/none
AccountingStorageUser   = root
AuthType                = auth/munge
BackupAddr              = (null)
BackupController        = (null)
BatchStartTimeout       = 10 sec
BOOT_TIME               = 2011-09-04T16:48:50
CacheGroups             = 1
CheckpointType          = checkpoint/none
ClusterName             = ri
CompleteWait            = 0 sec
ControlAddr             = head
ControlMachine          = head
CryptoType              = crypto/munge
DebugFlags              = (null)
DefMemPerCPU            = UNLIMITED
DisableRootJobs         = NO
EnforcePartLimits       = NO
Epilog                  = /usr/local/slurm/epilog
EpilogMsgTime           = 2000 usec
EpilogSlurmctld         = (null)
FastSchedule            = 1
FirstJobId              = 1
GetEnvTimeout           = 2 sec
GresTypes               = (null)
GroupUpdateForce        = 0
GroupUpdateTime         = 600 sec
HashVal                 = Match
HealthCheckInterval     = 0 sec
HealthCheckProgram      = (null)
InactiveLimit           = 600 sec
JobAcctGatherFrequency  = 30 sec
JobAcctGatherType       = jobacct_gather/none
JobCheckpointDir        = /var/slurm/checkpoint
JobCompHost             = localhost
JobCompLoc              = /var/log/slurm_jobcomp.log
JobCompPort             = 0
JobCompType             = jobcomp/none
JobCompUser             = root
JobCredentialPrivateKey = (null)
JobCredentialPublicCertificate = (null)
JobFileAppend           = 0
JobRequeue              = 1
JobSubmitPlugins        = (null)
KillOnBadExit           = 0
KillWait                = 30 sec
Licenses                = (null)
MailProg                = /bin/mail
MaxJobCount             = 10000
MaxMemPerCPU            = UNLIMITED
MaxTasksPerNode         = 128
MessageTimeout          = 30 sec
MinJobAge               = 300 sec
MpiDefault              = none
MpiParams               = (null)
NEXT_JOB_ID             = 2207
OverTimeLimit           = 0 min
PluginDir               = /usr/lib64/slurm
PlugStackConfig         = /etc/slurm/plugstack.conf
PreemptMode             = OFF
PreemptType             = preempt/none
PriorityType            = priority/basic
PrivateData             = none
ProctrackType           = proctrack/pgid
Prolog                  = (null)
PrologSlurmctld         = (null)
PropagatePrioProcess    = 0
PropagateResourceLimits = ALL
PropagateResourceLimitsExcept = (null)
ResumeProgram           = (null)
ResumeRate              = 300 nodes/min
ResumeTimeout           = 60 sec
ResvOverRun             = 0 min
ReturnToService         = 2
SallocDefaultCommand    = (null)
SchedulerParameters     = max_job_bf=15,interval=20
SchedulerPort           = 7321
SchedulerRootFilter     = 1
SchedulerTimeSlice      = 30 sec
SchedulerType           = sched/backfill
SelectType              = select/linear
SlurmUser               = slurm(497)
SlurmctldDebug          = 5
SlurmctldLogFile        = /var/log/slurm/slurmctld.log
SlurmSchedLogFile       = (null)
SlurmctldPort           = 6817
SlurmctldTimeout        = 120 sec
SlurmdDebug             = 3
SlurmdLogFile           = /var/log/slurm/slurmd.log
SlurmdPidFile           = /var/run/slurmd.pid
SlurmdPort              = 6818
SlurmdSpoolDir          = /tmp/slurmd
SlurmdTimeout           = 300 sec
SlurmdUser              = root(0)
SlurmSchedLogLevel      = 0
SlurmctldPidFile        = /var/run/slurmctld.pid
SLURM_CONF              = /etc/slurm/slurm.conf
SLURM_VERSION           = 2.2.7
SrunEpilog              = (null)
SrunProlog              = (null)
StateSaveLocation       = /tmp
SuspendExcNodes         = (null)
SuspendExcParts         = (null)
SuspendProgram          = (null)
SuspendRate             = 60 nodes/min
SuspendTime             = NONE
SuspendTimeout          = 30 sec
SwitchType              = switch/none
TaskEpilog              = (null)
TaskPlugin              = task/none
TaskPluginParam         = (null type)
TaskProlog              = (null)
TmpFS                   = /tmp
TopologyPlugin          = topology/none
TrackWCKey              = 0
TreeWidth               = 50
UsePam                  = 0
UnkillableStepProgram   = (null)
UnkillableStepTimeout   = 60 sec
VSizeFactor             = 0 percent
WaitTime                = 0 sec

Slurmctld(primary/backup) at head/(NULL) are UP/DOWN

Reply via email to