This patch fixes the problem in SLURM v2.3.0. It will probably work with v2.2.7 also.

Moe Jette
SchedMD LLC

Quoting Chris Harwell <[email protected]>:

Hi,

I really like the Licenses feature which slurm has and am hoping to
make use of it shortly.  I particularly like the ability to return a
license by using scontrol update to change the number of licenses for
a running job. I notice that is only permitted by the root or
SlurmUser when specifying a string. But I also notice I am able to
crash the slurm controller with the empty string as a user.


drdws0115:~$ scontrol show -o JobId=312  | awk ' { print $42 }'
Licenses=pitstop*4

drdws0115:~$ sudo -u slurmcl2 /opt/slurmcl2/bin/scontrol update
JobId=312 Licenses="pitstop*1"

drdws0115:~$ scontrol show -o JobId=312  | awk ' { print $42 }'
Licenses=pitstop*1

# Great worked as expected!

drdws0115:~$ sudo -u slurmcl2 /opt/slurmcl2/bin/scontrol update
JobId=312 Licenses="pitstop*0"
slurm_update error: Invalid license specification
# I need to use the emptry string because 0 is not allowed


drdws0115:~$ sudo -u slurmcl2 /opt/slurmcl2/bin/scontrol update
JobId=312 Licenses=""
drdws0115:~$ scontrol show -o JobId=312  | awk ' { print $42 }'
Licenses=
# Again great worked as expected

drdws0115:~$ /opt/slurmcl2/bin/scontrol update JobId=312 Licenses=""
scontrol: error: slurm_receive_msg: Zero Bytes were transmitted or received
slurm_update error: Zero Bytes were transmitted or received

Can other people see this problem too? What additional information
could I provide to help fix this?

drdws0115:~$ /opt/slurmcl2/bin/scontrol version
slurm 2.2.7
drdws0115:~$ uname -a
Linux drdws0115.nyc.desres.deshaw.com 2.6.18-238.12.1.el5 #1 SMP Tue
May 31 13:22:04 EDT 2011 x86_64 x86_64 x86_64 GNU/Linux
drdws0115:~$ cat /etc/redhat-release
CentOS release 5.6 (Final)

drdws0115:~$ /opt/slurmcl2/bin/scontrol show config | grep SlurmUser
SlurmUser               = slurmcl2(406)

Best,
Chris



diff --git a/NEWS b/NEWS
index 771fbc9..d561776 100644
--- a/NEWS
+++ b/NEWS
@@ -18,6 +18,8 @@ documents those changes that are of interest to users and admins.
     NodeAddr.
  -- If a job is deferred due to partition limits, then re-test those limits
     after a partition is modified. Patch from Don Lipari.
+ -- Fix bug which would crash slurmcld if job's owner (not root) tries to clear
+    a job's licenses by setting value to "".
 
 * Changes in SLURM 2.3.0.rc2
 ============================
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 24233f3..396af9b 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -7652,23 +7652,24 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid)
 			     job_specs->licenses);
 			error_code = ESLURM_INVALID_LICENSES;
 		} else if (IS_JOB_PENDING(job_ptr)) {
-			if (job_ptr->license_list)
-				list_destroy(job_ptr->license_list);
+			FREE_NULL_LIST(job_ptr->license_list);
 			job_ptr->license_list = license_list;
+			info("sched: update_job: changing licenses from '%s' "
+			     "to '%s' for pending job %u",
+			     job_ptr->licenses, job_specs->licenses,
+			     job_ptr->job_id);
 			xfree(job_ptr->licenses);
 			job_ptr->licenses = job_specs->licenses;
 			job_specs->licenses = NULL; /* nothing to free */
-			info("sched: update_job: setting licenses to %s for "
-			     "job %u", job_ptr->licenses, job_ptr->job_id);
-		} else if (IS_JOB_RUNNING(job_ptr) && authorized) {
+		} else if (IS_JOB_RUNNING(job_ptr) &&
+			   (authorized || (license_list == NULL))) {
 			/* NOTE: This can result in oversubscription of
 			 * licenses */
 			license_job_return(job_ptr);
-			if (job_ptr->license_list)
-				list_destroy(job_ptr->license_list);
+			FREE_NULL_LIST(job_ptr->license_list);
 			job_ptr->license_list = license_list;
-			info("sched: update_job: changing licenses from %s to "
-			     "%s for  running job %u",
+			info("sched: update_job: changing licenses from '%s' "
+			     "to '%s' for running job %u",
 			     job_ptr->licenses, job_specs->licenses,
 			     job_ptr->job_id);
 			xfree(job_ptr->licenses);
@@ -7681,7 +7682,7 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid)
 			info("sched: update_job: could not change licenses "
 			     "for job %u", job_ptr->job_id);
 			error_code = ESLURM_DISABLED;
-			list_destroy(license_list);
+			FREE_NULL_LIST(license_list);
 		}
 	}
 	if (error_code != SLURM_SUCCESS)

Reply via email to