Hello there!

    There is possible situation where variable TMPDIR on srun/sbatch side
points to valid directory but that directory does not exist on computing
node. Attached patch fixes the issue by rewriting TMPDIR=/tmp in those
cases. User still get error message from each node involved though, is it
better to do info() call instead of error() in that code?

    Andriy.
diff -udpr slurm-2.2.4/src/slurmd/slurmstepd/task.c slurm-2.2.4.tmp/src/slurmd/slurmstepd/task.c
--- slurm-2.2.4/src/slurmd/slurmstepd/task.c	2010-11-24 00:05:15.000000000 +0200
+++ slurm-2.2.4.tmp/src/slurmd/slurmstepd/task.c	2011-05-23 15:54:14.000000000 +0300
@@ -515,13 +515,24 @@ exec_task(slurmd_job_t *job, int i, int
 static void
 _make_tmpdir(slurmd_job_t *job)
 {
 	char *tmpdir;
 
 	if (!(tmpdir = getenvp(job->env, "TMPDIR")))
-		return;
+		setenvf(&job->env, "TMPDIR", "/tmp"); /* task may want it set */
 
-	if ((mkdir(tmpdir, 0700) < 0) && (errno != EEXIST))
+	else if (mkdir(tmpdir, 0700) < 0) {
+		if (errno == EEXIST) {
+			struct stat st;
+
+			if (stat(tmpdir, &st) == 0 && /* does user have access? */
+			    S_ISDIR(st.st_mode) && /* is it a directory? */
+			    ((st.st_mode & S_IWOTH) || /* can user write there? */
+			     (st.st_uid == job->uid && (st.st_mode & S_IWUSR))))
+				return;
+		}
 		error ("Unable to create TMPDIR [%s]: %m", tmpdir);
+		setenvf(&job->env, "TMPDIR", "/tmp");
+	}
 
 	return;
 }

Reply via email to