I found this in a cluster running Slurm 2.6.9, using select/linear. I
think the problem exists in newer versions also. 

When there are completing nodes in a partition, the backfill loop may be
ended early: _try_sched() thinks the job can run immediately, while
select_nodes() cannot allocate nodes for it, returning
ESLURM_NODES_BUSY. The jobs in the queue will not be backfilled any
longer until the related job can be started or failed to backfill.
>From c470e36ace3c6ab9afcb4575b572c82096e2393f Mon Sep 17 00:00:00 2001
From: Hongjia Cao <[email protected]>
Date: Mon, 12 May 2014 20:18:05 +0800
Subject: [PATCH] fix of comp nodes causing backfill to end early

Completing nodes is removed when calling _try_sched() for a job, which
is the case in select_nodes(). If _try_sched() thinks the job can run
now but select_nodes() returns ESLURM_NODES_BUSY, the backfill loop will
be ended.
---
 src/plugins/sched/backfill/backfill.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c
index ff3e325..a1d04bf 100644
--- a/src/plugins/sched/backfill/backfill.c
+++ b/src/plugins/sched/backfill/backfill.c
@@ -653,7 +653,7 @@ static int _attempt_backfill(void)
 	uint32_t time_limit, comp_time_limit, orig_time_limit, part_time_limit;
 	uint32_t min_nodes, max_nodes, req_nodes;
 	bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL;
-	bitstr_t *exc_core_bitmap = NULL;
+	bitstr_t *exc_core_bitmap = NULL, *non_cg_bitmap = NULL;
 	time_t now, sched_start, later_start, start_res, resv_end;
 	node_space_map_t *node_space;
 	struct timeval bf_time1, bf_time2;
@@ -710,6 +710,9 @@ static int _attempt_backfill(void)
 
 	gettimeofday(&bf_time1, NULL);
 
+	non_cg_bitmap = bit_copy(cg_node_bitmap);
+	bit_not(non_cg_bitmap);
+
 	slurmctld_diag_stats.bf_queue_len = list_count(job_queue);
 	slurmctld_diag_stats.bf_queue_len_sum += slurmctld_diag_stats.
 						 bf_queue_len;
@@ -769,6 +772,9 @@ static int _attempt_backfill(void)
 				xfree(job_queue_rec);
 				break;
 			}
+			/* cg_node_bitmap may be changed */
+			bit_copybits(non_cg_bitmap, cg_node_bitmap);
+			bit_not(non_cg_bitmap);
 			/* Reset backfill scheduling timers, resume testing */
 			sched_start = time(NULL);
 			gettimeofday(&start_tv, NULL);
@@ -950,6 +956,9 @@ static int _attempt_backfill(void)
 				rc = 1;
 				break;
 			}
+			/* cg_node_bitmap may be changed */
+			bit_copybits(non_cg_bitmap, cg_node_bitmap);
+			bit_not(non_cg_bitmap);
 
 			/* With bf_continue configured, the original job could
 			 * have been scheduled or cancelled and purged.
@@ -988,6 +997,7 @@ static int _attempt_backfill(void)
 		/* Identify usable nodes for this job */
 		bit_and(avail_bitmap, part_ptr->node_bitmap);
 		bit_and(avail_bitmap, up_node_bitmap);
+		bit_and(avail_bitmap, non_cg_bitmap);
 		for (j=0; ; ) {
 			if ((node_space[j].end_time > start_res) &&
 			     node_space[j].next && (later_start == 0))
@@ -1176,6 +1186,7 @@ static int _attempt_backfill(void)
 	FREE_NULL_BITMAP(avail_bitmap);
 	FREE_NULL_BITMAP(exc_core_bitmap);
 	FREE_NULL_BITMAP(resv_bitmap);
+	FREE_NULL_BITMAP(non_cg_bitmap);
 
 	for (i=0; ; ) {
 		FREE_NULL_BITMAP(node_space[i].avail_bitmap);
-- 
2.0.0.rc0

Reply via email to