branch: main commit 9f63d3ae6f0a9c7185287a8c563cc72201cf7376 Author: Ludovic Courtès <l...@gnu.org> AuthorDate: Tue Sep 10 11:20:38 2024 +0200
database: Introduce ‘db-reschedule-stale-builds’ and call it periodically. Doing this task in ‘db-remove-workers’ was inappropriate because that procedure may be called very infrequently if no workers are ever removed. * src/cuirass/database.scm (db-reschedule-stale-builds): New procedure, with code moved from… (db-remove-workers): … here. * src/cuirass/scripts/remote-server.scm (spawn-periodic-updates-fiber): Call ‘db-reschedule-stale-builds’. --- src/cuirass/database.scm | 22 +++++++++++++--------- src/cuirass/scripts/remote-server.scm | 7 ++++--- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/cuirass/database.scm b/src/cuirass/database.scm index 3cfdeec..70e965a 100644 --- a/src/cuirass/database.scm +++ b/src/cuirass/database.scm @@ -216,6 +216,7 @@ db-get-worker db-get-workers db-worker-current-builds + db-reschedule-stale-builds db-remove-workers db-clear-workers db-clear-build-queue @@ -2231,6 +2232,18 @@ Builds.starttime DESC, Builds.id DESC;")) ;; switched back to "scheduled". (* 30 60)) +(define (db-reschedule-stale-builds) + "Reschedule builds that have been in \"submitted\" state for too long." + (with-db-connection db + (let ((rescheduled (exec-query/bind db "UPDATE Builds +SET status = " (build-status scheduled) " +WHERE status = " (build-status submitted) " AND +(extract(epoch from now())::int - starttime) > " %build-submission-timeout +";"))) + (unless (zero? rescheduled) + (log-info "rescheduled ~a builds that were submitted more than ~as ago" + rescheduled %build-submission-timeout))))) + (define (db-remove-workers names) "Remove workers with any of the given NAMES. Also restart the builds that were started on those workers." @@ -2249,15 +2262,6 @@ WHERE status = " (build-status started) " (log-info "restarted ~a builds that were on unresponsive workers" restarted))) - (let ((rescheduled (exec-query/bind db "UPDATE Builds -SET status = " (build-status scheduled) " -WHERE status = " (build-status submitted) " AND -(extract(epoch from now())::int - starttime) > " %build-submission-timeout -";"))) - (unless (zero? rescheduled) - (log-info "rescheduled ~a builds that were submitted more than ~as ago" - rescheduled %build-submission-timeout))) - (let ((removed (exec-query/bind db "DELETE FROM Workers WHERE name = ANY(" name-array ");"))) (unless (zero? removed) diff --git a/src/cuirass/scripts/remote-server.scm b/src/cuirass/scripts/remote-server.scm index 99c2052..f8f19da 100644 --- a/src/cuirass/scripts/remote-server.scm +++ b/src/cuirass/scripts/remote-server.scm @@ -381,9 +381,10 @@ requested received on its channel." (lambda () (let loop () (let ((resumable (db-update-resumable-builds!)) - (failed (db-update-failed-builds!))) - (log-info "periodic update: ~a resumable, ~a failed builds" - resumable failed)) + (failed (db-update-failed-builds!)) + (stale (db-reschedule-stale-builds))) + (log-info "periodic update: ~a resumable, ~a failed builds, ~a stale builds" + resumable failed stale)) (sleep 30) (loop)))))