(Replying to myself) Sorry, I didn't copy the entire set of propose changes for jobs and jobs.makefile (they got cut off). So I tweaked it further, and here's the entire set of diffs. Suggestions welcome.
--- David A. Wheeler ==================================== diff --git a/scripts/jobs b/scripts/jobs index 045a57ca..3a0f9b8e 100755 --- a/scripts/jobs +++ b/scripts/jobs @@ -43,6 +43,10 @@ find_work_list () { done } +# If metamath is *already* on the PATH, that one will be used. +# If not, increase the likelihood of finding a working "metamath" command +PATH="$PATH:${PWD}/metamath:${HOME}/bin" + command -v metamath > /dev/null || die 'Metamath program not on path' [ "$#" -gt 0 ] || die 'Requires parameters, e.g., 112 118-140' @@ -58,13 +62,17 @@ master_log='metamathjobs/master.log' # Generate expanded list of space-separated job numbers work="$(find_work_list "$@" | tr '\r\n' ' ')" -echo 'Starting jobs. View job NUM with: tail -c +0 -f metamathjobs/jobNUM.log' -echo "View overall state with: tail -c +0 -f ${master_log}" +echo "Starting jobs at $(date)" +echo "View overall (master) state log with: tail -c +0 -f ${master_log}" +echo 'View job NUM log with: tail -c +0 -f metamathjobs/jobNUM.log' +echo 'Print number of completed jobs with: ls metamathjobs/*.done | wc -l' echo +# Use GNU make to run jobs in parallel. We use GNU make, not GNU parallel, +# to simplify skipping jobs we've already completed (which have .done files). # We use "nproc" to find the number of CPUs if NPROC is not set. -nohup make --jobs "${NPROC:-$(nproc)}" -r -f scripts/jobs.makefile \ +nohup nice make --jobs "${NPROC:-$(nproc)}" -r -f scripts/jobs.makefile \ work="$work" > "$master_log" 2>&1 & echo diff --git a/scripts/jobs.makefile b/scripts/jobs.makefile index c5d12fbc..dcf6a78c 100644 --- a/scripts/jobs.makefile +++ b/scripts/jobs.makefile @@ -2,23 +2,24 @@ # "work" is a list of simple numbers. all: alljobs + @echo 'Completion time: $(date)' + @echo @echo 'DONE.' -LOG_LIST := \ - $(foreach num, $(work), metamathjobs/job$(num).log) +DONE_LIST := \ + $(foreach num, $(work), metamathjobs/job$(num).done) -metamathjobs/job%.log: metamathjobs/job%.cmd - @echo "Running job $*" - @rm -f "metamathjobs/job$(*).log" +metamathjobs/job%.done: metamathjobs/job%.cmd + @echo 'Running job $(*)' + @rm -f 'metamathjobs/job$(*).log' time metamath 'read set.mm' \ "open log 'metamathjobs/job$(*).log'" \ "submit 'metamathjobs/job$(*).cmd' /silent" quit 2>&1 + @touch 'metamathjobs/job$(*).done' -alljobs: $(LOG_LIST) +alljobs: $(DONE_LIST) -# Delete what's generated if a program returns a nonzero (error) result. -# This can be annoying for debugging, but it means that we can simply re-run -# make if a process is stopped. -ifndef KEEP_ON_ERROR +# Delete what we're creating on error. +# NOTE: We do *NOT* delete partial logs, since they are not make targets. +# Keeping logs around simplifies debugging. .DELETE_ON_ERROR: -endif -- You received this message because you are subscribed to the Google Groups "Metamath" group. To unsubscribe from this group and stop receiving emails from it, send an email to metamath+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/metamath/E1j2d13-0002E8-AC%40rmmprod07.runbox.