By sleeping for 100ms after receiving a notification for a changed job file the job is given some additional time to change again. This significantly reduces the number of LUXI calls for WaitForJobChanges (depending on the job, in my tests with “gnt-cluster verify --debug-simulate-errors” by about 80%), and improves performance (the same job went from around 7 seconds to around 3.5 seconds).
This method is not perfect. The algorithm could be made more complex, e.g. by increasing the delay on each change, etc., but for now this simple change provides a good improvement. Signed-off-by: Michael Hanselmann <[email protected]> --- lib/jqueue.py | 12 ++++++++++-- 1 files changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/jqueue.py b/lib/jqueue.py index ff67760..743152e 100644 --- a/lib/jqueue.py +++ b/lib/jqueue.py @@ -35,6 +35,7 @@ import re import time import weakref import threading +import itertools try: # pylint: disable-msg=E0611 @@ -721,7 +722,13 @@ class _WaitForJobChangesHelper(object): """ @staticmethod - def _CheckForChanges(job_load_fn, check_fn): + def _CheckForChanges(counter, job_load_fn, check_fn): + if counter.next() > 0: + # If this isn't the first check the job is given some more time to change + # again. This gives better performance for jobs generating many + # changes/messages. + time.sleep(0.1) + job = job_load_fn() if not job: raise errors.JobLost() @@ -750,12 +757,13 @@ class _WaitForJobChangesHelper(object): @param timeout: maximum time to wait in seconds """ + counter = itertools.count() try: check_fn = _JobChangesChecker(fields, prev_job_info, prev_log_serial) waiter = _JobChangesWaiter(filename) try: return utils.Retry(compat.partial(self._CheckForChanges, - job_load_fn, check_fn), + counter, job_load_fn, check_fn), utils.RETRY_REMAINING_TIME, timeout, wait_fn=waiter.Wait) finally: -- 1.7.6
