jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/349364 )
Change subject: Limit how much work the Lua chron script can do at once ...................................................................... Limit how much work the Lua chron script can do at once This limits the chance of tying up the redis server too long. Change-Id: I7729c53ab5bd2059fdd513cf9ede41c1aefe260a --- M redisJobChronService 1 file changed, 8 insertions(+), 5 deletions(-) Approvals: Krinkle: Looks good to me, approved jenkins-bot: Verified diff --git a/redisJobChronService b/redisJobChronService index c17cac8..9c7f9d4 100755 --- a/redisJobChronService +++ b/redisJobChronService @@ -10,7 +10,7 @@ RedisJobService::checkEnvironment(); class RedisJobChronService extends RedisJobService { - const LUA_WAIT_US = 5000; // time to wait between LUA scripts + const LUA_WAIT_US = 5000; // time to wait between Lua scripts const PERIOD_WAIT_US = 1e6; // time between task runs /** @@ -294,6 +294,8 @@ /** @var string[] JSON encoded queue name list */ private $queueIds; + const LUA_MAX_JOBS = 500; // limit on the number of jobs to change state in a Lua script + /** * @param RedisJobChronService $service * @param array $queueIds JSON encoded queue name list (type, domain) @@ -331,6 +333,7 @@ $this->service->getAttemptsForType( $type ), # ARGV[3] $now, # ARGV[4] $queueId, # ARGV[5] + self::LUA_MAX_JOBS # ARGV[6] ), 'keys' => 7 # number of first argument(s) that are keys ); @@ -355,7 +358,7 @@ static $script = <<<LUA local kClaimed, kAttempts, kUnclaimed, kData, kAbandoned, kDelayed, kQwJobs = unpack(KEYS) - local rClaimCutoff, rPruneCutoff, rAttempts, rTime, queueId = unpack(ARGV) + local rClaimCutoff, rPruneCutoff, rAttempts, rTime, queueId, rLimit = unpack(ARGV) local released,abandoned,pruned,undelayed,ready = 0,0,0,0,0 -- Short-circuit if there is nothing at all in the queue if redis.call('exists',kData) == 0 then @@ -364,7 +367,7 @@ end -- Get all non-dead jobs that have an expired claim on them. -- The score for each item is the last claim timestamp (UNIX). - local staleClaims = redis.call('zRangeByScore',kClaimed,0,rClaimCutoff) + local staleClaims = redis.call('zRangeByScore',kClaimed,0,rClaimCutoff,'limit',0,rLimit) for k,id in ipairs(staleClaims) do local timestamp = redis.call('zScore',kClaimed,id) local attempts = 1*redis.call('hGet',kAttempts,id) @@ -381,7 +384,7 @@ end -- Get all of the dead jobs that have been marked as dead for too long. -- The score for each item is the last claim timestamp (UNIX). - local deadClaims = redis.call('zRangeByScore',kAbandoned,0,rPruneCutoff) + local deadClaims = redis.call('zRangeByScore',kAbandoned,0,rPruneCutoff,'limit',0,rLimit) for k,id in ipairs(deadClaims) do -- Stale and out of attempts: remove any traces of the job redis.call('zRem',kAbandoned,id) @@ -390,7 +393,7 @@ pruned = pruned + 1 end -- Get the list of ready delayed jobs, sorted by readiness (UNIX timestamp) - local ids = redis.call('zRangeByScore',kDelayed,0,rTime) + local ids = redis.call('zRangeByScore',kDelayed,0,rTime,'limit',0,rLimit) -- Migrate the jobs from the "delayed" set to the "unclaimed" list for k,id in ipairs(ids) do redis.call('lPush',kUnclaimed,id) -- To view, visit https://gerrit.wikimedia.org/r/349364 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I7729c53ab5bd2059fdd513cf9ede41c1aefe260a Gerrit-PatchSet: 3 Gerrit-Project: mediawiki/services/jobrunner Gerrit-Branch: master Gerrit-Owner: Aaron Schulz <asch...@wikimedia.org> Gerrit-Reviewer: Aaron Schulz <asch...@wikimedia.org> Gerrit-Reviewer: Elukey <ltosc...@wikimedia.org> Gerrit-Reviewer: Krinkle <krinklem...@gmail.com> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits