BBlack has submitted this change and it was merged. Change subject: cron_splay() with first use in cache_upload ......................................................................
cron_splay() with first use in cache_upload Change-Id: I995c8e55018bbd6544a55cae744658e972c72726 --- M modules/role/manifests/cache/upload.pp M modules/varnish/templates/varnish-backend-restart.cron.erb A modules/wmflib/lib/puppet/parser/functions/cron_splay.rb 3 files changed, 147 insertions(+), 10 deletions(-) Approvals: BBlack: Looks good to me, approved jenkins-bot: Verified diff --git a/modules/role/manifests/cache/upload.pp b/modules/role/manifests/cache/upload.pp index 908f7ad..ee8707a 100644 --- a/modules/role/manifests/cache/upload.pp +++ b/modules/role/manifests/cache/upload.pp @@ -131,15 +131,20 @@ } # XXX: temporary, we need this to mitigate T145661 - $rt_parts = split(inline_template("<%= require 'digest/md5'; x = Random.new(Digest::MD5.hexdigest(@fqdn).to_i(16)).rand(1440); hh = x / 60; mm = x % 60; hh.to_s() + ':' + mm.to_s(); %>"), ':') - $be_restart_h = $rt_parts[0] - $be_restart_m = $rt_parts[1] + if $::realm == 'production' { + $hnodes = hiera('cache::upload::nodes') + $all_nodes = array_concat($hnodes['eqiad'], $hnodes['esams'], $hnodes['ulsfo'], $hnodes['codfw']) + $times = cron_splay($all_nodes, 'daily', 'upload-backend-restarts') + $be_restart_h = $times['hour'] + $be_restart_m = $times['minute'] + $be_restart_d = $times['weekday'] - file { '/etc/cron.d/varnish-backend-restart': - mode => '0444', - owner => 'root', - group => 'root', - content => template('varnish/varnish-backend-restart.cron.erb'), - require => File['/usr/local/sbin/varnish-backend-restart'], + file { '/etc/cron.d/varnish-backend-restart': + mode => '0444', + owner => 'root', + group => 'root', + content => template('varnish/varnish-backend-restart.cron.erb'), + require => File['/usr/local/sbin/varnish-backend-restart'], + } } } diff --git a/modules/varnish/templates/varnish-backend-restart.cron.erb b/modules/varnish/templates/varnish-backend-restart.cron.erb index bc88893..2844280 100644 --- a/modules/varnish/templates/varnish-backend-restart.cron.erb +++ b/modules/varnish/templates/varnish-backend-restart.cron.erb @@ -1 +1 @@ -<%= @be_restart_m %> <%= @be_restart_h %> * * * root /usr/local/sbin/varnish-backend-restart > /dev/null +<%= @be_restart_m %> <%= @be_restart_h %> * * <%= @be_restart_d %> root /usr/local/sbin/varnish-backend-restart > /dev/null diff --git a/modules/wmflib/lib/puppet/parser/functions/cron_splay.rb b/modules/wmflib/lib/puppet/parser/functions/cron_splay.rb new file mode 100644 index 0000000..08fa6ab --- /dev/null +++ b/modules/wmflib/lib/puppet/parser/functions/cron_splay.rb @@ -0,0 +1,132 @@ +# +# cron_splay.rb +# + +require 'digest/md5' + +module Puppet::Parser::Functions + newfunction(:cron_splay, :type => :rvalue, :doc => <<-EOS +Given an array of fqdn which a cron is applicable to, and a period arg which is +one of 'hourly', 'daily', or 'weekly', this sorts the fqdn set with +per-datacenter interleaving for DC-numbered hosts, splays them to fixed even +intervals within the total period, and then outputs a set of crontab time +fields for the fqdn currently being compiled-for. + +The idea here is to ensure each host in the set executes the cron once per time +period, and also ensure the time between hosts is consistent (no edge cases +much closer than the average) by splaying them as evenly as possible with +rounding errors. For the case of hosts with NNNN numbers indicating the +datacenter in the first digit, we also maximize the period between any two +hosts in a given datacenter by interleaving sorted per-DC lists of hosts before +splaying. + +The third and final argument is a static seed which modulates the splayed +values in two different ways to minimize the effects of multiple cron_splay() +with the same hostlist and period. It is used to select a determinstically +random "offset" for the splayed time values (so that the first host doesn't +always start at 00:00), and is also used to permute the order of the hosts +within each DC uniquely. + +*Examples:* + + $times = fqdn_splay($hosts, 'weekly', 'foo-static-seed') + cron { 'foo': + minute => $times['minute'], + hour => $times['hour'], + weekday => $times['weekday'], + } + + EOS + ) do |arguments| + + raise(Puppet::ParseError, "cron_splay(): Wrong number of arguments " + + "given (#{arguments.size} for 3)") if arguments.size != 3 + + hosts = arguments[0] + period = arguments[1] + seed = arguments[2] + + unless hosts.is_a?(Array) + raise(Puppet::ParseError, 'cron_splay(): Argument 1 must be an array') + end + + unless period.is_a?(String) + raise(Puppet::ParseError, 'cron_splay(): Argument 2 must be an string') + end + + unless seed.is_a?(String) + raise(Puppet::ParseError, 'cron_splay(): Argument 3 must be an string') + end + + case period + when 'hourly' + mins = 60 + when 'daily' + mins = 1440 + when 'weekly' + mins = 10080 + else + raise(Puppet::ParseError, 'cron_splay(): invalid period') + end + + # Avoid this edge case for now. At sufficiently large host counts and + # small period, randomization is probably better anyways. + if hosts.length > mins + raise(Puppet::ParseError, 'cron_splay(): too many hosts for period') + end + + # split hosts into N lists based the first digit of /NNNN/, defaulting to zero + sublists = [ [], [], [], [], [], [], [], [], [], [] ] + for h in hosts + match = /([1-9])[0-9]{3}/.match(h) + if match + sublists[match[1].to_i].push(h) + else + sublists[0].push(h) + end + end + + # sort each sublist into a determinstic order based on seed + for s in sublists + s.sort_by! { |x| Digest::MD5.hexdigest(seed + x) } + end + + # interleave sublists into "ordered" + longest = sublists.max_by(&:length) + sublists -= [longest] + ordered = longest.zip(*sublists).flatten.compact + + # find the index of this host in ordered + this_idx = ordered.index(lookupvar('::fqdn')) + if this_idx.nil? + raise(Puppet::ParseError, 'cron_splay(): this host not in set') + end + + # find the truncated-integer splayed value of this host + tval = this_idx * mins / ordered.length + + # use the seed (again) to add a time offset to the splayed values, + # the time offset never being larger than the splayed interval + tval += Digest::MD5.hexdigest(seed).to_i(16) % (mins / ordered.length) + + # generate the output + output = {} + output['minute'] = tval % 60 + + if period == 'hourly' + outout['hour'] = '*' + else + output['hour'] = (tval / 60) % 24 + end + + if period == 'weekly' + output['weekday'] = tval / 1440 + else + output['weekday'] = '*' + end + + return output + end +end + +# vim: set ts=2 sw=2 et : -- To view, visit https://gerrit.wikimedia.org/r/311239 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I995c8e55018bbd6544a55cae744658e972c72726 Gerrit-PatchSet: 5 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: BBlack <bbl...@wikimedia.org> Gerrit-Reviewer: BBlack <bbl...@wikimedia.org> Gerrit-Reviewer: Ema <e...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits