BBlack has submitted this change and it was merged.

Change subject: cron_splay() with first use in cache_upload
......................................................................


cron_splay() with first use in cache_upload

Change-Id: I995c8e55018bbd6544a55cae744658e972c72726
---
M modules/role/manifests/cache/upload.pp
M modules/varnish/templates/varnish-backend-restart.cron.erb
A modules/wmflib/lib/puppet/parser/functions/cron_splay.rb
3 files changed, 147 insertions(+), 10 deletions(-)

Approvals:
  BBlack: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/modules/role/manifests/cache/upload.pp 
b/modules/role/manifests/cache/upload.pp
index 908f7ad..ee8707a 100644
--- a/modules/role/manifests/cache/upload.pp
+++ b/modules/role/manifests/cache/upload.pp
@@ -131,15 +131,20 @@
     }
 
     # XXX: temporary, we need this to mitigate T145661
-    $rt_parts = split(inline_template("<%= require 'digest/md5'; x = 
Random.new(Digest::MD5.hexdigest(@fqdn).to_i(16)).rand(1440); hh = x / 60; mm = 
x % 60; hh.to_s() + ':' + mm.to_s(); %>"), ':')
-    $be_restart_h = $rt_parts[0]
-    $be_restart_m = $rt_parts[1]
+    if $::realm == 'production' {
+        $hnodes = hiera('cache::upload::nodes')
+        $all_nodes = array_concat($hnodes['eqiad'], $hnodes['esams'], 
$hnodes['ulsfo'], $hnodes['codfw'])
+        $times = cron_splay($all_nodes, 'daily', 'upload-backend-restarts')
+        $be_restart_h = $times['hour']
+        $be_restart_m = $times['minute']
+        $be_restart_d = $times['weekday']
 
-    file { '/etc/cron.d/varnish-backend-restart':
-        mode    => '0444',
-        owner   => 'root',
-        group   => 'root',
-        content => template('varnish/varnish-backend-restart.cron.erb'),
-        require => File['/usr/local/sbin/varnish-backend-restart'],
+        file { '/etc/cron.d/varnish-backend-restart':
+            mode    => '0444',
+            owner   => 'root',
+            group   => 'root',
+            content => template('varnish/varnish-backend-restart.cron.erb'),
+            require => File['/usr/local/sbin/varnish-backend-restart'],
+        }
     }
 }
diff --git a/modules/varnish/templates/varnish-backend-restart.cron.erb 
b/modules/varnish/templates/varnish-backend-restart.cron.erb
index bc88893..2844280 100644
--- a/modules/varnish/templates/varnish-backend-restart.cron.erb
+++ b/modules/varnish/templates/varnish-backend-restart.cron.erb
@@ -1 +1 @@
-<%= @be_restart_m %> <%= @be_restart_h %> * * * root 
/usr/local/sbin/varnish-backend-restart > /dev/null
+<%= @be_restart_m %> <%= @be_restart_h %> * * <%= @be_restart_d %> root 
/usr/local/sbin/varnish-backend-restart > /dev/null
diff --git a/modules/wmflib/lib/puppet/parser/functions/cron_splay.rb 
b/modules/wmflib/lib/puppet/parser/functions/cron_splay.rb
new file mode 100644
index 0000000..08fa6ab
--- /dev/null
+++ b/modules/wmflib/lib/puppet/parser/functions/cron_splay.rb
@@ -0,0 +1,132 @@
+#
+# cron_splay.rb
+#
+
+require 'digest/md5'
+
+module Puppet::Parser::Functions
+  newfunction(:cron_splay, :type => :rvalue, :doc => <<-EOS
+Given an array of fqdn which a cron is applicable to, and a period arg which is
+one of 'hourly', 'daily', or 'weekly', this sorts the fqdn set with
+per-datacenter interleaving for DC-numbered hosts, splays them to fixed even
+intervals within the total period, and then outputs a set of crontab time
+fields for the fqdn currently being compiled-for.
+
+The idea here is to ensure each host in the set executes the cron once per time
+period, and also ensure the time between hosts is consistent (no edge cases
+much closer than the average) by splaying them as evenly as possible with
+rounding errors.  For the case of hosts with NNNN numbers indicating the
+datacenter in the first digit, we also maximize the period between any two
+hosts in a given datacenter by interleaving sorted per-DC lists of hosts before
+splaying.
+
+The third and final argument is a static seed which modulates the splayed
+values in two different ways to minimize the effects of multiple cron_splay()
+with the same hostlist and period.  It is used to select a determinstically
+random "offset" for the splayed time values (so that the first host doesn't
+always start at 00:00), and is also used to permute the order of the hosts
+within each DC uniquely.
+
+*Examples:*
+
+    $times = fqdn_splay($hosts, 'weekly', 'foo-static-seed')
+    cron { 'foo':
+        minute   => $times['minute'],
+        hour     => $times['hour'],
+        weekday  => $times['weekday'],
+    }
+
+    EOS
+  ) do |arguments|
+
+    raise(Puppet::ParseError, "cron_splay(): Wrong number of arguments " +
+      "given (#{arguments.size} for 3)") if arguments.size != 3
+
+    hosts = arguments[0]
+    period = arguments[1]
+    seed = arguments[2]
+
+    unless hosts.is_a?(Array)
+      raise(Puppet::ParseError, 'cron_splay(): Argument 1 must be an array')
+    end
+
+    unless period.is_a?(String)
+      raise(Puppet::ParseError, 'cron_splay(): Argument 2 must be an string')
+    end
+
+    unless seed.is_a?(String)
+      raise(Puppet::ParseError, 'cron_splay(): Argument 3 must be an string')
+    end
+
+    case period
+    when 'hourly'
+       mins = 60
+    when 'daily'
+       mins = 1440
+    when 'weekly'
+       mins = 10080
+    else
+      raise(Puppet::ParseError, 'cron_splay(): invalid period')
+    end
+
+    # Avoid this edge case for now.  At sufficiently large host counts and
+    # small period, randomization is probably better anyways.
+    if hosts.length > mins
+      raise(Puppet::ParseError, 'cron_splay(): too many hosts for period')
+    end
+
+    # split hosts into N lists based the first digit of /NNNN/, defaulting to 
zero
+    sublists = [ [], [], [], [], [], [], [], [], [], [] ]
+    for h in hosts
+      match = /([1-9])[0-9]{3}/.match(h)
+      if match
+        sublists[match[1].to_i].push(h)
+      else
+        sublists[0].push(h)
+      end
+    end
+
+    # sort each sublist into a determinstic order based on seed
+    for s in sublists
+      s.sort_by! { |x| Digest::MD5.hexdigest(seed + x) }
+    end
+
+    # interleave sublists into "ordered"
+    longest = sublists.max_by(&:length)
+    sublists -= [longest]
+    ordered = longest.zip(*sublists).flatten.compact
+
+    # find the index of this host in ordered
+    this_idx = ordered.index(lookupvar('::fqdn'))
+    if this_idx.nil?
+      raise(Puppet::ParseError, 'cron_splay(): this host not in set')
+    end
+
+    # find the truncated-integer splayed value of this host
+    tval = this_idx * mins / ordered.length
+
+    # use the seed (again) to add a time offset to the splayed values,
+    # the time offset never being larger than the splayed interval
+    tval += Digest::MD5.hexdigest(seed).to_i(16) % (mins / ordered.length)
+
+    # generate the output
+    output = {}
+    output['minute'] = tval % 60
+
+    if period == 'hourly'
+      outout['hour'] = '*'
+    else
+      output['hour'] = (tval / 60) % 24
+    end
+
+    if period == 'weekly'
+      output['weekday'] = tval / 1440
+    else
+      output['weekday'] = '*'
+    end
+
+    return output
+  end
+end
+
+# vim: set ts=2 sw=2 et :

-- 
To view, visit https://gerrit.wikimedia.org/r/311239
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I995c8e55018bbd6544a55cae744658e972c72726
Gerrit-PatchSet: 5
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: BBlack <bbl...@wikimedia.org>
Gerrit-Reviewer: BBlack <bbl...@wikimedia.org>
Gerrit-Reviewer: Ema <e...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to