Giuseppe Lavagetto has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/264055

Change subject: jobrunner: contain gwt jobs to run on two specific hosts
......................................................................

jobrunner: contain gwt jobs to run on two specific hosts

Since GWT jobs are leaking memory and it has caused instabilities on the
jobrunners, we do as follows:

* Remove the hack introduced in I44990808 to contain running GWT runners
  on odd-numbered jobrunners
* Contain gwt jobs to run on two specific servers, which will be running
  those jobs exclusively. We use two newer, high-memory ones.
* Modify the cronjob hack to run each hour on those machines, and to
  restart HHVM once it occupies a sizeable amount of the memory

Bug: T122069
Change-Id: I3ff20b324ee98e93a926e0aa1ed568cc9343ae90
---
A hieradata/hosts/mw1161.yaml
A hieradata/hosts/mw1162.yaml
M hieradata/role/common/mediawiki/jobrunner.yaml
M modules/mediawiki/manifests/jobrunner.pp
M modules/mediawiki/templates/jobrunner/jobrunner.conf.erb
5 files changed, 20 insertions(+), 7 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/55/264055/1

diff --git a/hieradata/hosts/mw1161.yaml b/hieradata/hosts/mw1161.yaml
new file mode 100644
index 0000000..48d3708
--- /dev/null
+++ b/hieradata/hosts/mw1161.yaml
@@ -0,0 +1,5 @@
+mediawiki::jobrunner::runners_basic: 0
+mediawiki::jobrunner::runners_gwt: 10
+mediawiki::jobrunner::runners_restbase: 0
+mediawiki::jobrunner::runners_translate: 0
+mediawiki::jobrunner::runners_upload: 0
diff --git a/hieradata/hosts/mw1162.yaml b/hieradata/hosts/mw1162.yaml
new file mode 100644
index 0000000..48d3708
--- /dev/null
+++ b/hieradata/hosts/mw1162.yaml
@@ -0,0 +1,5 @@
+mediawiki::jobrunner::runners_basic: 0
+mediawiki::jobrunner::runners_gwt: 10
+mediawiki::jobrunner::runners_restbase: 0
+mediawiki::jobrunner::runners_translate: 0
+mediawiki::jobrunner::runners_upload: 0
diff --git a/hieradata/role/common/mediawiki/jobrunner.yaml 
b/hieradata/role/common/mediawiki/jobrunner.yaml
index 84d964c..f5c4dea 100644
--- a/hieradata/role/common/mediawiki/jobrunner.yaml
+++ b/hieradata/role/common/mediawiki/jobrunner.yaml
@@ -3,7 +3,7 @@
   - deployment
   - perf-roots
 mediawiki::jobrunner::runners_basic: 20
-mediawiki::jobrunner::runners_gwt: 1
+mediawiki::jobrunner::runners_gwt: 0
 mediawiki::jobrunner::runners_restbase: 3
 mediawiki::jobrunner::runners_translate: 1
 mediawiki::jobrunner::runners_upload: 7
diff --git a/modules/mediawiki/manifests/jobrunner.pp 
b/modules/mediawiki/manifests/jobrunner.pp
index 3513bb5..dc156d5 100644
--- a/modules/mediawiki/manifests/jobrunner.pp
+++ b/modules/mediawiki/manifests/jobrunner.pp
@@ -107,11 +107,14 @@
             content  => template('mediawiki/jobrunner/site.conf.erb')
         }
 
-        # Hack for T122069: Once a day, check the uptime of HHVM. If HHVM has
-        # been running for more than a day, restart it.
-        cron { 'periodic_hhvm_restart':
-            command => '/bin/ps -C hhvm -o etime= | /bin/grep -q - && 
/sbin/initctl restart hhvm >/dev/null 2>/dev/null',
-            hour    => fqdn_rand(23, 'periodic_hhvm_restart'),
+        # Hack for T122069: on servers running GWT jobs, restart HHVM
+        # once it occupies more than 60% of the available memory
+        if ($runners_gwt > 0) {
+            cron { 'periodic_hhvm_restart':
+                command => '/bin/ps -C hhvm -o pmem= | awk \'{sum+=$1} END { 
if (sum > 60.0) print "restart"  }\' | grep -q restart && /usr/sbin/service 
hhvm restart >/dev/null 2>/dev/null',
+                minute  => 0,
+            }
+
         }
     }
 
diff --git a/modules/mediawiki/templates/jobrunner/jobrunner.conf.erb 
b/modules/mediawiki/templates/jobrunner/jobrunner.conf.erb
index 4f60685..365fd6b 100644
--- a/modules/mediawiki/templates/jobrunner/jobrunner.conf.erb
+++ b/modules/mediawiki/templates/jobrunner/jobrunner.conf.erb
@@ -26,7 +26,7 @@
             ]
         },
         "gwt": {
-            "runners": <%= @hostname.split('.').first.slice(-1).to_i % 2 == 1 
? @runners_gwt  : 0 %>,
+            "runners": <%=  @runners_gwt  %>,
             "include": [
                 "gwtoolsetUploadMetadataJob",
                 "gwtoolsetUploadMediafileJob",

-- 
To view, visit https://gerrit.wikimedia.org/r/264055
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I3ff20b324ee98e93a926e0aa1ed568cc9343ae90
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Giuseppe Lavagetto <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to