Giuseppe Lavagetto has submitted this change and it was merged.

Change subject: role::mediawiki::webserver: restart hhvm routinely
......................................................................


role::mediawiki::webserver: restart hhvm routinely

This cron will restart HHVM if it's running since more than 3 days.

Bug: T147773
Change-Id: I204331607ba80169fafdd205ba9bffeeabf9a443
---
A modules/hhvm/files/hhvm-needs-restart.sh
M modules/hhvm/manifests/init.pp
M modules/role/manifests/mediawiki/webserver.pp
3 files changed, 111 insertions(+), 0 deletions(-)

Approvals:
  Giuseppe Lavagetto: Verified; Looks good to me, approved



diff --git a/modules/hhvm/files/hhvm-needs-restart.sh 
b/modules/hhvm/files/hhvm-needs-restart.sh
new file mode 100755
index 0000000..cc9f062
--- /dev/null
+++ b/modules/hhvm/files/hhvm-needs-restart.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# Script used to determine if HHVM needs to be restarted.
+# Will return an exit code of 0 if HHVM needs to be restarted,
+# and 1 if it doesn't.
+
+# Maximum number of days HHVM should run without being restarted
+MAX_RUN_DAYS=3
+# Maximum memory occupation from HHVM before being restarted
+MAX_MEM=50
+# Maximum queue size with respect to the load before being restarted.
+# This is very dangerous and should only be defined after very careful 
consideration
+MAX_QUEUE_RATIO=
+
+function usage {
+    cat <<EOF
+hhvm-needs-restart [-m MAX_MEM] [-d DAYS] [-q QUEUE_RATIO]
+
+Can be used to conditionally verify if any of the conditions for restarting 
HHVM
+are met. Checks are set by command-line flags
+
+ -d sets the number of days the HHVM process might be running before being
+    restarted. Defaults to 3 days.
+ -m sets the maximum % of memory HHVM can use; if the threshold is exceeded,
+    a restart is needed. Defaults to 50%.
+ -q the ratio of queued requests compared to the ones being processed above
+    which HHVM is assumed to be in a bad state. This is experimental and should
+    only be used interactively. Disabled by default.
+EOF
+    exit 1
+}
+
+while getopts ":m:d:q:" opt; do
+    case "${opt}" in
+        m)
+            test $OPTARG && MAX_MEM=$OPTARG
+            ;;
+        d)
+            test $OPTARG && MAX_RUN_DAYS=$OPTARG
+            ;;
+        q)
+            if [ $OPTARG ]; then
+                echo "WARNING: setting up the queue check is experimental"
+                MAX_QUEUE_RATIO=$OPTARG
+            fi
+            ;;
+        *)
+            usage
+            ;;
+    esac
+done
+
+
+MAX_RUN=$(( 86400 * ${MAX_RUN_DAYS} ))
+
+RUN_TIME=$(ps -C hhvm -o etimes= | head -n 1 )
+
+# Check that HHVM is running, first.
+test ${RUN_TIME} || exit 1
+
+if (( ${RUN_TIME} > ${MAX_RUN} )); then
+    echo "HHVM needs restarting: running since ${RUN_TIME} seconds"
+    exit 0
+fi
+
+# Used Memory
+/bin/ps -C hhvm -o pmem= | awk -v max_mem=${MAX_MEM} '{sum+=$1}
+END {
+  if (sum > max_mem) {
+    print "HHVM needs restart: using " sum "% of available memory";
+    exit 0;
+  }
+}'
+
+# Queue size
+# If not defined, just exit as if everything is fine
+test -z $MAX_QUEUE_RATIO && exit 1
+HIGH_RATIO=$(hhvmadm check-health | \
+                    jq "if (.queued > (${MAX_QUEUE_RATIO} * .load)) then 1 
else 0 end")
+if (( $HIGH_RATIO )); then
+    print "HHVM needs restart: queue > ${MAX_QUEUE_RATIO} * load"
+    exit 0
+fi
+# No need for a restart
+exit 1
diff --git a/modules/hhvm/manifests/init.pp b/modules/hhvm/manifests/init.pp
index 74535cd..adc75ad 100644
--- a/modules/hhvm/manifests/init.pp
+++ b/modules/hhvm/manifests/init.pp
@@ -264,6 +264,13 @@
         mode   => '0555',
     }
 
+    file {  '/usr/local/bin/hhvm-needs-restart':
+        ensure => present,
+        owner  => 'root',
+        group  => 'root',
+        mode   => '0555',
+        source => 'puppet:///modules/hhvm/hhvm-needs-restart.sh',
+    }
 
     ## Run-time data and logging
 
diff --git a/modules/role/manifests/mediawiki/webserver.pp 
b/modules/role/manifests/mediawiki/webserver.pp
index 6d47a9b..beefddf 100644
--- a/modules/role/manifests/mediawiki/webserver.pp
+++ b/modules/role/manifests/mediawiki/webserver.pp
@@ -12,10 +12,30 @@
     }
 
     if hiera('has_lvs', true) {
+        include ::lvs::configuration
         include ::role::lvs::realserver
 
         # Conftool config
         include ::mediawiki::conftool
+
+        # Restart HHVM if it is running since more than 3 days or
+        # memory occupation exceeds 50% of the available RAM
+        # This should prevent a series of cpu usage surges we've been seeing
+        # on long-running HHVM processes. T147773
+        $pool = $::role::lvs::realserver::lvs_pools['hhvm']['lvs_name']
+        $lvs_service = pick($::lvs::configuration::lvs_services[$pool], {})
+        $conftool_config = pick($lvs_service['conftool'], {'cluster' => 
'appserver'})
+        $module_path = get_module_path($module_name)
+        $site_nodes = 
loadyaml("${module_path}/../../conftool-data/nodes/${::site}.yaml")
+        $pool_nodes = keys($site_nodes[$conftool_config['cluster']])
+        if member($pool_nodes, $::fqdn) {
+            $times = cron_splay($pool_nodes, 'daily', 
'hhvm-conditional-restarts')
+            cron { 'hhvm-conditional-restart':
+                command => '/usr/local/bin/hhvm-needs-restart && 
/usr/local/bin/run-no-puppet /usr/local/bin/restart-hhvm > /dev/null',
+                hour    => $times['hour'],
+                minute  => $times['minute'],
+            }
+        }
     }
 
     ferm::service { 'mediawiki-http':

-- 
To view, visit https://gerrit.wikimedia.org/r/315938
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I204331607ba80169fafdd205ba9bffeeabf9a443
Gerrit-PatchSet: 14
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Giuseppe Lavagetto <glavage...@wikimedia.org>
Gerrit-Reviewer: BBlack <bbl...@wikimedia.org>
Gerrit-Reviewer: Dzahn <dz...@wikimedia.org>
Gerrit-Reviewer: Giuseppe Lavagetto <glavage...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to