Volans has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/349737 )
Change subject: Mediawiki: refactor stop/start maintenance
......................................................................
Mediawiki: refactor stop/start maintenance
- move common calls for jobrunners, videoscalers and cronjobs to the
mediawiki library
- re-enable and run puppet also in dc_from, where was disabled too
- add verification in t09_start_maintenance that all the services are
still stopped in dc_from after the puppet run
Bug: T163372
Change-Id: I4fc3a9632fcc6cd3737188e29c10a9bb30708907
---
M switchdc/lib/mediawiki.py
M switchdc/stages/t01_stop_maintenance.py
M switchdc/stages/t09_start_maintenance.py
3 files changed, 119 insertions(+), 53 deletions(-)
Approvals:
Giuseppe Lavagetto: Looks good to me, but someone else must approve
jenkins-bot: Verified
Volans: Looks good to me, approved
diff --git a/switchdc/lib/mediawiki.py b/switchdc/lib/mediawiki.py
index 791f4a2..9981919 100644
--- a/switchdc/lib/mediawiki.py
+++ b/switchdc/lib/mediawiki.py
@@ -2,8 +2,9 @@
import requests
+from switchdc import SwitchdcError
from switchdc.log import logger
-from switchdc.lib.remote import Remote
+from switchdc.lib.remote import Remote, RemoteExecutionError
def check_config_line(filename, expected):
@@ -42,3 +43,101 @@
command = 'su - {user} -c \'scap sync-file --force
wmf-config/{filename}.php "{message}"\''.format(
user=os.getlogin(), filename=filename, message=message)
remote.sync(command)
+
+
+def jobrunners(dc, verify_status, stop=False):
+ """Manage and verify the MediaWiki jobrunners.
+
+ Arguments:
+ dc -- the name of the datacenter to filter for.
+ verify_status -- the status to verify that the jobrunners are in. Accepted
values: 'running', 'stopped'.
+ stop -- whether to stop the jobrunners (True) or left them
untouched (False).
+ """
+ remote = Remote(site=dc)
+ remote.select('R:class = role::mediawiki::jobrunner')
+
+ if stop:
+ logger.info('Stopping jobrunners in {dc}'.format(dc=dc))
+ # We wait for all jobs on HHVM on jobrunners to finish before
proceeding
+ remote.async('service jobrunner stop', 'service jobchron stop',
+ 'while [ "$(hhvmadm /check-load)" -gt 1 ]; do sleep 1;
done')
+
+ _validate_status(verify_status)
+ if verify_status == 'stopped':
+ prefix = '! '
+ elif verify_status == 'running':
+ prefix = ''
+
+ remote.async('{prefix}service jobrunner status >
/dev/null'.format(prefix=prefix),
+ '{prefix}service jobchron status >
/dev/null'.format(prefix=prefix), is_safe=True)
+
+
+def videoscalers(dc, verify_status, stop=False):
+ """Manage and verify the MediaWiki videoscalers.
+
+ Arguments:
+ dc -- the name of the datacenter to filter for.
+ verify_status -- the status to verify that the videoscalers are in.
Accepted values: 'running', 'stopped'.
+ stop -- whether to stop the videoscalers (True) or left them
untouched (False).
+ """
+ remote = Remote(site=dc)
+ remote.select('R:class = role::mediawiki::videoscaler')
+
+ if stop:
+ logger.info('Stopping videoscalers in {dc}'.format(dc=dc))
+ # On videoscalers we are forced to restart HHVM without waiting as
transcodes can take a long time
+ remote.async('stop jobrunner || exit 0', 'stop jobchron || exit 0',
'restart hhvm')
+
+ _validate_status(verify_status)
+ if verify_status == 'stopped':
+ option = 'v'
+ elif verify_status == 'running':
+ option = ''
+
+ remote.async('status jobrunner | grep -q{option}
running'.format(option=option),
+ 'status jobchron | grep -q{option}
running'.format(option=option), is_safe=True)
+
+
+def cronjobs(dc, verify_status, stop=False):
+ """Manage and verify the MediaWiki cronjobs.
+
+ Arguments:
+ dc -- the name of the datacenter to filter for.
+ verify_status -- the status to verify that the cronjobs are in. Accepted
values: 'running', 'stopped'.
+ stop -- whether to stop the cronjobs (True) or left them
untouched (False).
+ """
+ remote = Remote(site=dc)
+ remote.select('R:class = role::mediawiki::maintenance')
+
+ if stop:
+ logger.info('Disabling MediaWiki cronjobs in {dc}'.format(dc=dc))
+ remote.async('crontab -u www-data -r', 'killall -r php', 'sleep 5',
'killall -9 -r php')
+
+ _validate_status(verify_status)
+ if verify_status == 'stopped':
+ option = '-z '
+ elif verify_status == 'running':
+ option = ''
+
+ remote.sync('test {option}"$(crontab -u www-data -l | sed -r
\'/^(#|$)/d\')"'.format(option=option), is_safe=True)
+
+ if verify_status == 'stopped':
+ # We just log an error, don't actually report a failure to the system.
We can live with this.
+ try:
+ remote.sync('pgrep -c php', is_safe=True)
+ logger.error('Stray php processes still present on the maintenance
host, please check')
+ except RemoteExecutionError:
+ pass
+
+
+def _validate_status(status):
+ """Raise SwitchdcError if the status have not a valid value.
+
+ Arguments:
+ status -- the status to validate. Accepted values are 'running', 'stopped'.
+ """
+ valid_statuses = ('running', 'stopped')
+ if status not in valid_statuses:
+ logger.error("Got invalid status '{status}', expected one of
{valid}".format(
+ status=status, valid=valid_statuses))
+ raise SwitchdcError(1)
diff --git a/switchdc/stages/t01_stop_maintenance.py
b/switchdc/stages/t01_stop_maintenance.py
index 6da2677..975050e 100644
--- a/switchdc/stages/t01_stop_maintenance.py
+++ b/switchdc/stages/t01_stop_maintenance.py
@@ -1,40 +1,11 @@
-from switchdc.lib.remote import Remote, RemoteExecutionError
-from switchdc.log import logger
+from switchdc.lib import mediawiki
-__title__ = 'Stop MediaWiki jobrunners, videoscalers and maintenance in
{dc_from}'
+__title__ = 'Stop MediaWiki jobrunners, videoscalers and cronjobs in {dc_from}'
def execute(dc_from, dc_to):
- """Sets mediawiki-maintenance offline, stopping jobrunners and cronjobs."""
- # Note: the two steps here could be run in parallel; split the file here if
- # deemed necessary. Since this is pre-read-only, I didn't think it would be
- # an issue
+ """Sets mediawiki-maintenance offline, stopping jobrunners, videoscalers
and cronjobs."""
- # 1: Stop the jobrunners in dc_from
- remote = Remote(site=dc_from)
- logger.info('Stopping jobrunners in {dc}'.format(dc=dc_from))
- remote.select('R:class = role::mediawiki::jobrunner')
- # We wait for all jobs on HHVM on jobrunners to finish before proceeding
- remote.async('service jobrunner stop', 'service jobchron stop',
- 'while [ "$(hhvmadm /check-load)" -gt 1 ]; do sleep 1; done')
- remote.async('! service jobrunner status > /dev/null', '! service jobchron
status > /dev/null', is_safe=True)
-
- remote.select('R:class = role::mediawiki::videoscaler')
- # On videoscalers we are forced to restart HHVM as transcodes can take a
long time
- remote.async('stop jobrunner || exit 0', 'stop jobchron || exit 0',
'restart hhvm')
- remote.async('status jobrunner | grep -qv running', 'status jobchron |
grep -qv running')
-
- # 2: disable and kill cronjobs
- logger.info('Disabling MediaWiki cronjobs in {dc}'.format(dc=dc_from))
- remote.select('R:class = role::mediawiki::maintenance')
- remote.async('crontab -u www-data -r', 'killall -r php', 'sleep 5',
'killall -9 -r php')
-
- # Verify that the crontab has no entries
- remote.sync('test -z "$(crontab -u www-data -l | sed -r \'/^(#|$)/d\')"',
is_safe=True)
-
- # We just log an error, don't actually report a failure to the system. We
can live with this.
- try:
- remote.sync('pgrep -c php', is_safe=True)
- logger.error('Stray php processes still present on the maintenance
host, please check')
- except RemoteExecutionError:
- pass
+ mediawiki.jobrunners(dc_from, 'stopped', stop=True)
+ mediawiki.videoscalers(dc_from, 'stopped', stop=True)
+ mediawiki.cronjobs(dc_from, 'stopped', stop=True)
diff --git a/switchdc/stages/t09_start_maintenance.py
b/switchdc/stages/t09_start_maintenance.py
index 045ccd8..2ecc39a 100644
--- a/switchdc/stages/t09_start_maintenance.py
+++ b/switchdc/stages/t09_start_maintenance.py
@@ -1,28 +1,24 @@
from switchdc import get_reason
+from switchdc.lib import mediawiki
from switchdc.lib.remote import Remote
__title__ = 'Start MediaWiki jobrunners, videoscalers and maintenance in
{dc_to}'
def execute(dc_from, dc_to):
- """Sets mediawiki-maintenance online, starting jobrunners and cronjobs."""
- remote = Remote(site=dc_to)
-
- # 1: Run puppet on all jobrunner and maintenace machines in dc_to
- jobrunners = Remote.query('R:class = role::mediawiki::jobrunner')
- videoscalers = Remote.query('R:class = role::mediawiki::videoscaler')
- maintenance = Remote.query('R:class = role::mediawiki::maintenance')
- all_jobs = videoscalers | jobrunners | maintenance
- remote.select(all_jobs)
+ """Sets mediawiki-maintenance online, starting jobrunners, videoscalers
and cronjobs."""
+ # Enable and run puppet on the hosts where it was disabled
+ remote = Remote()
+ remote.select('R:class = profile::mediawiki::jobrunner or R:class =
role::mediawiki::maintenance')
command = 'run-puppet-agent --enable
"{message}"'.format(message=get_reason())
remote.async(command, batch_size=30)
- # Verify
- remote.select(jobrunners)
- remote.async('service jobrunner status > /dev/null', 'service jobchron
status > /dev/null', is_safe=True)
+ # Verify all services are started in dc_to
+ mediawiki.jobrunners(dc_to, 'running')
+ mediawiki.videoscalers(dc_to, 'running')
+ mediawiki.cronjobs(dc_to, 'running')
- remote.select(videoscalers)
- remote.async('status jobrunner | grep -q running', 'status jobchron | grep
-q running')
- # Verify that the crontab has entries
- remote.select(maintenance)
- remote.sync('test "$(crontab -u www-data -l | sed -r \'/^(#|$)/d\')"',
is_safe=True)
+ # Verify all services are still stopped in dc_from
+ mediawiki.jobrunners(dc_from, 'stopped')
+ mediawiki.videoscalers(dc_from, 'stopped')
+ mediawiki.cronjobs(dc_from, 'stopped')
--
To view, visit https://gerrit.wikimedia.org/r/349737
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I4fc3a9632fcc6cd3737188e29c10a9bb30708907
Gerrit-PatchSet: 4
Gerrit-Project: operations/switchdc
Gerrit-Branch: master
Gerrit-Owner: Volans <[email protected]>
Gerrit-Reviewer: Giuseppe Lavagetto <[email protected]>
Gerrit-Reviewer: Volans <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits