Rush has submitted this change and it was merged. Change subject: bdsync backup setup for labstore ......................................................................
bdsync backup setup for labstore Change-Id: I67cb9b5579788ae4542f86646d01f502f95733df --- A modules/labstore/files/block_sync.sh A modules/labstore/files/snapshot-manager.py A modules/labstore/manifests/bdsync.pp A modules/labstore/manifests/device_backup.pp M modules/labstore/manifests/init.pp 5 files changed, 373 insertions(+), 0 deletions(-) Approvals: Rush: Looks good to me, approved Madhuvishy: Looks good to me, but someone else must approve jenkins-bot: Verified diff --git a/modules/labstore/files/block_sync.sh b/modules/labstore/files/block_sync.sh new file mode 100644 index 0000000..beac4f8 --- /dev/null +++ b/modules/labstore/files/block_sync.sh @@ -0,0 +1,48 @@ +#!/bin/sh + +function usage { + echo -e "Usage:\n" + echo -e "This script replicates an LVM2 block device across the network by taking a remote snapshot\n" + echo -e "block_sync.sh remote_host remote_volume_group remote_logical_volume snapshot_name local_device\n" + echo -e "Example: block_sync.sh 10.64.37.20 misc test snaptest /dev/backup/test\n" +} + +if [[ "$#" -ne 5 || "$1" == '-h' ]]; then + usage + exit 1 +fi + +BDSYNC='/usr/bin/bdsync' +SNAPSHOT_MGR='/usr/local/sbin/snapshot-manager' +PV_OPTIONS='-p -t -e -r -a -b' +r_user='root' +r_host=$1 +r_vg=$2 +r_lv=$3 +r_snapshot_name=$4 +remotenice=10 + +localdev=$5 +blocksize=16384 + +remote_connect="ssh -i /root/.ssh/id_labstore ${r_user}@${r_host}" + +/bin/findmnt --notruncate -P -n -c $localdev +if [ $? -eq 0 ] +then + echo "Local device is mounted. Operations may be unsafe." + exit 1 +fi + +set -e + +$remote_connect "/usr/bin/test -e ${BDSYNC}" +$remote_connect "/usr/bin/test -e ${SNAPSHOT_MGR}" + +$remote_connect "${SNAPSHOT_MGR} create ${r_snapshot_name} ${r_vg}/${r_lv} --force" + +$BDSYNC --blocksize=$blocksize \ + --remdata "${remote_connect} 'nice -${remotenice} ${BDSYNC} --server'" \ + $localdev "/dev/${r_vg}/${r_snapshot_name}" | \ + pv $PV_OPTIONS | \ + sudo $BDSYNC --patch=$localdev diff --git a/modules/labstore/files/snapshot-manager.py b/modules/labstore/files/snapshot-manager.py new file mode 100644 index 0000000..c4404f1 --- /dev/null +++ b/modules/labstore/files/snapshot-manager.py @@ -0,0 +1,269 @@ +#!/usr/bin/python3 + +import argparse +import datetime +import operator +import logging +import os +import sys +import subprocess + +from dateutil.parser import parse + + +def runcmd(command, cwd=None, stdin=None, shell=True): + """ Run a command + :param command: str + :param cwd: str + :param stdin: str + :param shell: bool + :return: tuple + """ + p = subprocess.Popen( + command, + shell=shell, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + (out, error) = p.communicate(input=stdin) + retcode = p.wait() + return out, error, retcode + + +class LVMSnapshot: + def __init__(self, name, vg, lv): + self.name = name + self.lv = lv + self.vg = vg + + def run(self, cmd, shell=False): + """Execute shell command + :param cmd: list + :param shell: bool + :returns: str + """ + logging.debug("Run: {}".format(' '.join(cmd))) + out, error, retcode = runcmd(cmd, shell=shell) + if error: + logging.error(error) + return retcode, out.decode('utf-8') + + def status(self): + """ Get status information for a snapshot + :returns: dict + """ + + params = [ + 'lv_name', + 'lv_path', + 'lv_attr', + 'lv_size', + 'lv_time', + 'origin', + 'snap_percent' + ] + + status = [ + '/sbin/lvs', + '--noheadings', + '--select', + 'lv_name={}'.format(self.name), + '-o', + ','.join(params), + "--separator=';'", + ] + + # if lvname does not exist will still exit 0 + retcode, out = self.run(status) + if retcode: + return {} + + param_values = [x.strip().strip("'") for x in out.split(';')] + param_dict = dict(zip(params, param_values)) + + if 'lv_time' in param_dict: + param_dict['lv_time'] = parse(param_dict['lv_time']) + else: + # should mean it does not exist + param_dict = {} + + logging.debug('status: {}'.format(str(param_dict))) + return param_dict + + def is_snapshot(self): + """ Ensure that a logical volume is a snapshot + The lv_attr is representative of properties for the lv. + If it starts with an 's' it is a snapshot + :returns: bool + """ + meta = self.status() + return bool(meta['lv_attr'].startswith('s')) + + def create(self, size): + """ Create a snapshot + :param size: str + :returns: int + """ + + if self.status(): + logging.debug('creation aborted as {} already exists'.format(self.name)) + return 1 + + cmd = [ + '/sbin/lvcreate', + '--snapshot', + '--size', size, + '--name', self.name, + '/dev/{}/{}'.format(self.vg, self.lv), + ] + + logging.info('creating {} at {}'.format(self.name, size)) + out, retcode = self.run(cmd) + + return retcode + + def remove(self): + """ Discard a snapshot with validation it is indeed a snapshot + :returns: bool + """ + + if not self.status() or not self.is_snapshot(): + logging.info('{} cannot be removed'.format(self.name)) + return False + + # It is very dangerous to do removal operations + # without explicit names set here as removal + # aimed only at a vg can be treated as wildcard + if not self.name or not self.vg: + logging('{} or {} not set'.format(self.name, self.vg)) + return False + + logging.info('removing {}'.format(self.name)) + retcode, out = self.run([ + '/sbin/lvremove', + '{}/{}'.format(self.vg, self.name), + '--force', + ]) + + return not bool(self.status()) + + +def main(): + + argparser = argparse.ArgumentParser( + os.path.basename(sys.argv[0]), + description="Manage LVM2 Snapshots" + ) + + argparser.add_argument('action', help='execute this action') + argparser.add_argument('name', help='snapshot name') + argparser.add_argument('volume', help="logical volume source for snapshot $vg/$lv") + + argparser.add_argument( + "--size", + help="size matching lvcreate expectations e.g. [1T|10G|100m]", + default="1T" + ) + + argparser.add_argument( + "--max-age", + help="Ensure snapshot is no older than in minutes", + type=int, + default=86400 + ) + + argparser.add_argument( + '--force', + help='Forcefully execute operation where applicable', + action='store_true' + ) + + argparser.add_argument( + '--debug', + help='Turn on debug logging', + action='store_true' + ) + + # re: making flag arguments required http://bugs.python.org/issue9694 + args = argparser.parse_args() + + def help(): + argparser.print_help() + sys.exit(1) + + logging.basicConfig( + format='%(asctime)s %(levelname)s %(message)s', + level=logging.DEBUG if args.debug else logging.INFO) + + logging.debug(args) + try: + vg, lv = args.volume.split('/') + except: + logging.critical('source is a bad format {}'.format(args.volume)) + help() + + ss = LVMSnapshot(args.name, vg, lv) + + logging.debug("initial status {}".format(ss.status())) + + if args.force: + logging.info("force is enabled") + + def status(): + + status = ss.status() + + if not status: + logging.warning('null status as {} does not exist'.format(args.name)) + sys.exit(1) + + ssorted = sorted(status.items(), key=operator.itemgetter(0)) + for value in ssorted: + print('%s - %s' % (value[0], value[1])) + + def create(): + + status = ss.status() + if status: + logging.debug('{} already exists'.format(args.name)) + creation_epoch = int(status['lv_time'].strftime('%s')) + now = int(datetime.datetime.now().strftime('%s')) + oldest_possible = now - args.max_age + logging.debug('current epoch: {}'.format(now)) + logging.debug('max age epoch: {}'.format(oldest_possible)) + logging.debug('creation epoch: {}'.format(creation_epoch)) + + if creation_epoch < oldest_possible or args.force: + logging.info('removing {}'.format(args.name)) + ss.remove() + else: + logging.info('skipping creation as snapshot exists') + sys.exit(1) + return + + ss.create(args.size) + if not ss.status(): + logging.critical('failed to create {}'.format(args.name)) + + def remove(): + + if not ss.status(): + logging.info('{} does not exist'.format(args.name)) + sys.exit(1) + + ss.remove() + if ss.status(): + logging.criticial('{} still exists'.format(args.name)) + status() + + actions = { + 'status': status, + 'create': create, + 'remove': remove, + } + + actions.get(args.action, help)() + + +if __name__ == '__main__': + main() diff --git a/modules/labstore/manifests/bdsync.pp b/modules/labstore/manifests/bdsync.pp new file mode 100644 index 0000000..21ac945 --- /dev/null +++ b/modules/labstore/manifests/bdsync.pp @@ -0,0 +1,14 @@ +class labstore::bdsync { + + package { 'bdsync': + ensure => present, + before => File['/usr/local/sbin/block_sync'], + } + + file { '/usr/local/sbin/block_sync': + source => 'puppet:///modules/labstore/block_sync.sh', + owner => 'root', + group => 'root', + mode => '0755', + } +} diff --git a/modules/labstore/manifests/device_backup.pp b/modules/labstore/manifests/device_backup.pp new file mode 100644 index 0000000..2b61604 --- /dev/null +++ b/modules/labstore/manifests/device_backup.pp @@ -0,0 +1,34 @@ +define labstore::device_backup ( + $remotehost, + $remote_vg, + $remote_lv, + $remote_snapshot, + $localdev, + $weekday, + $hour=0, +) { + + include labstore::bdsync + $remote_ip = ipresolve($remotehost, 4) + + $day = { + 'sunday' => 0, + 'monday' => 1, + 'tuesday' => 2, + 'wednesday' => 3, + 'thursday' => 4, + 'friday' => 5, + 'saturday' => 6, + } + + # Establish what list these alerts should go to + # environment => 'MAILTO=labs-adm...@wikimedia.org' + $block_sync='/usr/local/sbin/block_sync' + cron { "block_sync-${remote_vg}/${remote_lv}=>${localdev}": + ensure => 'present', + user => 'root', + command => "${block_sync} ${remote_ip} ${remote_vg} ${remote_lv} ${remote_snapshot} ${localdev}", + weekday => $day[$weekday], + hour => $hour, + } +} diff --git a/modules/labstore/manifests/init.pp b/modules/labstore/manifests/init.pp index 07166e2..7af4315 100644 --- a/modules/labstore/manifests/init.pp +++ b/modules/labstore/manifests/init.pp @@ -14,6 +14,14 @@ $ldapincludes = ['openldap', 'nss', 'utils'] class { 'ldap::role::client::labs': ldapincludes => $ldapincludes } + file { '/usr/local/sbin/snapshot-manager': + ensure => present, + owner => 'root', + group => 'root', + mode => '0755', + source => 'puppet:///modules/labstore/snapshot-manager.py', + } + file { '/usr/local/sbin/set-stripe-cache': ensure => present, owner => 'root', -- To view, visit https://gerrit.wikimedia.org/r/315595 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I67cb9b5579788ae4542f86646d01f502f95733df Gerrit-PatchSet: 18 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Rush <r...@wikimedia.org> Gerrit-Reviewer: Madhuvishy <mviswanat...@wikimedia.org> Gerrit-Reviewer: Rush <r...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits