ArielGlenn has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/392875 )
Change subject: rsync all dumps status files to web servers and unpack them periodically ...................................................................... rsync all dumps status files to web servers and unpack them periodically Make sure that the index.html files and various other dumps status files don't provide links to dumps files that haven't been copied over yet from internal servers. Bug: T179857 Change-Id: I7fd30f713387da25d061a452786c71786f372a37 --- M modules/dumps/files/generation/rsync-to-peers.sh A modules/dumps/files/web/unpack-dumpstatusfiles.sh A modules/dumps/manifests/web/statusfiles.pp A modules/profile/manifests/dumps/web/statusfiles_sync.pp M modules/role/manifests/dumps/generation/server/fallback.pp M modules/role/manifests/dumps/public/server.pp M modules/role/manifests/dumps/web/xmldumps_active.pp M modules/role/manifests/dumps/web/xmldumps_fallback.pp 8 files changed, 109 insertions(+), 2 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/75/392875/1 diff --git a/modules/dumps/files/generation/rsync-to-peers.sh b/modules/dumps/files/generation/rsync-to-peers.sh index fe6730d..451cce0 100644 --- a/modules/dumps/files/generation/rsync-to-peers.sh +++ b/modules/dumps/files/generation/rsync-to-peers.sh @@ -78,10 +78,18 @@ while [ 1 ]; do + # make xml/sql dumps tarball of all status files etc + tarballpath="${xmldumpsdir}/public/dumpstatusfiles.tar" + /usr/bin/find "${xmldumpsdir}/public/" -maxdepth 3 -regextype sed -regex '.*/*\.\(html\|json\|txt\)' | /usr/bin/xargs /bin/tar cvfp "$tarballpath" + /bin/gzip "$tarballpath" + # rsync of xml/sql dumps for public wikis for dest in $xmlremotedirs_list; do - /usr/bin/rsync -a --contimeout=600 --timeout=600 ${xmldumpsdir}/public/*html "$dest" > /dev/null 2>&1 - /usr/bin/rsync -a --contimeout=600 --timeout=600 --exclude='**bad/' --exclude='**save/' --exclude='**not/' --exclude='**temp/' --exclude='**tmp/' --exclude='*.inprog' ${xmldumpsdir}/public/*wik* "$dest" > /dev/null 2>&1 + /usr/bin/rsync -a --contimeout=600 --timeout=600 --exclude='**bad/' --exclude='**save/' --exclude='**not/' --exclude='**temp/' --exclude='**tmp/' --exclude='*.inprog' --exclude='*.html' --exclude='*.txt' --exclude-'*.json' ${xmldumpsdir}/public/*wik* "$dest" > /dev/null 2>&1 + # send tarball over last, remote can unpack it when it notices the arrival + # this way, content of status and html files always reflects dump output already + # made available via rsync + /usr/bin/rsync -a --contimeout=600 --timeout=600 "${tarballpath}.gz" "$dest" > /dev/null 2>&1 done # rsync of misc dumps, not necessarily to/from the same tree as the public wikis diff --git a/modules/dumps/files/web/unpack-dumpstatusfiles.sh b/modules/dumps/files/web/unpack-dumpstatusfiles.sh new file mode 100644 index 0000000..21ea731 --- /dev/null +++ b/modules/dumps/files/web/unpack-dumpstatusfiles.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +############################## +# This file is managed by puppet! +# puppet:///modules/dumps/web/unpack-dumpstatusfiles.sh +############################## + +# This script checks for the arrival of a tarball +# of dump status files; if a new one has arrived, it unpacks +# the tarball in the appropriate location. +# +# This ensures that html and other dumps status files +# on public-facing servers always reflect dump content +# files that have actually been made available via +# rsync from internal servers. + +usage() { + cat<<EOF +Usage: $0 --xmldumpsdir <path> --newer <minutes> + + --xmldumpsdir path to root of xml/sql dumps tree for unpacking tarball + --newer file must be newer than this many minutes ago to unpack it + +Example: $0 --xmldumpsdir /data/xmldatadumps --newer 10 +EOF + exit 1 +} + +xmldumpsdir="" +newer="" + +while [ $# -gt 0 ]; do + if [ $1 == "--xmldumpsdir" ]; then + xmldumpsdir="$2" + shift; shift + elif [ $1 == "--newer" ]; then + newer="$2" + shift; shift + else + echo "$0: Unknown option $1" >& 2 + usage + fi +done + +if [ -z "$xmldumpsdir" ]; then + echo "$0: missing argument --xmldumpsdir" + usage && exit 1 +elif [ -z "$newer" ]; then + echo "$0: missing argument --newer" + usage && exit 1 +fi + +tarballpath="${xmldumpsdir}/public/dumpstatusfiles.tar.gz" +if [ ! -e "$tarballpath" ]; then + # no file so do no check + exit 0 +fi + +result=$( /usr/bin/find "$tarballpath" -mmin "+${newer}" ) +if [ -n "$result" ]; then + cd "${xmldumpsdir}/public/" + /bin/zcat "$tarballpath" | tar xvfp - +fi diff --git a/modules/dumps/manifests/web/statusfiles.pp b/modules/dumps/manifests/web/statusfiles.pp new file mode 100644 index 0000000..a008b48 --- /dev/null +++ b/modules/dumps/manifests/web/statusfiles.pp @@ -0,0 +1,24 @@ +class dumps::web::statusfiles( + $xmldumpsdir = undef, +) { + file { '/usr/local/bin/unpack-statusfiles.sh': + ensure => 'present', + mode => '0755', + owner => 'root', + group => 'root', + source => 'puppet:///modules/dumps/generation/unpack-statusfiles.sh', + } + + # interval passed to script should match interval cron job runs; + # check every interval minutes for a new tarball of status files + # and unpack them + cron { 'update-dump-statusfiles': + ensure => 'present', + environment => 'MAILTO=ops-du...@wikimedia.org', + command => "/bin/bash /usr/local/bin/unpack-statusfiles.sh --xmldumpsdir $xmldumpsdir --interval 5", + user => root, + minute => '*/5', + require => File['/usr/local/bin/unpack-statusfiles.sh'], + } + +} diff --git a/modules/profile/manifests/dumps/web/statusfiles_sync.pp b/modules/profile/manifests/dumps/web/statusfiles_sync.pp new file mode 100644 index 0000000..5ad5506 --- /dev/null +++ b/modules/profile/manifests/dumps/web/statusfiles_sync.pp @@ -0,0 +1,8 @@ +class profile::dumps::web::statusfiles_sync( + $rsyncer_peer_settings = hiera('profile::dumps::rsyncer_peer'), +) { + $mntpoint = $rsyncer_peer_settings['dumps_mntpoint'] + class {'::dumps::web::statusfiles': + $xmldumpsdir => "${mntpoint/xmldatadumps/public}", + } +} diff --git a/modules/role/manifests/dumps/generation/server/fallback.pp b/modules/role/manifests/dumps/generation/server/fallback.pp index d16d8a4..c85ba68 100644 --- a/modules/role/manifests/dumps/generation/server/fallback.pp +++ b/modules/role/manifests/dumps/generation/server/fallback.pp @@ -7,4 +7,5 @@ include ::profile::dumps::generation::server::rsync include ::profile::dumps::rsyncer_peer include ::profile::dumps::nfs::generation + include ::profile::dumps::web::statusfiles_sync } diff --git a/modules/role/manifests/dumps/public/server.pp b/modules/role/manifests/dumps/public/server.pp index b7642f6..47cb652 100644 --- a/modules/role/manifests/dumps/public/server.pp +++ b/modules/role/manifests/dumps/public/server.pp @@ -6,4 +6,5 @@ include ::profile::dumps::public_server include ::profile::dumps::web::rsync_server include ::profile::dumps::rsyncer + include ::profile::dumps::web::statusfiles_sync } diff --git a/modules/role/manifests/dumps/web/xmldumps_active.pp b/modules/role/manifests/dumps/web/xmldumps_active.pp index 88f2693..78292a8 100644 --- a/modules/role/manifests/dumps/web/xmldumps_active.pp +++ b/modules/role/manifests/dumps/web/xmldumps_active.pp @@ -6,6 +6,7 @@ include ::profile::base::firewall include ::profile::dumps::web::xmldumps_active include ::profile::dumps::web::rsync_server + include ::profile::dumps::web::statusfiles_sync include ::profile::dumps::rsyncer include ::profile::dumps::fetcher include ::profile::dumps::nfs::all diff --git a/modules/role/manifests/dumps/web/xmldumps_fallback.pp b/modules/role/manifests/dumps/web/xmldumps_fallback.pp index bb4f482..65984eb 100644 --- a/modules/role/manifests/dumps/web/xmldumps_fallback.pp +++ b/modules/role/manifests/dumps/web/xmldumps_fallback.pp @@ -6,6 +6,7 @@ include ::profile::base::firewall include ::profile::dumps::web::xmldumps_fallback include ::profile::dumps::web::rsync_server + include ::profile::dumps::web::statusfiles_sync include ::profile::dumps::rsyncer_peer include ::profile::dumps::nfs::all -- To view, visit https://gerrit.wikimedia.org/r/392875 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I7fd30f713387da25d061a452786c71786f372a37 Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: ArielGlenn <ar...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits