ArielGlenn has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/392875 )

Change subject: rsync all dumps status files to web servers and unpack them 
periodically
......................................................................

rsync all dumps status files to web servers and unpack them periodically

Make sure that the index.html files and various other dumps status files
don't provide links to dumps files that haven't been copied over yet from
internal servers.

Bug: T179857
Change-Id: I7fd30f713387da25d061a452786c71786f372a37
---
M modules/dumps/files/generation/rsync-to-peers.sh
A modules/dumps/files/web/unpack-dumpstatusfiles.sh
A modules/dumps/manifests/web/statusfiles.pp
A modules/profile/manifests/dumps/web/statusfiles_sync.pp
M modules/role/manifests/dumps/generation/server/fallback.pp
M modules/role/manifests/dumps/public/server.pp
M modules/role/manifests/dumps/web/xmldumps_active.pp
M modules/role/manifests/dumps/web/xmldumps_fallback.pp
8 files changed, 109 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/75/392875/1

diff --git a/modules/dumps/files/generation/rsync-to-peers.sh 
b/modules/dumps/files/generation/rsync-to-peers.sh
index fe6730d..451cce0 100644
--- a/modules/dumps/files/generation/rsync-to-peers.sh
+++ b/modules/dumps/files/generation/rsync-to-peers.sh
@@ -78,10 +78,18 @@
 
 while [ 1 ]; do
 
+    # make xml/sql dumps tarball of all status files etc
+    tarballpath="${xmldumpsdir}/public/dumpstatusfiles.tar"
+    /usr/bin/find "${xmldumpsdir}/public/" -maxdepth 3 -regextype sed -regex 
'.*/*\.\(html\|json\|txt\)' | /usr/bin/xargs /bin/tar cvfp "$tarballpath"
+    /bin/gzip "$tarballpath"
+
     # rsync of xml/sql dumps for public wikis
     for dest in $xmlremotedirs_list; do
-       /usr/bin/rsync -a  --contimeout=600 --timeout=600 
${xmldumpsdir}/public/*html "$dest" > /dev/null 2>&1
-        /usr/bin/rsync -a  --contimeout=600 --timeout=600 --exclude='**bad/' 
--exclude='**save/' --exclude='**not/' --exclude='**temp/' --exclude='**tmp/' 
--exclude='*.inprog'  ${xmldumpsdir}/public/*wik* "$dest" > /dev/null 2>&1
+        /usr/bin/rsync -a  --contimeout=600 --timeout=600 --exclude='**bad/' 
--exclude='**save/' --exclude='**not/' --exclude='**temp/' --exclude='**tmp/' 
--exclude='*.inprog'  --exclude='*.html' --exclude='*.txt' --exclude-'*.json' 
${xmldumpsdir}/public/*wik* "$dest" > /dev/null 2>&1
+       # send tarball over last, remote can unpack it when it notices the 
arrival
+       # this way, content of status and html files always reflects dump 
output already
+       # made available via rsync
+       /usr/bin/rsync -a  --contimeout=600 --timeout=600 "${tarballpath}.gz" 
"$dest" > /dev/null 2>&1
     done
 
     # rsync of misc dumps, not necessarily to/from the same tree as the public 
wikis
diff --git a/modules/dumps/files/web/unpack-dumpstatusfiles.sh 
b/modules/dumps/files/web/unpack-dumpstatusfiles.sh
new file mode 100644
index 0000000..21ea731
--- /dev/null
+++ b/modules/dumps/files/web/unpack-dumpstatusfiles.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+##############################
+# This file is managed by puppet!
+# puppet:///modules/dumps/web/unpack-dumpstatusfiles.sh
+##############################
+
+# This script checks for the arrival of a tarball
+# of dump status files; if a new one has arrived, it unpacks
+# the tarball in the appropriate location.
+#
+# This ensures that html and other dumps status files
+# on public-facing servers always reflect dump content
+# files that have actually been made available via
+# rsync from internal servers.
+
+usage() {
+    cat<<EOF
+Usage: $0 --xmldumpsdir <path> --newer <minutes>
+
+  --xmldumpsdir   path to root of xml/sql dumps tree for unpacking tarball
+  --newer         file must be newer than this many minutes ago to unpack it
+
+Example:  $0 --xmldumpsdir /data/xmldatadumps --newer 10
+EOF
+    exit 1
+}
+
+xmldumpsdir=""
+newer=""
+
+while [ $# -gt 0 ]; do
+    if [ $1 == "--xmldumpsdir" ]; then
+        xmldumpsdir="$2"
+        shift; shift
+    elif [ $1 == "--newer" ]; then
+        newer="$2"
+        shift; shift
+    else
+        echo "$0: Unknown option $1" >& 2
+        usage
+    fi
+done
+
+if [ -z "$xmldumpsdir" ]; then
+    echo "$0: missing argument --xmldumpsdir"
+    usage && exit 1
+elif [ -z "$newer" ]; then
+    echo "$0: missing argument --newer"
+    usage && exit 1
+fi
+
+tarballpath="${xmldumpsdir}/public/dumpstatusfiles.tar.gz"
+if [ ! -e "$tarballpath" ]; then
+    # no file so do no check
+    exit 0
+fi
+
+result=$( /usr/bin/find "$tarballpath" -mmin "+${newer}" )
+if [ -n "$result" ]; then
+        cd "${xmldumpsdir}/public/"
+       /bin/zcat "$tarballpath" | tar xvfp -
+fi
diff --git a/modules/dumps/manifests/web/statusfiles.pp 
b/modules/dumps/manifests/web/statusfiles.pp
new file mode 100644
index 0000000..a008b48
--- /dev/null
+++ b/modules/dumps/manifests/web/statusfiles.pp
@@ -0,0 +1,24 @@
+class dumps::web::statusfiles(
+    $xmldumpsdir = undef,
+) {
+    file { '/usr/local/bin/unpack-statusfiles.sh':
+        ensure => 'present',
+        mode   => '0755',
+        owner  => 'root',
+        group  => 'root',
+        source => 'puppet:///modules/dumps/generation/unpack-statusfiles.sh',
+    }
+
+    # interval passed to script should match interval cron job runs;
+    # check every interval minutes for a new tarball of status files
+    # and unpack them
+    cron { 'update-dump-statusfiles':
+        ensure      => 'present',
+        environment => 'MAILTO=ops-du...@wikimedia.org',
+        command     => "/bin/bash /usr/local/bin/unpack-statusfiles.sh 
--xmldumpsdir $xmldumpsdir --interval 5",
+        user        => root,
+        minute      => '*/5',
+        require     => File['/usr/local/bin/unpack-statusfiles.sh'],
+    }
+
+}
diff --git a/modules/profile/manifests/dumps/web/statusfiles_sync.pp 
b/modules/profile/manifests/dumps/web/statusfiles_sync.pp
new file mode 100644
index 0000000..5ad5506
--- /dev/null
+++ b/modules/profile/manifests/dumps/web/statusfiles_sync.pp
@@ -0,0 +1,8 @@
+class profile::dumps::web::statusfiles_sync(
+    $rsyncer_peer_settings = hiera('profile::dumps::rsyncer_peer'),
+) {
+    $mntpoint = $rsyncer_peer_settings['dumps_mntpoint']
+    class {'::dumps::web::statusfiles':
+        $xmldumpsdir => "${mntpoint/xmldatadumps/public}",
+    }
+}
diff --git a/modules/role/manifests/dumps/generation/server/fallback.pp 
b/modules/role/manifests/dumps/generation/server/fallback.pp
index d16d8a4..c85ba68 100644
--- a/modules/role/manifests/dumps/generation/server/fallback.pp
+++ b/modules/role/manifests/dumps/generation/server/fallback.pp
@@ -7,4 +7,5 @@
     include ::profile::dumps::generation::server::rsync
     include ::profile::dumps::rsyncer_peer
     include ::profile::dumps::nfs::generation
+    include ::profile::dumps::web::statusfiles_sync
 }
diff --git a/modules/role/manifests/dumps/public/server.pp 
b/modules/role/manifests/dumps/public/server.pp
index b7642f6..47cb652 100644
--- a/modules/role/manifests/dumps/public/server.pp
+++ b/modules/role/manifests/dumps/public/server.pp
@@ -6,4 +6,5 @@
     include ::profile::dumps::public_server
     include ::profile::dumps::web::rsync_server
     include ::profile::dumps::rsyncer
+    include ::profile::dumps::web::statusfiles_sync
 }
diff --git a/modules/role/manifests/dumps/web/xmldumps_active.pp 
b/modules/role/manifests/dumps/web/xmldumps_active.pp
index 88f2693..78292a8 100644
--- a/modules/role/manifests/dumps/web/xmldumps_active.pp
+++ b/modules/role/manifests/dumps/web/xmldumps_active.pp
@@ -6,6 +6,7 @@
     include ::profile::base::firewall
     include ::profile::dumps::web::xmldumps_active
     include ::profile::dumps::web::rsync_server
+    include ::profile::dumps::web::statusfiles_sync
     include ::profile::dumps::rsyncer
     include ::profile::dumps::fetcher
     include ::profile::dumps::nfs::all
diff --git a/modules/role/manifests/dumps/web/xmldumps_fallback.pp 
b/modules/role/manifests/dumps/web/xmldumps_fallback.pp
index bb4f482..65984eb 100644
--- a/modules/role/manifests/dumps/web/xmldumps_fallback.pp
+++ b/modules/role/manifests/dumps/web/xmldumps_fallback.pp
@@ -6,6 +6,7 @@
     include ::profile::base::firewall
     include ::profile::dumps::web::xmldumps_fallback
     include ::profile::dumps::web::rsync_server
+    include ::profile::dumps::web::statusfiles_sync
     include ::profile::dumps::rsyncer_peer
     include ::profile::dumps::nfs::all
 

-- 
To view, visit https://gerrit.wikimedia.org/r/392875
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I7fd30f713387da25d061a452786c71786f372a37
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: ArielGlenn <ar...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to