ArielGlenn has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/394842 )

Change subject: cleanup xml/sql dumps on all hosts that have them
......................................................................

cleanup xml/sql dumps on all hosts that have them

on generator hosts, keep a small number for use for prefetch
for the next runl on hosts serving the dumps, keep as many as
we can make available to the public, given disk space

Bug: T181895
Change-Id: I04fc060fa607e0bedc7971e2fd4b830aec1b66d2
---
M hieradata/hosts/dataset1001.yaml
M hieradata/hosts/dumpsdata1001.yaml
M hieradata/hosts/dumpsdata1002.yaml
M hieradata/hosts/labstore1006.yaml
M hieradata/hosts/labstore1007.yaml
M hieradata/hosts/ms1001.yaml
M modules/dumps/manifests/web/cleanup.pp
M modules/dumps/manifests/web/cleanups/xmldumps.pp
M modules/dumps/manifests/web/xmldumps_active.pp
M modules/profile/manifests/dumps/web/cleanup.pp
10 files changed, 35 insertions(+), 16 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/42/394842/1

diff --git a/hieradata/hosts/dataset1001.yaml b/hieradata/hosts/dataset1001.yaml
index 967f0c7..47ec977 100644
--- a/hieradata/hosts/dataset1001.yaml
+++ b/hieradata/hosts/dataset1001.yaml
@@ -1,5 +1,6 @@
 profile::dumps::cleanup::isreplica: true
 profile::dumps::miscdumpsdir: '/data/xmldatadumps/public/other'
+profile::dumps::xmldumpspublicdir: '/data/xmldatadumps/public'
 
 profile::dumps::rsyncer:
   dumps_user: 'datasets'
diff --git a/hieradata/hosts/dumpsdata1001.yaml 
b/hieradata/hosts/dumpsdata1001.yaml
index 12f28b6..a435ccd 100644
--- a/hieradata/hosts/dumpsdata1001.yaml
+++ b/hieradata/hosts/dumpsdata1001.yaml
@@ -1,2 +1,4 @@
 profile::dumps::miscdumpsdir: '/data/otherdumps'
+profile::dumps::xmldumpspublicdir: '/data/xmldatadumps/public'
+
 profile::dumps::cleanup::isreplica: false
diff --git a/hieradata/hosts/dumpsdata1002.yaml 
b/hieradata/hosts/dumpsdata1002.yaml
index 6cba27b..871da47 100644
--- a/hieradata/hosts/dumpsdata1002.yaml
+++ b/hieradata/hosts/dumpsdata1002.yaml
@@ -1,4 +1,5 @@
 profile::dumps::miscdumpsdir: '/data/otherdumps'
+profile::dumps::xmldumpspublicdir: '/data/xmldatadumps/public'
 
 # this is currently a dumps generation fallback host,
 # we configure cleanups of old files there the
diff --git a/hieradata/hosts/labstore1006.yaml 
b/hieradata/hosts/labstore1006.yaml
index 549439b..e0cbf4f 100644
--- a/hieradata/hosts/labstore1006.yaml
+++ b/hieradata/hosts/labstore1006.yaml
@@ -1,4 +1,5 @@
 profile::dumps::miscdumpsdir: '/srv/dumps/xmldatadumps/public/other'
+profile::dumps::xmldumpspublicdir: '/srv/dumps/xmldatadumps/public'
 profile::dumps::cleanup::isreplica: true
 
 profile::dumps::rsyncer:
diff --git a/hieradata/hosts/labstore1007.yaml 
b/hieradata/hosts/labstore1007.yaml
index c74bbdb..e0cbf4f 100644
--- a/hieradata/hosts/labstore1007.yaml
+++ b/hieradata/hosts/labstore1007.yaml
@@ -1,3 +1,7 @@
+profile::dumps::miscdumpsdir: '/srv/dumps/xmldatadumps/public/other'
+profile::dumps::xmldumpspublicdir: '/srv/dumps/xmldatadumps/public'
+profile::dumps::cleanup::isreplica: true
+
 profile::dumps::rsyncer:
   dumps_user: 'dumpsgen'
   dumps_group: 'dumpsgen'
diff --git a/hieradata/hosts/ms1001.yaml b/hieradata/hosts/ms1001.yaml
index 1f7dd46..0dd4675 100644
--- a/hieradata/hosts/ms1001.yaml
+++ b/hieradata/hosts/ms1001.yaml
@@ -1,5 +1,6 @@
 profile::dumps::cleanup::isreplica: true
 profile::dumps::miscdumpsdir: '/data/xmldatadumps/public/other'
+profile::dumps::xmldumpspublicdir: '/data/xmldatadumps/public'
 
 profile::dumps::rsyncer:
   dumps_user: 'datasets'
diff --git a/modules/dumps/manifests/web/cleanup.pp 
b/modules/dumps/manifests/web/cleanup.pp
index e173d52..12cf8a4 100644
--- a/modules/dumps/manifests/web/cleanup.pp
+++ b/modules/dumps/manifests/web/cleanup.pp
@@ -1,6 +1,8 @@
 class dumps::web::cleanup(
     $miscdumpsdir = undef,
     $isreplica = undef,
+    $publicdir = undef,
+    $user = undef,
 ) {
     file { '/etc/dumps':
         ensure => 'directory',
@@ -22,4 +24,9 @@
         miscdumpsdir => $miscdumpsdir,
         isreplica    => $isreplica,
     }
+
+    class {'::dumps::web::cleanups::xmldumps':
+        publicdir    => $publicdir,
+        user         => $user,
+    }
 }
diff --git a/modules/dumps/manifests/web/cleanups/xmldumps.pp 
b/modules/dumps/manifests/web/cleanups/xmldumps.pp
index 9784525..c36527d 100644
--- a/modules/dumps/manifests/web/cleanups/xmldumps.pp
+++ b/modules/dumps/manifests/web/cleanups/xmldumps.pp
@@ -44,8 +44,20 @@
     # less, so that when a new dump run starts and partial dumps are
     # copied over to the web server, space is available for that new
     # run BEFORE it is copied.
-    $keeps = ['hugewikis.dblist:7', 'bigwikis.dblist:8', 'default:10']
-    $keeps_content = join($keeps, "\n")
+
+    # on generator hosts we must keep a minimum of 3 so that at any time
+    # we have at least one old full dump around, with all revision content
+    # which can be stolen from for the next dump run.  This is due to
+    # the way we run dumps: one full run, then one run without full
+    # revision content, etc.
+    $keep_generator = ['hugewikis.dblist:3', 'bigwikis.dblist:3', 'default:3']
+    $keep_replica = ['hugewikis.dblist:7', 'bigwikis.dblist:8', 'default:10']
+
+    if ($isreplica == true) {
+        $content= join($keep_replicas, "\n")
+    } else {
+        $content= join($keep_generator, "\n")
+    }
 
     file { '/etc/dumps/xml_keeps.conf':
         ensure  => 'present',
@@ -53,7 +65,7 @@
         mode    => '0644',
         owner   => 'root',
         group   => 'root',
-        content => "${keeps_content}\n",
+        content => "${content}\n",
     }
 
     file { '/usr/local/bin/cleanup_old_xmldumps.py':
@@ -77,5 +89,4 @@
         hour        => '1',
         require     => File['/usr/local/bin/cleanup_old_xmldumps.py'],
     }
-
 }
diff --git a/modules/dumps/manifests/web/xmldumps_active.pp 
b/modules/dumps/manifests/web/xmldumps_active.pp
index 82ccc48..e8ca5c8 100644
--- a/modules/dumps/manifests/web/xmldumps_active.pp
+++ b/modules/dumps/manifests/web/xmldumps_active.pp
@@ -6,9 +6,6 @@
     $logs_dest        = undef,
     $htmldumps_server = undef,
     $xmldumps_server  = undef,
-    $wikilist_url     = undef,
-    $wikilist_dir     = undef,
-    $user             = undef,
     $webuser          = undef,
     $webgroup         = undef,
 ) {
@@ -28,13 +25,4 @@
     class {'::dumps::web::rsync::nginxlogs':
         dest   => $logs_dest,
     }
-
-    # only the active web server needs to cleanup old files
-    # rsync between peers will take care of the other hosts
-    class {'::dumps::web::cleanups::xmldumps':
-        wikilist_url => $wikilist_url,
-        publicdir    => $publicdir,
-        user         => $user,
-    }
-
 }
diff --git a/modules/profile/manifests/dumps/web/cleanup.pp 
b/modules/profile/manifests/dumps/web/cleanup.pp
index dc09811..f49d7ca 100644
--- a/modules/profile/manifests/dumps/web/cleanup.pp
+++ b/modules/profile/manifests/dumps/web/cleanup.pp
@@ -1,9 +1,12 @@
 class profile::dumps::web::cleanup(
     $isreplica = hiera('profile::dumps::cleanup::isreplica'),
     $miscdumpsdir = hiera('profile::dumps::miscdumpsdir'),
+    $publicdir = hiera('profile::dumps::xmldumpspublicdir'),
 ) {
     class {'::dumps::web::cleanup':
         isreplica    => $isreplica,
         miscdumpsdir => $miscdumpsdir,
+        publicdir    => $publicdir,
+        user         => 'dumpsgen',
     }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/394842
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I04fc060fa607e0bedc7971e2fd4b830aec1b66d2
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: ArielGlenn <ar...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to