[MediaWiki-commits] [Gerrit] operations/puppet[production]: cleanup xml/sql dumps on all hosts that have them
ArielGlenn has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/394842 ) Change subject: cleanup xml/sql dumps on all hosts that have them .. cleanup xml/sql dumps on all hosts that have them On generator hosts, keep a small number for use for prefetch for the next run. On hosts serving the dumps, keep as many as we can make available to the public, given disk space. Bug: T181895 Change-Id: I04fc060fa607e0bedc7971e2fd4b830aec1b66d2 --- M hieradata/hosts/dataset1001.yaml M hieradata/hosts/dumpsdata1001.yaml M hieradata/hosts/dumpsdata1002.yaml M hieradata/hosts/labstore1006.yaml M hieradata/hosts/labstore1007.yaml M hieradata/hosts/ms1001.yaml M modules/dumps/manifests/web/cleanup.pp M modules/dumps/manifests/web/cleanups/xmldumps.pp M modules/dumps/manifests/web/xmldumps_active.pp M modules/profile/manifests/dumps/web/cleanup.pp M modules/profile/manifests/dumps/web/xmldumps_active.pp 11 files changed, 38 insertions(+), 20 deletions(-) Approvals: ArielGlenn: Looks good to me, approved jenkins-bot: Verified diff --git a/hieradata/hosts/dataset1001.yaml b/hieradata/hosts/dataset1001.yaml index 967f0c7..47ec977 100644 --- a/hieradata/hosts/dataset1001.yaml +++ b/hieradata/hosts/dataset1001.yaml @@ -1,5 +1,6 @@ profile::dumps::cleanup::isreplica: true profile::dumps::miscdumpsdir: '/data/xmldatadumps/public/other' +profile::dumps::xmldumpspublicdir: '/data/xmldatadumps/public' profile::dumps::rsyncer: dumps_user: 'datasets' diff --git a/hieradata/hosts/dumpsdata1001.yaml b/hieradata/hosts/dumpsdata1001.yaml index 12f28b6..a435ccd 100644 --- a/hieradata/hosts/dumpsdata1001.yaml +++ b/hieradata/hosts/dumpsdata1001.yaml @@ -1,2 +1,4 @@ profile::dumps::miscdumpsdir: '/data/otherdumps' +profile::dumps::xmldumpspublicdir: '/data/xmldatadumps/public' + profile::dumps::cleanup::isreplica: false diff --git a/hieradata/hosts/dumpsdata1002.yaml b/hieradata/hosts/dumpsdata1002.yaml index 6cba27b..871da47 100644 --- a/hieradata/hosts/dumpsdata1002.yaml +++ b/hieradata/hosts/dumpsdata1002.yaml @@ -1,4 +1,5 @@ profile::dumps::miscdumpsdir: '/data/otherdumps' +profile::dumps::xmldumpspublicdir: '/data/xmldatadumps/public' # this is currently a dumps generation fallback host, # we configure cleanups of old files there the diff --git a/hieradata/hosts/labstore1006.yaml b/hieradata/hosts/labstore1006.yaml index 549439b..e0cbf4f 100644 --- a/hieradata/hosts/labstore1006.yaml +++ b/hieradata/hosts/labstore1006.yaml @@ -1,4 +1,5 @@ profile::dumps::miscdumpsdir: '/srv/dumps/xmldatadumps/public/other' +profile::dumps::xmldumpspublicdir: '/srv/dumps/xmldatadumps/public' profile::dumps::cleanup::isreplica: true profile::dumps::rsyncer: diff --git a/hieradata/hosts/labstore1007.yaml b/hieradata/hosts/labstore1007.yaml index c74bbdb..e0cbf4f 100644 --- a/hieradata/hosts/labstore1007.yaml +++ b/hieradata/hosts/labstore1007.yaml @@ -1,3 +1,7 @@ +profile::dumps::miscdumpsdir: '/srv/dumps/xmldatadumps/public/other' +profile::dumps::xmldumpspublicdir: '/srv/dumps/xmldatadumps/public' +profile::dumps::cleanup::isreplica: true + profile::dumps::rsyncer: dumps_user: 'dumpsgen' dumps_group: 'dumpsgen' diff --git a/hieradata/hosts/ms1001.yaml b/hieradata/hosts/ms1001.yaml index 1f7dd46..0dd4675 100644 --- a/hieradata/hosts/ms1001.yaml +++ b/hieradata/hosts/ms1001.yaml @@ -1,5 +1,6 @@ profile::dumps::cleanup::isreplica: true profile::dumps::miscdumpsdir: '/data/xmldatadumps/public/other' +profile::dumps::xmldumpspublicdir: '/data/xmldatadumps/public' profile::dumps::rsyncer: dumps_user: 'datasets' diff --git a/modules/dumps/manifests/web/cleanup.pp b/modules/dumps/manifests/web/cleanup.pp index e173d52..6360fc6 100644 --- a/modules/dumps/manifests/web/cleanup.pp +++ b/modules/dumps/manifests/web/cleanup.pp @@ -1,6 +1,8 @@ class dumps::web::cleanup( $miscdumpsdir = undef, $isreplica = undef, +$publicdir = undef, +$user = undef, ) { file { '/etc/dumps': ensure => 'directory', @@ -22,4 +24,10 @@ miscdumpsdir => $miscdumpsdir, isreplica=> $isreplica, } + +class {'::dumps::web::cleanups::xmldumps': +publicdir => $publicdir, +user => $user, +isreplica => $isreplica, +} } diff --git a/modules/dumps/manifests/web/cleanups/xmldumps.pp b/modules/dumps/manifests/web/cleanups/xmldumps.pp index 9784525..0b140ef 100644 --- a/modules/dumps/manifests/web/cleanups/xmldumps.pp +++ b/modules/dumps/manifests/web/cleanups/xmldumps.pp @@ -1,7 +1,7 @@ class dumps::web::cleanups::xmldumps( -$wikilist_url = undef, $publicdir = undef, $user = undef, +$isreplica = undef, ) { $wikilist_dir = '/etc/dumps/dblists' file { $wikilist_dir: @@ -16,7 +16,7 @@ # each type of wiki we keep. $bigwikis = ['dewiki', 'eswiki', 'frwiki', 'itwiki', 'jawiki', 'metawiki', 'nlwiki', 'plwiki', 'p
[MediaWiki-commits] [Gerrit] operations/puppet[production]: cleanup xml/sql dumps on all hosts that have them
ArielGlenn has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/394842 ) Change subject: cleanup xml/sql dumps on all hosts that have them .. cleanup xml/sql dumps on all hosts that have them on generator hosts, keep a small number for use for prefetch for the next runl on hosts serving the dumps, keep as many as we can make available to the public, given disk space Bug: T181895 Change-Id: I04fc060fa607e0bedc7971e2fd4b830aec1b66d2 --- M hieradata/hosts/dataset1001.yaml M hieradata/hosts/dumpsdata1001.yaml M hieradata/hosts/dumpsdata1002.yaml M hieradata/hosts/labstore1006.yaml M hieradata/hosts/labstore1007.yaml M hieradata/hosts/ms1001.yaml M modules/dumps/manifests/web/cleanup.pp M modules/dumps/manifests/web/cleanups/xmldumps.pp M modules/dumps/manifests/web/xmldumps_active.pp M modules/profile/manifests/dumps/web/cleanup.pp 10 files changed, 35 insertions(+), 16 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/42/394842/1 diff --git a/hieradata/hosts/dataset1001.yaml b/hieradata/hosts/dataset1001.yaml index 967f0c7..47ec977 100644 --- a/hieradata/hosts/dataset1001.yaml +++ b/hieradata/hosts/dataset1001.yaml @@ -1,5 +1,6 @@ profile::dumps::cleanup::isreplica: true profile::dumps::miscdumpsdir: '/data/xmldatadumps/public/other' +profile::dumps::xmldumpspublicdir: '/data/xmldatadumps/public' profile::dumps::rsyncer: dumps_user: 'datasets' diff --git a/hieradata/hosts/dumpsdata1001.yaml b/hieradata/hosts/dumpsdata1001.yaml index 12f28b6..a435ccd 100644 --- a/hieradata/hosts/dumpsdata1001.yaml +++ b/hieradata/hosts/dumpsdata1001.yaml @@ -1,2 +1,4 @@ profile::dumps::miscdumpsdir: '/data/otherdumps' +profile::dumps::xmldumpspublicdir: '/data/xmldatadumps/public' + profile::dumps::cleanup::isreplica: false diff --git a/hieradata/hosts/dumpsdata1002.yaml b/hieradata/hosts/dumpsdata1002.yaml index 6cba27b..871da47 100644 --- a/hieradata/hosts/dumpsdata1002.yaml +++ b/hieradata/hosts/dumpsdata1002.yaml @@ -1,4 +1,5 @@ profile::dumps::miscdumpsdir: '/data/otherdumps' +profile::dumps::xmldumpspublicdir: '/data/xmldatadumps/public' # this is currently a dumps generation fallback host, # we configure cleanups of old files there the diff --git a/hieradata/hosts/labstore1006.yaml b/hieradata/hosts/labstore1006.yaml index 549439b..e0cbf4f 100644 --- a/hieradata/hosts/labstore1006.yaml +++ b/hieradata/hosts/labstore1006.yaml @@ -1,4 +1,5 @@ profile::dumps::miscdumpsdir: '/srv/dumps/xmldatadumps/public/other' +profile::dumps::xmldumpspublicdir: '/srv/dumps/xmldatadumps/public' profile::dumps::cleanup::isreplica: true profile::dumps::rsyncer: diff --git a/hieradata/hosts/labstore1007.yaml b/hieradata/hosts/labstore1007.yaml index c74bbdb..e0cbf4f 100644 --- a/hieradata/hosts/labstore1007.yaml +++ b/hieradata/hosts/labstore1007.yaml @@ -1,3 +1,7 @@ +profile::dumps::miscdumpsdir: '/srv/dumps/xmldatadumps/public/other' +profile::dumps::xmldumpspublicdir: '/srv/dumps/xmldatadumps/public' +profile::dumps::cleanup::isreplica: true + profile::dumps::rsyncer: dumps_user: 'dumpsgen' dumps_group: 'dumpsgen' diff --git a/hieradata/hosts/ms1001.yaml b/hieradata/hosts/ms1001.yaml index 1f7dd46..0dd4675 100644 --- a/hieradata/hosts/ms1001.yaml +++ b/hieradata/hosts/ms1001.yaml @@ -1,5 +1,6 @@ profile::dumps::cleanup::isreplica: true profile::dumps::miscdumpsdir: '/data/xmldatadumps/public/other' +profile::dumps::xmldumpspublicdir: '/data/xmldatadumps/public' profile::dumps::rsyncer: dumps_user: 'datasets' diff --git a/modules/dumps/manifests/web/cleanup.pp b/modules/dumps/manifests/web/cleanup.pp index e173d52..12cf8a4 100644 --- a/modules/dumps/manifests/web/cleanup.pp +++ b/modules/dumps/manifests/web/cleanup.pp @@ -1,6 +1,8 @@ class dumps::web::cleanup( $miscdumpsdir = undef, $isreplica = undef, +$publicdir = undef, +$user = undef, ) { file { '/etc/dumps': ensure => 'directory', @@ -22,4 +24,9 @@ miscdumpsdir => $miscdumpsdir, isreplica=> $isreplica, } + +class {'::dumps::web::cleanups::xmldumps': +publicdir=> $publicdir, +user => $user, +} } diff --git a/modules/dumps/manifests/web/cleanups/xmldumps.pp b/modules/dumps/manifests/web/cleanups/xmldumps.pp index 9784525..c36527d 100644 --- a/modules/dumps/manifests/web/cleanups/xmldumps.pp +++ b/modules/dumps/manifests/web/cleanups/xmldumps.pp @@ -44,8 +44,20 @@ # less, so that when a new dump run starts and partial dumps are # copied over to the web server, space is available for that new # run BEFORE it is copied. -$keeps = ['hugewikis.dblist:7', 'bigwikis.dblist:8', 'default:10'] -$keeps_content = join($keeps, "\n") + +# on generator hosts we must keep a minimum of 3 so that at any time +# we have at least one old full dump around, with all revision content +# which can be stolen f