Ottomata has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/377352 )
Change subject: Fetch Hadoop NameNode fsimage backups daily and also save them
in bacula
..
Fetch Hadoop NameNode fsimage backups daily and also save them in bacula
Change-Id: I5d8b772c8501e914858cac16aaa6d3c9f3a3c028
---
M manifests/site.pp
M modules/profile/manifests/backup/director.pp
A modules/role/manifests/analytics_cluster/backup.pp
M modules/role/manifests/analytics_cluster/database/meta/backup_dest.pp
A modules/role/manifests/analytics_cluster/hadoop/backup/namenode.pp
5 files changed, 65 insertions(+), 4 deletions(-)
Approvals:
Ottomata: Looks good to me, approved
jenkins-bot: Verified
diff --git a/manifests/site.pp b/manifests/site.pp
index e67e628..64eeb84 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -54,10 +54,12 @@
analytics_cluster::users,
# analytics1002 is usually inactive, and it has a
# decent amount of disk space. We use it to
-# store backups of the analytics_cluster::database::meta
-# (MySQL analytics-meta) instance. If you move this,
-# make sure /srv/backup/mysql/analytics-meta has
+# store some backups, including fsimage snapshots
+# of Hadoop NameNode metadata, and of the
+# analytics_cluster::database::meta (MySQL analytics-meta) instance.
+# If you move these, make sure /srv/backup has
# enough space to store backups.
+analytics_cluster::hadoop::backup::namenode,
analytics_cluster::database::meta::backup_dest)
include ::standard
diff --git a/modules/profile/manifests/backup/director.pp
b/modules/profile/manifests/backup/director.pp
index d55e62f..c034fe1 100644
--- a/modules/profile/manifests/backup/director.pp
+++ b/modules/profile/manifests/backup/director.pp
@@ -233,6 +233,10 @@
includes => [ '/var/lib/rancid' ]
}
+bacula::director::fileset { 'hadoop-namenode-backup':
+includes => [ '/srv/backup/hadoop/namenode' ]
+}
+
# The console should be on the director
class { 'bacula::console':
director => $::fqdn,
diff --git a/modules/role/manifests/analytics_cluster/backup.pp
b/modules/role/manifests/analytics_cluster/backup.pp
new file mode 100644
index 000..b3bc9a1
--- /dev/null
+++ b/modules/role/manifests/analytics_cluster/backup.pp
@@ -0,0 +1,10 @@
+# == Class role::analytics_cluster::backup
+# Simple wrapper class to create and manage /srv/backup
+class role::analytics_cluster::backup {
+file { '/srv/backup':
+ensure => 'directory',
+owner => 'root',
+group => 'analytics-admins',
+mode => '0750',
+}
+}
diff --git
a/modules/role/manifests/analytics_cluster/database/meta/backup_dest.pp
b/modules/role/manifests/analytics_cluster/database/meta/backup_dest.pp
index c85f0a1..fd691d6 100644
--- a/modules/role/manifests/analytics_cluster/database/meta/backup_dest.pp
+++ b/modules/role/manifests/analytics_cluster/database/meta/backup_dest.pp
@@ -1,8 +1,10 @@
# == Class role::analytics_cluster::database::meta::backup_dest
#
class role::analytics_cluster::database::meta::backup_dest {
+# Ensure /srv/backup exists
+include ::role::analytics_cluster::backup
+
file { [
-'/srv/backup',
'/srv/backup/mysql',
'/srv/backup/mysql/analytics-meta',
]:
diff --git a/modules/role/manifests/analytics_cluster/hadoop/backup/namenode.pp
b/modules/role/manifests/analytics_cluster/hadoop/backup/namenode.pp
new file mode 100644
index 000..c86e50e5
--- /dev/null
+++ b/modules/role/manifests/analytics_cluster/hadoop/backup/namenode.pp
@@ -0,0 +1,43 @@
+# == Class role::analytics_cluster::hadoop::backup::namenode
+# Periodically runs hdfs dfsadmin -fetchImage
+# and ensures that bacula backs up Hadoop NameNode fsimages,
+# in the case we need to recover if both Hadoop NameNodes.
+#
+class role::analytics_cluster::hadoop::backup::namenode {
+require ::profile::hadoop::client
+
+include ::role::analytics_cluster::backup
+
+$destination = '/srv/backup/hadoop/namenode'
+file { [
+'/srv/backup/hadoop',
+$destination
+]:
+ensure => 'directory',
+owner => 'hdfs',
+group => 'analytics-admins',
+mode => '0750',
+}
+
+cron { 'hadoop-namenode-backup-fetchimage':
+command => "/usr/bin/hdfs dfsadmin -fetchImage ${destination} >
/dev/null 2>&1 ",
+user=> 'hdfs',
+hour=> 0,
+minute => 0,
+}
+
+$retention_days = 30
+# Delete files older than $retention_days.
+cron { 'hadoop-namenode-backup-prune':
+command => "/usr/bin/find ${destination} -mtime +${retention_days}
-delete > /dev/null 2>&1",
+user=> 'hdfs',
+hour=> 1,
+minute => 0,
+}
+
+# Bacula will also bac