Elukey has submitted this change and it was merged.
Change subject: Add automatic failover to Hadoop Namenodes.
......................................................................
Add automatic failover to Hadoop Namenodes.
Hadoop namenodes can be configured for automatic failover if HA is enabled.
This requires Zookeeper and a new daemon (hadoop-hdfs-zkfc) that periodically
executes healtch checks and decides what Namenode is the active master.
Bug: T129838
Change-Id: I0a5c34c130909d3f4a058800b09970de65c29596
---
M README.md
M manifests/hadoop.pp
M manifests/hadoop/namenode.pp
M templates/hadoop/core-site.xml.erb
M templates/hadoop/hdfs-site.xml.erb
5 files changed, 56 insertions(+), 4 deletions(-)
Approvals:
Elukey: Looks good to me, approved
jenkins-bot: Verified
diff --git a/README.md b/README.md
index 30c71be..7b87f00 100644
--- a/README.md
+++ b/README.md
@@ -248,9 +248,8 @@
sudo service hadoop-hdfs-datanode start
```
-When there are multiple NameNodes and automatic failover is not configured
-(it is not yet supported by this puppet module), both NameNodes start up
-in standby mode. You will have to manually transition one of them to active.
+When there are multiple NameNodes and automatic failover is not configured (it
is enabled by setting
+```zookeeper_hosts```), both NameNodes start up in standby mode. You will have
to manually transition one of them to active.
```bash
# on your hadoop master node:
diff --git a/manifests/hadoop.pp b/manifests/hadoop.pp
index 7b09979..a48ccc3 100644
--- a/manifests/hadoop.pp
+++ b/manifests/hadoop.pp
@@ -38,7 +38,8 @@
# for $namenode_hosts. Please be sure to
include cdh::hadoop::resourcemanager
# directly on any standby RM hosts. (The
master RM will be included automatically)
# when you include cdh::hadoop::master).
-# $zookeeper_hosts - Array of Zookeeper hosts to use for HA YARN
ResouceManager.
+# $zookeeper_hosts - Array of Zookeeper hosts to use for HA
failover. If provided, HA YARN Resourcemanager will be enabled.
+# Also if set AND $ha_enabled for HDFS is also
set, automatic failover for HDFS NameNodes will be enabled.
# Default: undef
# $enable_jmxremote - enables remote JMX connections for all
Hadoop services.
# Ports are not currently configurable.
Default: true.
diff --git a/manifests/hadoop/namenode.pp b/manifests/hadoop/namenode.pp
index fbc150a..bd95758 100644
--- a/manifests/hadoop/namenode.pp
+++ b/manifests/hadoop/namenode.pp
@@ -16,6 +16,12 @@
ensure => 'installed',
}
+ if ($::cdh::hadoop::ha_enabled and $::cdh::hadoop::zookeeper_hosts) {
+ package { 'hadoop-hdfs-zkfc':
+ ensure => 'installed',
+ }
+ }
+
# NameNodes expect that the hosts.exclude file exists.
# I don't want to manage this as a puppet file resource,
# as users of this class might want to manage it themselves.
@@ -53,4 +59,36 @@
alias => 'namenode',
require => Exec['hadoop-namenode-format'],
}
+
+ if ($::cdh::hadoop::ha_enabled and $::cdh::hadoop::zookeeper_hosts) {
+ # Create a znode in ZooKeeper inside of which the automatic failover
+ # system stores its data. The command will create a znode in ZooKeeper
+ # and it needs to be executed only when the znode is not present.
+ $zookeeper_hosts_string = join($::cdh::hadoop::zookeper_hosts, ',')
+ exec { 'hadoop-hdfs-zkfc-init':
+ command => '/usr/bin/hdfs zkfc -formatZK',
+ user => 'hdfs',
+ require => [
+ Service['hadoop-hdfs-namenode'],
+ File['/usr/lib/zookeeper/bin/zkCli.sh']
+ ],
+ unless => "/usr/lib/zookeeper/bin/zkCli.sh \
+ -server ${zookeeper_hosts_string} \
+ stat /hadoop-ha/${::cdh::hadoop::cluster_name}
2>&1 \
+ | /bin/grep -q ctime",
+ }
+
+ # Supporting daemon to enable automatic-failover via health-check.
+ # Stores its state in zookeper.
+ service { 'hadoop-hdfs-zkfc':
+ ensure => 'running',
+ enable => true,
+ hasstatus => true,
+ hasrestart => true,
+ require => [
+ Exec['hadoop-hdfs-zkfc-init'],
+ Service['hadoop-hdfs-namenode'],
+ ],
+ }
+ }
}
diff --git a/templates/hadoop/core-site.xml.erb
b/templates/hadoop/core-site.xml.erb
index 1e8af34..a8df776 100644
--- a/templates/hadoop/core-site.xml.erb
+++ b/templates/hadoop/core-site.xml.erb
@@ -12,6 +12,13 @@
<value>hdfs://<%= @ha_enabled ? @nameservice_id : @primary_namenode_host
%>/</value>
</property>
+<% if @ha_enabled and @zookeeper_hosts %>
+ <property>
+ <name>ha.zookeeper.quorum</name>
+ <value><%= Array(@zookeeper_hosts).sort.join(',') %></value>
+ </property>
+<% end -%>
+
<% if @io_file_buffer_size -%>
<property>
<name>io.file.buffer.size</name>
diff --git a/templates/hadoop/hdfs-site.xml.erb
b/templates/hadoop/hdfs-site.xml.erb
index 3ccba3e..d27b76b 100644
--- a/templates/hadoop/hdfs-site.xml.erb
+++ b/templates/hadoop/hdfs-site.xml.erb
@@ -21,6 +21,13 @@
<value>hadoop</value>
</property>
+<% if @ha_enabled and @zookeeper_hosts %>
+ <property>
+ <name>dfs.ha.automatic-failover.enabled</name>
+ <value>true</value>
+ </property>
+<% end -%>
+
<% if @ha_enabled -%>
<property>
<name>dfs.nameservices</name>
--
To view, visit https://gerrit.wikimedia.org/r/277984
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I0a5c34c130909d3f4a058800b09970de65c29596
Gerrit-PatchSet: 5
Gerrit-Project: operations/puppet/cdh
Gerrit-Branch: master
Gerrit-Owner: Elukey <[email protected]>
Gerrit-Reviewer: Elukey <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits