Ottomata has uploaded a new change for review.
https://gerrit.wikimedia.org/r/209019
Change subject: Configure YARN HA ResourceManager
......................................................................
Configure YARN HA ResourceManager
Change-Id: I614968e8892392bfa1f0cf6e579a1f79d931682a
---
M manifests/hadoop.pp
M manifests/hadoop/resourcemanager.pp
M templates/hadoop/yarn-site.xml.erb
3 files changed, 159 insertions(+), 24 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet/cdh
refs/changes/19/209019/1
diff --git a/manifests/hadoop.pp b/manifests/hadoop.pp
index 2b4a59a..6c4ea82 100644
--- a/manifests/hadoop.pp
+++ b/manifests/hadoop.pp
@@ -107,6 +107,9 @@
$datanode_mounts =
$::cdh::hadoop::defaults::datanode_mounts,
$dfs_data_path =
$::cdh::hadoop::defaults::dfs_data_path,
+ $resoucemanager_hosts = undef,
+ $zookeeper_hosts = ['localhost:2181'],
+
$yarn_local_path =
$::cdh::hadoop::defaults::yarn_local_path,
$yarn_logs_path =
$::cdh::hadoop::defaults::yarn_logs_path,
$dfs_block_size =
$::cdh::hadoop::defaults::dfs_block_size,
@@ -155,6 +158,10 @@
# This used in a couple of execs throughout this module.
$dfs_name_dir_main = inline_template('<%= (@dfs_name_dir.class == Array) ?
@dfs_name_dir[0] : @dfs_name_dir %>')
+ # Config files are installed into a directory
+ # based on the value of $cluster_name.
+ $config_directory = "/etc/hadoop/conf.${cluster_name}"
+
# Set a boolean used to indicate that HA NameNodes
# are intended to be used for this cluster. HA NameNodes
# require the JournalNodes are configured.
@@ -162,22 +169,11 @@
undef => false,
default => true,
}
-
# If $ha_enabled is true, use $cluster_name as $nameservice_id.
$nameservice_id = $ha_enabled ? {
true => $cluster_name,
default => undef,
}
-
- # Config files are installed into a directory
- # based on the value of $cluster_name.
- $config_directory = "/etc/hadoop/conf.${cluster_name}"
-
- # Parameter Validation:
- if ($ha_enabled and !$journalnode_hosts) {
- fail('Must provide multiple $journalnode_hosts when using HA and
setting $nameservice_id.')
- }
-
# Assume the primary namenode is the first entry in $namenode_hosts,
# Set a variable here for reference in other classes.
$primary_namenode_host = $namenode_hosts[0]
@@ -188,6 +184,31 @@
# which are '.' delimited.
$primary_namenode_id = inline_template('<%=
@primary_namenode_host.tr(\'.\', \'-\') %>')
+
+
+ # Set a boolean used to indicate that HA YARN
+ # is intended to be used for this cluster. HA YARN
+ # require the zookeeper is configured, and that
+ # multiple ResourceManagers are specificed.
+ $yarn_ha_enabled = $resourcemanager_hosts ? {
+ undef => false,
+ default => true,
+ }
+ # If $yarn_ha_enabled is true, use $cluster_name as the RM cluster id
+ $yarn_cluster_id = $yarn_ha_enabled ? {
+ true => $cluster_name,
+ default => undef,
+ }
+
+ # Parameter Validation:
+ if ($yarn_ha_enabled and !$zookeeper_hosts) {
+ fail('Must provide $zookeeper_hosts when using YARN HA by setting
$resourcemanager_hosts')
+ }
+ # Assume the primary resourcemanager is the first entry in
$resourcemanager_hosts
+ # Set a variable here for reference in other classes.
+ $primary_resourcemanager_host = $resourcemanager_hosts[0]
+
+
package { 'hadoop-client':
ensure => 'installed'
}
diff --git a/manifests/hadoop/resourcemanager.pp
b/manifests/hadoop/resourcemanager.pp
index 679a60e..75b2906 100644
--- a/manifests/hadoop/resourcemanager.pp
+++ b/manifests/hadoop/resourcemanager.pp
@@ -5,23 +5,29 @@
class cdh::hadoop::resourcemanager {
Class['cdh::hadoop::namenode'] -> Class['cdh::hadoop::resourcemanager']
- # Create YARN HDFS directories.
- # See:
http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH5-Installation-Guide/cdh5ig_yarn_cluster_deploy.html?scroll=topic_11_4_10_unique_1
- cdh::hadoop::directory { '/var/log/hadoop-yarn':
- # sudo -u hdfs hdfs dfs -mkdir /var/log/hadoop-yarn
- # sudo -u hdfs hdfs dfs -chown yarn:mapred /var/log/hadoop-yarn
- owner => 'yarn',
- group => 'mapred',
- mode => '0755',
- # Make sure HDFS directories are created before
- # resourcemanager is installed and started, but after
- # the namenode.
- require => [Service['hadoop-hdfs-namenode'],
Cdh::Hadoop::Directory['/var/log']],
+
+ # In an HA YARN ResourceManager setup, this class will be include on
multiple nodes.
+ # In order to have this directory check performed by only one
resourcemanager,
+ # we only use it on the first node in the $resourcemanager_hosts array.
+ if !$::cdh::hadoop::yarn_ha_enabled or $::fqdn ==
$::cdh::hadoop::primary_resourcemanager_host {
+ # Create YARN HDFS directories.
+ # See:
http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH5-Installation-Guide/cdh5ig_yarn_cluster_deploy.html?scroll=topic_11_4_10_unique_1
+ cdh::hadoop::directory { '/var/log/hadoop-yarn':
+ # sudo -u hdfs hdfs dfs -mkdir /var/log/hadoop-yarn
+ # sudo -u hdfs hdfs dfs -chown yarn:mapred /var/log/hadoop-yarn
+ owner => 'yarn',
+ group => 'mapred',
+ mode => '0755',
+ # Make sure HDFS directories are created before
+ # resourcemanager is installed and started, but after
+ # the namenode.
+ require => [Service['hadoop-hdfs-namenode'],
Cdh::Hadoop::Directory['/var/log']],
+ before => Package['hadoop-yarn-resourcemanager'],
+ }
}
package { 'hadoop-yarn-resourcemanager':
ensure => 'installed',
- require => Cdh::Hadoop::Directory['/var/log/hadoop-yarn'],
}
service { 'hadoop-yarn-resourcemanager':
diff --git a/templates/hadoop/yarn-site.xml.erb
b/templates/hadoop/yarn-site.xml.erb
index 1ffb5c9..233bc2d 100644
--- a/templates/hadoop/yarn-site.xml.erb
+++ b/templates/hadoop/yarn-site.xml.erb
@@ -1,3 +1,13 @@
+<%
+# Convert a hostname to a Node ID.
+# We can't use '.' characters because IDs.
+# will be used in the names of some Java properties,
+# which are '.' delimited.
+def host_to_id(host)
+ host.tr('.', '-')
+end
+
+-%>
<?xml version="1.0"?>
<!-- NOTE: This file is managed by Puppet. -->
@@ -7,6 +17,104 @@
<configuration>
+<% if @yarn_ha_enabled -%>
+ <property>
+ <name>yarn.resourcemanager.cluster-id</name>
+ <value><%= $yarn_cluster_id %></value>
+ </property>
+
+ <property>
+ <name>yarn.resourcemanager.ha.rm-ids</name>
+ <value><%= @resourcemanager_hosts.sort.collect { |host| host_to_id(host)
}.join(',') %></value>
+ </property>
+
+<% if @resourcemanager_hosts.include?(@fqdn) -%>
+ <property>
+ <name>yarn.resourcemanager.ha.id</name>
+ <value><%= host_to_id(@fqdn) %></value>
+ </property>
+<% end -%>
+
+ <property>
+ <name>yarn.resourcemanager.connect.retry-interval.ms</name>
+ <value>2000</value>
+ </property>
+
+ <property>
+ <name>yarn.resourcemanager.ha.enabled</name>
+ <value>true</value>
+ </property>
+
+ <property>
+ <name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
+ <value>true</value>
+ </property>
+
+ <property>
+ <name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
+ <value>true</value>
+ </property>
+
+ <property>
+ <name>yarn.resourcemanager.recovery.enabled</name>
+ <value>true</value>
+ </property>
+
+ <property>
+ <name>yarn.resourcemanager.store.class</name>
+
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
+ </property>
+
+ <property>
+ <name>yarn.resourcemanager.zk-address</name>
+ <value><%= Array(@zookeeper_hosts).sort.join(',') %></value>
+ </property>
+
+ <property>
+ <name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name>
+ <value>5000</value>
+ </property>
+
+ <property>
+ <name>yarn.resourcemanager.work-preserving-recovery.enabled</name>
+ <value>true</value>
+ </property>
+
+ <property>
+ <name>yarn.resourcemanager.am.max-attempts</name>
+ <value>6</value>
+ </property>
+
+<!--HA YARN ResourceManager addresses for each ResourceManager id -->
+<% @resourcemanager_hosts.sort.each do |host| -%>
+ <property>
+ <name>yarn.resourcemanager.scheduler.address.<%= host_to_id(host) %></name>
+ <value><%= host %>:8030</value>
+ </property>
+ <property>
+ <name>yarn.resourcemanager.resource-tracker.address.<%= host_to_id(host)
%></name>
+ <value><%= host %>:8031</value>
+ </property>
+ <property>
+ <name>yarn.resourcemanager.address.<% host_to_id(host) %></name>
+ <value><%= host %>:8032</value>
+ </property>
+ <property>
+ <name>yarn.resourcemanager.admin.address.<%= host_to_id(host) %></name>
+ <value><%= host %>:8033</value>
+ </property>
+ <property>
+ <name>yarn.resourcemanager.webapp.address.<%= host_to_id(host) %></name>
+ <value><%= host %>:8088</value>
+ </property>
+ <property>
+ <name>yarn.resourcemanager.webapp.https.address.<%= host_to_id(host)
%></name>
+ <value><%= host %>:8090</value>
+ </property>
+<% end # @resourcemanager_hosts.each -%>
+
+<% end # if @yarn_ha_enabled -%>
+
<property>
<name>yarn.resourcemanager.hostname</name>
<value><%= @primary_namenode_host %></value>
--
To view, visit https://gerrit.wikimedia.org/r/209019
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I614968e8892392bfa1f0cf6e579a1f79d931682a
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet/cdh
Gerrit-Branch: master
Gerrit-Owner: Ottomata <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits