Ottomata has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/209019

Change subject: Configure YARN HA ResourceManager
......................................................................

Configure YARN HA ResourceManager

Change-Id: I614968e8892392bfa1f0cf6e579a1f79d931682a
---
M manifests/hadoop.pp
M manifests/hadoop/resourcemanager.pp
M templates/hadoop/yarn-site.xml.erb
3 files changed, 159 insertions(+), 24 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet/cdh 
refs/changes/19/209019/1

diff --git a/manifests/hadoop.pp b/manifests/hadoop.pp
index 2b4a59a..6c4ea82 100644
--- a/manifests/hadoop.pp
+++ b/manifests/hadoop.pp
@@ -107,6 +107,9 @@
     $datanode_mounts                             = 
$::cdh::hadoop::defaults::datanode_mounts,
     $dfs_data_path                               = 
$::cdh::hadoop::defaults::dfs_data_path,
 
+    $resoucemanager_hosts                        = undef,
+    $zookeeper_hosts                             = ['localhost:2181'],
+
     $yarn_local_path                             = 
$::cdh::hadoop::defaults::yarn_local_path,
     $yarn_logs_path                              = 
$::cdh::hadoop::defaults::yarn_logs_path,
     $dfs_block_size                              = 
$::cdh::hadoop::defaults::dfs_block_size,
@@ -155,6 +158,10 @@
     # This used in a couple of execs throughout this module.
     $dfs_name_dir_main = inline_template('<%= (@dfs_name_dir.class == Array) ? 
@dfs_name_dir[0] : @dfs_name_dir %>')
 
+    # Config files are installed into a directory
+    # based on the value of $cluster_name.
+    $config_directory = "/etc/hadoop/conf.${cluster_name}"
+
     # Set a boolean used to indicate that HA NameNodes
     # are intended to be used for this cluster.  HA NameNodes
     # require the JournalNodes are configured.
@@ -162,22 +169,11 @@
         undef   => false,
         default => true,
     }
-
     # If $ha_enabled is true, use $cluster_name as $nameservice_id.
     $nameservice_id = $ha_enabled ? {
         true    => $cluster_name,
         default => undef,
     }
-
-    # Config files are installed into a directory
-    # based on the value of $cluster_name.
-    $config_directory = "/etc/hadoop/conf.${cluster_name}"
-
-    # Parameter Validation:
-    if ($ha_enabled and !$journalnode_hosts) {
-        fail('Must provide multiple $journalnode_hosts when using HA and 
setting $nameservice_id.')
-    }
-
     # Assume the primary namenode is the first entry in $namenode_hosts,
     # Set a variable here for reference in other classes.
     $primary_namenode_host = $namenode_hosts[0]
@@ -188,6 +184,31 @@
     # which are '.' delimited.
     $primary_namenode_id   = inline_template('<%= 
@primary_namenode_host.tr(\'.\', \'-\') %>')
 
+
+
+    # Set a boolean used to indicate that HA YARN
+    # is intended to be used for this cluster.  HA YARN
+    # require the zookeeper is configured, and that
+    # multiple ResourceManagers are specificed.
+    $yarn_ha_enabled = $resourcemanager_hosts ? {
+        undef   => false,
+        default => true,
+    }
+    # If $yarn_ha_enabled is true, use $cluster_name as the RM cluster id
+    $yarn_cluster_id = $yarn_ha_enabled ? {
+        true    => $cluster_name,
+        default => undef,
+    }
+
+    # Parameter Validation:
+    if ($yarn_ha_enabled and !$zookeeper_hosts) {
+        fail('Must provide $zookeeper_hosts when using YARN HA by setting 
$resourcemanager_hosts')
+    }
+    # Assume the primary resourcemanager is the first entry in 
$resourcemanager_hosts
+    # Set a variable here for reference in other classes.
+    $primary_resourcemanager_host = $resourcemanager_hosts[0]
+
+
     package { 'hadoop-client':
         ensure => 'installed'
     }
diff --git a/manifests/hadoop/resourcemanager.pp 
b/manifests/hadoop/resourcemanager.pp
index 679a60e..75b2906 100644
--- a/manifests/hadoop/resourcemanager.pp
+++ b/manifests/hadoop/resourcemanager.pp
@@ -5,23 +5,29 @@
 class cdh::hadoop::resourcemanager {
     Class['cdh::hadoop::namenode'] -> Class['cdh::hadoop::resourcemanager']
 
-    # Create YARN HDFS directories.
-    # See: 
http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH5-Installation-Guide/cdh5ig_yarn_cluster_deploy.html?scroll=topic_11_4_10_unique_1
-    cdh::hadoop::directory { '/var/log/hadoop-yarn':
-        # sudo -u hdfs hdfs dfs -mkdir /var/log/hadoop-yarn
-        # sudo -u hdfs hdfs dfs -chown yarn:mapred /var/log/hadoop-yarn
-        owner   => 'yarn',
-        group   => 'mapred',
-        mode    => '0755',
-        # Make sure HDFS directories are created before
-        # resourcemanager is installed and started, but after
-        # the namenode.
-        require => [Service['hadoop-hdfs-namenode'], 
Cdh::Hadoop::Directory['/var/log']],
+
+    # In an HA YARN ResourceManager setup, this class will be include on 
multiple nodes.
+    # In order to have this directory check performed by only one 
resourcemanager,
+    # we only use it on the first node in the $resourcemanager_hosts array.
+    if !$::cdh::hadoop::yarn_ha_enabled or $::fqdn == 
$::cdh::hadoop::primary_resourcemanager_host {
+        # Create YARN HDFS directories.
+        # See: 
http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH5-Installation-Guide/cdh5ig_yarn_cluster_deploy.html?scroll=topic_11_4_10_unique_1
+        cdh::hadoop::directory { '/var/log/hadoop-yarn':
+            # sudo -u hdfs hdfs dfs -mkdir /var/log/hadoop-yarn
+            # sudo -u hdfs hdfs dfs -chown yarn:mapred /var/log/hadoop-yarn
+            owner   => 'yarn',
+            group   => 'mapred',
+            mode    => '0755',
+            # Make sure HDFS directories are created before
+            # resourcemanager is installed and started, but after
+            # the namenode.
+            require => [Service['hadoop-hdfs-namenode'], 
Cdh::Hadoop::Directory['/var/log']],
+            before  => Package['hadoop-yarn-resourcemanager'],
+        }
     }
 
     package { 'hadoop-yarn-resourcemanager':
         ensure  => 'installed',
-        require => Cdh::Hadoop::Directory['/var/log/hadoop-yarn'],
     }
 
     service { 'hadoop-yarn-resourcemanager':
diff --git a/templates/hadoop/yarn-site.xml.erb 
b/templates/hadoop/yarn-site.xml.erb
index 1ffb5c9..233bc2d 100644
--- a/templates/hadoop/yarn-site.xml.erb
+++ b/templates/hadoop/yarn-site.xml.erb
@@ -1,3 +1,13 @@
+<%
+# Convert a hostname to a Node ID.
+# We can't use '.' characters because IDs.
+# will be used in the names of some Java properties,
+# which are '.' delimited.
+def host_to_id(host)
+  host.tr('.', '-')
+end
+
+-%>
 <?xml version="1.0"?>
 <!-- NOTE:  This file is managed by Puppet. -->
 
@@ -7,6 +17,104 @@
 
 <configuration>
 
+<% if @yarn_ha_enabled -%>
+  <property>
+    <name>yarn.resourcemanager.cluster-id</name>
+    <value><%= $yarn_cluster_id %></value>
+  </property>
+
+  <property>
+    <name>yarn.resourcemanager.ha.rm-ids</name>
+    <value><%= @resourcemanager_hosts.sort.collect { |host| host_to_id(host) 
}.join(',') %></value>
+  </property>
+
+<% if @resourcemanager_hosts.include?(@fqdn) -%>
+  <property>
+    <name>yarn.resourcemanager.ha.id</name>
+    <value><%= host_to_id(@fqdn) %></value>
+  </property>
+<% end -%>
+
+  <property>
+    <name>yarn.resourcemanager.connect.retry-interval.ms</name>
+    <value>2000</value>
+  </property>
+
+  <property>
+    <name>yarn.resourcemanager.ha.enabled</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>yarn.resourcemanager.recovery.enabled</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>yarn.resourcemanager.store.class</name>
+    
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
+  </property>
+
+  <property>
+    <name>yarn.resourcemanager.zk-address</name>
+    <value><%= Array(@zookeeper_hosts).sort.join(',') %></value>
+  </property>
+
+  <property>
+    <name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name>
+    <value>5000</value>
+  </property>
+
+  <property>
+    <name>yarn.resourcemanager.work-preserving-recovery.enabled</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>yarn.resourcemanager.am.max-attempts</name>
+    <value>6</value>
+  </property>
+
+<!--HA YARN ResourceManager addresses for each ResourceManager id -->
+<% @resourcemanager_hosts.sort.each do |host| -%>
+  <property>
+    <name>yarn.resourcemanager.scheduler.address.<%= host_to_id(host) %></name>
+    <value><%= host %>:8030</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.resource-tracker.address.<%= host_to_id(host) 
%></name>
+    <value><%= host %>:8031</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.address.<% host_to_id(host) %></name>
+    <value><%= host %>:8032</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.admin.address.<%= host_to_id(host) %></name>
+    <value><%= host %>:8033</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.webapp.address.<%= host_to_id(host) %></name>
+    <value><%= host %>:8088</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.webapp.https.address.<%= host_to_id(host) 
%></name>
+    <value><%= host %>:8090</value>
+  </property>
+<% end # @resourcemanager_hosts.each -%>
+
+<% end # if @yarn_ha_enabled -%>
+
   <property>
     <name>yarn.resourcemanager.hostname</name>
     <value><%= @primary_namenode_host %></value>

-- 
To view, visit https://gerrit.wikimedia.org/r/209019
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I614968e8892392bfa1f0cf6e579a1f79d931682a
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet/cdh
Gerrit-Branch: master
Gerrit-Owner: Ottomata <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to