Ottomata has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/76018


Change subject: Puppetizing HA NameNode via Quorum Based JournalNode.
......................................................................

Puppetizing HA NameNode via Quorum Based JournalNode.

THIS COMMIT NOT YET READY FOR REVIEW.

Change-Id: I78b1e51a5d027a6f48b8392b2ca0a35b6199a97b
---
M README.md
M manifests/hadoop.pp
M manifests/hadoop/defaults.pp
M manifests/hue.pp
M templates/hadoop/core-site.xml.erb
M templates/hadoop/hdfs-site.xml.erb
M templates/hadoop/mapred-site.xml.erb
M templates/hadoop/yarn-site.xml.erb
M tests/datanode.pp
M tests/hadoop.pp
M tests/historyserver.pp
11 files changed, 107 insertions(+), 24 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet/cdh4 
refs/changes/18/76018/1

diff --git a/README.md b/README.md
index b73cea6..e9070fd 100644
--- a/README.md
+++ b/README.md
@@ -45,7 +45,7 @@
 
 include cdh4
 class { "cdh4::hadoop":
-       namenode_hostname => "namenode.hostname.org",
+       namenode_hostnames => ["namenode.hostname.org"],
        datanode_mounts   => [
            "/var/lib/hadoop/data/a",
            "/var/lib/hadoop/data/b",
diff --git a/manifests/hadoop.pp b/manifests/hadoop.pp
index 6c7a4a5..f3da4b0 100644
--- a/manifests/hadoop.pp
+++ b/manifests/hadoop.pp
@@ -12,12 +12,18 @@
 # yarn.nodemanager.log-dirs will be set to each of 
${dfs_data_dir_mounts}/$yarn_logs_path
 #
 # == Parameters
-#   $namenode_hostname   - hostname of the NameNode.  This will also be used 
as the hostname for the historyserver, proxyserver, and resourcemanager.
-#   $dfs_name_dir        - full path to hadoop NameNode name directory.  This 
can be an array of paths or a single string path.
+#   $namenode_hostnames  - hostname(s) of the NameNode(s).  This will also be
+#                          used as the hostname for the historyserver, 
proxyserver,
+#                          and resourcemanager.   Use an array of hostnames if 
you
+#                          configuring Hadoop with HA NameNodes.
+#   $dfs_name_dir        - Path to hadoop NameNode name directory.  This
+#                          can be an array of paths or a single string path.
 #   $config_directory    - path of the hadoop config directory.
-#   $datanode_mounts     - array of JBOD mount points.  Hadoop datanode and 
mapreduce/yarn directories will be here.
+#   $datanode_mounts     - array of JBOD mount points.  Hadoop datanode and
+#                          mapreduce/yarn directories will be here.
 #   $dfs_data_path       - path relative to JBOD mount point for HDFS data 
directories.
-#   $enable_jmxremote    - enables remote JMX connections for all Hadoop 
services.  Ports are not currently configurable.  Default: true.
+#   $enable_jmxremote    - enables remote JMX connections for all Hadoop 
services.
+#                          Ports are not currently configurable.  Default: 
true.
 #   $yarn_local_path     - path relative to JBOD mount point for yarn local 
directories.
 #   $yarn_logs_path      - path relative to JBOD mount point for yarn log 
directories.
 #   $dfs_block_size      - HDFS block size in bytes.  Default 64MB.
@@ -33,19 +39,32 @@
 #   $mapreduce_task_io_sort_factor
 #   $mapreduce_map_java_opts
 #   $mapreduce_child_java_opts
-#   $mapreduce_intermediate_compression   - If true, intermediate MapReduce 
data will be compressed with Snappy.    Default: true.
-#   $mapreduce_final_compession           - If true, Final output of MapReduce 
jobs will be compressed with Snappy. Default: false.
+#   $mapreduce_intermediate_compression   - If true, intermediate MapReduce 
data
+#                                           will be compressed with Snappy.  
Default: true.
+#   $mapreduce_final_compession           - If true, Final output of MapReduce
+#                                           jobs will be compressed with 
Snappy. Default: false.
 #   $yarn_nodemanager_resource_memory_mb
-#   $yarn_resourcemanager_scheduler_class - If you change this (e.g. to 
FairScheduler), you should also provide your own scheduler config .xml files 
outside of the cdh4 module.
+#   $yarn_resourcemanager_scheduler_class - If you change this (e.g. to
+#                                           FairScheduler), you should also 
provide
+#                                           your own scheduler config .xml 
files
+#                                           outside of the cdh4 module.
 #   $use_yarn
-#   $ganglia_hosts                        - Set this to an array of ganglia 
host:ports if you want to enable ganglia sinks in hadoop-metrics2.properites
+#   $ganglia_hosts                        - Set this to an array of ganglia 
host:ports
+#                                           if you want to enable ganglia 
sinks in hadoop-metrics2.properites
 #
 class cdh4::hadoop(
-    $namenode_hostname,
+    $namenode_hostnames,
     $dfs_name_dir,
+
     $config_directory                        = 
$::cdh4::hadoop::defaults::config_directory,
+    
+    $nameservice_id                          = 
$::cdh4::hadoop::defaults::nameservice_id,
+    $journalnode_hostnames                   = 
$::cdh4::hadoop::defaults::journalnode_hostnames,
+    $dfs_journalnode_edits_dir               = 
$::cdh4::hadoop::defaults::dfs_journalnode_edits_dir,
+
     $datanode_mounts                         = 
$::cdh4::hadoop::defaults::datanode_mounts,
     $dfs_data_path                           = 
$::cdh4::hadoop::defaults::dfs_data_path,
+
     $yarn_local_path                         = 
$::cdh4::hadoop::defaults::yarn_local_path,
     $yarn_logs_path                          = 
$::cdh4::hadoop::defaults::yarn_logs_path,
     $dfs_block_size                          = 
$::cdh4::hadoop::defaults::dfs_block_size,
@@ -71,6 +90,9 @@
     $ganglia_hosts                           = 
$::cdh4::hadoop::defaults::ganglia_hosts
 ) inherits cdh4::hadoop::defaults
 {
+    # TODO: parameter validation for HA NameNode parameters.
+    # TODO: HA Fencing methods
+
     # JMX Ports
     $namenode_jmxremote_port           = 9980
     $datanode_jmxremote_port           = 9981
diff --git a/manifests/hadoop/defaults.pp b/manifests/hadoop/defaults.pp
index d5df0d0..d355631 100644
--- a/manifests/hadoop/defaults.pp
+++ b/manifests/hadoop/defaults.pp
@@ -3,6 +3,11 @@
 #
 class cdh4::hadoop::defaults {
     $config_directory                        = '/etc/hadoop/conf'
+
+    $nameservice_id                          = undef
+    $journalnode_hostnames                   = undef
+    $dfs_journalnode_edits_dir               = undef
+
     $datanode_mounts                         = undef
     $dfs_data_path                           = 'hdfs/dn'
     $yarn_local_path                         = 'yarn/local'
diff --git a/manifests/hue.pp b/manifests/hue.pp
index 6d40b4e..c51c948 100644
--- a/manifests/hue.pp
+++ b/manifests/hue.pp
@@ -221,7 +221,7 @@
         $sqoop = false
     }
 
-    $namenode_hostname = $cdh4::hadoop::namenode_hostname
+    $namenode_hostname = $cdh4::hadoop::namenode_hostnames[0]
     file { '/etc/hue/hue.ini':
         content => template($hue_ini_template),
         require => Package['hue-server'],
diff --git a/templates/hadoop/core-site.xml.erb 
b/templates/hadoop/core-site.xml.erb
index 56a83f6..fc28c28 100644
--- a/templates/hadoop/core-site.xml.erb
+++ b/templates/hadoop/core-site.xml.erb
@@ -9,9 +9,9 @@
 
   <property>
     <name><%= @use_yarn ? 'fs.defaultFS' : 'fs.default.name' %></name>
-    <value>hdfs://<%= namenode_hostname %>/</value>
+    <value>hdfs://<%= @nameservice_id ? @nameservice_id : 
(@namenode_hostnames.class == Array ? @namenode_hostnames[0] : 
@namenode_hostnames) %>/</value>
   </property>
-  
+
 <% if @io_file_buffer_size -%>
   <property>
     <name>io.file.buffer.size</name>
diff --git a/templates/hadoop/hdfs-site.xml.erb 
b/templates/hadoop/hdfs-site.xml.erb
index 81d7d4d..16d46ac 100644
--- a/templates/hadoop/hdfs-site.xml.erb
+++ b/templates/hadoop/hdfs-site.xml.erb
@@ -1,3 +1,16 @@
+<%
+# If @namenode_hostnames was not provided as an array,
+# Then assume it was a string.  Turn it into an array here
+# to make logic below easier.
+if @namenode_hostnames.class != Array
+  @namenode_hostnames = [@namenode_hostnames]
+end
+
+def namenode_name_to_id(hostname)
+  hostname.tr('.', '-')
+end
+
+-%>
 <?xml version="1.0"?>
 <!-- NOTE:  This file is managed by Puppet. -->
 
@@ -11,6 +24,47 @@
    <value>hadoop</value>
   </property>
 
+<% if @nameservice_id -%>
+  <property>
+    <name>dfs.nameservices</name>
+    <value><%= @nameservice_id %></value>
+  </property>
+
+  <property>
+    <name>dfs.ha.namenodes.<%= @nameservice_id %></name>
+    <value><%= @namenode_hostnames.collect { |host| namenode_name_to_id(host) 
}.join(',') %></value>
+  </property>
+
+<% @namenode_hostnames.each do |host| -%>
+  <property>
+    <name>dfs.namenode.rpc-address.<%= @nameservice_id %>.<%= 
namenode_name_to_id(host) %></name>
+    <value><%= host %>:8020</value>
+  </property>
+<% end # @namenode_hostnames.each -%>
+
+<% @namenode_hostnames.each do |host| -%>
+  <property>
+    <name>dfs.namenode.http-address.<%= @nameservice_id %>.<%= 
namenode_name_to_id(host) %></name>
+    <value><%= host %>:50070</value>
+  </property>
+<% end # @namenode_hostnames.each -%>
+
+  <property>
+    <name>dfs.namenode.shared.edits.dir</name>
+    <value>qjournal://<%= @journalnode_hostnames.join(':8485;') %>:8485/<%= 
@nameservice_id %></value>
+  </property>
+
+  <property>
+    <name>dfs.journalnode.edits.dir</name>
+    <value><%= @dfs_journalnode_edits_dir %></value>
+  </property>
+
+  <property>
+    <name>dfs.client.failover.proxy.provider.<%= @nameservice_id %></name>
+    
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
+  </property>
+
+<% end # if @nameservice_id -%>
   <property>
    <name>dfs.namenode.name.dir</name>
    <value>file://<%= (dfs_name_dir.class == Array) ? 
dfs_name_dir.join(',file://') : dfs_name_dir %></value>
@@ -41,4 +95,5 @@
       This is useful for decommissioning nodes.
     </description>
   </property>
-</configuration>
\ No newline at end of file
+
+</configuration>
diff --git a/templates/hadoop/mapred-site.xml.erb 
b/templates/hadoop/mapred-site.xml.erb
index 974eb86..2bae750 100644
--- a/templates/hadoop/mapred-site.xml.erb
+++ b/templates/hadoop/mapred-site.xml.erb
@@ -14,7 +14,7 @@
 
   <property>
    <name>mapreduce.jobhistory.address</name>
-   <value><%= namenode_hostname %>:10020</value>
+   <value><%= @namenode_hostnames.class == Array ? @namenode_hostnames[0] : 
@namenode_hostnames %>:10020</value>
   </property>
 
   <property>
@@ -41,9 +41,10 @@
 <% else -%>
   <property>
     <name>mapred.job.tracker</name>
-    <value><%= namenode_hostname %>:8021</value>
+    <value><%= @namenode_hostnames.class == Array ? @namenode_hostnames[0] : 
@namenode_hostnames %>:8021</value>
   </property>
 <% if @mapreduce_system_dir -%>
+
   <property>
     <name>mapred.system.dir</name>
     <value><%= @mapreduce_system_dir %></value>
diff --git a/templates/hadoop/yarn-site.xml.erb 
b/templates/hadoop/yarn-site.xml.erb
index 3d51195..7373820 100644
--- a/templates/hadoop/yarn-site.xml.erb
+++ b/templates/hadoop/yarn-site.xml.erb
@@ -9,19 +9,19 @@
 
   <property>
     <name>yarn.resourcemanager.scheduler.address</name>
-    <value><%= namenode_hostname %>:8030</value>
+    <value><%= @namenode_hostnames.class == Array ? @namenode_hostnames[0] : 
@namenode_hostnames %>:8030</value>
   </property>
   <property>
     <name>yarn.resourcemanager.resource-tracker.address</name>
-    <value><%= namenode_hostname %>:8031</value>
+    <value><%= @namenode_hostnames.class == Array ? @namenode_hostnames[0] : 
@namenode_hostnames %>:8031</value>
   </property>
   <property>
     <name>yarn.resourcemanager.address</name>
-    <value><%= namenode_hostname %>:8032</value>
+    <value><%= @namenode_hostnames.class == Array ? @namenode_hostnames[0] : 
@namenode_hostnames %>:8032</value>
   </property>
   <property>
     <name>yarn.resourcemanager.admin.address</name>
-    <value><%= namenode_hostname %>:8033</value>
+    <value><%= @namenode_hostnames.class == Array ? @namenode_hostnames[0] : 
@namenode_hostnames %>:8033</value>
   </property>
   <property>
     <name>yarn.resourcemanager.webapp.address</name>
diff --git a/tests/datanode.pp b/tests/datanode.pp
index f2faa1d..2a27602 100644
--- a/tests/datanode.pp
+++ b/tests/datanode.pp
@@ -1,7 +1,7 @@
 # 
 
 class { '::cdh4::hadoop':
-  namenode_hostname    => 'localhost',
+  namenode_hostnames   => 'localhost',
   dfs_name_dir         => '/var/lib/hadoop/name',
 }
 
diff --git a/tests/hadoop.pp b/tests/hadoop.pp
index d760a7d..1272b30 100644
--- a/tests/hadoop.pp
+++ b/tests/hadoop.pp
@@ -1,7 +1,7 @@
 #
 
 class { '::cdh4::hadoop':
-  namenode_hostname    => 'localhost',
+  namenode_hostnames   => 'localhost',
   dfs_name_dir         => '/var/lib/hadoop/name',
 }
 
diff --git a/tests/historyserver.pp b/tests/historyserver.pp
index 5df020b..e173370 100644
--- a/tests/historyserver.pp
+++ b/tests/historyserver.pp
@@ -1,8 +1,8 @@
 #
 
 class { '::cdh4::hadoop':
-  namenode_hostname    => 'localhost',
-  dfs_name_dir         => '/var/lib/hadoop/name',
+    namenode_hostnames => 'localhost',
+    dfs_name_dir       => '/var/lib/hadoop/name',
 }
 
 # historyserver requires namenode

-- 
To view, visit https://gerrit.wikimedia.org/r/76018
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I78b1e51a5d027a6f48b8392b2ca0a35b6199a97b
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet/cdh4
Gerrit-Branch: master
Gerrit-Owner: Ottomata <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to