Ottomata has uploaded a new change for review.
https://gerrit.wikimedia.org/r/76018
Change subject: Puppetizing HA NameNode via Quorum Based JournalNode.
......................................................................
Puppetizing HA NameNode via Quorum Based JournalNode.
THIS COMMIT NOT YET READY FOR REVIEW.
Change-Id: I78b1e51a5d027a6f48b8392b2ca0a35b6199a97b
---
M README.md
M manifests/hadoop.pp
M manifests/hadoop/defaults.pp
M manifests/hue.pp
M templates/hadoop/core-site.xml.erb
M templates/hadoop/hdfs-site.xml.erb
M templates/hadoop/mapred-site.xml.erb
M templates/hadoop/yarn-site.xml.erb
M tests/datanode.pp
M tests/hadoop.pp
M tests/historyserver.pp
11 files changed, 107 insertions(+), 24 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet/cdh4
refs/changes/18/76018/1
diff --git a/README.md b/README.md
index b73cea6..e9070fd 100644
--- a/README.md
+++ b/README.md
@@ -45,7 +45,7 @@
include cdh4
class { "cdh4::hadoop":
- namenode_hostname => "namenode.hostname.org",
+ namenode_hostnames => ["namenode.hostname.org"],
datanode_mounts => [
"/var/lib/hadoop/data/a",
"/var/lib/hadoop/data/b",
diff --git a/manifests/hadoop.pp b/manifests/hadoop.pp
index 6c7a4a5..f3da4b0 100644
--- a/manifests/hadoop.pp
+++ b/manifests/hadoop.pp
@@ -12,12 +12,18 @@
# yarn.nodemanager.log-dirs will be set to each of
${dfs_data_dir_mounts}/$yarn_logs_path
#
# == Parameters
-# $namenode_hostname - hostname of the NameNode. This will also be used
as the hostname for the historyserver, proxyserver, and resourcemanager.
-# $dfs_name_dir - full path to hadoop NameNode name directory. This
can be an array of paths or a single string path.
+# $namenode_hostnames - hostname(s) of the NameNode(s). This will also be
+# used as the hostname for the historyserver,
proxyserver,
+# and resourcemanager. Use an array of hostnames if
you
+# configuring Hadoop with HA NameNodes.
+# $dfs_name_dir - Path to hadoop NameNode name directory. This
+# can be an array of paths or a single string path.
# $config_directory - path of the hadoop config directory.
-# $datanode_mounts - array of JBOD mount points. Hadoop datanode and
mapreduce/yarn directories will be here.
+# $datanode_mounts - array of JBOD mount points. Hadoop datanode and
+# mapreduce/yarn directories will be here.
# $dfs_data_path - path relative to JBOD mount point for HDFS data
directories.
-# $enable_jmxremote - enables remote JMX connections for all Hadoop
services. Ports are not currently configurable. Default: true.
+# $enable_jmxremote - enables remote JMX connections for all Hadoop
services.
+# Ports are not currently configurable. Default:
true.
# $yarn_local_path - path relative to JBOD mount point for yarn local
directories.
# $yarn_logs_path - path relative to JBOD mount point for yarn log
directories.
# $dfs_block_size - HDFS block size in bytes. Default 64MB.
@@ -33,19 +39,32 @@
# $mapreduce_task_io_sort_factor
# $mapreduce_map_java_opts
# $mapreduce_child_java_opts
-# $mapreduce_intermediate_compression - If true, intermediate MapReduce
data will be compressed with Snappy. Default: true.
-# $mapreduce_final_compession - If true, Final output of MapReduce
jobs will be compressed with Snappy. Default: false.
+# $mapreduce_intermediate_compression - If true, intermediate MapReduce
data
+# will be compressed with Snappy.
Default: true.
+# $mapreduce_final_compession - If true, Final output of MapReduce
+# jobs will be compressed with
Snappy. Default: false.
# $yarn_nodemanager_resource_memory_mb
-# $yarn_resourcemanager_scheduler_class - If you change this (e.g. to
FairScheduler), you should also provide your own scheduler config .xml files
outside of the cdh4 module.
+# $yarn_resourcemanager_scheduler_class - If you change this (e.g. to
+# FairScheduler), you should also
provide
+# your own scheduler config .xml
files
+# outside of the cdh4 module.
# $use_yarn
-# $ganglia_hosts - Set this to an array of ganglia
host:ports if you want to enable ganglia sinks in hadoop-metrics2.properites
+# $ganglia_hosts - Set this to an array of ganglia
host:ports
+# if you want to enable ganglia
sinks in hadoop-metrics2.properites
#
class cdh4::hadoop(
- $namenode_hostname,
+ $namenode_hostnames,
$dfs_name_dir,
+
$config_directory =
$::cdh4::hadoop::defaults::config_directory,
+
+ $nameservice_id =
$::cdh4::hadoop::defaults::nameservice_id,
+ $journalnode_hostnames =
$::cdh4::hadoop::defaults::journalnode_hostnames,
+ $dfs_journalnode_edits_dir =
$::cdh4::hadoop::defaults::dfs_journalnode_edits_dir,
+
$datanode_mounts =
$::cdh4::hadoop::defaults::datanode_mounts,
$dfs_data_path =
$::cdh4::hadoop::defaults::dfs_data_path,
+
$yarn_local_path =
$::cdh4::hadoop::defaults::yarn_local_path,
$yarn_logs_path =
$::cdh4::hadoop::defaults::yarn_logs_path,
$dfs_block_size =
$::cdh4::hadoop::defaults::dfs_block_size,
@@ -71,6 +90,9 @@
$ganglia_hosts =
$::cdh4::hadoop::defaults::ganglia_hosts
) inherits cdh4::hadoop::defaults
{
+ # TODO: parameter validation for HA NameNode parameters.
+ # TODO: HA Fencing methods
+
# JMX Ports
$namenode_jmxremote_port = 9980
$datanode_jmxremote_port = 9981
diff --git a/manifests/hadoop/defaults.pp b/manifests/hadoop/defaults.pp
index d5df0d0..d355631 100644
--- a/manifests/hadoop/defaults.pp
+++ b/manifests/hadoop/defaults.pp
@@ -3,6 +3,11 @@
#
class cdh4::hadoop::defaults {
$config_directory = '/etc/hadoop/conf'
+
+ $nameservice_id = undef
+ $journalnode_hostnames = undef
+ $dfs_journalnode_edits_dir = undef
+
$datanode_mounts = undef
$dfs_data_path = 'hdfs/dn'
$yarn_local_path = 'yarn/local'
diff --git a/manifests/hue.pp b/manifests/hue.pp
index 6d40b4e..c51c948 100644
--- a/manifests/hue.pp
+++ b/manifests/hue.pp
@@ -221,7 +221,7 @@
$sqoop = false
}
- $namenode_hostname = $cdh4::hadoop::namenode_hostname
+ $namenode_hostname = $cdh4::hadoop::namenode_hostnames[0]
file { '/etc/hue/hue.ini':
content => template($hue_ini_template),
require => Package['hue-server'],
diff --git a/templates/hadoop/core-site.xml.erb
b/templates/hadoop/core-site.xml.erb
index 56a83f6..fc28c28 100644
--- a/templates/hadoop/core-site.xml.erb
+++ b/templates/hadoop/core-site.xml.erb
@@ -9,9 +9,9 @@
<property>
<name><%= @use_yarn ? 'fs.defaultFS' : 'fs.default.name' %></name>
- <value>hdfs://<%= namenode_hostname %>/</value>
+ <value>hdfs://<%= @nameservice_id ? @nameservice_id :
(@namenode_hostnames.class == Array ? @namenode_hostnames[0] :
@namenode_hostnames) %>/</value>
</property>
-
+
<% if @io_file_buffer_size -%>
<property>
<name>io.file.buffer.size</name>
diff --git a/templates/hadoop/hdfs-site.xml.erb
b/templates/hadoop/hdfs-site.xml.erb
index 81d7d4d..16d46ac 100644
--- a/templates/hadoop/hdfs-site.xml.erb
+++ b/templates/hadoop/hdfs-site.xml.erb
@@ -1,3 +1,16 @@
+<%
+# If @namenode_hostnames was not provided as an array,
+# Then assume it was a string. Turn it into an array here
+# to make logic below easier.
+if @namenode_hostnames.class != Array
+ @namenode_hostnames = [@namenode_hostnames]
+end
+
+def namenode_name_to_id(hostname)
+ hostname.tr('.', '-')
+end
+
+-%>
<?xml version="1.0"?>
<!-- NOTE: This file is managed by Puppet. -->
@@ -11,6 +24,47 @@
<value>hadoop</value>
</property>
+<% if @nameservice_id -%>
+ <property>
+ <name>dfs.nameservices</name>
+ <value><%= @nameservice_id %></value>
+ </property>
+
+ <property>
+ <name>dfs.ha.namenodes.<%= @nameservice_id %></name>
+ <value><%= @namenode_hostnames.collect { |host| namenode_name_to_id(host)
}.join(',') %></value>
+ </property>
+
+<% @namenode_hostnames.each do |host| -%>
+ <property>
+ <name>dfs.namenode.rpc-address.<%= @nameservice_id %>.<%=
namenode_name_to_id(host) %></name>
+ <value><%= host %>:8020</value>
+ </property>
+<% end # @namenode_hostnames.each -%>
+
+<% @namenode_hostnames.each do |host| -%>
+ <property>
+ <name>dfs.namenode.http-address.<%= @nameservice_id %>.<%=
namenode_name_to_id(host) %></name>
+ <value><%= host %>:50070</value>
+ </property>
+<% end # @namenode_hostnames.each -%>
+
+ <property>
+ <name>dfs.namenode.shared.edits.dir</name>
+ <value>qjournal://<%= @journalnode_hostnames.join(':8485;') %>:8485/<%=
@nameservice_id %></value>
+ </property>
+
+ <property>
+ <name>dfs.journalnode.edits.dir</name>
+ <value><%= @dfs_journalnode_edits_dir %></value>
+ </property>
+
+ <property>
+ <name>dfs.client.failover.proxy.provider.<%= @nameservice_id %></name>
+
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
+ </property>
+
+<% end # if @nameservice_id -%>
<property>
<name>dfs.namenode.name.dir</name>
<value>file://<%= (dfs_name_dir.class == Array) ?
dfs_name_dir.join(',file://') : dfs_name_dir %></value>
@@ -41,4 +95,5 @@
This is useful for decommissioning nodes.
</description>
</property>
-</configuration>
\ No newline at end of file
+
+</configuration>
diff --git a/templates/hadoop/mapred-site.xml.erb
b/templates/hadoop/mapred-site.xml.erb
index 974eb86..2bae750 100644
--- a/templates/hadoop/mapred-site.xml.erb
+++ b/templates/hadoop/mapred-site.xml.erb
@@ -14,7 +14,7 @@
<property>
<name>mapreduce.jobhistory.address</name>
- <value><%= namenode_hostname %>:10020</value>
+ <value><%= @namenode_hostnames.class == Array ? @namenode_hostnames[0] :
@namenode_hostnames %>:10020</value>
</property>
<property>
@@ -41,9 +41,10 @@
<% else -%>
<property>
<name>mapred.job.tracker</name>
- <value><%= namenode_hostname %>:8021</value>
+ <value><%= @namenode_hostnames.class == Array ? @namenode_hostnames[0] :
@namenode_hostnames %>:8021</value>
</property>
<% if @mapreduce_system_dir -%>
+
<property>
<name>mapred.system.dir</name>
<value><%= @mapreduce_system_dir %></value>
diff --git a/templates/hadoop/yarn-site.xml.erb
b/templates/hadoop/yarn-site.xml.erb
index 3d51195..7373820 100644
--- a/templates/hadoop/yarn-site.xml.erb
+++ b/templates/hadoop/yarn-site.xml.erb
@@ -9,19 +9,19 @@
<property>
<name>yarn.resourcemanager.scheduler.address</name>
- <value><%= namenode_hostname %>:8030</value>
+ <value><%= @namenode_hostnames.class == Array ? @namenode_hostnames[0] :
@namenode_hostnames %>:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
- <value><%= namenode_hostname %>:8031</value>
+ <value><%= @namenode_hostnames.class == Array ? @namenode_hostnames[0] :
@namenode_hostnames %>:8031</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
- <value><%= namenode_hostname %>:8032</value>
+ <value><%= @namenode_hostnames.class == Array ? @namenode_hostnames[0] :
@namenode_hostnames %>:8032</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
- <value><%= namenode_hostname %>:8033</value>
+ <value><%= @namenode_hostnames.class == Array ? @namenode_hostnames[0] :
@namenode_hostnames %>:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
diff --git a/tests/datanode.pp b/tests/datanode.pp
index f2faa1d..2a27602 100644
--- a/tests/datanode.pp
+++ b/tests/datanode.pp
@@ -1,7 +1,7 @@
#
class { '::cdh4::hadoop':
- namenode_hostname => 'localhost',
+ namenode_hostnames => 'localhost',
dfs_name_dir => '/var/lib/hadoop/name',
}
diff --git a/tests/hadoop.pp b/tests/hadoop.pp
index d760a7d..1272b30 100644
--- a/tests/hadoop.pp
+++ b/tests/hadoop.pp
@@ -1,7 +1,7 @@
#
class { '::cdh4::hadoop':
- namenode_hostname => 'localhost',
+ namenode_hostnames => 'localhost',
dfs_name_dir => '/var/lib/hadoop/name',
}
diff --git a/tests/historyserver.pp b/tests/historyserver.pp
index 5df020b..e173370 100644
--- a/tests/historyserver.pp
+++ b/tests/historyserver.pp
@@ -1,8 +1,8 @@
#
class { '::cdh4::hadoop':
- namenode_hostname => 'localhost',
- dfs_name_dir => '/var/lib/hadoop/name',
+ namenode_hostnames => 'localhost',
+ dfs_name_dir => '/var/lib/hadoop/name',
}
# historyserver requires namenode
--
To view, visit https://gerrit.wikimedia.org/r/76018
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I78b1e51a5d027a6f48b8392b2ca0a35b6199a97b
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet/cdh4
Gerrit-Branch: master
Gerrit-Owner: Ottomata <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits