Ottomata has submitted this change and it was merged. Change subject: Add new 14 DataNode to Hadoop puppetization ......................................................................
Add new 14 DataNode to Hadoop puppetization Also repurpose analytics1004 as Hadoop Standby NameNode. cdh4 module is no longer used, so it is being removed. Change-Id: I1a118b6184c69f5f134f2839e6165a62c9d108cb --- M .gitmodules M manifests/role/analytics/hadoop.pp M manifests/site.pp D modules/cdh4 M templates/hadoop/net-topology.py.erb 5 files changed, 93 insertions(+), 56 deletions(-) Approvals: Ottomata: Verified; Looks good to me, approved diff --git a/.gitmodules b/.gitmodules index 001370d..f07ab5a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ -[submodule "modules/cdh4"] - path = modules/cdh4 - url = https://gerrit.wikimedia.org/r/operations/puppet/cdh4 [submodule "modules/zookeeper"] path = modules/zookeeper url = https://gerrit.wikimedia.org/r/operations/puppet/zookeeper diff --git a/manifests/role/analytics/hadoop.pp b/manifests/role/analytics/hadoop.pp index f6f23f4..84de1d0 100644 --- a/manifests/role/analytics/hadoop.pp +++ b/manifests/role/analytics/hadoop.pp @@ -67,24 +67,45 @@ # JournalNodes are colocated on worker DataNodes. $journalnode_hosts = [ 'analytics1011.eqiad.wmnet', # Row A2 - 'analytics1014.eqiad.wmnet', # Row C7 + 'analytics1028.eqiad.wmnet', # Row C2 'analytics1019.eqiad.wmnet', # Row D2 ] - $datanode_mounts = [ - "${hadoop_data_directory}/a", - "${hadoop_data_directory}/b", - "${hadoop_data_directory}/c", - "${hadoop_data_directory}/d", - "${hadoop_data_directory}/e", - "${hadoop_data_directory}/f", - "${hadoop_data_directory}/g", - "${hadoop_data_directory}/h", - "${hadoop_data_directory}/i", - "${hadoop_data_directory}/j", - "${hadoop_data_directory}/k", - "${hadoop_data_directory}/l" - ] + # analytics1011-analytics1020 have 12 mounts on disks sda - sdl. + if $::hostname =~ /analytics10(1[1-9]|20)/ { + $datanode_mounts = [ + "${hadoop_data_directory}/a", + "${hadoop_data_directory}/b", + "${hadoop_data_directory}/c", + "${hadoop_data_directory}/d", + "${hadoop_data_directory}/e", + "${hadoop_data_directory}/f", + "${hadoop_data_directory}/g", + "${hadoop_data_directory}/h", + "${hadoop_data_directory}/i", + "${hadoop_data_directory}/j", + "${hadoop_data_directory}/k", + "${hadoop_data_directory}/l", + ] + } + # analytics1028-analytics1041 have mounts on disks sdb - sdm. + # (sda is hardware raid on the 2 2.5 drives in the flex bays.) + elsif $::hostname =~ /analytics10(2[89]|3[0-9]|4[01])/ { + $datanode_mounts = [ + "${hadoop_data_directory}/b", + "${hadoop_data_directory}/c", + "${hadoop_data_directory}/d", + "${hadoop_data_directory}/e", + "${hadoop_data_directory}/f", + "${hadoop_data_directory}/g", + "${hadoop_data_directory}/h", + "${hadoop_data_directory}/i", + "${hadoop_data_directory}/j", + "${hadoop_data_directory}/k", + "${hadoop_data_directory}/l", + "${hadoop_data_directory}/m", + ] + } $mapreduce_map_tasks_maximum = ($::processorcount - 2) / 2 $mapreduce_reduce_tasks_maximum = ($::processorcount - 2) / 2 @@ -101,8 +122,9 @@ $hadoop_heapsize = undef $yarn_heapsize = undef - $ganglia_host = 'aggregator.eqiad.wmflabs' - $ganglia_port = 50090 + # TODO: use variables from new ganglia module once it is finished. + $ganglia_host = '239.192.1.32' + $ganglia_port = 8649 } # Configs specific to Labs. @@ -152,9 +174,8 @@ $yarn_nodemanager_resource_memory_mb = undef $net_topology_script_template = undef - # TODO: use variables from new ganglia module once it is finished. - $ganglia_host = '239.192.1.32' - $ganglia_port = 8649 + $ganglia_host = 'aggregator.eqiad.wmflabs' + $ganglia_port = 50090 } } diff --git a/manifests/site.pp b/manifests/site.pp index 56b3528..405c0ea 100644 --- a/manifests/site.pp +++ b/manifests/site.pp @@ -114,28 +114,37 @@ include role::logging::udp2log::misc } -node 'analytics1004.eqiad.wmnet' { - include standard - - # search-users using this node while it is idle - # to do some elasticsearch testing. - class { 'admin': groups => ['search-users'] } - include role::analytics -} - - -# analytics1009 is the Hadoop standby NameNode +# analytics1009 used to be the standby NameNode, +# but during cluster reinstall in 2014-07, it +# had an error when booting. analytics1004 +# has been repurposed as analytics standby NameNode. node 'analytics1009.eqiad.wmnet' { $nagios_group = 'analytics_eqiad' # ganglia cluster name. $cluster = 'analytics' # analytics1009 is analytics Ganglia aggregator for Row A - $ganglia_aggregator = true + # $ganglia_aggregator = true + + # class { 'admin': groups => ['analytics-users'] } + include admin + include standard + # include role::analytics::hadoop::standby +} + + + + +# analytics1004 is the Hadoop standby NameNode +# TODO: either fix analytics1009, or move this +# node to Row A. +node 'analytics1004.eqiad.wmnet' { + $nagios_group = 'analytics_eqiad' + # ganglia cluster name. + $cluster = 'analytics' class { 'admin': groups => ['analytics-users'] } include standard - include role::analytics::kraken include role::analytics::hadoop::standby } @@ -150,17 +159,18 @@ include standard class { 'admin': groups => ['analytics-users'] } - include role::analytics::kraken + include role::analytics::hadoop::master } -# analytics1011, analytics1013-analytics1017, analytics1019 and analytics1020 -# are Hadoop worker nodes. +# analytics1011, analytics1013-analytics1017, analytics1019, analytics1020, +# analytics1028-analytics1041 are Hadoop worker nodes. +# # NOTE: If you add, remove or move Hadoop nodes, you should edit # templates/hadoop/net-topology.py.erb to make sure the # hostname -> /datacenter/rack/row id is correct. This is # used for Hadoop network topology awareness. -node /analytics10(11|1[3-7]|19|20).eqiad.wmnet/ { +node /analytics10(11|1[3-7]|19|2[089]|3[0-9]|4[01]).eqiad.wmnet/ { $nagios_group = 'analytics_eqiad' # ganglia cluster name. $cluster = 'analytics' @@ -170,7 +180,7 @@ } include standard class { 'admin': groups => ['analytics-users'] } - include role::analytics::kraken + include role::analytics::hadoop::worker } @@ -219,27 +229,14 @@ include standard class { 'admin': groups => ['analytics-users'] } - include role::analytics::kraken - - # Including kraken import and hive partition cron jobs. - - # Imports pagecount files from dumps.wikimedia.org into Hadoop - include role::analytics::kraken::jobs::import::pagecounts - # Imports logs from Kafka into Hadoop (via Camus) - include role::analytics::kraken::jobs::import::kafka - # Creates hive partitions on all data in HDFS /wmf/data/external - include role::analytics::kraken::jobs::hive::partitions::external # Include analytics/refinery deployment target. - # NOTE: refinery roles will soon replace kraken classes. include role::analytics::refinery - include role::analytics::refinery::data::drop } # analytics1027 hosts the frontend # interfaces to Kraken and Hadoop. # (Hue, Oozie, Hive, etc.) - node 'analytics1027.eqiad.wmnet' { $nagios_group = 'analytics_eqiad' # ganglia cluster name. @@ -247,6 +244,7 @@ include standard class { 'admin': groups => ['analytics-users'] } + include role::analytics::clients include role::analytics::hive::server include role::analytics::oozie::server diff --git a/modules/cdh4 b/modules/cdh4 deleted file mode 160000 index 807519d..0000000 --- a/modules/cdh4 +++ /dev/null -Subproject commit 807519d285672cc6abbb2d3f22000285d8a7a6f9 diff --git a/templates/hadoop/net-topology.py.erb b/templates/hadoop/net-topology.py.erb index 35f2698..c4a27f7 100755 --- a/templates/hadoop/net-topology.py.erb +++ b/templates/hadoop/net-topology.py.erb @@ -5,23 +5,44 @@ # Returns a rack/row name for the given node name. # Usage: -# net-topology.sh <ipaddr|fqdn> +# net-topology.py <ipaddr|fqdn> import socket from sys import argv # Maps fqdn to a unique /datacenter/rack/row id. nodes = { - 'analytics1009.eqiad.wmnet': '/eqiad/A/2', + 'analytics1004.eqiad.wmnet': '/eqiad/B/3', 'analytics1010.eqiad.wmnet': '/eqiad/B/3', + 'analytics1011.eqiad.wmnet': '/eqiad/A/2', 'analytics1013.eqiad.wmnet': '/eqiad/A/2', + 'analytics1014.eqiad.wmnet': '/eqiad/C/7', 'analytics1015.eqiad.wmnet': '/eqiad/C/7', 'analytics1016.eqiad.wmnet': '/eqiad/C/7', 'analytics1017.eqiad.wmnet': '/eqiad/C/7', + 'analytics1019.eqiad.wmnet': '/eqiad/D/2', 'analytics1020.eqiad.wmnet': '/eqiad/D/2', + + 'analytics1028.eqiad.wmnet': '/eqiad/C/2', + 'analytics1029.eqiad.wmnet': '/eqiad/C/2', + 'analytics1030.eqiad.wmnet': '/eqiad/C/2', + 'analytics1031.eqiad.wmnet': '/eqiad/C/2', + + 'analytics1032.eqiad.wmnet': '/eqiad/C/3', + 'analytics1033.eqiad.wmnet': '/eqiad/C/3', + 'analytics1034.eqiad.wmnet': '/eqiad/C/3', + + 'analytics1035.eqiad.wmnet': '/eqiad/D/2', + 'analytics1036.eqiad.wmnet': '/eqiad/D/2', + 'analytics1037.eqiad.wmnet': '/eqiad/D/2', + + 'analytics1038.eqiad.wmnet': '/eqiad/D/4', + 'analytics1039.eqiad.wmnet': '/eqiad/D/4', + 'analytics1040.eqiad.wmnet': '/eqiad/D/4', + 'analytics1041.eqiad.wmnet': '/eqiad/D/4' } if len(argv) < 2: -- To view, visit https://gerrit.wikimedia.org/r/145675 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I1a118b6184c69f5f134f2839e6165a62c9d108cb Gerrit-PatchSet: 4 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Ottomata <[email protected]> Gerrit-Reviewer: Ottomata <[email protected]> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
