Ori.livneh has submitted this change and it was merged. Change subject: Revert "Include hiera configuration for labs and prod analytics_cluster role" ......................................................................
Revert "Include hiera configuration for labs and prod analytics_cluster role" This reverts commit 1f3a4a6a7b3406a7ae043157ddde41a7f72a077d. Change-Id: I87020f77d38bd163b302d7c3b7965e6d1958d20f --- M hieradata/eqiad.yaml M hieradata/labs.yaml D hieradata/role/common/analytics_cluster/database/meta.yaml D hieradata/role/common/analytics_cluster/hadoop/master.yaml D hieradata/role/common/analytics_cluster/hadoop/standby.yaml D hieradata/role/common/analytics_cluster/hadoop/worker.yaml D hieradata/role/common/analytics_cluster/hue.yaml M modules/cdh M modules/role/manifests/analytics_cluster/database/meta.pp M modules/role/manifests/analytics_cluster/hadoop/client.pp M modules/role/manifests/analytics_cluster/hue.pp 11 files changed, 20 insertions(+), 225 deletions(-) Approvals: Ori.livneh: Verified; Looks good to me, approved diff --git a/hieradata/eqiad.yaml b/hieradata/eqiad.yaml index 4b5524e..67ee866 100644 --- a/hieradata/eqiad.yaml +++ b/hieradata/eqiad.yaml @@ -149,55 +149,3 @@ oozie_host: analytics1027.eqiad.wmnet ldap_labs_hostname: ldap-labs.eqiad.wikimedia.org - -# -# Analytics Cluster Configuration: -# -cdh::hadoop::cluster_name: analytics-hadoop - -cdh::hadoop::namenode_hosts: - - analytics1001.eqiad.wmnet - - analytics1002.eqiad.wmnet - -cdh::hadoop::journalnode_hosts: - - analytics1052.eqiad.wmnet # Row A3 - - analytics1028.eqiad.wmnet # Row C2 - - analytics1035.eqiad.wmnet # Row D2 - -# analytics* Dell R720s have mounts on disks sdb - sdm. -# (sda is hardware raid on the 2 2.5 drives in the flex bays.) -cdh::hadoop::datanode_mounts: - - /var/lib/hadoop/data/b - - /var/lib/hadoop/data/c - - /var/lib/hadoop/data/d - - /var/lib/hadoop/data/e - - /var/lib/hadoop/data/f - - /var/lib/hadoop/data/g - - /var/lib/hadoop/data/h - - /var/lib/hadoop/data/i - - /var/lib/hadoop/data/j - - /var/lib/hadoop/data/k - - /var/lib/hadoop/data/l - - /var/lib/hadoop/data/m - -cdh::hadoop::net_topology_script_template: 'role/analytics_cluster/hadoop/net-topology.py.erb' - -# Increase NameNode heapsize independent from other daemons -cdh::hadoop::namenode_opts: -Xmx4096m - -# Ensure that users in these groups have home directories in HDFS. -cdh::hadoop::hadoop_users_posix_groups: "analytics-users analytics-privatedata-users analytics-admins analytics-search-users" - -cdh::hadoop::mapreduce_reduce_shuffle_parallelcopies: 10 -cdh::hadoop::mapreduce_task_io_sort_mb: 200 -cdh::hadoop::mapreduce_task_io_sort_factor: 10 - -# NOTE: Hadoop Memory Settings are configured in the -# role::analytics_cluster::hadoop::client class. -# Many of these settings are configured programatically. - -cdh::hive::metastore_host: analytics1027.eqiad.wmnet -cdh::oozie::oozie_host: analytics1027.eqiad.wmnet - -# Don't auto create Hue users from LDAP in production. -cdh::hue::ldap_create_users_on_login diff --git a/hieradata/labs.yaml b/hieradata/labs.yaml index dcaf509..2e4c9a1 100644 --- a/hieradata/labs.yaml +++ b/hieradata/labs.yaml @@ -60,9 +60,3 @@ # By default, nag project admins about puppet breakage send_puppet_failure_emails: true - - -# For any Analytics Cluster in labs: -"cdh::hadoop::datanode_mounts": - - /var/lib/hadoop/data/a - - /var/lib/hadoop/data/b diff --git a/hieradata/role/common/analytics_cluster/database/meta.yaml b/hieradata/role/common/analytics_cluster/database/meta.yaml deleted file mode 100644 index 1c3255a..0000000 --- a/hieradata/role/common/analytics_cluster/database/meta.yaml +++ /dev/null @@ -1,3 +0,0 @@ -debdeploy::grains: - debdeploy-mysql-analytics: - value: standard diff --git a/hieradata/role/common/analytics_cluster/hadoop/master.yaml b/hieradata/role/common/analytics_cluster/hadoop/master.yaml deleted file mode 100644 index 74f7c05..0000000 --- a/hieradata/role/common/analytics_cluster/hadoop/master.yaml +++ /dev/null @@ -1,14 +0,0 @@ -nagios_group: analytics_eqiad -cluster: analytics -admin::groups: - - analytics-users - - analytics-privatedata-users - - analytics-roots - - analytics-admins - # elasticsearch::analytics creates the analytics-search user and group - # that analytics-search-users are allowed to sudo to. This is used - # for deploying files to HDFS. - - analytics-search-users -debdeploy::grains: - debdeploy-hadoop-master: - value: standard diff --git a/hieradata/role/common/analytics_cluster/hadoop/standby.yaml b/hieradata/role/common/analytics_cluster/hadoop/standby.yaml deleted file mode 100644 index 1813013..0000000 --- a/hieradata/role/common/analytics_cluster/hadoop/standby.yaml +++ /dev/null @@ -1,14 +0,0 @@ -nagios_group: analytics_eqiad -cluster: analytics -admin::groups: - - analytics-users - - analytics-privatedata-users - - analytics-roots - - analytics-admins - # elasticsearch::analytics creates the analytics-search user and group - # that analytics-search-users are allowed to sudo to. This is used - # for deploying files to HDFS. - - analytics-search-users -debdeploy::grains: - debdeploy-hadoop-standby: - value: standard diff --git a/hieradata/role/common/analytics_cluster/hadoop/worker.yaml b/hieradata/role/common/analytics_cluster/hadoop/worker.yaml deleted file mode 100644 index 25d88ba..0000000 --- a/hieradata/role/common/analytics_cluster/hadoop/worker.yaml +++ /dev/null @@ -1,12 +0,0 @@ -nagios_group: analytics_eqiad -cluster: analytics -admin::groups: - - analytics-roots - - analytics-admins -debdeploy::grains: - debdeploy-hadoop-worker: - value: standard - -# Analytics worker disks are large. We will install a custom -# NRPE check for them, so the base module's should ignore them. -base::monitoring::host::nrpe_check_disk_options: -w 6% -c 3% -l -e -A -i "/var/lib/hadoop/data" diff --git a/hieradata/role/common/analytics_cluster/hue.yaml b/hieradata/role/common/analytics_cluster/hue.yaml deleted file mode 100644 index f3c3b72..0000000 --- a/hieradata/role/common/analytics_cluster/hue.yaml +++ /dev/null @@ -1,3 +0,0 @@ -debdeploy::grains: - debdeploy-hue: - value: standard diff --git a/modules/cdh b/modules/cdh index 23e1347..424de83 160000 --- a/modules/cdh +++ b/modules/cdh -Subproject commit 23e134768be8009e696242a78eb9ddb842db9fbe +Subproject commit 424de833758b1723fe0b4e3164048428993685c1 diff --git a/modules/role/manifests/analytics_cluster/database/meta.pp b/modules/role/manifests/analytics_cluster/database/meta.pp index f745e0b..a3efd12 100644 --- a/modules/role/manifests/analytics_cluster/database/meta.pp +++ b/modules/role/manifests/analytics_cluster/database/meta.pp @@ -3,7 +3,7 @@ # class role::analytics_cluster::database::meta { # Some CDH database init scripts need Java to run. - require role::analytics_cluster::java + require_package('openjdk-7-jdk') class { 'mariadb::packages_wmf': mariadb10 => true diff --git a/modules/role/manifests/analytics_cluster/hadoop/client.pp b/modules/role/manifests/analytics_cluster/hadoop/client.pp index 430244e..c5427aa 100644 --- a/modules/role/manifests/analytics_cluster/hadoop/client.pp +++ b/modules/role/manifests/analytics_cluster/hadoop/client.pp @@ -22,12 +22,12 @@ 'cdh::hadoop::resourcemanager_hosts', hiera('cdh::hadoop::namenode_hosts') ), zookeeper_hosts => keys(hiera('zookeeper_hosts', undef)), + datanode_mounts => [ + "${hadoop_data_directory}/a", + "${hadoop_data_directory}/b", + ], dfs_name_dir => [$hadoop_name_directory], dfs_journalnode_edits_dir => $hadoop_journal_directory, - - # 256 MB - dfs_block_size => 268435456, - io_file_buffer_size => 131072, # Turn on Snappy compression by default for maps and final outputs mapreduce_intermediate_compression_codec => 'org.apache.hadoop.io.compress.SnappyCodec', @@ -111,122 +111,6 @@ exec { 'hadoop-yarn-logging-helper-reset': command => '/usr/local/bin/hadoop-yarn-logging-helper.sh reset', subscribe => File['/usr/local/bin/hadoop-yarn-logging-helper.sh'], - } - } - - # NOTE: Hadoop Memory Settings are configured here instead of - # hiera. Many of these settings are configured programatically and - # based on dynamic facter variables. - if $::realm == 'production' { - # Configure memory based on these recommendations and then adjusted: - # http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.0.6.0/bk_installing_manually_book/content/rpm-chap1-11.html - - ### These Map/Reduce and YARN ApplicationMaster master settings are - # settable per job, and the defaults when clients submit them are often - # picked up from the local versions of the /etc/hadoop/conf/{mapred,yarn}-site.xml files. - # That means they should not be set relative to the local node facter variables, and as such - # use a hardcoded value of memory_per_container to work from. Otherwise a job - # submitted from a relatively small client node will use bad job defaults. - # - # We currently run two different types of worker nodes in production. - # The older Dells have 48G of RAM, and the newer ones have 64G. - # - # Using + 0 here ensures that these variables are - # integers (Fixnums) and won't throw errors - # when used with min()/max() functions. - - # Worker nodes are heterogenous, so I don't want to use a variable - # memory per container size across the cluster. Larger nodes will just - # allocate a few more containers. Setting this to 2G. - $memory_per_container_mb = 2048 + 0 - - # Map container size and JVM max heap size (-XmX) - $mapreduce_map_memory_mb = floor($memory_per_container_mb) - $mapreduce_reduce_memory_mb = floor(2 * $memory_per_container_mb) - $map_jvm_heap_size = floor(0.8 * $memory_per_container_mb) - # Reduce container size and JVM max heap size (-Xmx) - $mapreduce_map_java_opts = "-Xmx${map_jvm_heap_size}m" - $reduce_jvm_heap_size = floor(0.8 * 2 * $memory_per_container_mb) - $mapreduce_reduce_java_opts = "-Xmx${reduce_jvm_heap_size}m" - - # Yarn ApplicationMaster container size and max heap size (-Xmx) - $yarn_app_mapreduce_am_resource_mb = floor(2 * $memory_per_container_mb) - $mapreduce_am_heap_size = floor(0.8 * 2 * $memory_per_container_mb) - $yarn_app_mapreduce_am_command_opts = "-Xmx${mapreduce_am_heap_size}m" - - # The amount of RAM for NodeManagers will only be be used by - # NodeManager processes running on the worker nodes themselves. - # Client nodes that submit jobs will ignore these settings. - # These are safe to set relative to the node currently evaluating - # puppet's facter variables. - - # Select a 'reserve' memory size for the - # OS and other Hadoop processes. - if $::memorysize_mb <= 1024 { - $reserve_memory_mb = 256 - } - elsif $::memorysize_mb <= 2048 { - $reserve_memory_mb = 512 - } - elsif $::memorysize_mb <= 4096 { - $reserve_memory_mb = 1024 - } - elsif $::memorysize_mb <= 16384 { - $reserve_memory_mb = 2048 - } - elsif $::memorysize_mb <= 24576 { - $reserve_memory_mb = 4096 - } - elsif $::memorysize_mb <= 49152 { - $reserve_memory_mb = 6144 - } - elsif $::memorysize_mb <= 73728 { - $reserve_memory_mb = 8192 - } - elsif $::memorysize_mb <= 98304 { - $reserve_memory_mb = 12288 - } - elsif $::memorysize_mb <= 131072 { - $reserve_memory_mb = 24576 - } - elsif $::memorysize_mb <= 262144 { - $reserve_memory_mb = 32768 - } - else { - $reserve_memory_mb = 65536 - } - - # Memory available for use by Hadoop jobs. - $available_memory_mb = $::memorysize_mb - $reserve_memory_mb - - # Since I have chosen a static $memory_per_container of 2048 across all - # node sizes, we should just choose to give NodeManagers - # $available_memory_mb to work with. - # This will give nodes with 48G of memory about 21 containers, and - # nodes with 64G memory about 28 containers. - # - # This is the total amount of memory that NodeManagers - # will use for allocation to containers. - $yarn_nodemanager_resource_memory_mb = floor($available_memory_mb) - - # Setting _minimum_allocation_mb to 0 to allow Impala to submit small reservation requests. - $yarn_scheduler_minimum_allocation_mb = 0 - $yarn_scheduler_maximum_allocation_mb = $yarn_nodemanager_resource_memory_mb - # Setting minimum_allocation_vcores to 0 to allow Impala to submit small reservation requests. - $yarn_scheduler_minimum_allocation_vcores = 0 - - Class['cdh::hadoop'] { - mapreduce_map_memory_mb => $mapreduce_map_memory_mb, - mapreduce_reduce_memory_mb => $mapreduce_reduce_memory_mb, - mapreduce_map_java_opts => $mapreduce_map_java_opts, - mapreduce_reduce_java_opts => $mapreduce_reduce_java_opts, - yarn_app_mapreduce_am_resource_mb => $yarn_app_mapreduce_am_resource_mb, - yarn_app_mapreduce_am_command_opts => $yarn_app_mapreduce_am_command_opts, - - yarn_nodemanager_resource_memory_mb => $yarn_nodemanager_resource_memory_mb, - yarn_scheduler_minimum_allocation_mb => $yarn_scheduler_minimum_allocation_mb, - yarn_scheduler_maximum_allocation_mb => $yarn_scheduler_maximum_allocation_mb, - yarn_scheduler_minimum_allocation_vcores => $yarn_scheduler_minimum_allocation_vcores, } } diff --git a/modules/role/manifests/analytics_cluster/hue.pp b/modules/role/manifests/analytics_cluster/hue.pp index 4e208be..a70459d 100644 --- a/modules/role/manifests/analytics_cluster/hue.pp +++ b/modules/role/manifests/analytics_cluster/hue.pp @@ -23,6 +23,21 @@ # LDAP Labs config is the same as LDAP in production. include ldap::role::config::labs + # if ($::realm == 'production') { + # include passwords::analytics + # + # $secret_key = $passwords::analytics::hue_secret_key + # $hive_server_host = 'analytics1027.eqiad.wmnet' + # # Disable automatic Hue user creation in production. + # $ldap_create_users_on_login = false + # } + # elsif ($::realm == 'labs') { + # $secret_key = 'oVEAAG5dp02MAuIScIetX3NZlmBkhOpagK92wY0GhBbq6ooc0B3rosmcxDg2fJBM' + # # Assume that in Labs, Hue should run on the main master Hadoop NameNode. + # $hive_server_host = $role::analytics::hadoop::config::namenode_hosts[0] + # $ldap_create_users_on_login = true + # } + class { 'cdh::hue': # We always host hive-server on the same node as hive-metastore. hive_server_host => hiera('cdh::hive::metastore_host'), -- To view, visit https://gerrit.wikimedia.org/r/269849 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I87020f77d38bd163b302d7c3b7965e6d1958d20f Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Ori.livneh <o...@wikimedia.org> Gerrit-Reviewer: Ori.livneh <o...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits