Ori.livneh has submitted this change and it was merged.

Change subject: Revert "Include hiera configuration for labs and prod 
analytics_cluster role"
......................................................................


Revert "Include hiera configuration for labs and prod analytics_cluster role"

This reverts commit 1f3a4a6a7b3406a7ae043157ddde41a7f72a077d.

Change-Id: I87020f77d38bd163b302d7c3b7965e6d1958d20f
---
M hieradata/eqiad.yaml
M hieradata/labs.yaml
D hieradata/role/common/analytics_cluster/database/meta.yaml
D hieradata/role/common/analytics_cluster/hadoop/master.yaml
D hieradata/role/common/analytics_cluster/hadoop/standby.yaml
D hieradata/role/common/analytics_cluster/hadoop/worker.yaml
D hieradata/role/common/analytics_cluster/hue.yaml
M modules/cdh
M modules/role/manifests/analytics_cluster/database/meta.pp
M modules/role/manifests/analytics_cluster/hadoop/client.pp
M modules/role/manifests/analytics_cluster/hue.pp
11 files changed, 20 insertions(+), 225 deletions(-)

Approvals:
  Ori.livneh: Verified; Looks good to me, approved



diff --git a/hieradata/eqiad.yaml b/hieradata/eqiad.yaml
index 4b5524e..67ee866 100644
--- a/hieradata/eqiad.yaml
+++ b/hieradata/eqiad.yaml
@@ -149,55 +149,3 @@
 oozie_host: analytics1027.eqiad.wmnet
 
 ldap_labs_hostname: ldap-labs.eqiad.wikimedia.org
-
-#
-# Analytics Cluster Configuration:
-#
-cdh::hadoop::cluster_name: analytics-hadoop
-
-cdh::hadoop::namenode_hosts:
-    - analytics1001.eqiad.wmnet
-    - analytics1002.eqiad.wmnet
-
-cdh::hadoop::journalnode_hosts:
-    - analytics1052.eqiad.wmnet  # Row A3
-    - analytics1028.eqiad.wmnet  # Row C2
-    - analytics1035.eqiad.wmnet  # Row D2
-
-# analytics* Dell R720s have mounts on disks sdb - sdm.
-# (sda is hardware raid on the 2 2.5 drives in the flex bays.)
-cdh::hadoop::datanode_mounts:
-    - /var/lib/hadoop/data/b
-    - /var/lib/hadoop/data/c
-    - /var/lib/hadoop/data/d
-    - /var/lib/hadoop/data/e
-    - /var/lib/hadoop/data/f
-    - /var/lib/hadoop/data/g
-    - /var/lib/hadoop/data/h
-    - /var/lib/hadoop/data/i
-    - /var/lib/hadoop/data/j
-    - /var/lib/hadoop/data/k
-    - /var/lib/hadoop/data/l
-    - /var/lib/hadoop/data/m
-
-cdh::hadoop::net_topology_script_template: 
'role/analytics_cluster/hadoop/net-topology.py.erb'
-
-# Increase NameNode heapsize independent from other daemons
-cdh::hadoop::namenode_opts: -Xmx4096m
-
-# Ensure that users in these groups have home directories in HDFS.
-cdh::hadoop::hadoop_users_posix_groups: "analytics-users 
analytics-privatedata-users analytics-admins analytics-search-users"
-
-cdh::hadoop::mapreduce_reduce_shuffle_parallelcopies: 10
-cdh::hadoop::mapreduce_task_io_sort_mb: 200
-cdh::hadoop::mapreduce_task_io_sort_factor: 10
-
-# NOTE: Hadoop Memory Settings are configured in the
-# role::analytics_cluster::hadoop::client class.
-# Many of these settings are configured programatically.
-
-cdh::hive::metastore_host: analytics1027.eqiad.wmnet
-cdh::oozie::oozie_host: analytics1027.eqiad.wmnet
-
-# Don't auto create Hue users from LDAP in production.
-cdh::hue::ldap_create_users_on_login
diff --git a/hieradata/labs.yaml b/hieradata/labs.yaml
index dcaf509..2e4c9a1 100644
--- a/hieradata/labs.yaml
+++ b/hieradata/labs.yaml
@@ -60,9 +60,3 @@
 
 # By default, nag project admins about puppet breakage
 send_puppet_failure_emails: true
-
-
-# For any Analytics Cluster in labs:
-"cdh::hadoop::datanode_mounts":
-    - /var/lib/hadoop/data/a
-    - /var/lib/hadoop/data/b
diff --git a/hieradata/role/common/analytics_cluster/database/meta.yaml 
b/hieradata/role/common/analytics_cluster/database/meta.yaml
deleted file mode 100644
index 1c3255a..0000000
--- a/hieradata/role/common/analytics_cluster/database/meta.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-debdeploy::grains:
-  debdeploy-mysql-analytics:
-    value: standard
diff --git a/hieradata/role/common/analytics_cluster/hadoop/master.yaml 
b/hieradata/role/common/analytics_cluster/hadoop/master.yaml
deleted file mode 100644
index 74f7c05..0000000
--- a/hieradata/role/common/analytics_cluster/hadoop/master.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-nagios_group: analytics_eqiad
-cluster: analytics
-admin::groups:
-  - analytics-users
-  - analytics-privatedata-users
-  - analytics-roots
-  - analytics-admins
-  # elasticsearch::analytics creates the analytics-search user and group
-  # that analytics-search-users are allowed to sudo to.  This is used
-  # for deploying files to HDFS.
-  - analytics-search-users
-debdeploy::grains:
-  debdeploy-hadoop-master:
-    value: standard
diff --git a/hieradata/role/common/analytics_cluster/hadoop/standby.yaml 
b/hieradata/role/common/analytics_cluster/hadoop/standby.yaml
deleted file mode 100644
index 1813013..0000000
--- a/hieradata/role/common/analytics_cluster/hadoop/standby.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-nagios_group: analytics_eqiad
-cluster: analytics
-admin::groups:
-  - analytics-users
-  - analytics-privatedata-users
-  - analytics-roots
-  - analytics-admins
-  # elasticsearch::analytics creates the analytics-search user and group
-  # that analytics-search-users are allowed to sudo to.  This is used
-  # for deploying files to HDFS.
-  - analytics-search-users
-debdeploy::grains:
-  debdeploy-hadoop-standby:
-    value: standard
diff --git a/hieradata/role/common/analytics_cluster/hadoop/worker.yaml 
b/hieradata/role/common/analytics_cluster/hadoop/worker.yaml
deleted file mode 100644
index 25d88ba..0000000
--- a/hieradata/role/common/analytics_cluster/hadoop/worker.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-nagios_group: analytics_eqiad
-cluster: analytics
-admin::groups:
-  - analytics-roots
-  - analytics-admins
-debdeploy::grains:
-  debdeploy-hadoop-worker:
-    value: standard
-
-# Analytics worker disks are large.  We will install a custom
-# NRPE check for them, so the base module's should ignore them.
-base::monitoring::host::nrpe_check_disk_options: -w 6% -c 3% -l -e -A -i 
"/var/lib/hadoop/data"
diff --git a/hieradata/role/common/analytics_cluster/hue.yaml 
b/hieradata/role/common/analytics_cluster/hue.yaml
deleted file mode 100644
index f3c3b72..0000000
--- a/hieradata/role/common/analytics_cluster/hue.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-debdeploy::grains:
-  debdeploy-hue:
-    value: standard
diff --git a/modules/cdh b/modules/cdh
index 23e1347..424de83 160000
--- a/modules/cdh
+++ b/modules/cdh
-Subproject commit 23e134768be8009e696242a78eb9ddb842db9fbe
+Subproject commit 424de833758b1723fe0b4e3164048428993685c1
diff --git a/modules/role/manifests/analytics_cluster/database/meta.pp 
b/modules/role/manifests/analytics_cluster/database/meta.pp
index f745e0b..a3efd12 100644
--- a/modules/role/manifests/analytics_cluster/database/meta.pp
+++ b/modules/role/manifests/analytics_cluster/database/meta.pp
@@ -3,7 +3,7 @@
 #
 class role::analytics_cluster::database::meta {
     # Some CDH database init scripts need Java to run.
-    require role::analytics_cluster::java
+    require_package('openjdk-7-jdk')
 
     class { 'mariadb::packages_wmf':
         mariadb10 => true
diff --git a/modules/role/manifests/analytics_cluster/hadoop/client.pp 
b/modules/role/manifests/analytics_cluster/hadoop/client.pp
index 430244e..c5427aa 100644
--- a/modules/role/manifests/analytics_cluster/hadoop/client.pp
+++ b/modules/role/manifests/analytics_cluster/hadoop/client.pp
@@ -22,12 +22,12 @@
             'cdh::hadoop::resourcemanager_hosts', 
hiera('cdh::hadoop::namenode_hosts')
         ),
         zookeeper_hosts                             => 
keys(hiera('zookeeper_hosts', undef)),
+        datanode_mounts => [
+            "${hadoop_data_directory}/a",
+            "${hadoop_data_directory}/b",
+        ],
         dfs_name_dir                                => 
[$hadoop_name_directory],
         dfs_journalnode_edits_dir                   => 
$hadoop_journal_directory,
-
-        # 256 MB
-        dfs_block_size                              => 268435456,
-        io_file_buffer_size                         => 131072,
 
         # Turn on Snappy compression by default for maps and final outputs
         mapreduce_intermediate_compression_codec    => 
'org.apache.hadoop.io.compress.SnappyCodec',
@@ -111,122 +111,6 @@
         exec { 'hadoop-yarn-logging-helper-reset':
             command   => '/usr/local/bin/hadoop-yarn-logging-helper.sh reset',
             subscribe => File['/usr/local/bin/hadoop-yarn-logging-helper.sh'],
-        }
-    }
-
-    # NOTE: Hadoop Memory Settings are configured here instead of
-    # hiera. Many of these settings are configured programatically and
-    # based on dynamic facter variables.
-    if $::realm == 'production' {
-        # Configure memory based on these recommendations and then adjusted:
-        # 
http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.0.6.0/bk_installing_manually_book/content/rpm-chap1-11.html
-
-        ### These Map/Reduce and YARN ApplicationMaster master settings are
-        # settable per job, and the defaults when clients submit them are often
-        # picked up from the local versions of the 
/etc/hadoop/conf/{mapred,yarn}-site.xml files.
-        # That means they should not be set relative to the local node facter 
variables, and as such
-        # use a hardcoded value of memory_per_container to work from.  
Otherwise a job
-        # submitted from a relatively small client node will use bad job 
defaults.
-        #
-        # We currently run two different types of worker nodes in production.
-        # The older Dells have 48G of RAM, and the newer ones have 64G.
-        #
-        # Using + 0 here ensures that these variables are
-        # integers (Fixnums) and won't throw errors
-        # when used with min()/max() functions.
-
-        # Worker nodes are heterogenous, so I don't want to use a variable
-        # memory per container size across the cluster.  Larger nodes will just
-        # allocate a few more containers.  Setting this to 2G.
-        $memory_per_container_mb                  = 2048 + 0
-
-        # Map container size and JVM max heap size (-XmX)
-        $mapreduce_map_memory_mb                  = 
floor($memory_per_container_mb)
-        $mapreduce_reduce_memory_mb               = floor(2 * 
$memory_per_container_mb)
-        $map_jvm_heap_size                        = floor(0.8 * 
$memory_per_container_mb)
-        # Reduce container size and JVM max heap size (-Xmx)
-        $mapreduce_map_java_opts                  = "-Xmx${map_jvm_heap_size}m"
-        $reduce_jvm_heap_size                     = floor(0.8 * 2 * 
$memory_per_container_mb)
-        $mapreduce_reduce_java_opts               = 
"-Xmx${reduce_jvm_heap_size}m"
-
-        # Yarn ApplicationMaster container size and  max heap size (-Xmx)
-        $yarn_app_mapreduce_am_resource_mb        = floor(2 * 
$memory_per_container_mb)
-        $mapreduce_am_heap_size                   = floor(0.8 * 2 * 
$memory_per_container_mb)
-        $yarn_app_mapreduce_am_command_opts       = 
"-Xmx${mapreduce_am_heap_size}m"
-
-        # The amount of RAM for NodeManagers will only be be used by
-        # NodeManager processes running on the worker nodes themselves.
-        # Client nodes that submit jobs will ignore these settings.
-        # These are safe to set relative to the node currently evaluating
-        # puppet's facter variables.
-
-        # Select a 'reserve' memory size for the
-        # OS and other Hadoop processes.
-        if $::memorysize_mb <= 1024 {
-            $reserve_memory_mb = 256
-        }
-        elsif $::memorysize_mb <= 2048 {
-            $reserve_memory_mb = 512
-        }
-        elsif $::memorysize_mb <= 4096 {
-            $reserve_memory_mb = 1024
-        }
-        elsif $::memorysize_mb <= 16384 {
-            $reserve_memory_mb = 2048
-        }
-        elsif $::memorysize_mb <= 24576 {
-            $reserve_memory_mb = 4096
-        }
-        elsif $::memorysize_mb <= 49152 {
-            $reserve_memory_mb = 6144
-        }
-        elsif $::memorysize_mb <= 73728 {
-            $reserve_memory_mb = 8192
-        }
-        elsif $::memorysize_mb <= 98304 {
-            $reserve_memory_mb = 12288
-        }
-        elsif $::memorysize_mb <= 131072 {
-            $reserve_memory_mb = 24576
-        }
-        elsif $::memorysize_mb <= 262144 {
-            $reserve_memory_mb = 32768
-        }
-        else {
-            $reserve_memory_mb = 65536
-        }
-
-        # Memory available for use by Hadoop jobs.
-        $available_memory_mb = $::memorysize_mb - $reserve_memory_mb
-
-        # Since I have chosen a static $memory_per_container of 2048 across all
-        # node sizes, we should just choose to give NodeManagers
-        # $available_memory_mb to work with.
-        # This will give nodes with 48G of memory about 21 containers, and
-        # nodes with 64G memory about 28 containers.
-        #
-        # This is the total amount of memory that NodeManagers
-        # will use for allocation to containers.
-        $yarn_nodemanager_resource_memory_mb      = floor($available_memory_mb)
-
-        # Setting _minimum_allocation_mb to 0 to allow Impala to submit small 
reservation requests.
-        $yarn_scheduler_minimum_allocation_mb     = 0
-        $yarn_scheduler_maximum_allocation_mb     = 
$yarn_nodemanager_resource_memory_mb
-        # Setting minimum_allocation_vcores to 0 to allow Impala to submit 
small reservation requests.
-        $yarn_scheduler_minimum_allocation_vcores = 0
-
-        Class['cdh::hadoop'] {
-            mapreduce_map_memory_mb                     => 
$mapreduce_map_memory_mb,
-            mapreduce_reduce_memory_mb                  => 
$mapreduce_reduce_memory_mb,
-            mapreduce_map_java_opts                     => 
$mapreduce_map_java_opts,
-            mapreduce_reduce_java_opts                  => 
$mapreduce_reduce_java_opts,
-            yarn_app_mapreduce_am_resource_mb           => 
$yarn_app_mapreduce_am_resource_mb,
-            yarn_app_mapreduce_am_command_opts          => 
$yarn_app_mapreduce_am_command_opts,
-
-            yarn_nodemanager_resource_memory_mb         => 
$yarn_nodemanager_resource_memory_mb,
-            yarn_scheduler_minimum_allocation_mb        => 
$yarn_scheduler_minimum_allocation_mb,
-            yarn_scheduler_maximum_allocation_mb        => 
$yarn_scheduler_maximum_allocation_mb,
-            yarn_scheduler_minimum_allocation_vcores    => 
$yarn_scheduler_minimum_allocation_vcores,
         }
     }
 
diff --git a/modules/role/manifests/analytics_cluster/hue.pp 
b/modules/role/manifests/analytics_cluster/hue.pp
index 4e208be..a70459d 100644
--- a/modules/role/manifests/analytics_cluster/hue.pp
+++ b/modules/role/manifests/analytics_cluster/hue.pp
@@ -23,6 +23,21 @@
     # LDAP Labs config is the same as LDAP in production.
     include ldap::role::config::labs
 
+    # if ($::realm == 'production') {
+    #     include passwords::analytics
+    #
+    #     $secret_key       = $passwords::analytics::hue_secret_key
+    #     $hive_server_host = 'analytics1027.eqiad.wmnet'
+    #     # Disable automatic Hue user creation in production.
+    #     $ldap_create_users_on_login = false
+    # }
+    # elsif ($::realm == 'labs') {
+    #     $secret_key       = 
'oVEAAG5dp02MAuIScIetX3NZlmBkhOpagK92wY0GhBbq6ooc0B3rosmcxDg2fJBM'
+    #     # Assume that in Labs, Hue should run on the main master Hadoop 
NameNode.
+    #     $hive_server_host = 
$role::analytics::hadoop::config::namenode_hosts[0]
+    #     $ldap_create_users_on_login = true
+    # }
+
     class { 'cdh::hue':
         # We always host hive-server on the same node as hive-metastore.
         hive_server_host           => hiera('cdh::hive::metastore_host'),

-- 
To view, visit https://gerrit.wikimedia.org/r/269849
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I87020f77d38bd163b302d7c3b7965e6d1958d20f
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ori.livneh <o...@wikimedia.org>
Gerrit-Reviewer: Ori.livneh <o...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to