[MediaWiki-commits] [Gerrit] Apply new analytics_cluster role to analytics1027 - change (operations/puppet)

Ottomata (Code Review) Mon, 15 Feb 2016 12:25:43 -0800

Ottomata has submitted this change and it was merged.

Change subject: Apply new analytics_cluster role to analytics1027
......................................................................



Apply new analytics_cluster role to analytics1027

Bug: T109859
Change-Id: Id7294fd7ffd076e7a44f6e8695cd4d5be6b85b12
---
A hieradata/eqiad/cdh/hive/metastore.yaml
A hieradata/eqiad/cdh/hive/server.yaml
M manifests/site.pp
A modules/role/manifests/analytics_cluster/hadoop/balancer.pp
M modules/role/manifests/analytics_cluster/hadoop/client.pp
M modules/role/manifests/analytics_cluster/hadoop/worker.pp
M modules/role/manifests/analytics_cluster/hive/client.pp
M modules/role/manifests/analytics_cluster/hive/metastore.pp
M modules/role/manifests/analytics_cluster/hive/server.pp
M modules/role/manifests/analytics_cluster/oozie/server.pp
M modules/role/manifests/analytics_cluster/refinery/camus.pp
11 files changed, 62 insertions(+), 44 deletions(-)

Approvals:
  Ottomata: Verified; Looks good to me, approved



diff --git a/hieradata/eqiad/cdh/hive/metastore.yaml 
b/hieradata/eqiad/cdh/hive/metastore.yaml
new file mode 100644
index 0000000..2d68efe
--- /dev/null
+++ b/hieradata/eqiad/cdh/hive/metastore.yaml
@@ -0,0 +1 @@
+heapsize: 256
diff --git a/hieradata/eqiad/cdh/hive/server.yaml 
b/hieradata/eqiad/cdh/hive/server.yaml
new file mode 100644
index 0000000..67545e5
--- /dev/null
+++ b/hieradata/eqiad/cdh/hive/server.yaml
@@ -0,0 +1 @@
+heapsize: 1024
\ No newline at end of file
diff --git a/manifests/site.pp b/manifests/site.pp
index 1874151..00695e5 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -123,46 +123,31 @@
 # (Hue, Oozie, Hive, etc.).  It also submits regularly scheduled
 # batch Hadoop jobs.
 node 'analytics1027.eqiad.wmnet' {
-    role analytics::hive::server, analytics::oozie::server, analytics::hue
+    role analytics_cluster::client,
+        analytics_cluster::hive::metastore,
+        analytics_cluster::hive::server,
+        analytics_cluster::oozie::server,
+        analytics_cluster::hue,
+
+        # Include a weekly cron job to run hdfs balancer.
+        analytics_cluster::hadoop::balancer,
+
+        # Include analytics/refinery deployment target.
+        analytics_cluster::refinery,
+
+        # Add cron jobs to run Camus to import data into
+        # HDFS from Kafka.
+        analytics_cluster::refinery::camus,
+
+        # Add cron job to delete old data in HDFS
+        analytics_cluster::refinery::data::drop
 
     include standard
     include base::firewall
 
-    # Make sure refinery happens before analytics::clients,
-    # so that the hive role can properly configure Hive's
-    # auxpath to include refinery-hive.jar.
-    Class['role::analytics::refinery'] -> Class['role::analytics::clients']
-
-    # Include analytics/refinery deployment target.
-    include role::analytics::refinery
-    # Include analytics clients (Hadoop, Hive etc.)
-    include role::analytics::clients
-
-
-    # Add cron jobs to run Camus to import data into
-    # HDFS from Kafka.
-    include role::analytics::refinery::camus
-
-    # Add cron job to delete old data in HDFS
-    include role::analytics::refinery::data::drop
-
-    # Oozie runs a monitor_done_flag job to make
-    # sure the _SUCCESS done-flag is written
-    # for each hourly webrequest import.  This
-    # file is written only if the hourly import
-    # reports a 0.0 percent_different in expected
-    # vs actual number of sequence numbers per host.
-    # These are passive checks, so if
-    # icinga is not notified of a successful import
-    # hourly, icinga should generate an alert.
-    include role::analytics::refinery::data::check::icinga
-
-    # Include a weekly cron job to run hdfs balancer.
-    include role::analytics::hadoop::balancer
-
     # Allow access to this analytics mysql instance from analytics networks
     # NOTE: an27's mysql instance will soon be managed by the
-    # role::analytics::mysql::meta class.
+    # role::analytics_cluster::database::meta class.
     ferm::service{ 'analytics-mysql-meta':
         proto  => 'tcp',
         port   => '3306',
diff --git a/modules/role/manifests/analytics_cluster/hadoop/balancer.pp 
b/modules/role/manifests/analytics_cluster/hadoop/balancer.pp
new file mode 100644
index 0000000..05d0d4c
--- /dev/null
+++ b/modules/role/manifests/analytics_cluster/hadoop/balancer.pp
@@ -0,0 +1,21 @@
+# == Class role::analytics_cluster::hadoop::balancer
+# Runs hdfs balancer periodically to keep data balanced across all DataNodes
+class role::analytics_cluster::hadoop::balancer {
+    Class['role::analytics_cluster::hadoop::client'] -> 
Class['role::analytics_cluster::hadoop::balancer']
+
+    file { '/usr/local/bin/hdfs-balancer':
+        source => 
'puppet:///modules/role/analytics_cluster/hadoop/hdfs-balancer',
+        mode   => '0754',
+        owner  => 'hdfs',
+        group  => 'hdfs',
+    }
+
+    cron { 'hdfs-balancer':
+        command => '/usr/local/bin/hdfs-balancer >> 
/var/log/hadoop-hdfs/balancer.log 2>&1',
+        user    => 'hdfs',
+        # Every day at 6am UTC.
+        minute  => 0,
+        hour    => 6,
+        require => File['/usr/local/bin/hdfs-balancer'],
+    }
+}
diff --git a/modules/role/manifests/analytics_cluster/hadoop/client.pp 
b/modules/role/manifests/analytics_cluster/hadoop/client.pp
index 52028d7..338593a 100644
--- a/modules/role/manifests/analytics_cluster/hadoop/client.pp
+++ b/modules/role/manifests/analytics_cluster/hadoop/client.pp
@@ -1,4 +1,4 @@
-# == Class role::role::analytics_cluster::hadoop::client
+# == Class role::analytics_cluster::hadoop::client
 # Installs Hadoop client pacakges and configuration.
 #
 class role::analytics_cluster::hadoop::client {
diff --git a/modules/role/manifests/analytics_cluster/hadoop/worker.pp 
b/modules/role/manifests/analytics_cluster/hadoop/worker.pp
index ced50e8..78a688a 100644
--- a/modules/role/manifests/analytics_cluster/hadoop/worker.pp
+++ b/modules/role/manifests/analytics_cluster/hadoop/worker.pp
@@ -1,7 +1,7 @@
 # == Class role::role::analytics_cluster::hadoop::worker
 # Includes cdh::hadoop::worker classes
 class role::analytics_cluster::hadoop::worker {
-    system::role { 'role::role::analytics_cluster::hadoop::worker':
+    system::role { 'role::analytics_cluster::hadoop::worker':
         description => 'Hadoop Worker (DataNode & NodeManager)',
     }
 
diff --git a/modules/role/manifests/analytics_cluster/hive/client.pp 
b/modules/role/manifests/analytics_cluster/hive/client.pp
index 5cbc758..d6d4955 100644
--- a/modules/role/manifests/analytics_cluster/hive/client.pp
+++ b/modules/role/manifests/analytics_cluster/hive/client.pp
@@ -11,7 +11,7 @@
     # TODO Remove this: https://phabricator.wikimedia.org/T114769
     # If refinery is included on this node, then add
     # refinery-hive.jar to the auxpath as well.
-    if $::hostname == 'stat1002' or $::hostname == 'analytics1027' {
+    if $::hostname == 'stat1002' {
         $auxpath = 
"${hcatalog_jar},file:///srv/deployment/analytics/refinery/artifacts/refinery-hive.jar"
     }
     else {
diff --git a/modules/role/manifests/analytics_cluster/hive/metastore.pp 
b/modules/role/manifests/analytics_cluster/hive/metastore.pp
index 20763ed..71a7b91 100644
--- a/modules/role/manifests/analytics_cluster/hive/metastore.pp
+++ b/modules/role/manifests/analytics_cluster/hive/metastore.pp
@@ -15,4 +15,4 @@
         port   => '9083',
         srange => '$INTERNAL',
     }
-}
\ No newline at end of file
+}
diff --git a/modules/role/manifests/analytics_cluster/hive/server.pp 
b/modules/role/manifests/analytics_cluster/hive/server.pp
index 3da900d..5795fdb 100644
--- a/modules/role/manifests/analytics_cluster/hive/server.pp
+++ b/modules/role/manifests/analytics_cluster/hive/server.pp
@@ -16,4 +16,4 @@
         srange => '$INTERNAL',
     }
 
-}
\ No newline at end of file
+}
diff --git a/modules/role/manifests/analytics_cluster/oozie/server.pp 
b/modules/role/manifests/analytics_cluster/oozie/server.pp
index 2b06d6c..ec546db 100644
--- a/modules/role/manifests/analytics_cluster/oozie/server.pp
+++ b/modules/role/manifests/analytics_cluster/oozie/server.pp
@@ -29,6 +29,16 @@
         authorization_service_authorization_enabled => false,
     }
 
+    # Oozie is creating event logs in /var/log/oozie.
+    # It rotates them but does not delete old ones.  Set up cronjob to
+    # delete old files in this directory.
+    cron { 'oozie-clean-logs':
+        command => 'test -d /var/log/oozie && /usr/bin/find /var/log/oozie 
-type f -mtime +62 -exec rm {} >/dev/null \;',
+        minute  => 5,
+        hour    => 0,
+        require => Class['cdh::oozie::server'],
+    }
+
     ferm::service{ 'oozie_server':
         proto  => 'tcp',
         port   => '11000',
diff --git a/modules/role/manifests/analytics_cluster/refinery/camus.pp 
b/modules/role/manifests/analytics_cluster/refinery/camus.pp
index 69be4f9..f0c01e3 100644
--- a/modules/role/manifests/analytics_cluster/refinery/camus.pp
+++ b/modules/role/manifests/analytics_cluster/refinery/camus.pp
@@ -3,8 +3,8 @@
 # import data from Kafka into Hadoop.
 #
 class role::analytics_cluster::refinery::camus {
-    require role::role::analytics_cluster::refinery
-    include role::kafka::role::analytics::config
+    require role::analytics_cluster::refinery
+    include role::kafka::analytics::config
 
     # Make all uses of camus::job set default kafka_brokers and camus_jar.
     # If you build a new camus or refinery, and you want to use it, you'll
@@ -13,8 +13,8 @@
     # the camus::job declaration.
     Camus::Job {
         kafka_brokers => 
suffix($role::kafka::analytics::config::brokers_array, ':9092'),
-        camus_jar     => 
"${role::analytics::refinery::path}/artifacts/org/wikimedia/analytics/camus-wmf/camus-wmf-0.1.0-wmf6.jar",
-        check_jar     => 
"${role::analytics::refinery::path}/artifacts/org/wikimedia/analytics/refinery/refinery-job-0.0.26.jar",
+        camus_jar     => 
"${role::analytics_cluster::refinery::path}/artifacts/org/wikimedia/analytics/camus-wmf/camus-wmf-0.1.0-wmf6.jar",
+        check_jar     => 
"${role::analytics_cluster::refinery::path}/artifacts/org/wikimedia/analytics/refinery/refinery-job-0.0.26.jar",
     }
 
     # Import webrequest_* topics into /wmf/data/raw/webrequest
@@ -37,6 +37,6 @@
         minute  => '15',
         # refinery-camus contains some custom decoder classes which
         # are needed to import Avro binary data.
-        libjars => 
"${role::analytics::refinery::path}/artifacts/org/wikimedia/analytics/refinery/refinery-camus-0.0.23.jar",
+        libjars => 
"${role::analytics_cluster::refinery::path}/artifacts/org/wikimedia/analytics/refinery/refinery-camus-0.0.23.jar",
     }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/270795
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Id7294fd7ffd076e7a44f6e8695cd4d5be6b85b12
Gerrit-PatchSet: 8
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] Apply new analytics_cluster role to analytics1027 - change (operations/puppet)

Reply via email to