Ottomata has submitted this change and it was merged.
Change subject: Apply new analytics_cluster role to analytics1027
......................................................................
Apply new analytics_cluster role to analytics1027
Bug: T109859
Change-Id: Id7294fd7ffd076e7a44f6e8695cd4d5be6b85b12
---
A hieradata/eqiad/cdh/hive/metastore.yaml
A hieradata/eqiad/cdh/hive/server.yaml
M manifests/site.pp
A modules/role/manifests/analytics_cluster/hadoop/balancer.pp
M modules/role/manifests/analytics_cluster/hadoop/client.pp
M modules/role/manifests/analytics_cluster/hadoop/worker.pp
M modules/role/manifests/analytics_cluster/hive/client.pp
M modules/role/manifests/analytics_cluster/hive/metastore.pp
M modules/role/manifests/analytics_cluster/hive/server.pp
M modules/role/manifests/analytics_cluster/oozie/server.pp
M modules/role/manifests/analytics_cluster/refinery/camus.pp
11 files changed, 62 insertions(+), 44 deletions(-)
Approvals:
Ottomata: Verified; Looks good to me, approved
diff --git a/hieradata/eqiad/cdh/hive/metastore.yaml
b/hieradata/eqiad/cdh/hive/metastore.yaml
new file mode 100644
index 0000000..2d68efe
--- /dev/null
+++ b/hieradata/eqiad/cdh/hive/metastore.yaml
@@ -0,0 +1 @@
+heapsize: 256
diff --git a/hieradata/eqiad/cdh/hive/server.yaml
b/hieradata/eqiad/cdh/hive/server.yaml
new file mode 100644
index 0000000..67545e5
--- /dev/null
+++ b/hieradata/eqiad/cdh/hive/server.yaml
@@ -0,0 +1 @@
+heapsize: 1024
\ No newline at end of file
diff --git a/manifests/site.pp b/manifests/site.pp
index 1874151..00695e5 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -123,46 +123,31 @@
# (Hue, Oozie, Hive, etc.). It also submits regularly scheduled
# batch Hadoop jobs.
node 'analytics1027.eqiad.wmnet' {
- role analytics::hive::server, analytics::oozie::server, analytics::hue
+ role analytics_cluster::client,
+ analytics_cluster::hive::metastore,
+ analytics_cluster::hive::server,
+ analytics_cluster::oozie::server,
+ analytics_cluster::hue,
+
+ # Include a weekly cron job to run hdfs balancer.
+ analytics_cluster::hadoop::balancer,
+
+ # Include analytics/refinery deployment target.
+ analytics_cluster::refinery,
+
+ # Add cron jobs to run Camus to import data into
+ # HDFS from Kafka.
+ analytics_cluster::refinery::camus,
+
+ # Add cron job to delete old data in HDFS
+ analytics_cluster::refinery::data::drop
include standard
include base::firewall
- # Make sure refinery happens before analytics::clients,
- # so that the hive role can properly configure Hive's
- # auxpath to include refinery-hive.jar.
- Class['role::analytics::refinery'] -> Class['role::analytics::clients']
-
- # Include analytics/refinery deployment target.
- include role::analytics::refinery
- # Include analytics clients (Hadoop, Hive etc.)
- include role::analytics::clients
-
-
- # Add cron jobs to run Camus to import data into
- # HDFS from Kafka.
- include role::analytics::refinery::camus
-
- # Add cron job to delete old data in HDFS
- include role::analytics::refinery::data::drop
-
- # Oozie runs a monitor_done_flag job to make
- # sure the _SUCCESS done-flag is written
- # for each hourly webrequest import. This
- # file is written only if the hourly import
- # reports a 0.0 percent_different in expected
- # vs actual number of sequence numbers per host.
- # These are passive checks, so if
- # icinga is not notified of a successful import
- # hourly, icinga should generate an alert.
- include role::analytics::refinery::data::check::icinga
-
- # Include a weekly cron job to run hdfs balancer.
- include role::analytics::hadoop::balancer
-
# Allow access to this analytics mysql instance from analytics networks
# NOTE: an27's mysql instance will soon be managed by the
- # role::analytics::mysql::meta class.
+ # role::analytics_cluster::database::meta class.
ferm::service{ 'analytics-mysql-meta':
proto => 'tcp',
port => '3306',
diff --git a/modules/role/manifests/analytics_cluster/hadoop/balancer.pp
b/modules/role/manifests/analytics_cluster/hadoop/balancer.pp
new file mode 100644
index 0000000..05d0d4c
--- /dev/null
+++ b/modules/role/manifests/analytics_cluster/hadoop/balancer.pp
@@ -0,0 +1,21 @@
+# == Class role::analytics_cluster::hadoop::balancer
+# Runs hdfs balancer periodically to keep data balanced across all DataNodes
+class role::analytics_cluster::hadoop::balancer {
+ Class['role::analytics_cluster::hadoop::client'] ->
Class['role::analytics_cluster::hadoop::balancer']
+
+ file { '/usr/local/bin/hdfs-balancer':
+ source =>
'puppet:///modules/role/analytics_cluster/hadoop/hdfs-balancer',
+ mode => '0754',
+ owner => 'hdfs',
+ group => 'hdfs',
+ }
+
+ cron { 'hdfs-balancer':
+ command => '/usr/local/bin/hdfs-balancer >>
/var/log/hadoop-hdfs/balancer.log 2>&1',
+ user => 'hdfs',
+ # Every day at 6am UTC.
+ minute => 0,
+ hour => 6,
+ require => File['/usr/local/bin/hdfs-balancer'],
+ }
+}
diff --git a/modules/role/manifests/analytics_cluster/hadoop/client.pp
b/modules/role/manifests/analytics_cluster/hadoop/client.pp
index 52028d7..338593a 100644
--- a/modules/role/manifests/analytics_cluster/hadoop/client.pp
+++ b/modules/role/manifests/analytics_cluster/hadoop/client.pp
@@ -1,4 +1,4 @@
-# == Class role::role::analytics_cluster::hadoop::client
+# == Class role::analytics_cluster::hadoop::client
# Installs Hadoop client pacakges and configuration.
#
class role::analytics_cluster::hadoop::client {
diff --git a/modules/role/manifests/analytics_cluster/hadoop/worker.pp
b/modules/role/manifests/analytics_cluster/hadoop/worker.pp
index ced50e8..78a688a 100644
--- a/modules/role/manifests/analytics_cluster/hadoop/worker.pp
+++ b/modules/role/manifests/analytics_cluster/hadoop/worker.pp
@@ -1,7 +1,7 @@
# == Class role::role::analytics_cluster::hadoop::worker
# Includes cdh::hadoop::worker classes
class role::analytics_cluster::hadoop::worker {
- system::role { 'role::role::analytics_cluster::hadoop::worker':
+ system::role { 'role::analytics_cluster::hadoop::worker':
description => 'Hadoop Worker (DataNode & NodeManager)',
}
diff --git a/modules/role/manifests/analytics_cluster/hive/client.pp
b/modules/role/manifests/analytics_cluster/hive/client.pp
index 5cbc758..d6d4955 100644
--- a/modules/role/manifests/analytics_cluster/hive/client.pp
+++ b/modules/role/manifests/analytics_cluster/hive/client.pp
@@ -11,7 +11,7 @@
# TODO Remove this: https://phabricator.wikimedia.org/T114769
# If refinery is included on this node, then add
# refinery-hive.jar to the auxpath as well.
- if $::hostname == 'stat1002' or $::hostname == 'analytics1027' {
+ if $::hostname == 'stat1002' {
$auxpath =
"${hcatalog_jar},file:///srv/deployment/analytics/refinery/artifacts/refinery-hive.jar"
}
else {
diff --git a/modules/role/manifests/analytics_cluster/hive/metastore.pp
b/modules/role/manifests/analytics_cluster/hive/metastore.pp
index 20763ed..71a7b91 100644
--- a/modules/role/manifests/analytics_cluster/hive/metastore.pp
+++ b/modules/role/manifests/analytics_cluster/hive/metastore.pp
@@ -15,4 +15,4 @@
port => '9083',
srange => '$INTERNAL',
}
-}
\ No newline at end of file
+}
diff --git a/modules/role/manifests/analytics_cluster/hive/server.pp
b/modules/role/manifests/analytics_cluster/hive/server.pp
index 3da900d..5795fdb 100644
--- a/modules/role/manifests/analytics_cluster/hive/server.pp
+++ b/modules/role/manifests/analytics_cluster/hive/server.pp
@@ -16,4 +16,4 @@
srange => '$INTERNAL',
}
-}
\ No newline at end of file
+}
diff --git a/modules/role/manifests/analytics_cluster/oozie/server.pp
b/modules/role/manifests/analytics_cluster/oozie/server.pp
index 2b06d6c..ec546db 100644
--- a/modules/role/manifests/analytics_cluster/oozie/server.pp
+++ b/modules/role/manifests/analytics_cluster/oozie/server.pp
@@ -29,6 +29,16 @@
authorization_service_authorization_enabled => false,
}
+ # Oozie is creating event logs in /var/log/oozie.
+ # It rotates them but does not delete old ones. Set up cronjob to
+ # delete old files in this directory.
+ cron { 'oozie-clean-logs':
+ command => 'test -d /var/log/oozie && /usr/bin/find /var/log/oozie
-type f -mtime +62 -exec rm {} >/dev/null \;',
+ minute => 5,
+ hour => 0,
+ require => Class['cdh::oozie::server'],
+ }
+
ferm::service{ 'oozie_server':
proto => 'tcp',
port => '11000',
diff --git a/modules/role/manifests/analytics_cluster/refinery/camus.pp
b/modules/role/manifests/analytics_cluster/refinery/camus.pp
index 69be4f9..f0c01e3 100644
--- a/modules/role/manifests/analytics_cluster/refinery/camus.pp
+++ b/modules/role/manifests/analytics_cluster/refinery/camus.pp
@@ -3,8 +3,8 @@
# import data from Kafka into Hadoop.
#
class role::analytics_cluster::refinery::camus {
- require role::role::analytics_cluster::refinery
- include role::kafka::role::analytics::config
+ require role::analytics_cluster::refinery
+ include role::kafka::analytics::config
# Make all uses of camus::job set default kafka_brokers and camus_jar.
# If you build a new camus or refinery, and you want to use it, you'll
@@ -13,8 +13,8 @@
# the camus::job declaration.
Camus::Job {
kafka_brokers =>
suffix($role::kafka::analytics::config::brokers_array, ':9092'),
- camus_jar =>
"${role::analytics::refinery::path}/artifacts/org/wikimedia/analytics/camus-wmf/camus-wmf-0.1.0-wmf6.jar",
- check_jar =>
"${role::analytics::refinery::path}/artifacts/org/wikimedia/analytics/refinery/refinery-job-0.0.26.jar",
+ camus_jar =>
"${role::analytics_cluster::refinery::path}/artifacts/org/wikimedia/analytics/camus-wmf/camus-wmf-0.1.0-wmf6.jar",
+ check_jar =>
"${role::analytics_cluster::refinery::path}/artifacts/org/wikimedia/analytics/refinery/refinery-job-0.0.26.jar",
}
# Import webrequest_* topics into /wmf/data/raw/webrequest
@@ -37,6 +37,6 @@
minute => '15',
# refinery-camus contains some custom decoder classes which
# are needed to import Avro binary data.
- libjars =>
"${role::analytics::refinery::path}/artifacts/org/wikimedia/analytics/refinery/refinery-camus-0.0.23.jar",
+ libjars =>
"${role::analytics_cluster::refinery::path}/artifacts/org/wikimedia/analytics/refinery/refinery-camus-0.0.23.jar",
}
}
--
To view, visit https://gerrit.wikimedia.org/r/270795
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Id7294fd7ffd076e7a44f6e8695cd4d5be6b85b12
Gerrit-PatchSet: 8
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits