Elukey has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/372131 )
Change subject: Introduce role::analytics_cluster::coordinator
......................................................................
Introduce role::analytics_cluster::coordinator
Bug: T167790
Change-Id: I94480677b16821054aaa85ac7d0d7d345c0fec40
---
M manifests/site.pp
A modules/role/manifests/analytics_cluster/coordinator.pp
2 files changed, 52 insertions(+), 43 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/31/372131/1
diff --git a/manifests/site.pp b/manifests/site.pp
index 64f9d64..9779104 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -64,50 +64,8 @@
include ::base::firewall
}
-# This node hosts Oozie and Hive servers,
-# as well as a MySQL instance that stores
-# meta data associated with those and other
-# Analytics Cluster services.
-#
-# This node is also is a launch pad for various cron based Hadoop jobs.
-# Many ingestion jobs need a starting point. Oozie is a great
-# Hadoop job scheduler, but it is not better than cron
-# for some jobs that need to be launched at regular time
-# intervals. Cron is used for those. These crons
-# do not use local resources, instead, they launch
-# Hadoop jobs that run throughout the cluster.
-#
node 'analytics1003.eqiad.wmnet' {
- role(analytics_cluster::client,
- analytics_cluster::database::meta,
- # Back up analytics-meta MySQL instance
- # to analytics1002. $dest is configured in
- # hieradata/role/eqiad/analytics_cluster/database/meta/backup.yaml
- analytics_cluster::database::meta::backup,
- analytics_cluster::hive::metastore::database,
- analytics_cluster::oozie::server::database,
- analytics_cluster::hive::metastore,
- analytics_cluster::hive::server,
- analytics_cluster::oozie::server,
-
- # Include a weekly cron job to run hdfs balancer.
- analytics_cluster::hadoop::balancer,
-
- # We need hive-site.xml in HDFS. This can be included
- # on any node with a Hive client, but we really only
- # want to include it in one place. analytics1003
- # is a little special and standalone, so we do it here.
- analytics_cluster::hive::site_hdfs,
-
- # Camus crons import data into
- # from Kafka into HDFS.
- analytics_cluster::refinery::job::camus,
-
- # Various crons that launch Hadoop jobs.
- analytics_cluster::refinery,
- analytics_cluster::refinery::job::data_drop,
- analytics_cluster::refinery::job::project_namespace_map,
- analytics_cluster::refinery::job::sqoop_mediawiki)
+ role(analytics_cluster::coordinator)
include ::standard
include ::base::firewall
diff --git a/modules/role/manifests/analytics_cluster/coordinator.pp
b/modules/role/manifests/analytics_cluster/coordinator.pp
new file mode 100644
index 0000000..e34f3ec
--- /dev/null
+++ b/modules/role/manifests/analytics_cluster/coordinator.pp
@@ -0,0 +1,51 @@
+# == Class role::analytics_cluster::coordinator
+#
+# This role includes Oozie and Hive servers, as well as a MySQL instance
+# that stores meta data associated with those and other
+# Analytics Cluster services.
+#
+# This role is a launch pad for various cron based Hadoop jobs.
+# Many ingestion jobs need a starting point. Oozie is a great
+# Hadoop job scheduler, but it is not better than cron
+# for some jobs that need to be launched at regular time
+# intervals. Cron is used for those. These crons
+# do not use local resources, instead, they launch
+# Hadoop jobs that run throughout the cluster.
+#
+# This roles sets up a node responsible to coordinate and orchestrate
+# a Hadoop cluster equipped with tools like Camus, Hive, Oozie and
+# the Analytics Refinery.
+#
+class role::analytics_cluster::coordinator {
+ include ::role::analytics_cluster::client,
+ include ::role::analytics_cluster::database::meta,
+
+ # Back up analytics-meta MySQL instance
+ # to analytics1002. $dest is configured in
+ # hieradata/role/eqiad/analytics_cluster/database/meta/backup.yaml
+ include ::role::analytics_cluster::database::meta::backup,
+ include ::role::analytics_cluster::hive::metastore::database,
+ include ::role::analytics_cluster::oozie::server::database,
+ include ::role::analytics_cluster::hive::metastore,
+ include ::role::analytics_cluster::hive::server,
+ include ::role::analytics_cluster::oozie::server,
+
+ # Include a weekly cron job to run hdfs balancer.
+ include ::role::analytics_cluster::hadoop::balancer,
+
+ # We need hive-site.xml in HDFS. This can be included
+ # on any node with a Hive client, but we really only
+ # want to include it in one place. analytics1003
+ # is a little special and standalone, so we do it here.
+ include ::role::analytics_cluster::hive::site_hdfs,
+
+ # Camus crons import data into
+ # from Kafka into HDFS.
+ include ::role::analytics_cluster::refinery::job::camus,
+
+ # Various crons that launch Hadoop jobs.
+ include ::role::analytics_cluster::refinery,
+ include ::role::analytics_cluster::refinery::job::data_drop,
+ include ::role::analytics_cluster::refinery::job::project_namespace_map,
+ include ::role::analytics_cluster::refinery::job::sqoop_mediawiki)
+}
\ No newline at end of file
--
To view, visit https://gerrit.wikimedia.org/r/372131
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I94480677b16821054aaa85ac7d0d7d345c0fec40
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Elukey <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits