Ottomata has uploaded a new change for review.
https://gerrit.wikimedia.org/r/94148
Change subject: Cleaning up analytics role classes
......................................................................
Cleaning up analytics role classes
Change-Id: I3ab3d1cc7d3c5a99ae00096d1499a56d7bb9cf3f
---
M manifests/role/analytics.pp
M manifests/role/analytics/kraken.pp
M manifests/site.pp
3 files changed, 63 insertions(+), 69 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/48/94148/1
diff --git a/manifests/role/analytics.pp b/manifests/role/analytics.pp
index 0c5f0dc..0623a48 100644
--- a/manifests/role/analytics.pp
+++ b/manifests/role/analytics.pp
@@ -2,15 +2,14 @@
@monitor_group { 'analytics-eqiad': description => 'analytics servers in
eqiad' }
+# == Class role::analytics
# Base class for all analytics nodes.
# All analytics nodes should include this.
class role::analytics {
system::role { 'role::analytics': description => 'analytics server' }
$nagios_group = 'analytics-eqiad'
# ganglia cluster name.
- $cluster = "analytics"
-
- package { 'openjdk-7-jdk': }
+ $cluster = 'analytics'
include standard
include admins::roots
@@ -18,13 +17,17 @@
# run automated jobs and for file
# ownership.
include misc::statistics::user
+
+ package { 'openjdk-7-jdk': }
}
-# == Class role::analytics::common
+# == Class role::analytics::clients
# Includes common client classes for
# working with hadoop and other analytics services.
-#
-class role::analytics::common {
+# This class is often included by including
+# role::analytics::kraken, but you may include
+# it on its own if you don't need any kraken code.
+class role::analytics::clients {
include role::analytics
# Include Hadoop ecosystem client classes.
@@ -33,19 +36,14 @@
role::analytics::oozie::client,
role::analytics::pig,
role::analytics::sqoop
-
- # We want to be able to geolocate IP addresses
- include geoip
- # udp-filter is a useful thing!
- include misc::udp2log::udp_filter
- # include dclass for device classification
- include role::analytics::dclass
-
- # Include Kraken repository deployments.
- include role::analytics::kraken
}
-
+# == Class role::analytics::users
+# Users that should be on analytics nodes.
+# This class is not included on *all* analytics
+# nodes, just ones where it is useful for users to
+# have accounts. I.e. hadoop related nodes.
+# Users do not need accounts on Kafka or Zookeeper nodes.
class role::analytics::users {
# Analytics user accounts will be added to the
# 'stats' group which gets created by this class.
@@ -72,46 +70,30 @@
accounts::halfak, # RT 5836
accounts::ypanda # RT 6103
- # add Analytics team members to the stats group so they can
- # access data group owned by 'stats'.
- User<|title == milimetric|> { groups +> [ "stats" ] }
- User<|title == yurik|> { groups +> [ "stats" ] }
- User<|title == dartar|> { groups +> [ "stats" ] }
- User<|title == dsc|> { groups +> [ "stats" ] }
- User<|title == diederik|> { groups +> [ "stats" ] }
- User<|title == erik|> { groups +> [ "stats" ] }
- User<|title == erosen|> { groups +> [ "stats" ] }
- User<|title == olivneh|> { groups +> [ "stats" ] }
- User<|title == otto|> { groups +> [ "stats" ] }
- User<|title == spetrea|> { groups +> [ "stats" ] }
- User<|title == abaso|> { groups +> [ "stats" ] }
- User<|title == qchris|> { groups +> [ "stats" ] }
+ # NOTE: If you are filling an RT request for Hadoop access,
+ # you will need to add the user to the list of accounts above,
+ # as well as manually create the user's HDFS home directory.
+ # Once the user's posix account is created on analytics1010
+ # (the Hadoop NameNode), run these commands:
+ #
+ # sudo -u hdfs hadoop fs -mkdir /user/<username>
+ # sudo -u hdfs hadoop fs -chown <username>:stats /user/<username>
+ #
+ # Users in the stats group will be able to read private data in HDFS.
+ User<|title == milimetric|> { groups +> [ 'stats' ] }
+ User<|title == yurik|> { groups +> [ 'stats' ] }
+ User<|title == dartar|> { groups +> [ 'stats' ] }
+ User<|title == dsc|> { groups +> [ 'stats' ] }
+ User<|title == diederik|> { groups +> [ 'stats' ] }
+ User<|title == erik|> { groups +> [ 'stats' ] }
+ User<|title == erosen|> { groups +> [ 'stats' ] }
+ User<|title == olivneh|> { groups +> [ 'stats' ] }
+ User<|title == otto|> { groups +> [ 'stats' ] }
+ User<|title == spetrea|> { groups +> [ 'stats' ] }
+ User<|title == abaso|> { groups +> [ 'stats' ] }
+ User<|title == qchris|> { groups +> [ 'stats' ] }
- # Diederik, David and Otto have sudo privileges on Analytics nodes.
- sudo_user { [ "diederik", "dsc", "otto" ]: privileges => ['ALL = (ALL)
NOPASSWD: ALL'] }
+ # Diederik and Otto have sudo privileges on Analytics nodes.
+ sudo_user { [ 'diederik', 'otto' ]: privileges => ['ALL = (ALL) NOPASSWD:
ALL'] }
}
-
-
-class role::analytics::dclass {
- # install dclass JNI package
- # for device classification.
- if !defined(Package['libdclass-java']) {
- package { 'libdclass-java':
- ensure => 'installed',
- }
- }
- # Symlink libdclass* .so into /usr/lib.
- # (Oracle java does not support multiarch.)
- file { '/usr/lib/libdclass.so':
- ensure => 'link',
- target => '/usr/lib/x86_64-linux-gnu/libdclass.so.0',
- require => Package['libdclass-java'],
- }
- file { '/usr/lib/libdclassjni.so':
- ensure => 'link',
- target => '/usr/lib/x86_64-linux-gnu/jni/libdclassjni.so',
- require => Package['libdclass-java'],
- }
-}
-
diff --git a/manifests/role/analytics/kraken.pp
b/manifests/role/analytics/kraken.pp
index e407d25..69c4cbf 100644
--- a/manifests/role/analytics/kraken.pp
+++ b/manifests/role/analytics/kraken.pp
@@ -1,11 +1,27 @@
-# kraken.pp - role classes dealing with Kraken repository related
puppetization.
+# kraken.pp - role classes dealing with Kraken data analysis.
# == Class role::analytics::kraken
# Kraken refers to the Analytics codebase used to generate
# analytics for WMF.
class role::analytics::kraken {
# Need Hadoop client classes included to use Kraken.
- include role::analytics::common
+ include role::analytics::clients
+
+ # We want to be able to geolocate IP addresses
+ include geoip
+ # udp-filter is a useful thing!
+ include misc::udp2log::udp_filter
+
+ # many Kraken python scripts use docopt for CLI parsing.
+ package { 'python-docopt':
+ ensure => 'installed',
+ }
+
+ # Many kraken jobs use dclass for
+ # User Agent Device classification
+ package { 'libdclass-java':
+ ensure => 'installed',
+ }
# Include Kraken repository deployment target.
deployment::target { 'analytics-kraken': }
@@ -24,13 +40,9 @@
group => 'stats',
# setgid bit here to make kraken log files writeable
# by users in the stats group.
- mode => 2775,
+ mode => '2775',
}
- # many Kraken python scripts use docopt for CLI parsing.
- package { 'python-docopt':
- ensure => 'installed',
- }
}
# == Class role::analytics::kraken::import::pagecounts
diff --git a/manifests/site.pp b/manifests/site.pp
index fd0c11b..646f9d6 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -151,7 +151,7 @@
# include analytics user accounts
include role::analytics::users
- include role::analytics::common
+ include role::analytics::kraken
include role::analytics::hadoop::standby
}
@@ -161,7 +161,7 @@
# include analytics user accounts
include role::analytics::users
- include role::analytics::common
+ include role::analytics::kraken
include role::analytics::hadoop::master
}
@@ -174,7 +174,7 @@
# include analytics user accounts
include role::analytics::users
- include role::analytics::common
+ include role::analytics::kraken
include role::analytics::hadoop::worker
}
@@ -195,7 +195,7 @@
# include analytics user accounts
include role::analytics::users
- include role::analytics::common
+ include role::analytics::kraken
# Including kraken import and hive partition cron jobs.
include role::analytics::kraken::jobs::import::pagecounts
include role::analytics::kraken::jobs::hive::partitions::external
@@ -205,7 +205,7 @@
# interfaces to Kraken and Hadoop.
# (Hue, Oozie, Hive, etc.)
node "analytics1027.eqiad.wmnet" {
- include role::analytics::common
+ include role::analytics::clients
include role::analytics::hive::server
include role::analytics::oozie::server
include role::analytics::hue
--
To view, visit https://gerrit.wikimedia.org/r/94148
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I3ab3d1cc7d3c5a99ae00096d1499a56d7bb9cf3f
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits