Ottomata has submitted this change and it was merged.

Change subject: Cleaning up analytics role classes
......................................................................


Cleaning up analytics role classes

Change-Id: I3ab3d1cc7d3c5a99ae00096d1499a56d7bb9cf3f
---
M manifests/role/analytics.pp
M manifests/role/analytics/kraken.pp
M manifests/site.pp
3 files changed, 63 insertions(+), 69 deletions(-)

Approvals:
  Ottomata: Verified; Looks good to me, approved



diff --git a/manifests/role/analytics.pp b/manifests/role/analytics.pp
index 0c5f0dc..0623a48 100644
--- a/manifests/role/analytics.pp
+++ b/manifests/role/analytics.pp
@@ -2,15 +2,14 @@
 
 @monitor_group { 'analytics-eqiad': description => 'analytics servers in 
eqiad' }
 
+# == Class role::analytics
 # Base class for all analytics nodes.
 # All analytics nodes should include this.
 class role::analytics {
     system::role { 'role::analytics': description => 'analytics server' }
     $nagios_group = 'analytics-eqiad'
     # ganglia cluster name.
-    $cluster = "analytics"
-
-    package { 'openjdk-7-jdk': }
+    $cluster = 'analytics'
 
     include standard
     include admins::roots
@@ -18,13 +17,17 @@
     # run automated jobs and for file
     # ownership.
     include misc::statistics::user
+
+    package { 'openjdk-7-jdk': }
 }
 
-# == Class role::analytics::common
+# == Class role::analytics::clients
 # Includes common client classes for
 # working with hadoop and other analytics services.
-#
-class role::analytics::common {
+# This class is often included by including
+# role::analytics::kraken, but you may include
+# it on its own if you don't need any kraken code.
+class role::analytics::clients {
     include role::analytics
 
     # Include Hadoop ecosystem client classes.
@@ -33,19 +36,14 @@
         role::analytics::oozie::client,
         role::analytics::pig,
         role::analytics::sqoop
-
-    # We want to be able to geolocate IP addresses
-    include geoip
-    # udp-filter is a useful thing!
-    include misc::udp2log::udp_filter
-    # include dclass for device classification
-    include role::analytics::dclass
-
-    # Include Kraken repository deployments.
-    include role::analytics::kraken
 }
 
-
+# == Class role::analytics::users
+# Users that should be on analytics nodes.
+# This class is not included on *all* analytics
+# nodes, just ones where it is useful for users to
+# have accounts.  I.e. hadoop related nodes.
+# Users do not need accounts on Kafka or Zookeeper nodes.
 class role::analytics::users {
     # Analytics user accounts will be added to the
     # 'stats' group which gets created by this class.
@@ -72,46 +70,30 @@
         accounts::halfak,    # RT 5836
         accounts::ypanda     # RT 6103
 
-    # add Analytics team members to the stats group so they can
-    # access data group owned by 'stats'.
-    User<|title == milimetric|>  { groups +> [ "stats" ] }
-    User<|title == yurik|>       { groups +> [ "stats" ] }
-    User<|title == dartar|>      { groups +> [ "stats" ] }
-    User<|title == dsc|>         { groups +> [ "stats" ] }
-    User<|title == diederik|>    { groups +> [ "stats" ] }
-    User<|title == erik|>        { groups +> [ "stats" ] }
-    User<|title == erosen|>      { groups +> [ "stats" ] }
-    User<|title == olivneh|>     { groups +> [ "stats" ] }
-    User<|title == otto|>        { groups +> [ "stats" ] }
-    User<|title == spetrea|>     { groups +> [ "stats" ] }
-    User<|title == abaso|>       { groups +> [ "stats" ] }
-    User<|title == qchris|>      { groups +> [ "stats" ] }
+    # NOTE:  If you are filling an RT request for Hadoop access,
+    # you will need to add the user to the list of accounts above,
+    # as well as manually create the user's HDFS home directory.
+    # Once the user's posix account is created on analytics1010
+    # (the Hadoop NameNode), run these commands:
+    #
+    #   sudo -u hdfs hadoop fs -mkdir /user/<username>
+    #   sudo -u hdfs hadoop fs -chown <username>:stats /user/<username>
+    #
 
+    # Users in the stats group will be able to read private data in HDFS.
+    User<|title == milimetric|>  { groups +> [ 'stats' ] }
+    User<|title == yurik|>       { groups +> [ 'stats' ] }
+    User<|title == dartar|>      { groups +> [ 'stats' ] }
+    User<|title == dsc|>         { groups +> [ 'stats' ] }
+    User<|title == diederik|>    { groups +> [ 'stats' ] }
+    User<|title == erik|>        { groups +> [ 'stats' ] }
+    User<|title == erosen|>      { groups +> [ 'stats' ] }
+    User<|title == olivneh|>     { groups +> [ 'stats' ] }
+    User<|title == otto|>        { groups +> [ 'stats' ] }
+    User<|title == spetrea|>     { groups +> [ 'stats' ] }
+    User<|title == abaso|>       { groups +> [ 'stats' ] }
+    User<|title == qchris|>      { groups +> [ 'stats' ] }
 
-    # Diederik, David and Otto have sudo privileges on Analytics nodes.
-    sudo_user { [ "diederik", "dsc", "otto" ]: privileges => ['ALL = (ALL) 
NOPASSWD: ALL'] }
+    # Diederik and Otto have sudo privileges on Analytics nodes.
+    sudo_user { [ 'diederik', 'otto' ]: privileges => ['ALL = (ALL) NOPASSWD: 
ALL'] }
 }
-
-
-class role::analytics::dclass {
-    # install dclass JNI package
-    # for device classification.
-    if !defined(Package['libdclass-java']) {
-        package { 'libdclass-java':
-            ensure  => 'installed',
-        }
-    }
-    # Symlink libdclass* .so into /usr/lib.
-    # (Oracle java does not support multiarch.)
-    file { '/usr/lib/libdclass.so':
-        ensure => 'link',
-        target => '/usr/lib/x86_64-linux-gnu/libdclass.so.0',
-        require => Package['libdclass-java'],
-    }
-    file { '/usr/lib/libdclassjni.so':
-        ensure => 'link',
-        target => '/usr/lib/x86_64-linux-gnu/jni/libdclassjni.so',
-        require => Package['libdclass-java'],
-    }
-}
-
diff --git a/manifests/role/analytics/kraken.pp 
b/manifests/role/analytics/kraken.pp
index e407d25..69c4cbf 100644
--- a/manifests/role/analytics/kraken.pp
+++ b/manifests/role/analytics/kraken.pp
@@ -1,11 +1,27 @@
-# kraken.pp - role classes dealing with Kraken repository related 
puppetization.
+# kraken.pp - role classes dealing with Kraken data analysis.
 
 # == Class role::analytics::kraken
 # Kraken refers to the Analytics codebase used to generate
 # analytics for WMF.
 class role::analytics::kraken {
     # Need Hadoop client classes included to use Kraken.
-    include role::analytics::common
+    include role::analytics::clients
+
+    # We want to be able to geolocate IP addresses
+    include geoip
+    # udp-filter is a useful thing!
+    include misc::udp2log::udp_filter
+
+    # many Kraken python scripts use docopt for CLI parsing.
+    package { 'python-docopt':
+        ensure => 'installed',
+    }
+
+    # Many kraken jobs use dclass for
+    # User Agent Device classification
+    package { 'libdclass-java':
+        ensure  => 'installed',
+    }
 
     # Include Kraken repository deployment target.
     deployment::target { 'analytics-kraken': }
@@ -24,13 +40,9 @@
         group  => 'stats',
         # setgid bit here to make kraken log files writeable
         # by users in the stats group.
-        mode   => 2775,
+        mode   => '2775',
     }
 
-    # many Kraken python scripts use docopt for CLI parsing.
-    package { 'python-docopt':
-        ensure => 'installed',
-    }
 }
 
 # == Class role::analytics::kraken::import::pagecounts
diff --git a/manifests/site.pp b/manifests/site.pp
index 7a93799..5eec915 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -151,7 +151,7 @@
     # include analytics user accounts
     include role::analytics::users
 
-    include role::analytics::common
+    include role::analytics::kraken
     include role::analytics::hadoop::standby
 }
 
@@ -161,7 +161,7 @@
     # include analytics user accounts
     include role::analytics::users
 
-    include role::analytics::common
+    include role::analytics::kraken
     include role::analytics::hadoop::master
 }
 
@@ -174,7 +174,7 @@
     # include analytics user accounts
     include role::analytics::users
 
-    include role::analytics::common
+    include role::analytics::kraken
     include role::analytics::hadoop::worker
 }
 
@@ -195,7 +195,7 @@
     # include analytics user accounts
     include role::analytics::users
     
-    include role::analytics::common
+    include role::analytics::kraken
     # Including kraken import and hive partition cron jobs.
     include role::analytics::kraken::jobs::import::pagecounts
     include role::analytics::kraken::jobs::hive::partitions::external
@@ -205,7 +205,7 @@
 # interfaces to Kraken and Hadoop.
 # (Hue, Oozie, Hive, etc.)
 node "analytics1027.eqiad.wmnet" {
-    include role::analytics::common
+    include role::analytics::clients
     include role::analytics::hive::server
     include role::analytics::oozie::server
     include role::analytics::hue

-- 
To view, visit https://gerrit.wikimedia.org/r/94148
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I3ab3d1cc7d3c5a99ae00096d1499a56d7bb9cf3f
Gerrit-PatchSet: 2
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <[email protected]>
Gerrit-Reviewer: Faidon Liambotis <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to