Ottomata has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/72618


Change subject: Puppetizing analytics udp2log instances
......................................................................

Puppetizing analytics udp2log instances

Change-Id: I3fecaa06637bcb4e6ecb8c0d1858da38dc545ddb
---
M manifests/role/analytics.pp
M manifests/site.pp
A templates/udp2log/filters.analytics-mobile.erb
A templates/udp2log/filters.analytics-sampled.erb
A templates/udp2log/filters.analytics-wikipedia-mobile.erb
5 files changed, 120 insertions(+), 24 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/18/72618/1

diff --git a/manifests/role/analytics.pp b/manifests/role/analytics.pp
index 07e3e60..8de0d27 100644
--- a/manifests/role/analytics.pp
+++ b/manifests/role/analytics.pp
@@ -114,3 +114,70 @@
 class role::analytics::kafka::server inherits role::analytics {
   include misc::analytics::monitoring::kafka::server
 }
+
+
+
+# == role::analytics::udp2log::mobile
+#
+# Geocodes and anonymizes webrequest logs generated on mobile hosts
+# and pipes them into Kafka.
+#
+class role::analytics::udp2log::mobile {
+       include misc::udp2log,
+               misc::udp2log::utilities,
+               role::cache::configuration,
+               passwords::analytics
+
+       $log_directory   = '/var/log/udp2log/webrequest'
+       $packet_loss_log = "${log_directory}/packet-loss.log"
+
+       misc::udp2log::instance { 'analytics-mobile':
+               multicast          => true,
+               log_directory      => $log_directory,
+               packet_loss_log    => $packet_loss_log,
+               monitor_log_age    => false,
+       }
+}
+
+# == role::analytics::udp2log::wikipedia_mobile
+#
+# Pipes webrequest logs generated on mobile hosts into Kafka.
+# This stream is not geocoded or anonymized.
+#
+class role::analytics::udp2log::wikipedia_mobile {
+       include misc::udp2log,
+               misc::udp2log::utilities,
+               role::cache::configuration
+
+       $log_directory   = '/var/log/udp2log/webrequest'
+       $packet_loss_log = "${log_directory}/packet-loss.log"
+
+       misc::udp2log::instance { 'analytics-wikipedia-mobile':
+               multicast          => true,
+               log_directory      => $log_directory,
+               packet_loss_log    => $packet_loss_log,
+               monitor_log_age    => false,
+       }
+}
+
+
+# == role::analytics::udp2log::sampled
+#
+# Geocodes and anonymizses a full sampled 1000 stream
+# and pipes it into Kafka.
+#
+class role::analytics::udp2log::sampled {
+       include misc::udp2log,
+               misc::udp2log::utilities,
+               passwords::analytics
+
+       $log_directory   = '/var/log/udp2log/misc'
+       $packet_loss_log = "${log_directory}/packet-loss.log"
+
+       misc::udp2log::instance { 'analytics-sampled':
+               multicast          => true,
+               log_directory      => $log_directory,
+               packet_loss_log    => $packet_loss_log,
+               monitor_log_age    => false,
+       }
+}
diff --git a/manifests/site.pp b/manifests/site.pp
index 2be04c2..369146b 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -148,34 +148,17 @@
 }
 
 #### analytics udp2log kafka producer instances:
-
-#   analytics1006 - webrequest-wikipedia-mobile (unanonymized)
-#   analytics1009 - webrequest-mobile (geocoded and anonymized)
-node /analytics100[69].eqiad.wmnet/ {
+node "analytics1006.eqiad.wmnet" {
        include role::analytics
-
-       # monitor the (currently unpuppetized)
-       # udp2log instances.
-       misc::udp2log::instance::monitoring { 'webrequest':
-               log_directory       => '/var/log/udp2log/webrequest',
-               monitor_packet_loss => true,
-               monitor_processes   => true,
-               monitor_log_age     => false,
-       }
+       include role::analytics::udp2log::wikipedia_mobile
 }
-
-#   analytics1008 - webrequest-all-1000 (sampled)
 node "analytics1008.eqiad.wmnet" {
        include role::analytics
-
-       # monitor the (currently unpuppetized)
-       # udp2log instance.  (geocoded anonymized sampled-1000)
-       misc::udp2log::instance::monitoring { 'misc':
-               log_directory       => '/var/log/udp2log/misc',
-               monitor_packet_loss => true,
-               monitor_processes   => true,
-               monitor_log_age     => false,
-       }
+       include role::analytics::udp2log::mobile
+}
+node "analytics1009.eqiad.wmnet" {
+       include role::analytics
+       include role::analytics::udp2log::sampled
 }
 
 # analytics1021 and analytics1022 are Kafka Brokers.
diff --git a/templates/udp2log/filters.analytics-mobile.erb 
b/templates/udp2log/filters.analytics-mobile.erb
new file mode 100644
index 0000000..eb4284b
--- /dev/null
+++ b/templates/udp2log/filters.analytics-mobile.erb
@@ -0,0 +1,17 @@
+# Note: This file is managed by Puppet.
+
+<%
+# pull in $role::cache::configuration::active_nodes
+# to find mobile host names and build a regex on which to grep.
+cache_configuration = 
scope.lookupvar('::role::cache::configuration::active_nodes')
+mobile_hosts_regex = '(' + 
cache_configuration['production']['mobile'].values.flatten.join('|') + ')'
+
+-%>
+
+# udp2log packet loss monitoring
+pipe 10 /usr/bin/packet-loss 10 '\t' >> <%= packet_loss_log %>
+
+# Produce logs into Kafka:
+
+# pipe all requests from mobile frontend cache servers into kafka
+pipe 1 /bin/grep -P '<%= mobile_hosts_regex %>' | /usr/bin/udp-filter -F '\t' 
--geocode --bird=country --anonymize='<%= 
scope.lookupvar('::passwords::analytics::libanon_salt') %>' | 
/opt/kraken/bin/kafka-produce webrequest-mobile 9951 > /dev/null
\ No newline at end of file
diff --git a/templates/udp2log/filters.analytics-sampled.erb 
b/templates/udp2log/filters.analytics-sampled.erb
new file mode 100644
index 0000000..343e014
--- /dev/null
+++ b/templates/udp2log/filters.analytics-sampled.erb
@@ -0,0 +1,9 @@
+# Note: This file is managed by Puppet.
+
+# udp2log packet loss monitoring
+pipe 10 /usr/bin/packet-loss 10 '\t' >> <%= packet_loss_log %>
+
+# Produce logs into Kafka:
+
+# pipe geocoded and anonymized sampled 1000 into kraken.
+pipe 1000 /usr/bin/udp-filter --geocode --bird=country --anonymize='<%= 
scope.lookupvar('::passwords::analytics::libanon_salt') %>' -F '\t' | 
/opt/kraken/bin/kafka-produce webrequest-all-sampled-1000 9954 > /dev/null
\ No newline at end of file
diff --git a/templates/udp2log/filters.analytics-wikipedia-mobile.erb 
b/templates/udp2log/filters.analytics-wikipedia-mobile.erb
new file mode 100644
index 0000000..8910ea9
--- /dev/null
+++ b/templates/udp2log/filters.analytics-wikipedia-mobile.erb
@@ -0,0 +1,20 @@
+# Note: This file is managed by Puppet.
+
+<%
+# pull in $role::cache::configuration::active_nodes
+# to find mobile host names and build a regex on which to grep.
+cache_configuration = 
scope.lookupvar('::role::cache::configuration::active_nodes')
+mobile_hosts_regex = '(' + 
cache_configuration['production']['mobile'].values.flatten.join('|') + ')'
+
+-%>
+
+# Note:  packet-loss is not debianize or puppetized.
+# I have manually copied it into /usr/local/bin. - otto
+
+# udp2log packet loss monitoring
+pipe 10 /usr/bin/packet-loss 10 '\t' >> <%= packet_loss_log %>
+
+# Produce logs into Kafka:
+
+# pipe all requests from mobile frontend cache servers into kafka
+pipe 1 /bin/grep -P '<%= mobile_hosts_regex %>' | 
/opt/kraken/bin/kafka-produce webrequest-wikipedia-mobile 9951 > /dev/null
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/72618
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I3fecaa06637bcb4e6ecb8c0d1858da38dc545ddb
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to