Ottomata has uploaded a new change for review.
https://gerrit.wikimedia.org/r/72618
Change subject: Puppetizing analytics udp2log instances
......................................................................
Puppetizing analytics udp2log instances
Change-Id: I3fecaa06637bcb4e6ecb8c0d1858da38dc545ddb
---
M manifests/role/analytics.pp
M manifests/site.pp
A templates/udp2log/filters.analytics-mobile.erb
A templates/udp2log/filters.analytics-sampled.erb
A templates/udp2log/filters.analytics-wikipedia-mobile.erb
5 files changed, 120 insertions(+), 24 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/18/72618/1
diff --git a/manifests/role/analytics.pp b/manifests/role/analytics.pp
index 07e3e60..8de0d27 100644
--- a/manifests/role/analytics.pp
+++ b/manifests/role/analytics.pp
@@ -114,3 +114,70 @@
class role::analytics::kafka::server inherits role::analytics {
include misc::analytics::monitoring::kafka::server
}
+
+
+
+# == role::analytics::udp2log::mobile
+#
+# Geocodes and anonymizes webrequest logs generated on mobile hosts
+# and pipes them into Kafka.
+#
+class role::analytics::udp2log::mobile {
+ include misc::udp2log,
+ misc::udp2log::utilities,
+ role::cache::configuration,
+ passwords::analytics
+
+ $log_directory = '/var/log/udp2log/webrequest'
+ $packet_loss_log = "${log_directory}/packet-loss.log"
+
+ misc::udp2log::instance { 'analytics-mobile':
+ multicast => true,
+ log_directory => $log_directory,
+ packet_loss_log => $packet_loss_log,
+ monitor_log_age => false,
+ }
+}
+
+# == role::analytics::udp2log::wikipedia_mobile
+#
+# Pipes webrequest logs generated on mobile hosts into Kafka.
+# This stream is not geocoded or anonymized.
+#
+class role::analytics::udp2log::wikipedia_mobile {
+ include misc::udp2log,
+ misc::udp2log::utilities,
+ role::cache::configuration
+
+ $log_directory = '/var/log/udp2log/webrequest'
+ $packet_loss_log = "${log_directory}/packet-loss.log"
+
+ misc::udp2log::instance { 'analytics-wikipedia-mobile':
+ multicast => true,
+ log_directory => $log_directory,
+ packet_loss_log => $packet_loss_log,
+ monitor_log_age => false,
+ }
+}
+
+
+# == role::analytics::udp2log::sampled
+#
+# Geocodes and anonymizses a full sampled 1000 stream
+# and pipes it into Kafka.
+#
+class role::analytics::udp2log::sampled {
+ include misc::udp2log,
+ misc::udp2log::utilities,
+ passwords::analytics
+
+ $log_directory = '/var/log/udp2log/misc'
+ $packet_loss_log = "${log_directory}/packet-loss.log"
+
+ misc::udp2log::instance { 'analytics-sampled':
+ multicast => true,
+ log_directory => $log_directory,
+ packet_loss_log => $packet_loss_log,
+ monitor_log_age => false,
+ }
+}
diff --git a/manifests/site.pp b/manifests/site.pp
index 2be04c2..369146b 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -148,34 +148,17 @@
}
#### analytics udp2log kafka producer instances:
-
-# analytics1006 - webrequest-wikipedia-mobile (unanonymized)
-# analytics1009 - webrequest-mobile (geocoded and anonymized)
-node /analytics100[69].eqiad.wmnet/ {
+node "analytics1006.eqiad.wmnet" {
include role::analytics
-
- # monitor the (currently unpuppetized)
- # udp2log instances.
- misc::udp2log::instance::monitoring { 'webrequest':
- log_directory => '/var/log/udp2log/webrequest',
- monitor_packet_loss => true,
- monitor_processes => true,
- monitor_log_age => false,
- }
+ include role::analytics::udp2log::wikipedia_mobile
}
-
-# analytics1008 - webrequest-all-1000 (sampled)
node "analytics1008.eqiad.wmnet" {
include role::analytics
-
- # monitor the (currently unpuppetized)
- # udp2log instance. (geocoded anonymized sampled-1000)
- misc::udp2log::instance::monitoring { 'misc':
- log_directory => '/var/log/udp2log/misc',
- monitor_packet_loss => true,
- monitor_processes => true,
- monitor_log_age => false,
- }
+ include role::analytics::udp2log::mobile
+}
+node "analytics1009.eqiad.wmnet" {
+ include role::analytics
+ include role::analytics::udp2log::sampled
}
# analytics1021 and analytics1022 are Kafka Brokers.
diff --git a/templates/udp2log/filters.analytics-mobile.erb
b/templates/udp2log/filters.analytics-mobile.erb
new file mode 100644
index 0000000..eb4284b
--- /dev/null
+++ b/templates/udp2log/filters.analytics-mobile.erb
@@ -0,0 +1,17 @@
+# Note: This file is managed by Puppet.
+
+<%
+# pull in $role::cache::configuration::active_nodes
+# to find mobile host names and build a regex on which to grep.
+cache_configuration =
scope.lookupvar('::role::cache::configuration::active_nodes')
+mobile_hosts_regex = '(' +
cache_configuration['production']['mobile'].values.flatten.join('|') + ')'
+
+-%>
+
+# udp2log packet loss monitoring
+pipe 10 /usr/bin/packet-loss 10 '\t' >> <%= packet_loss_log %>
+
+# Produce logs into Kafka:
+
+# pipe all requests from mobile frontend cache servers into kafka
+pipe 1 /bin/grep -P '<%= mobile_hosts_regex %>' | /usr/bin/udp-filter -F '\t'
--geocode --bird=country --anonymize='<%=
scope.lookupvar('::passwords::analytics::libanon_salt') %>' |
/opt/kraken/bin/kafka-produce webrequest-mobile 9951 > /dev/null
\ No newline at end of file
diff --git a/templates/udp2log/filters.analytics-sampled.erb
b/templates/udp2log/filters.analytics-sampled.erb
new file mode 100644
index 0000000..343e014
--- /dev/null
+++ b/templates/udp2log/filters.analytics-sampled.erb
@@ -0,0 +1,9 @@
+# Note: This file is managed by Puppet.
+
+# udp2log packet loss monitoring
+pipe 10 /usr/bin/packet-loss 10 '\t' >> <%= packet_loss_log %>
+
+# Produce logs into Kafka:
+
+# pipe geocoded and anonymized sampled 1000 into kraken.
+pipe 1000 /usr/bin/udp-filter --geocode --bird=country --anonymize='<%=
scope.lookupvar('::passwords::analytics::libanon_salt') %>' -F '\t' |
/opt/kraken/bin/kafka-produce webrequest-all-sampled-1000 9954 > /dev/null
\ No newline at end of file
diff --git a/templates/udp2log/filters.analytics-wikipedia-mobile.erb
b/templates/udp2log/filters.analytics-wikipedia-mobile.erb
new file mode 100644
index 0000000..8910ea9
--- /dev/null
+++ b/templates/udp2log/filters.analytics-wikipedia-mobile.erb
@@ -0,0 +1,20 @@
+# Note: This file is managed by Puppet.
+
+<%
+# pull in $role::cache::configuration::active_nodes
+# to find mobile host names and build a regex on which to grep.
+cache_configuration =
scope.lookupvar('::role::cache::configuration::active_nodes')
+mobile_hosts_regex = '(' +
cache_configuration['production']['mobile'].values.flatten.join('|') + ')'
+
+-%>
+
+# Note: packet-loss is not debianize or puppetized.
+# I have manually copied it into /usr/local/bin. - otto
+
+# udp2log packet loss monitoring
+pipe 10 /usr/bin/packet-loss 10 '\t' >> <%= packet_loss_log %>
+
+# Produce logs into Kafka:
+
+# pipe all requests from mobile frontend cache servers into kafka
+pipe 1 /bin/grep -P '<%= mobile_hosts_regex %>' |
/opt/kraken/bin/kafka-produce webrequest-wikipedia-mobile 9951 > /dev/null
\ No newline at end of file
--
To view, visit https://gerrit.wikimedia.org/r/72618
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I3fecaa06637bcb4e6ecb8c0d1858da38dc545ddb
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits