Ottomata has uploaded a new change for review.
https://gerrit.wikimedia.org/r/56537
Change subject: Puppetizing udp2log instances on analytics nodes.
......................................................................
Puppetizing udp2log instances on analytics nodes.
This is meant as a temporary measure to get udp2log
back under puppetization on the analytics cluster.
This is not a well formed puppet commit, and is
intended to be scrapped once the analytics cluster
is repuppetized.
Change-Id: I0d6c49b8acafd0fcdc3e0a379bdb2f6b0129c4bc
---
M manifests/misc/analytics.pp
M manifests/site.pp
A templates/udp2log/filters.webrequest.erb
3 files changed, 109 insertions(+), 9 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/37/56537/1
diff --git a/manifests/misc/analytics.pp b/manifests/misc/analytics.pp
index 9b66a11..35107ad 100644
--- a/manifests/misc/analytics.pp
+++ b/manifests/misc/analytics.pp
@@ -23,4 +23,35 @@
minute => 15,
require => File[$tmp_dir],
}
-}
\ No newline at end of file
+}
+
+
+# Temporary class to manage udp2log instances
+# on analytics nodes. This class will be refactored
+# and deleted soon.
+#
+# analytics udp2log instances currently shard the
+# webrequest stream into $producer_count pieces.
+# $producer_id tells the current node which shard
+# it is responsible for.
+class misc::analytics::udp2log::webrequest($producer_id, $producer_count) {
+ include misc::udp2log,
+ misc::udp2log::utilities
+
+ # Starts a multicast listening udp2log instance
+ # to read from the webrequest log stream.
+ misc::udp2log::instance { "webrequest":
+ port => "8420",
+ multicast => true,
+ log_directory => "/var/log/udp2log/webrequest",
+ logrotate => false,
+ monitor_packet_loss => true,
+ monitor_processes => true,
+ monitor_log_age => false,
+ template_variables => {
+ 'producer_count' => $producer_count,
+ 'producer_id' => $producer_id,
+ }
+ }
+}
+
diff --git a/manifests/site.pp b/manifests/site.pp
index 22bb58f..5940ba2 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -134,6 +134,50 @@
include misc::udp2log::iptables
}
+node "analytics1002.eqiad.wmnet" {
+ include role::analytics
+}
+
+# analytics1003 - analytics1006 are udp2log instances.
+node "analytics1003.eqiad.wmnet" inherits analytics_basenode {
+ # ganglia aggregator for the Analytics cluster.
+ $ganglia_aggregator = "true"
+
+ class { 'misc::analytics::udp2log::webrequest':
+ producer_id => 0,
+ producer_count => 4,
+ }
+}
+node "analytics1004.eqiad.wmnet" inherits analytics_basenode {
+ class { 'misc::analytics::udp2log::webrequest':
+ producer_id => 1,
+ producer_count => 4,
+ }
+}
+node "analytics1005.eqiad.wmnet" inherits analytics_basenode {
+ class { 'misc::analytics::udp2log::webrequest':
+ producer_id => 2,
+ producer_count => 4,
+ }
+}
+node "analytics1006.eqiad.wmnet" inherits analytics_basenode {
+ class { 'misc::analytics::udp2log::webrequest':
+ producer_id => 3,
+ producer_count => 4,
+ }
+}
+
+
+# analytics1007 - analytics1026
+node /analytics10(0[7-9]|1[0-9]|2[0-6])\.eqiad\.wmnet/ {
+ # ganglia aggregator for the Analytics cluster.
+ if ($hostname == "analytics1011") {
+ $ganglia_aggregator = "true"
+ }
+
+ include role::analytics
+}
+
# analytics1027 hosts the frontend
# interfaces to Kraken and Hadoop.
node "analytics1027.eqiad.wmnet" {
@@ -149,15 +193,11 @@
}
}
-# analytics1002 - analytics1026
-node /analytics10(0[2-9]|1[0-9]|2[0-6])\.eqiad\.wmnet/ {
- # ganglia aggregator for the Analytics cluster.
- if ($hostname == "analytics1003" or $hostname == "analytics1011") {
- $ganglia_aggregator = "true"
- }
- include role::analytics
-}
+
+
+
+
node /(arsenic|niobium|strontium|palladium)\.(wikimedia\.org|eqiad\.wmnet)/ {
if $hostname =~ /^(arsenic|niobium)$/ {
diff --git a/templates/udp2log/filters.webrequest.erb
b/templates/udp2log/filters.webrequest.erb
new file mode 100644
index 0000000..80c9b74
--- /dev/null
+++ b/templates/udp2log/filters.webrequest.erb
@@ -0,0 +1,29 @@
+# Note: This file is managed by Puppet.
+
+# Analytics udp2log webrequest instance configuartion file.
+
+<%
+# Quick and easy method for DRYing piping into kafka producer.
+# This uses the bin/kafka-produce script from the Kraken repository.
+def kafka_producer(topic, jmx_port='')
+# "/usr/lib/kafka/bin/kafka-producer-shell.sh
--props=/etc/kafka/producer.properties --topic=#{topic} > /dev/null"
+ "/opt/kraken/bin/kafka-produce #{topic} #{jmx_port} > /dev/null"
+end
+
+webrequest_producer_count = template_variables['producer_count']
+webrequest_producer_id = template_variables['producer_id']
+
+# We're splitting the webrequest stream into
+# a number of streams across a few different machines.
+# Hopefully this will let us avoid packet loss.
+mod_filter_command = "/usr/bin/awk '{if ($2 % #{webrequest_producer_count} ==
#{webrequest_producer_id}) print $0; }' | "
+-%>
+
+# udp2log packet loss monitoring
+pipe 10 /usr/bin/packet-loss 10 '\t' >>
/var/log/udp2log/webrequest/packet-loss.log
+
+
+# pipe all requests from mobile frontend cache servers into Kafka=
+pipe 1 <%= mod_filter_command %>/bin/grep -P '^cp104[1-4]' | <%=
kafka_producer("webrequest-wikipedia-mobile", 9951) %>
+
+
--
To view, visit https://gerrit.wikimedia.org/r/56537
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I0d6c49b8acafd0fcdc3e0a379bdb2f6b0129c4bc
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits