Ottomata has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/56537


Change subject: Puppetizing udp2log instances on analytics nodes.
......................................................................

Puppetizing udp2log instances on analytics nodes.

This is meant as a temporary measure to get udp2log
back under puppetization on the analytics cluster.
This is not a well formed puppet commit, and is
intended to be scrapped once the analytics cluster
is repuppetized.

Change-Id: I0d6c49b8acafd0fcdc3e0a379bdb2f6b0129c4bc
---
M manifests/misc/analytics.pp
M manifests/site.pp
A templates/udp2log/filters.webrequest.erb
3 files changed, 109 insertions(+), 9 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/37/56537/1

diff --git a/manifests/misc/analytics.pp b/manifests/misc/analytics.pp
index 9b66a11..35107ad 100644
--- a/manifests/misc/analytics.pp
+++ b/manifests/misc/analytics.pp
@@ -23,4 +23,35 @@
                minute  => 15,
                require => File[$tmp_dir],
        }
-}
\ No newline at end of file
+}
+
+
+# Temporary class to manage udp2log instances 
+# on analytics nodes.  This class will be refactored
+# and deleted soon.
+# 
+# analytics udp2log instances currently shard the
+# webrequest stream into $producer_count pieces.
+# $producer_id tells the current node which shard
+# it is responsible for.
+class misc::analytics::udp2log::webrequest($producer_id, $producer_count) {
+       include misc::udp2log,
+               misc::udp2log::utilities
+
+       # Starts a multicast listening udp2log instance
+       # to read from the webrequest log stream.
+       misc::udp2log::instance { "webrequest":
+               port                => "8420",
+               multicast           => true,
+               log_directory       => "/var/log/udp2log/webrequest",
+               logrotate           => false,
+               monitor_packet_loss => true,
+               monitor_processes   => true,
+               monitor_log_age     => false,
+               template_variables  => {
+                       'producer_count' => $producer_count,
+                       'producer_id'    => $producer_id,
+               }
+       }
+}
+
diff --git a/manifests/site.pp b/manifests/site.pp
index 22bb58f..5940ba2 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -134,6 +134,50 @@
        include misc::udp2log::iptables
 }
 
+node "analytics1002.eqiad.wmnet" {
+       include role::analytics
+}
+
+# analytics1003 - analytics1006 are udp2log instances.
+node "analytics1003.eqiad.wmnet" inherits analytics_basenode {
+       # ganglia aggregator for the Analytics cluster.
+       $ganglia_aggregator = "true"
+
+       class { 'misc::analytics::udp2log::webrequest':
+               producer_id    => 0,
+               producer_count => 4,
+       }
+}
+node "analytics1004.eqiad.wmnet" inherits analytics_basenode {
+       class { 'misc::analytics::udp2log::webrequest':
+               producer_id    => 1,
+               producer_count => 4,
+       }
+}
+node "analytics1005.eqiad.wmnet" inherits analytics_basenode {
+       class { 'misc::analytics::udp2log::webrequest':
+               producer_id    => 2,
+               producer_count => 4,
+       }
+}
+node "analytics1006.eqiad.wmnet" inherits analytics_basenode {
+       class { 'misc::analytics::udp2log::webrequest':
+               producer_id    => 3,
+               producer_count => 4,
+       }
+}
+
+
+# analytics1007 - analytics1026
+node /analytics10(0[7-9]|1[0-9]|2[0-6])\.eqiad\.wmnet/ {
+       # ganglia aggregator for the Analytics cluster.
+       if ($hostname == "analytics1011") {
+               $ganglia_aggregator = "true"
+       }
+
+       include role::analytics
+}
+
 # analytics1027 hosts the frontend
 # interfaces to Kraken and Hadoop.
 node "analytics1027.eqiad.wmnet" {
@@ -149,15 +193,11 @@
        }
 }
 
-# analytics1002 - analytics1026
-node /analytics10(0[2-9]|1[0-9]|2[0-6])\.eqiad\.wmnet/ {
-       # ganglia aggregator for the Analytics cluster.
-       if ($hostname == "analytics1003" or $hostname == "analytics1011") {
-               $ganglia_aggregator = "true"
-       }
 
-       include role::analytics
-}
+
+
+
+
 
 node /(arsenic|niobium|strontium|palladium)\.(wikimedia\.org|eqiad\.wmnet)/ {
        if $hostname =~ /^(arsenic|niobium)$/ {
diff --git a/templates/udp2log/filters.webrequest.erb 
b/templates/udp2log/filters.webrequest.erb
new file mode 100644
index 0000000..80c9b74
--- /dev/null
+++ b/templates/udp2log/filters.webrequest.erb
@@ -0,0 +1,29 @@
+# Note: This file is managed by Puppet.
+
+# Analytics udp2log webrequest instance configuartion file.
+
+<%
+# Quick and easy method for DRYing piping into kafka producer.
+# This uses the bin/kafka-produce script from the Kraken repository.
+def kafka_producer(topic, jmx_port='')
+#  "/usr/lib/kafka/bin/kafka-producer-shell.sh 
--props=/etc/kafka/producer.properties --topic=#{topic} > /dev/null"
+  "/opt/kraken/bin/kafka-produce #{topic} #{jmx_port} > /dev/null"
+end
+
+webrequest_producer_count = template_variables['producer_count']
+webrequest_producer_id    = template_variables['producer_id']
+
+# We're splitting the webrequest stream into
+# a number of streams across a few different machines.
+# Hopefully this will let us avoid packet loss.
+mod_filter_command = "/usr/bin/awk '{if ($2 % #{webrequest_producer_count} == 
#{webrequest_producer_id}) print $0; }' | "
+-%>
+
+# udp2log packet loss monitoring
+pipe 10 /usr/bin/packet-loss 10 '\t' >> 
/var/log/udp2log/webrequest/packet-loss.log
+
+
+# pipe all requests from mobile frontend cache servers into Kafka=
+pipe 1 <%= mod_filter_command %>/bin/grep -P '^cp104[1-4]' | <%= 
kafka_producer("webrequest-wikipedia-mobile", 9951) %>
+
+

-- 
To view, visit https://gerrit.wikimedia.org/r/56537
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I0d6c49b8acafd0fcdc3e0a379bdb2f6b0129c4bc
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to