Ottomata has submitted this change and it was merged.

Change subject: add varnish::kafka::statsv
......................................................................


add varnish::kafka::statsv

Add a varnishkafka instance on the bits varnishes that logs requests to
bits.wikimedia.org/statsv/*. This will be used for reporting performance 
metrics from client
side code.

This also includes a slight refactore of cache.pp varnish::kafka classes
so to abstract out default stuff we want happening for every 
varnishkafka::instance.

Change-Id: I3d74c9da11c979a9f3cdcb4ebf1b34d225d56061
---
M manifests/role/cache.pp
1 file changed, 148 insertions(+), 90 deletions(-)

Approvals:
  Ori.livneh: Looks good to me, but someone else must approve
  Ottomata: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/manifests/role/cache.pp b/manifests/role/cache.pp
index 2e623cb..6747cb3 100644
--- a/manifests/role/cache.pp
+++ b/manifests/role/cache.pp
@@ -392,81 +392,122 @@
     }
 
     # == Class varnish::kafka
-    # Sets up a varnishkafka instance producing varnish
-    # logs to the analytics Kafka brokers in eqiad.
-    class varnish::kafka($topic, $varnish_name = 'frontend')
-    {
-        # ToDo: Remove production conditional once this works
-        # is verified to work in labs.
-        if $::realm == 'production' {
-            require role::analytics::kafka::config
-            # All producers currently produce to the (only) Kafka cluster in 
eqiad.
-            $kafka_brokers = 
keys($role::analytics::kafka::config::cluster_config['eqiad'])
+    # Base class for instances of varnishkafka on cache servers.
+    #
+    class varnish::kafka {
+        require role::analytics::kafka::config
+        # All producers currently produce to the (only) Kafka cluster in eqiad.
+        $kafka_brokers = 
keys($role::analytics::kafka::config::cluster_config['eqiad'])
 
-
-            # Make sure varnishkafka rsyslog file is in place properly.
-            rsyslog::conf { 'varnishkafka':
-                source   => 
'puppet:///files/varnish/varnishkafka_rsyslog.conf',
-                priority => 70,
-            }
-
-            varnishkafka::instance { $varnish_name:
-                brokers                      => $kafka_brokers,
-                topic                        => $topic,
-                format_type                  => 'json',
-                compression_codec            => 'snappy',
-                varnish_name                 => $varnish_name,
-                # Note: fake_tag tricks varnishkafka into allowing hardcoded 
string into a JSON field.
-                # Hardcoding the $fqdn into hostname rather than using %l to 
account for
-                # possible slip ups where varnish only writes the short 
hostname for %l.
-                format                       => 
"%{fake_tag0@hostname?${::fqdn}}x %{@sequence!num?0}n %{%FT%T@dt}t 
%{Varnish:time_firstbyte@time_firstbyte!num?0.0}x %{@ip}h 
%{Varnish:handling@cache_status}x %{@http_status}s %{@response_size!num?0}b 
%{@http_method}m %{Host@uri_host}i %{@uri_path}U %{@uri_query}q 
%{Content-Type@content_type}o %{Referer@referer}i 
%{X-Forwarded-For@x_forwarded_for}i %{User-Agent@user_agent}i 
%{Accept-Language@accept_language}i %{X-Analytics@x_analytics}o 
%{Range@range}i",
-                message_send_max_retries     => 3,
-                # At ~6000 msgs per second, 500000 messages is over 1 minute
-                # of buffering, which should be more than enough.
-                queue_buffering_max_messages => 500000,
-                # bits varnishes can do about 6000 reqs / sec each.
-                # We want to send batches at least once a second.
-                batch_num_messages           => 6000,
-                # large timeout to account for potential cross DC latencies
-                topic_request_timeout_ms     => 30000, # request ack timeout
-                # By requiring 2 ACKs per message batch, we survive a
-                # single broker dropping out of its leader role,
-                # without seeing lost messages.
-                topic_request_required_acks  => '2',
-                # Write out stats to varnishkafka.stats.json
-                # this often.  This is set at 15 so that
-                # stats will be fresh when polled from gmetad.
-                log_statistics_interval      => 15,
-                # Require the varnishkafka rsyslog file so that
-                # logs will go to rsyslog the first time puppet
-                # sets up varnishkafka.
-                require                      => Rsyslog::Conf['varnishkafka'],
-            }
-
-            varnishkafka::monitor { $varnish_name:
-                # The primary webrequest varnishkafka instance was formerly the
-                # only one running, so we don't prefix its Ganglia metric keys.
-                key_prefix => '',
-            }
-
-            # Generate icinga alert if varnishkafka is not running.
-            nrpe::monitor_service { 'varnishkafka':
-                description  => 'Varnishkafka log producer',
-                nrpe_command => '/usr/lib/nagios/plugins/check_procs -c 1: -C 
varnishkafka',
-                require      => Class['::varnishkafka'],
-            }
-
-             # Generate an alert if too many delivery report errors
-            monitoring::ganglia { 'varnishkafka-drerr':
-                description => 'Varnishkafka Delivery Errors',
-                metric      => 'kafka.varnishkafka.kafka_drerr.per_second',
-                # Warn if between more than 0 but less than 30
-                warning     => '0.1:29.9',
-                # Critical if greater than 30.
-                critical    => '30.0',
-                require     => Varnishkafka::Monitor[$varnish_name],
-            }
+        # Make sure varnishkafka rsyslog file is in place properly.
+        rsyslog::conf { 'varnishkafka':
+            source   => 'puppet:///files/varnish/varnishkafka_rsyslog.conf',
+            priority => 70,
         }
+
+        # Make sure that Rsyslog::Conf['varnishkafka'] happens
+        # before the first varnishkafka::instance
+        # so that logs will go to rsyslog the first time puppet
+        # sets up varnishkafka.
+        Rsyslog::Conf['varnishkafka'] -> Varnishkafka::Instance <|  |>
+    }
+
+    # == Class varnish::kafka::webrequest
+    # Sets up a varnishkafka instance producing varnish
+    # webrequest logs to the analytics Kafka brokers in eqiad.
+    #
+    # == Parameters
+    # $topic            - the name of kafka topic to which to send messages
+    # $varnish_name - the name of the varnish instance to read shared logs 
from.  Default 'frontend'
+    #
+    class varnish::kafka::webrequest(
+        $topic,
+        $varnish_name = 'frontend'
+    ) inherits role::cache::varnish::kafka
+    {
+        varnishkafka::instance { $varnish_name:
+            brokers                      => $kafka_brokers,
+            topic                        => $topic,
+            format_type                  => 'json',
+            compression_codec            => 'snappy',
+            varnish_name                 => $varnish_name,
+            # Note: fake_tag tricks varnishkafka into allowing hardcoded 
string into a JSON field.
+            # Hardcoding the $fqdn into hostname rather than using %l to 
account for
+            # possible slip ups where varnish only writes the short hostname 
for %l.
+            format                       => "%{fake_tag0@hostname?${::fqdn}}x 
%{@sequence!num?0}n %{%FT%T@dt}t 
%{Varnish:time_firstbyte@time_firstbyte!num?0.0}x %{@ip}h 
%{Varnish:handling@cache_status}x %{@http_status}s %{@response_size!num?0}b 
%{@http_method}m %{Host@uri_host}i %{@uri_path}U %{@uri_query}q 
%{Content-Type@content_type}o %{Referer@referer}i 
%{X-Forwarded-For@x_forwarded_for}i %{User-Agent@user_agent}i 
%{Accept-Language@accept_language}i %{X-Analytics@x_analytics}o 
%{Range@range}i",
+            message_send_max_retries     => 3,
+            # At ~6000 msgs per second, 500000 messages is over 1 minute
+            # of buffering, which should be more than enough.
+            queue_buffering_max_messages => 500000,
+            # bits varnishes can do about 6000 reqs / sec each.
+            # We want to send batches at least once a second.
+            batch_num_messages           => 6000,
+            # large timeout to account for potential cross DC latencies
+            topic_request_timeout_ms     => 30000, # request ack timeout
+            # By requiring 2 ACKs per message batch, we survive a
+            # single broker dropping out of its leader role,
+            # without seeing lost messages.
+            topic_request_required_acks  => '2',
+            # Write out stats to varnishkafka.stats.json
+            # this often.  This is set at 15 so that
+            # stats will be fresh when polled from gmetad.
+            log_statistics_interval      => 15,
+        }
+
+        varnishkafka::monitor { $varnish_name:
+            # The primary webrequest varnishkafka instance was formerly the
+            # only one running, so we don't prefix its Ganglia metric keys.
+            key_prefix => '',
+        }
+
+        # Generate icinga alert if varnishkafka is not running.
+        nrpe::monitor_service { 'varnishkafka':
+            description  => 'Varnishkafka log producer',
+            nrpe_command => '/usr/lib/nagios/plugins/check_procs -c 1: -C 
varnishkafka',
+            require      => Class['::varnishkafka'],
+        }
+
+         # Generate an alert if too many delivery report errors
+        monitoring::ganglia { 'varnishkafka-drerr':
+            description => 'Varnishkafka Delivery Errors',
+            metric      => 'kafka.varnishkafka.kafka_drerr.per_second',
+            # Warn if between more than 0 but less than 30
+            warning     => '0.1:29.9',
+            # Critical if greater than 30.
+            critical    => '30.0',
+            require     => Varnishkafka::Monitor[$varnish_name],
+        }
+    }
+
+    # == Class varnish::kafka::statsv
+    # Sets up a varnishkafka logging endpoint for collecting
+    # application level metrics. We are calling this system
+    # statsv, as it is similar to statsd, but uses varnish
+    # as its logging endpoint.
+    #
+    # == Parameters
+    # $varnish_name - the name of the varnish instance to read shared logs 
from.  Default $::hostname
+    #
+    class varnish::kafka::statsv(
+        $varnish_name = $::hostname,
+    ) inherits role::cache::varnish::kafka
+    {
+        $format  = "%{fake_tag0@hostname?${::fqdn}}x %{%FT%T@dt}t %{@ip}h 
%{@uri_path}U %{@uri_query}q %{User-Agent@user_agent}i"
+
+        varnishkafka::instance { 'statsv':
+            brokers           => $kafka_brokers,
+            format            => $format,
+            format_type       => 'json',
+            topic             => 'statsv',
+            varnish_name      => $varnish_name,
+            varnish_opts      => { 'm' => 'RxURL:^/statsv\//', },
+            # By requiring 2 ACKs per message batch, we survive a
+            # single broker dropping out of its leader role,
+            # without seeing lost messages.
+            topic_request_required_acks  => '2',
+        }
+
+        varnishkafka::monitor { 'statsv': }
     }
 
     class varnish::logging::eventlistener {
@@ -846,10 +887,14 @@
             include misc::monitoring::htcp-loss
         }
 
-        # Install a varnishkafka producer to send
-        # varnish webrequest logs to Kafka.
-        class { 'role::cache::varnish::kafka':
-            topic => 'webrequest_text',
+        # ToDo: Remove production conditional once this works
+        # is verified to work in labs.
+        if $::realm == 'production' {
+            # Install a varnishkafka producer to send
+            # varnish webrequest logs to Kafka.
+            class { 'role::cache::varnish::kafka::webrequest':
+                topic => 'webrequest_text',
+            }
         }
     }
 
@@ -1021,10 +1066,14 @@
             include misc::monitoring::htcp-loss
         }
 
-        # Install a varnishkafka producer to send
-        # varnish webrequest logs to Kafka.
-        class { 'role::cache::varnish::kafka':
-            topic => 'webrequest_upload',
+        # ToDo: Remove production conditional once this works
+        # is verified to work in labs.
+        if $::realm == 'production' {
+            # Install a varnishkafka producer to send
+            # varnish webrequest logs to Kafka.
+            class { 'role::cache::varnish::kafka::webrequest':
+                topic => 'webrequest_upload',
+            }
         }
     }
 
@@ -1113,12 +1162,17 @@
         }
 
         include role::cache::varnish::logging::eventlistener
+        include role::cache::varnish::logging::statsv
 
-        # Install a varnishkafka producer to send
-        # varnish webrequest logs to Kafka.
-        class { 'role::cache::varnish::kafka':
-            topic        => 'webrequest_bits',
-            varnish_name => $::hostname,
+        # ToDo: Remove production conditional once this works
+        # is verified to work in labs.
+        if $::realm == 'production' {
+            # Install a varnishkafka producer to send
+            # varnish webrequest logs to Kafka.
+            class { 'role::cache::varnish::kafka::webrequest':
+                topic        => 'webrequest_bits',
+                varnish_name => $::hostname,
+            }
         }
     }
 
@@ -1295,10 +1349,14 @@
         # udp2log kafka consumer is implemented and deployed.
         include role::cache::varnish::logging
 
-        # Install a varnishkafka producer to send
-        # varnish webrequest logs to Kafka.
-        class { 'role::cache::varnish::kafka':
-            topic => 'webrequest_mobile',
+        # ToDo: Remove production conditional once this works
+        # is verified to work in labs.
+        if $::realm == 'production' {
+            # Install a varnishkafka producer to send
+            # varnish webrequest logs to Kafka.
+            class { 'role::cache::varnish::kafka::webrequest':
+                topic => 'webrequest_mobile',
+            }
         }
     }
 

-- 
To view, visit https://gerrit.wikimedia.org/r/174195
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I3d74c9da11c979a9f3cdcb4ebf1b34d225d56061
Gerrit-PatchSet: 8
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ori.livneh <o...@wikimedia.org>
Gerrit-Reviewer: Ori.livneh <o...@wikimedia.org>
Gerrit-Reviewer: Ottomata <o...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to