Ottomata has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/207858

Change subject: Set up kafkatee instance on erbium to output fundraising logs
......................................................................

Set up kafkatee instance on erbium to output fundraising logs

Bug: T97294
Change-Id: I8f00895ba2733461ef452006a1321e6ca339bd68
---
M manifests/role/logging.pp
M manifests/site.pp
2 files changed, 92 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/58/207858/1

diff --git a/manifests/role/logging.pp b/manifests/role/logging.pp
index b6b31f6..ba99160 100644
--- a/manifests/role/logging.pp
+++ b/manifests/role/logging.pp
@@ -383,13 +383,99 @@
 # Includes output filters useful for operational debugging.
 #
 class role::logging::kafkatee::webrequest::ops inherits 
role::logging::kafkatee::webrequest  {
-    ::kafkatee::output { 'sampled-1000':
+    kafkatee::output { 'sampled-1000':
         destination => "${webrequest_log_directory}/sampled-1000.json",
         sample      => 1000,
     }
 
-    ::kafkatee::output { '5xx':
+    kafkatee::output { '5xx':
         destination => "/bin/grep '\"http_status\":\"5' >> 
${webrequest_log_directory}/5xx.json",
         type        => 'pipe',
     }
 }
+
+# This does not inherit from role::logging::kafkatee::webrequest
+# because we need to use a custom output format, and don't need
+# all webrequest sources.
+class role::logging::kafkatee::webrequest::fundraising {
+    require role::analytics::kafka::config
+
+    # The fundraising outputs use udp-filter
+    require misc::udp2log::udp_filter
+
+
+    # Install kafkatee configured to consume from
+    # the Analytics Kafka cluster.  The webrequest logs are
+    # in json, so we output them in the format they are received.
+    class { '::kafkatee':
+        kafka_brokers           => 
$role::analytics::kafka::config::brokers_array,
+        # convert the json logs into the old udp2log tsv format.
+        output_encoding         => 'string',
+        output_format           => '%{hostname}        %{sequence}     %{dt}   
%{time_firstbyte}       %{ip}   %{cache_status}/%{http_status}  
%{response_size}        %{http_method}  
http://%{uri_host}%{uri_path}%{uri_query}       -       %{content_type} 
%{referer}      %{x_forwarded_for}      %{user_agent}   %{accept_language}      
%{x_analytics}',
+    }
+    include kafkatee::monitoring
+
+    # TODO: Do we need all topics for ops debugging of webrequest logs?
+
+    # Fundraising logs only need mobile and text as inputs.
+    # Setting offset to 'end' instead of 'stored', only
+    # because that is how udp2log worked, and I don't want to
+    # cause any weirdness with downstream consumers if an instance
+    # starts up late and consumes older data.
+    kafkatee::input { 'kafka-webrequest_mobile':
+        topic       => 'webrequest_mobile',
+        partitions  => '0-11',
+        options     => { 'encoding' => 'json' },
+        offset      => 'end',
+    }
+    kafkatee::input { 'kafka-webrequest_text':
+        topic       => 'webrequest_text',
+        partitions  => '0-11',
+        options     => { 'encoding' => 'json' },
+        offset      => 'end',
+    }
+
+    # Declare packaged rsyslog config to ensure it isn't purged.
+    file { '/etc/rsyslog.d/75-kafkatee.conf':
+        ensure  => file,
+        require => Class['::kafkatee'],
+    }
+
+    # Temporarly use a different log directory than udp2log, while we run
+    # both kafkatee and udp2log side by side so that FR techs can
+    # validate that we can use kafkatee instead of udp2log.
+    $log_directory              = '/a/log/fundraising-kafkatee'
+    file { $log_directory:
+        ensure      => 'directory',
+        owner       => 'kafkatee',
+        group       => 'kafkatee',
+        require     => Class['::kafkatee'],
+    }
+
+    # if the logs in $log_directory should be rotated
+    # then configure a logrotate.d script to do so.
+    file { '/etc/logrotate.d/kafkatee-fundraising':
+        mode    => '0444',
+        owner   => 'root',
+        group   => 'root',
+        content => template('kafkatee/logrotate_fundraising.erb'),
+    }
+
+
+    kafkatee::output { 'fundraising-landingpages':
+        destination => "/usr/bin/udp-filter -F '\t' -d 
wikimediafoundation.org,donate.wikimedia.org >> 
${log_directory}/landingpages.tsv.log",
+        type        => 'pipe',
+    }
+
+    kafkatee::output { 'fundraising-bannerImpressions':
+        destination => "/usr/bin/udp-filter -F '\t' -p 
Special:RecordImpression >> 
${log_directory}/bannerImpressions-sampled100.tsv.log",
+        sample      => 100,
+        type        => 'pipe',
+    }
+
+    kafkatee::output { 'fundraising-bannerRequests':
+        destination => "/usr/bin/udp-filter -F '\t' -p Special:BannerRandom >> 
${log_directory}/bannerRequests-sampled100.tsv.log",
+        sample      => 100,
+        type        => 'pipe',
+    }
+}
diff --git a/manifests/site.pp b/manifests/site.pp
index 4c22a4b..8825cf3 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -901,6 +901,10 @@
 node 'erbium.eqiad.wmnet' inherits 'base_analytics_logging_node' {
     # gadolinium hosts the separate nginx webrequest udp2log instance.
     include role::logging::udp2log::erbium
+
+    # Include kafkatee fundraising outputs alongside of udp2log
+    # while FR techs verify that they can use this output.
+    include role::logging::kafkatee::webrequest::fundraising
 }
 
 # es1 equad

-- 
To view, visit https://gerrit.wikimedia.org/r/207858
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I8f00895ba2733461ef452006a1321e6ca339bd68
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <o...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to