Ottomata has uploaded a new change for review. https://gerrit.wikimedia.org/r/207858
Change subject: Set up kafkatee instance on erbium to output fundraising logs ...................................................................... Set up kafkatee instance on erbium to output fundraising logs Bug: T97294 Change-Id: I8f00895ba2733461ef452006a1321e6ca339bd68 --- M manifests/role/logging.pp M manifests/site.pp 2 files changed, 92 insertions(+), 2 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/58/207858/1 diff --git a/manifests/role/logging.pp b/manifests/role/logging.pp index b6b31f6..ba99160 100644 --- a/manifests/role/logging.pp +++ b/manifests/role/logging.pp @@ -383,13 +383,99 @@ # Includes output filters useful for operational debugging. # class role::logging::kafkatee::webrequest::ops inherits role::logging::kafkatee::webrequest { - ::kafkatee::output { 'sampled-1000': + kafkatee::output { 'sampled-1000': destination => "${webrequest_log_directory}/sampled-1000.json", sample => 1000, } - ::kafkatee::output { '5xx': + kafkatee::output { '5xx': destination => "/bin/grep '\"http_status\":\"5' >> ${webrequest_log_directory}/5xx.json", type => 'pipe', } } + +# This does not inherit from role::logging::kafkatee::webrequest +# because we need to use a custom output format, and don't need +# all webrequest sources. +class role::logging::kafkatee::webrequest::fundraising { + require role::analytics::kafka::config + + # The fundraising outputs use udp-filter + require misc::udp2log::udp_filter + + + # Install kafkatee configured to consume from + # the Analytics Kafka cluster. The webrequest logs are + # in json, so we output them in the format they are received. + class { '::kafkatee': + kafka_brokers => $role::analytics::kafka::config::brokers_array, + # convert the json logs into the old udp2log tsv format. + output_encoding => 'string', + output_format => '%{hostname} %{sequence} %{dt} %{time_firstbyte} %{ip} %{cache_status}/%{http_status} %{response_size} %{http_method} http://%{uri_host}%{uri_path}%{uri_query} - %{content_type} %{referer} %{x_forwarded_for} %{user_agent} %{accept_language} %{x_analytics}', + } + include kafkatee::monitoring + + # TODO: Do we need all topics for ops debugging of webrequest logs? + + # Fundraising logs only need mobile and text as inputs. + # Setting offset to 'end' instead of 'stored', only + # because that is how udp2log worked, and I don't want to + # cause any weirdness with downstream consumers if an instance + # starts up late and consumes older data. + kafkatee::input { 'kafka-webrequest_mobile': + topic => 'webrequest_mobile', + partitions => '0-11', + options => { 'encoding' => 'json' }, + offset => 'end', + } + kafkatee::input { 'kafka-webrequest_text': + topic => 'webrequest_text', + partitions => '0-11', + options => { 'encoding' => 'json' }, + offset => 'end', + } + + # Declare packaged rsyslog config to ensure it isn't purged. + file { '/etc/rsyslog.d/75-kafkatee.conf': + ensure => file, + require => Class['::kafkatee'], + } + + # Temporarly use a different log directory than udp2log, while we run + # both kafkatee and udp2log side by side so that FR techs can + # validate that we can use kafkatee instead of udp2log. + $log_directory = '/a/log/fundraising-kafkatee' + file { $log_directory: + ensure => 'directory', + owner => 'kafkatee', + group => 'kafkatee', + require => Class['::kafkatee'], + } + + # if the logs in $log_directory should be rotated + # then configure a logrotate.d script to do so. + file { '/etc/logrotate.d/kafkatee-fundraising': + mode => '0444', + owner => 'root', + group => 'root', + content => template('kafkatee/logrotate_fundraising.erb'), + } + + + kafkatee::output { 'fundraising-landingpages': + destination => "/usr/bin/udp-filter -F '\t' -d wikimediafoundation.org,donate.wikimedia.org >> ${log_directory}/landingpages.tsv.log", + type => 'pipe', + } + + kafkatee::output { 'fundraising-bannerImpressions': + destination => "/usr/bin/udp-filter -F '\t' -p Special:RecordImpression >> ${log_directory}/bannerImpressions-sampled100.tsv.log", + sample => 100, + type => 'pipe', + } + + kafkatee::output { 'fundraising-bannerRequests': + destination => "/usr/bin/udp-filter -F '\t' -p Special:BannerRandom >> ${log_directory}/bannerRequests-sampled100.tsv.log", + sample => 100, + type => 'pipe', + } +} diff --git a/manifests/site.pp b/manifests/site.pp index 4c22a4b..8825cf3 100644 --- a/manifests/site.pp +++ b/manifests/site.pp @@ -901,6 +901,10 @@ node 'erbium.eqiad.wmnet' inherits 'base_analytics_logging_node' { # gadolinium hosts the separate nginx webrequest udp2log instance. include role::logging::udp2log::erbium + + # Include kafkatee fundraising outputs alongside of udp2log + # while FR techs verify that they can use this output. + include role::logging::kafkatee::webrequest::fundraising } # es1 equad -- To view, visit https://gerrit.wikimedia.org/r/207858 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I8f00895ba2733461ef452006a1321e6ca339bd68 Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Ottomata <o...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits