Madhuvishy has uploaded a new change for review. https://gerrit.wikimedia.org/r/248079
Change subject: [WIP] burrow: Add new module for burrow ...................................................................... [WIP] burrow: Add new module for burrow Burrow(https://github.com/linkedin/burrow) is a consumer lag monitoring tool for Kafka, it will monitor the consumer groups on our Kafka clusters and send emails on their status. Bug: T115669 Change-Id: Iaf6b2ee804df8f5e951fc7ced770b2565e3adb65 --- A modules/burrow/files/default-email.tmpl A modules/burrow/files/logging.cfg A modules/burrow/manifests/init.pp A modules/burrow/templates/burrow.cfg.erb A modules/burrow/templates/initscripts/burrow.systemd.erb 5 files changed, 138 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/79/248079/1 diff --git a/modules/burrow/files/default-email.tmpl b/modules/burrow/files/default-email.tmpl new file mode 100644 index 0000000..2e82447 --- /dev/null +++ b/modules/burrow/files/default-email.tmpl @@ -0,0 +1,16 @@ +From: {{.From}} +To: {{.To}} +Subject: [Burrow] Kafka Consumer Lag Alert + +The Kafka consumer groups you are monitoring are currently showing problems. The following groups are in a problem state (groups not listed are OK): + +{{range .Results}}{{if or (ne 0 .Status) (ne 5 .Status)}}Cluster: {{.Cluster}} +Group: {{.Group}} +Status: {{if eq 1 .Status}}WARNING{{else if eq 2 .Status}}ERROR{{end}} +Complete: {{.Complete}} +Errors: {{len .Partitions}} partitions have problems +{{range .Partitions}} {{if eq 1 .Status}} WARN{{else if eq 2 .Status}} ERR{{else if eq 3 .Status}} STOP{{else if eq 4 .Status}}STALL{{end}} {{.Topic}}:{{.Partition}} ({{.Start.Timestamp}}, {{.Start.Offset}}, {{.Start.Lag}}) -> ({{.End.Timestamp}}, {{.End.Offset}}, {{.End.Lag}}) +{{end}}{{end}} + +---------------------------------------------------------------------- +{{end}} diff --git a/modules/burrow/files/logging.cfg b/modules/burrow/files/logging.cfg new file mode 100644 index 0000000..410183a --- /dev/null +++ b/modules/burrow/files/logging.cfg @@ -0,0 +1,8 @@ +<seelog minlevel="info"> + <outputs formatid="main"> + <rollingfile type="date" filename="/var/log/burrow/burrow.log" datepattern="2006-02-01-15" maxrolls="168" /> + </outputs> + <formats> + <format id="main" format="%Date(2006-01-02 15:04:05) [%LEVEL] %Msg%n"/> + </formats> +</seelog> \ No newline at end of file diff --git a/modules/burrow/manifests/init.pp b/modules/burrow/manifests/init.pp new file mode 100644 index 0000000..86262d8 --- /dev/null +++ b/modules/burrow/manifests/init.pp @@ -0,0 +1,56 @@ +class burrow ( + $ensure = present, + $client_id = 'burrow-client', + $zk_hosts, + $zk_path, + $kafka_cluster_name, + $kafka_hosts, + $consumer_groups, + $smtp_server, + $from_email, + $to_emails, +) +{ + require_package('golang-burrow') + + $config_dir = 'etc/burrow/config' + $log_dir = '/var/log/burrow' + $burrow_config_file = "${config_dir}/burrow.cfg" + + # make sure the log directory exists + file { $log_dir: + ensure => directory, + owner => 'root', + group => 'root', + mode => '0775', + } + + # make sure the config directory exists + file { $config_dir: + ensure => directory, + owner => 'root', + group => 'root', + mode => '0775', + } + + file { $burrow_config_file: + ensure => $ensure, + content => template("${config_dir}/burrow.cfg.erb"), + } + + file { "${config_dir}/logging.cfg": + ensure => $ensure, + source => 'puppet:///modules/burrow/logging.cfg', + } + + file { "/etc/burrow/config/default-email.tmpl": + ensure => $ensure, + source => 'puppet:///modules/burrow/default-email.tmpl', + } + + base::service_unit { 'burrow': + ensure => $ensure, + systemd => true, + } + +} \ No newline at end of file diff --git a/modules/burrow/templates/burrow.cfg.erb b/modules/burrow/templates/burrow.cfg.erb new file mode 100644 index 0000000..a669277 --- /dev/null +++ b/modules/burrow/templates/burrow.cfg.erb @@ -0,0 +1,49 @@ +[general] +logdir=/var/log/burrow +logconfig=/etc/burrow/config/logging.cfg +pidfile=burrow.pid +client-id=<%= @client_id %> +group-blacklist=^(console-consumer-|python-kafka-consumer-).*$ + +[zookeeper] +<% @zk_hosts.each do |host| -%> + hostname=<%= host %> +<% end -%> +port=2181 +timeout=6 +lock-path=/etc/burrow/notifier + +[kafka "<%= @kafka_cluster_name %>"] +<% @kafka_hosts.each do |host| -%> + hostname=<%= host %> +<% end -%> +broker-port=9092 +<% @zk_hosts.each do |host| -%> + hostname=<%= host %> +<% end -%> +zookeeper-port=2181 +zookeeper-path=/<%= @zk_path %> +offsets-topic=__consumer_offsets + +[tickers] +broker-offsets=60 + +[lagcheck] +intervals=10 +expire-group=604800 + +[httpserver] +server=on +port=8000 + +[smtp] +server=<%= @smtp_server %> +port=25 +from=<%= @from_email %> +template=/etc/config/default-email.tmpl + +[email "<%= @to_emails %>"] +<% @consumer_groups.each do |group| -%> + group=<%= @kafka_cluster_name %>,<%= @group %> +<% end -%> +interval=120 diff --git a/modules/burrow/templates/initscripts/burrow.systemd.erb b/modules/burrow/templates/initscripts/burrow.systemd.erb new file mode 100644 index 0000000..8622522 --- /dev/null +++ b/modules/burrow/templates/initscripts/burrow.systemd.erb @@ -0,0 +1,9 @@ +[Unit] +Description=Burrow: Kafka consumer lag monitor + +[Service] +ExecStart=/usr/bin/burrow \ + --config=<%= @burrow_config_file %> + +[Install] +WantedBy=multi-user.target \ No newline at end of file -- To view, visit https://gerrit.wikimedia.org/r/248079 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Iaf6b2ee804df8f5e951fc7ced770b2565e3adb65 Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Madhuvishy <mviswanat...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits