Ottomata has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/381489 )
Change subject: [WIP] Prometheus based Kafka broker alerts, take 1
......................................................................
[WIP] Prometheus based Kafka broker alerts, take 1
This refactors the Prometheus JXM exporter just a bit, moving
it to a separate profile::kafka::broker::monitoring class,
along with icinga alerts.
Bug: T175923
Change-Id: I839d5de4110da245f712e23285280c2fd546fe8f
---
M hieradata/role/common/kafka/jumbo/broker.yaml
M modules/profile/manifests/kafka/broker.pp
A modules/profile/manifests/kafka/broker/monitoring.pp
3 files changed, 87 insertions(+), 39 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/89/381489/1
diff --git a/hieradata/role/common/kafka/jumbo/broker.yaml
b/hieradata/role/common/kafka/jumbo/broker.yaml
index 5fb6770..5b18607 100644
--- a/hieradata/role/common/kafka/jumbo/broker.yaml
+++ b/hieradata/role/common/kafka/jumbo/broker.yaml
@@ -2,8 +2,8 @@
profile::kafka::broker::kafka_cluster_name: jumbo
-# Enable the Prometheus JMX Exporter
-profile::kafka::broker::prometheus_monitoring_enabled: true
+# Enable Monitoring (via Prometheus) and icinga alerts
+profile::kafka::broker::monitoring_enabled: true
profile::kafka::broker::log_dirs: [/srv/kafka/data]
profile::kafka::broker::plaintext: true
@@ -28,5 +28,5 @@
profile::kafka::broker::num_recovery_threads_per_data_dir: 12
profile::kafka::broker::num_io_threads: 12
-profile::kafka::broker::replica_maxlag_warning: "1000000"
-profile::kafka::broker::replica_maxlag_critical: "5000000"
+profile::kafka::broker::monitoring::replica_maxlag_warning: 1000000
+profile::kafka::broker::monitoring::replica_maxlag_critical: 5000000
diff --git a/modules/profile/manifests/kafka/broker.pp
b/modules/profile/manifests/kafka/broker.pp
index 0ce4f1a..e0b3e71 100644
--- a/modules/profile/manifests/kafka/broker.pp
+++ b/modules/profile/manifests/kafka/broker.pp
@@ -87,7 +87,7 @@
$replica_maxlag_critical =
hiera('profile::kafka::broker::replica_maxlag_critical'),
# This is set via top level hiera variable so it can be synchronized
between roles and clients.
$message_max_bytes = hiera('kafka_message_max_bytes'),
- $prometheus_monitoring_enabled =
hiera('profile::kafka::broker::prometheus_monitoring_enabled'),
+ $monitoring_enabled =
hiera('profile::kafka::broker::monitoring_enabled'),
$prometheus_nodes = hiera('prometheus_nodes'),
) {
# TODO: WIP
@@ -185,39 +185,6 @@
java_home => '/usr/lib/jvm/java-8-openjdk-amd64',
}
- if $prometheus_monitoring_enabled {
- # Allow automatic generation of config on the
- # Prometheus master
- prometheus::jmx_exporter_instance { $::hostname:
- address => $::ipaddress,
- port => 7800,
- }
-
- $prometheus_nodes_ferm = join($prometheus_nodes, ' ')
- ferm::service { 'kafka-broker-jmx_exporter':
- proto => 'tcp',
- port => '7800',
- srange => "@resolve((${prometheus_nodes_ferm}))",
- }
-
- require_package('prometheus-jmx-exporter')
-
- $jmx_exporter_config_file =
'/etc/kafka/broker_prometheus_jmx_exporter.yaml'
- $java_opts =
"-javaagent:/usr/share/java/prometheus/jmx_prometheus_javaagent.jar=${::ipaddress}:7800:${jmx_exporter_config_file}"
-
- # Create the Prometheus JMX Exporter configuration
- file { $jmx_exporter_config_file:
- ensure => present,
- source =>
'puppet:///modules/profile/kafka/broker_prometheus_jmx_exporter.yaml',
- owner => 'kafka',
- group => 'kafka',
- mode => '0400',
- require => Class['::confluent::kafka::broker'],
- }
- } else {
- $java_opts = undef
- }
-
class { '::confluent::kafka::broker':
log_dirs => $log_dirs,
brokers => $config['brokers']['hash'],
@@ -233,7 +200,6 @@
# https://kafka.apache.org/documentation/#java
# Note that MetaspaceSize is a Java 8 setting.
jvm_performance_opts => '-server -XX:MetaspaceSize=96m
-XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35
-XX:G1HeapRegionSize=16M -XX:MinMetaspaceFreeRatio=50
-XX:MaxMetaspaceFreeRatio=80',
- java_opts => $java_opts,
listeners => $listeners,
security_inter_broker_protocol => $security_inter_broker_protocol,
@@ -249,6 +215,15 @@
message_max_bytes => $message_max_bytes,
}
+ # If monitoring is enabled, then include the monitoring profile and set
$java_opts
+ # for exposing the Prometheus JMX Exporter in the Kafka Broker process.
+ if $monitoring_enabled {
+ include ::profile::kafka::broker::monitoring
+ Class['::confluent::kafka::broker'] {
+ java_opts => $::profile::kafka::broker::monitoring::java_opts
+ }
+ }
+
$ferm_plaintext_ensure = $plaintext ? {
false => 'absent',
undef => 'absent',
diff --git a/modules/profile/manifests/kafka/broker/monitoring.pp
b/modules/profile/manifests/kafka/broker/monitoring.pp
new file mode 100644
index 0000000..628972b
--- /dev/null
+++ b/modules/profile/manifests/kafka/broker/monitoring.pp
@@ -0,0 +1,73 @@
+# Class: profile::kafka::broker::monitoring
+#
+# Sets up Prometheus based monitoring and icinga alerts.
+#
+class profile::kafka::broker::monitoring (
+ $cluster = hiera('cluster'),
+ $prometheus_nodes = hiera('prometheus_nodes'),
+ $replica_maxlag_warning =
hiera('profile::kafka::broker::monitoring::replica_maxlag_warning'),
+ $replica_maxlag_critical =
hiera('profile::kafka::broker::monitoring::replica_maxlag_critical'),
+) {
+ ### Expose Kafka Broker JMX metrics to Prometheus
+ require_package('prometheus-jmx-exporter')
+
+ $prometheus_jmx_exporter_port = 7800
+ $jmx_exporter_config_file =
'/etc/kafka/broker_prometheus_jmx_exporter.yaml'
+
+ # Use this in your JAVA_OPTS you pass to the Kafka broker process
+ $java_opts =
"-javaagent:/usr/share/java/prometheus/jmx_prometheus_javaagent.jar=${::ipaddress}:${prometheus_jmx_exporter_port}:${jmx_exporter_config_file}"
+
+ # Create the Prometheus JMX Exporter configuration
+ file { $jmx_exporter_config_file:
+ ensure => present,
+ source =>
'puppet:///modules/profile/kafka/broker_prometheus_jmx_exporter.yaml',
+ owner => 'kafka',
+ group => 'kafka',
+ mode => '0400',
+ # Require this to make sure that kafka user and group are already
created.
+ require => Class['::confluent::kafka::broker'],
+ }
+
+ # Allow automatic generation of config on the Prometheus master
+ prometheus::jmx_exporter_instance { $::hostname:
+ address => $::ipaddress,
+ port => $prometheus_jmx_exporter_port,
+ }
+
+ $prometheus_nodes_ferm = join($prometheus_nodes, ' ')
+ ferm::service { 'kafka-broker-jmx_exporter':
+ proto => 'tcp',
+ port => '7800',
+ srange => "@resolve((${prometheus_nodes_ferm}))",
+ }
+
+
+ ### Icinga alerts
+ # Generate icinga alert if Kafka Broker Server is not running.
+ nrpe::monitor_service { 'kafka':
+ description => 'Kafka Broker Server',
+ nrpe_command => '/usr/lib/nagios/plugins/check_procs -c 1:1 -C java
-a "Kafka /etc/kafka/server.properties"',
+ critical => true,
+ }
+
+ # Prometheus labels for this Kafka Broker instance
+ $prometheus_labels =
"cluster=kafka_${cluster},instance=${::hostname}:${prometheus_jmx_exporter_port},job=jmx_kafka"
+
+ # Alert on the average number of under replicated partitions over the last
30 minutes.
+ monitoring::check_prometheus { 'kafka_broker_under_replicated_partitions':
+ description => 'Kafka Broker Under Replicated Partitions',
+ metric =>
"scalar(avg_over_time(kafka_server_replicamanager_underreplicatedpartitions{${prometheus_labels}}[30m]))",
+ warning => 5,
+ critical => 10,
+ prometheus_url => "http://prometheus.svc.${::site}.wmnet/ops",
+ }
+
+ # Alert on the average max replica lag over the last 30 minutes.
+ monitoring::check_prometheus { 'kafka_broker_replica_max_lag':
+ description => 'Kafka Broker Replica Max Lag',
+ metric =>
"scalar(avg_over_time(kafka_server_replicafetchermanager_maxlag{${prometheus_labels}}[30m]))"
+ warning => $replica_maxlag_warning,
+ critical => $replica_maxlag_critical,
+ prometheus_url => "http://prometheus.svc.${::site}.wmnet/ops",
+ }
+}
\ No newline at end of file
--
To view, visit https://gerrit.wikimedia.org/r/381489
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I839d5de4110da245f712e23285280c2fd546fe8f
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits