Alexandros Kosiaris has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/399160 )
Change subject: Introduce profile::prometheus::k8s::staging
......................................................................
Introduce profile::prometheus::k8s::staging
This is almost identical to the non staging profile and is meant to be
used to monitor the staging cluster
Change-Id: Iba4d0a4a0c4f87b4ed98d51024f40c636a7da2ec
---
A modules/profile/manifests/prometheus/k8s/staging.pp
1 file changed, 142 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/60/399160/1
diff --git a/modules/profile/manifests/prometheus/k8s/staging.pp
b/modules/profile/manifests/prometheus/k8s/staging.pp
new file mode 100644
index 0000000..3e082e2
--- /dev/null
+++ b/modules/profile/manifests/prometheus/k8s/staging.pp
@@ -0,0 +1,142 @@
+# Uses the prometheus module and generates the specific configuration
+# needed for WMF production
+#
+class profile::prometheus::k8s::staging (
+ $users = hiera('k8s_infrastructure_users'), # lint:ignore:wmf_styleguide
+){
+ $targets_path = '/srv/prometheus/k8s/targets'
+ $storage_retention = hiera('prometheus::server::storage_retention',
'2190h0m0s')
+ $max_chunks_to_persist =
hiera('prometheus::server::max_chunks_to_persist', '524288')
+ $memory_chunks = hiera('prometheus::server::memory_chunks', '1048576')
+ $bearer_token_file = '/srv/prometheus/k8s/k8s.token'
+ $master_host = 'neon.eqiad.wmnet'
+ $client_token = $users['prometheus']['token']
+
+ $config_extra = {
+ # All metrics will get an additional 'site' label when queried by
+ # external systems (e.g. via federation)
+ 'external_labels' => {
+ 'site' => $::site,
+ },
+ }
+
+ # Configure scraping from k8s cluster with distinct jobs:
+ # - k8s-api: api server metrics (each one, as returned by k8s)
+ # - k8s-node: metrics from each node running k8s
+ # See also:
+ # *
https://prometheus.io/docs/operating/configuration/#<kubernetes_sd_config>
+ # *
https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml
+ $scrape_configs_extra = [
+ {
+ 'job_name' => 'k8s-api',
+ 'bearer_token_file' => $bearer_token_file,
+ 'scheme' => 'https',
+ 'tls_config' => {
+ 'server_name' => $master_host,
+ },
+ 'kubernetes_sd_configs' => [
+ {
+ 'api_server' => "https://${master_host}:6443",
+ 'bearer_token_file' => $bearer_token_file,
+ 'role' => 'endpoints',
+ },
+ ],
+ # Scrape config for API servers, keep only endpoints for
default/kubernetes to poll only
+ # api servers
+ 'relabel_configs' => [
+ {
+ 'source_labels' => ['__meta_kubernetes_namespace',
+ '__meta_kubernetes_service_name',
+
'__meta_kubernetes_endpoint_port_name'],
+ 'action' => 'keep',
+ 'regex' => 'default;kubernetes;https',
+ },
+ ],
+ },
+ {
+ 'job_name' => 'k8s-node',
+ 'bearer_token_file' => $bearer_token_file,
+ 'kubernetes_sd_configs' => [
+ {
+ 'api_server' => "https://${master_host}:6443",
+ 'bearer_token_file' => $bearer_token_file,
+ 'role' => 'node',
+ },
+ ],
+ 'relabel_configs' => [
+ # Map kubernetes node labels to prometheus metric labels
+ {
+ 'action' => 'labelmap',
+ 'regex' => '__meta_kubernetes_node_label_(.+)',
+ },
+ {
+ # Force read-only API for nodes. This listens on port 10255
+ # so rewrite the __address__ label to use that port. It's
+ # also HTTP, not HTTPS
+ 'action' => 'replace', # Redundant but clearer
+ 'source_labels' => ['__address__'],
+ 'target_label' => '__address__',
+ 'regex' => '([\d\.]+):(\d+)',
+ 'replacement' => "\${1}:10255",
+ },
+ ]
+ },
+ {
+ 'job_name' => 'k8s-node-cadvisor',
+ 'bearer_token_file' => $bearer_token_file,
+ 'metrics_path' => '/metrics/cadvisor',
+ 'kubernetes_sd_configs' => [
+ {
+ 'api_server' => "https://${master_host}:6443",
+ 'bearer_token_file' => $bearer_token_file,
+ 'role' => 'node',
+ },
+ ],
+ 'relabel_configs' => [
+ # Map kubernetes node labels to prometheus metric labels
+ {
+ 'action' => 'labelmap',
+ 'regex' => '__meta_kubernetes_node_label_(.+)',
+ },
+ {
+ # Force read-only API for nodes. This listens on port 10255
+ # so rewrite the __address__ label to use that port. It's
+ # also HTTP, not HTTPS
+ 'action' => 'replace', # Redundant but clearer
+ 'source_labels' => ['__address__'],
+ 'target_label' => '__address__',
+ 'regex' => '([\d\.]+):(\d+)',
+ 'replacement' => "\${1}:10255",
+ },
+ ]
+ },
+ ]
+
+ prometheus::server { 'k8s':
+ storage_encoding => '2',
+ listen_address => '127.0.0.1:9907',
+ storage_retention => $storage_retention,
+ max_chunks_to_persist => $max_chunks_to_persist,
+ memory_chunks => $memory_chunks,
+ global_config_extra => $config_extra,
+ scrape_configs_extra => $scrape_configs_extra,
+ }
+
+ prometheus::web { 'k8s':
+ proxy_pass => 'http://localhost:9907/k8s',
+ }
+
+ prometheus::rule { 'rules_k8s.conf':
+ instance => 'k8s',
+ source => 'puppet:///modules/profile/prometheus/rules_k8s.conf',
+ }
+
+ file { $bearer_token_file:
+ ensure => present,
+ content => $client_token,
+ mode => '0400',
+ owner => 'prometheus',
+ group => 'prometheus',
+ require => Prometheus::Server['k8s'],
+ }
+}
--
To view, visit https://gerrit.wikimedia.org/r/399160
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Iba4d0a4a0c4f87b4ed98d51024f40c636a7da2ec
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Alexandros Kosiaris <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits