Filippo Giunchedi has uploaded a new change for review. https://gerrit.wikimedia.org/r/318251
Change subject: prometheus::tools: fix k8s discovery after upgrade ...................................................................... prometheus::tools: fix k8s discovery after upgrade Newer Prometheus k8s discovery shuffled things around and discovery 'type' selection is done in the configuration itself. Also split in two jobs between apiserver and nodes, this allows e.g. to force insecure https and other settings only where needed. Finally, drop spammy metrics where cardinality would be out of control, e.g. rest_client_request_status_codes{code="Get https://k8s-master.tools.wmflabs.org:6443/api/v1/watch/services?resourceVersion=41811919&timeoutSeconds=377: dial tcp 10.68.17.142:6443: getsockopt: connection refused",host="k8s-master.tools.wmflabs.org:6443",method="GET"} 1 Bug: T147207 Change-Id: I36dbc5c58f7619d8785b28201efc7c9b73b1c884 --- M modules/role/manifests/prometheus/tools.pp 1 file changed, 22 insertions(+), 10 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/51/318251/1 diff --git a/modules/role/manifests/prometheus/tools.pp b/modules/role/manifests/prometheus/tools.pp index e0f60a9..1593b4a 100644 --- a/modules/role/manifests/prometheus/tools.pp +++ b/modules/role/manifests/prometheus/tools.pp @@ -15,30 +15,42 @@ listen_address => '127.0.0.1:9902', scrape_configs_extra => [ { - 'job_name' => 'k8s', + 'job_name' => 'k8s-api', 'bearer_token_file' => $bearer_token_file, 'kubernetes_sd_configs' => [ { 'api_servers' => [ "https://${master_host}:6443" ], 'bearer_token_file' => $bearer_token_file, + 'role' => 'apiserver', }, ], - # keep metrics coming from apiserver or node kubernetes roles - # and map kubernetes node labels to prometheus metric labels - 'relabel_configs' => [ + }, + { + 'job_name' => 'k8s-node', + 'bearer_token_file' => $bearer_token_file, + # Force (insecure) https only for node servers + 'scheme' => 'https', + 'tls_configs' => { + 'insecure_skip_verify' => 'True', + }, + 'kubernetes_sd_configs' => [ { - 'source_labels' => ['__meta_kubernetes_role'], - 'action' => 'keep', - 'regex' => '(?:apiserver|node)', + 'api_servers' => [ "https://${master_host}:6443" ], + 'bearer_token_file' => $bearer_token_file, + 'role' => 'node', }, + ], + 'relabel_configs' => [ + # Map kubernetes node labels to prometheus metric labels { 'action' => 'labelmap', 'regex' => '__meta_kubernetes_node_label_(.+)', }, + # Drop spammy metrics (i.e. with high cardinality k/v pairs) { - 'source_labels' => ['__meta_kubernetes_role'], - 'action' => 'replace', - 'target_label' => 'kubernetes_role', + 'action' => 'drop', + 'regex' => 'rest_client_request.*', + 'source_labels' => [ '__name__' ], }, ] }, -- To view, visit https://gerrit.wikimedia.org/r/318251 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I36dbc5c58f7619d8785b28201efc7c9b73b1c884 Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Filippo Giunchedi <fgiunch...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits