Hi.
I get an error "context deadline exceeded" .
This error reproduces once in 5 mins and the cadvisor x.y.z.n:8080 goes
down.
Once I refresh prometheus after 10 mins it is up again.
Scrape_interval is 40 s
relabeling is done, scrapes around 15 metrics using cadvisor .
no. of containers running per host 2k.
No. of host 4
Following Errors appear in the log
Could not configure a source for OOM detection, disabling OOM events: open
/dev/kmsg: no such file or directory
Failed to create summary reader for "/user.slice/user-0
Below is my docker-compose file running on the monitoring host.
version: '3.7'
volumes:
prometheus_data: {}
grafana_data: {}
networks:
front-tier:
back-tier:
services:
prometheus:
image: prom/prometheus:v2.1.0
volumes:
- ./prometheus/:/etc/prometheus/
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--web.listen-address=:9010'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
ports:
- 9010:9010
links:
- cadvisor:cadvisor
- alertmanager:alertmanager
depends_on:
- cadvisor
networks:
- back-tier
restart: always
deploy:
placement:
constraints:
- node.hostname == ${HOSTNAME}
node-exporter:
image: prom/node-exporter
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- --collector.filesystem.ignored-mount-points
-
"^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)"
ports:
- 9100:9100
networks:
- back-tier
restart: always
deploy:
mode: global
alertmanager:
image: prom/alertmanager
ports:
- 9093:9093
volumes:
- ./alertmanager/:/etc/alertmanager/
networks:
- back-tier
restart: always
command:
- '--config.file=/etc/alertmanager/config.yml'
- '--storage.path=/alertmanager'
# deploy:
# placement:
# constraints:
# - node.hostname == ${HOSTNAME}
cadvisor:
image: google/cadvisor
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
ports:
- 8080:8080
networks:
- back-tier
restart: always
deploy:
mode: global
grafana:
image: grafana/grafana:6.5.0
user: "104"
depends_on:
- prometheus
ports:
- 3000:3000
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning/:/etc/grafana/provisioning/
env_file:
- ./grafana/config.monitoring
networks:
- back-tier
- front-tier
restart: always
Below is the docker-compose file running in each node .Around 4 instances.
version: '3.7'
services:
cadvisor:
image: google/cadvisor
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
ports:
- 8080:8080
restart: always
deploy:
mode: global
node-exporter:
image: prom/node-exporter
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- --collector.filesystem.ignored-mount-points
-
"^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)"
ports:
- 9100:9100
restart: always
deploy:
mode: global
Any leads would be appreciated.
Thanks
Regards,
Isabel
--
You received this message because you are subscribed to the Google Groups
"Prometheus Users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To view this discussion on the web visit
https://groups.google.com/d/msgid/prometheus-users/71d468bb-80b4-4c6d-a164-1d104b86c1f6%40googlegroups.com.