Hi Lucas,

Thanks for picking this up. This is a fresh install and we diagnosed this using 
AI. What we did and how to replicate:
- fresh EC2 instance with Ubuntu 24.04 on it
- installed keydb on it in docker
- installed haproxy in order to monitor master keydb node. Full config below 
(we tried a bunch of configs, this is latest):
root@keydb1:/home/ubuntu# cat /etc/haproxy/haproxy.cfg
global
  log /dev/log local0
  daemon
#  master-worker
  maxconn 50000
  stats socket /run/haproxy/admin.sock mode 660 level admin

defaults
  log     global
  mode    tcp
  option  tcplog
  timeout connect 2s
  timeout client  2m
  timeout server  2m
  timeout check   2s

frontend fe_keydb
  bind 0.0.0.0:6380
  default_backend be_keydb

backend be_keydb
  mode tcp
  balance first
  option tcp-check
  # No AUTH — just probe replication role
  tcp-check send "INFO replication\r\n"
  tcp-check expect rstring role:master
  server keydb1 10.3.59.16:6379 check inter 1000ms fall 2 rise 1
  server keydb2 10.3.72.20:6379 check inter 1000ms fall 2 rise 1 backup

frontend fe_health
  mode http
  bind 0.0.0.0:8100
  acl master_up nbsrv(be_keydb) gt 0
  http-request return status 200 content-type text/plain string "ok\n" if 
master_up
  http-request return status 503 content-type text/plain string "no-master\n"

root@keydb1:/home/ubuntu#


This is systemd file, where Watchdog is enabled:
root@keydb1:/home/ubuntu# cat 
/etc/systemd/system/haproxy.service.d/override.conf
[Service]
Type=notify
Restart=always
RestartSec=0.5s
WatchdogSec=30s
root@keydb1:/home/ubuntu#

With this, we observed restarts every 30s in haproxy logs:
Nov 18 08:31:25 keydb1 systemd[1]: haproxy.service: Watchdog timeout (limit 
30s)!
Nov 18 08:31:25 keydb1 systemd[1]: haproxy.service: Killing process 1748781 
(haproxy) with signal SIGABRT.
Nov 18 08:31:25 keydb1 systemd[1]: haproxy.service: Main process exited, 
code=dumped, status=6/ABRT
Nov 18 08:31:25 keydb1 systemd[1]: haproxy.service: Killing process 1748783 
(haproxy) with signal SIGKILL.
Nov 18 08:31:25 keydb1 systemd[1]: haproxy.service: Failed to kill control 
group /system.slice/haproxy.service, ignoring: Invalid argument
Nov 18 08:31:25 keydb1 systemd[1]: haproxy.service: Failed with result 
'watchdog'.
Nov 18 08:31:26 keydb1 systemd[1]: haproxy.service: Scheduled restart job, 
restart counter is at 10227.
Nov 18 08:31:26 keydb1 systemd[1]: Starting haproxy.service - HAProxy Load 
Balancer...
Nov 18 08:31:26 keydb1 haproxy[1748790]: [NOTICE]   (1748790) : New worker 
(1748792) forked
Nov 18 08:31:26 keydb1 systemd[1]: Started haproxy.service - HAProxy Load 
Balancer.
Nov 18 08:31:26 keydb1 haproxy[1748790]: [NOTICE]   (1748790) : Loading success.


What we did to diagnose the missing notifications:
# Run manually with strace (watch for ~30 seconds)
strace -f -e trace=sendmsg -o /tmp/haproxy-strace.log /usr/sbin/haproxy -Ws -f 
/etc/haproxy/haproxy.cfg -p /run/haproxy.pid

# In another terminal after 30 seconds:
grep -i notify /tmp/haproxy-strace.log

Result:
root@keydb1:/home/ubuntu# systemctl stop haproxy
root@keydb1:/home/ubuntu# strace -f -e trace=sendmsg -o /tmp/haproxy-strace.log 
/usr/sbin/haproxy -Ws -f /etc/haproxy/haproxy.cfg -p /run/haproxy.pid
[NOTICE]   (1749916) : New worker (1749918) forked
[NOTICE]   (1749916) : Loading success.
[WARNING]  (1749918) : Backup Server be_keydb/keydb2 is DOWN, reason: Layer7 
timeout, info: " at step 2 of tcp-check (expect regex)", check duration: 
2005ms. 1 active and 0 backup servers left. 0 sessions active, 0 requeued, 0 
remaining in queue.

root@keydb1:/home/ubuntu# grep -i notify /tmp/haproxy-strace.log
root@keydb1:/home/ubuntu# grep -i notify /tmp/haproxy-strace.log
root@keydb1:/home/ubuntu# grep -i notify /tmp/haproxy-strace.log
root@keydb1:/home/ubuntu# grep -i notify /tmp/haproxy-strace.log
root@keydb1:/home/ubuntu# grep -i notify /tmp/haproxy-strace.log
root@keydb1:/home/ubuntu#


Also, maybe this is also relevant for investigation:
root@keydb1:/home/ubuntu# systemctl cat haproxy
# /usr/lib/systemd/system/haproxy.service
[Unit]
Description=HAProxy Load Balancer
Documentation=man:haproxy(1)
Documentation=file:/usr/share/doc/haproxy/configuration.txt.gz
After=network-online.target rsyslog.service
Wants=network-online.target
[Service]
EnvironmentFile=-/etc/default/haproxy
EnvironmentFile=-/etc/sysconfig/haproxy
BindReadOnlyPaths=/dev/log:/var/lib/haproxy/dev/log
Environment="CONFIG=/etc/haproxy/haproxy.cfg" "PIDFILE=/run/haproxy.pid" 
"EXTRAOPTS=-S /run/haproxy-master.sock"
ExecStart=/usr/sbin/haproxy -Ws -f $CONFIG -p $PIDFILE $EXTRAOPTS
ExecReload=/usr/sbin/haproxy -Ws -f $CONFIG -c -q $EXTRAOPTS
ExecReload=/bin/kill -USR2 $MAINPID
KillMode=mixed
Restart=always
SuccessExitStatus=143
Type=notify
# The following lines leverage SystemD's sandboxing options to provide
# defense in depth protection at the expense of restricting some flexibility
# in your setup (e.g. placement of your configuration files) or possibly
# reduced performance. See systemd.service(5) and systemd.exec(5) for further
# information.
# NoNewPrivileges=true
# ProtectHome=true
# If you want to use 'ProtectSystem=strict' you should whitelist the PIDFILE,
# any state files and any other files written using 'ReadWritePaths' or
# 'RuntimeDirectory'.
# ProtectSystem=true
# ProtectKernelTunables=true
# ProtectKernelModules=true
# ProtectControlGroups=true
# If your SystemD version supports them, you can add: @reboot, @swap, @sync
# SystemCallFilter=~@cpu-emulation @keyring @module @obsolete @raw-io
[Install]
WantedBy=multi-user.target
# /etc/systemd/system/haproxy.service.d/override.conf
[Service]
Restart=always
RestartSec=0.5s
WatchdogSec=30s
root@keydb1:/home/ubuntu#

Thanks and regards,
Stour NOC

-- 
You received this bug notification because you are a member of Ubuntu
Bugs, which is subscribed to Ubuntu.
https://bugs.launchpad.net/bugs/2131789

Title:
  Doesn't send periodic WATCHDOG=1 notifications when using systemd-
  notify (compiled with USE_SYSTEMD=1)

To manage notifications about this bug go to:
https://bugs.launchpad.net/ubuntu/+source/haproxy/+bug/2131789/+subscriptions


-- 
ubuntu-bugs mailing list
[email protected]
https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs

Reply via email to