Hi,
Sometimes after HAProxy reload it starts to loop infinitely, for example 9
of 10 threads using 100% CPU (gdb sessions attached). I've also dumped the
core file from gdb.
# haproxy -v
HA-Proxy version 2.2.9-a947cc2 2021/02/06 - https://haproxy.org/
Status: long-term supported branch - will stop receiving fixes around Q2
2025.
Known bugs: http://www.haproxy.org/bugs/bugs-2.2.9.html
Running on: Linux 4.15.0-55-generic #60-Ubuntu SMP Tue Jul 2 18:22:20 UTC
2019 x86_64
thread with high CPU is looping on:
809 if (HA_ATOMIC_CAS(&threads_to_dump, &old, all_threads_mask))
(gdb)
811 ha_thread_relax();
(gdb)
809 if (HA_ATOMIC_CAS(&threads_to_dump, &old, all_threads_mask))
(gdb)
811 ha_thread_relax();
it tried to panic but is failing (from bt full):
#0 0x00007fa73309cf37 in sched_yield () at
../sysdeps/unix/syscall-template.S:78
No locals.
#1 0x0000559253cb7fce in ha_thread_relax () at include/haproxy/thread.h:233
No locals.
#2 ha_thread_dump_all_to_trash () at src/debug.c:811
old = <optimized out>
#3 0x0000559253cb802f in ha_panic () at src/debug.c:268
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
0x00007fa73309cf37 in sched_yield () at ../sysdeps/unix/syscall-template.S:78
78 ../sysdeps/unix/syscall-template.S: No such file or directory.
(gdb) bt full
#0 0x00007fa73309cf37 in sched_yield () at
../sysdeps/unix/syscall-template.S:78
No locals.
#1 0x0000559253cb7fce in ha_thread_relax () at include/haproxy/thread.h:233
No locals.
#2 ha_thread_dump_all_to_trash () at src/debug.c:811
old = <optimized out>
#3 0x0000559253cb802f in ha_panic () at src/debug.c:268
No locals.
#4 0x0000559253d23f08 in wdt_handler (sig=14, si=<optimized out>,
arg=<optimized out>) at src/wdt.c:119
n = <optimized out>
p = <optimized out>
thr = 0
#5 <signal handler called>
No locals.
#6 0x00007fa73309cf37 in sched_yield () at
../sysdeps/unix/syscall-template.S:78
No locals.
#7 0x0000559253cb9be5 in ha_thread_relax () at include/haproxy/thread.h:233
No locals.
#8 debug_handler (sig=<optimized out>, si=<optimized out>, arg=<optimized
out>) at src/debug.c:859
sig = <optimized out>
si = <optimized out>
arg = <optimized out>
#9 <signal handler called>
No locals.
#10 0x0000559253bc642f in hlua_action (rule=0x5592596b5ba0, px=0x559259686260,
sess=<optimized out>, s=0x55925b8f2fb0, flags=2) at src/hlua.c:6711
__pl_r = 4294967300
ret = <optimized out>
arg = <optimized out>
dir = 0
act_ret = 0
error = <optimized out>
#11 0x0000559253c353c3 in http_req_get_intercept_rule
(px=px@entry=0x559259686260, rules=rules@entry=0x5592596862b0,
s=s@entry=0x55925b8f2fb0) at src/http_ana.c:2884
sess = <optimized out>
txn = 0x55925b8f3790
rule = 0x5592596b5ba0
rule_ret = HTTP_RULE_RES_CONT
act_opts = 2
#12 0x0000559253c382f8 in http_process_req_common (s=s@entry=0x55925b8f2fb0,
req=req@entry=0x55925b8f2fc0, an_bit=an_bit@entry=16, px=<optimized out>) at
src/http_ana.c:501
sess = <optimized out>
txn = <optimized out>
msg = <optimized out>
---Type <return> to continue, or q <return> to quit---
htx = <optimized out>
rule = <optimized out>
verdict = <optimized out>
conn = <optimized out>
#13 0x0000559253c48669 in process_stream (t=<optimized out>,
context=0x55925b8f2fb0, state=<optimized out>) at src/stream.c:1781
max_loops = <optimized out>
ana_list = 409648
ana_back = 409648
flags = <optimized out>
srv = <optimized out>
s = 0x55925b8f2fb0
sess = <optimized out>
rqf_last = <optimized out>
rpf_last = 2147483648
rq_prod_last = <optimized out>
rq_cons_last = 0
rp_cons_last = 8
rp_prod_last = 0
req_ana_back = <optimized out>
req = 0x55925b8f2fc0
res = 0x55925b8f3020
si_f = 0x55925b8f32e8
si_b = 0x55925b8f3340
rate = <optimized out>
#14 0x0000559253d0a643 in run_tasks_from_lists
(budgets=budgets@entry=0x7ffc669f811c) at src/task.c:483
process = <optimized out>
tl_queues = <optimized out>
t = 0x55925b8f34a0
budget_mask = 6 '\006'
done = <optimized out>
queue = <optimized out>
state = <optimized out>
ctx = <optimized out>
__ret = <optimized out>
__n = <optimized out>
__p = <optimized out>
#15 0x0000559253d0b05d in process_runnable_tasks () at src/task.c:679
tt = 0x55925412f7c0 <task_per_thread>
lrq = <optimized out>
grq = <optimized out>
t = <optimized out>
---Type <return> to continue, or q <return> to quit---
max = {0, 37, 25}
max_total = <optimized out>
tmp_list = <optimized out>
queue = 3
max_processed = <optimized out>
#16 0x0000559253cc1df7 in run_poll_loop () at src/haproxy.c:2939
next = <optimized out>
wake = <optimized out>
#17 0x0000559253cc21a9 in run_thread_poll_loop (data=<optimized out>) at
src/haproxy.c:3104
ptaf = <optimized out>
ptif = <optimized out>
ptdf = <optimized out>
ptff = <optimized out>
init_left = 0
init_mutex = pthread_mutex_t = {Type = Normal, Status = Not acquired,
Robust = No, Shared = No, Protocol = None}
init_cond = pthread_cond_t = {Threads known to still execute a wait
function = 0, Clock ID = CLOCK_REALTIME, Shared = No}
#18 0x0000559253b95840 in main (argc=<optimized out>, argv=0x7ffc669f8658) at
src/haproxy.c:3803
blocked_sig = {__val = {18446744067199990583, 18446744073709551615
<repeats 15 times>}}
old_sig = {__val = {0, 13925524187434950144, 0, 140722030216352, 8, 32,
12884901897, 7, 48, 94086972698648, 80, 18446744073709409504, 0, 206158430211,
0, 0}}
i = <optimized out>
err = <optimized out>
retry = <optimized out>
limit = {rlim_cur = 4000364, rlim_max = 6000000}
errmsg =
"\000\206\237fü\177\000\000úàËS\222U\000\000\060DþS\222U\000\000
\204\237fü\177\000\000\b\000\000\000\000\000\000\000\177d¹S\222U\000\000\220Ñ\227T\222U\000\000\000\207ù2§\177\000\000\017",
'\000' <repeats 34 times>
pidfd = <optimized out>
(gdb) n
sched_yield () at ../sysdeps/unix/syscall-template.S:79
79 in ../sysdeps/unix/syscall-template.S
(gdb)
ha_thread_dump_all_to_trash () at src/debug.c:809
809 if (HA_ATOMIC_CAS(&threads_to_dump, &old,
all_threads_mask))
(gdb)
811 ha_thread_relax();
(gdb)
809 if (HA_ATOMIC_CAS(&threads_to_dump, &old,
all_threads_mask))
(gdb)
811 ha_thread_relax();
(gdb)
809 if (HA_ATOMIC_CAS(&threads_to_dump, &old,
all_threads_mask))
(gdb)
811 ha_thread_relax();
(gdb)
809 if (HA_ATOMIC_CAS(&threads_to_dump, &old,
all_threads_mask))
(gdb)
811 ha_thread_relax();
(gdb)
809 if (HA_ATOMIC_CAS(&threads_to_dump, &old,
all_threads_mask))
(gdb)
811 ha_thread_relax();
(gdb)
809 if (HA_ATOMIC_CAS(&threads_to_dump, &old,
all_threads_mask))
(gdb)
811 ha_thread_relax();
(gdb)
809 if (HA_ATOMIC_CAS(&threads_to_dump, &old,
all_threads_mask))
(gdb)
811 ha_thread_relax();
(gdb)
809 if (HA_ATOMIC_CAS(&threads_to_dump, &old,
all_threads_mask))
(gdb)
811 ha_thread_relax();
(gdb)
809 if (HA_ATOMIC_CAS(&threads_to_dump, &old,
all_threads_mask))
(gdb)
811 ha_thread_relax();
(gdb)
809 if (HA_ATOMIC_CAS(&threads_to_dump, &old,
all_threads_mask))
(gdb)
811 ha_thread_relax();
(gdb)
809 if (HA_ATOMIC_CAS(&threads_to_dump, &old,
all_threads_mask))
(gdb)
811 ha_thread_relax();
(gdb)
809 if (HA_ATOMIC_CAS(&threads_to_dump, &old,
all_threads_mask))
(gdb)
811 ha_thread_relax();
(gdb)
809 if (HA_ATOMIC_CAS(&threads_to_dump, &old,
all_threads_mask))
(gdb)
811 ha_thread_relax();
(gdb)
809 if (HA_ATOMIC_CAS(&threads_to_dump, &old,
all_threads_mask))
(gdb)
811 ha_thread_relax();
(gdb)
809 if (HA_ATOMIC_CAS(&threads_to_dump, &old,
all_threads_mask))
(gdb)
811 ha_thread_relax();
Attaching to program: /usr/sbin/haproxy, process 32064
[New LWP 32065]
[New LWP 32066]
[New LWP 32067]
[New LWP 32068]
[New LWP 32069]
[New LWP 32070]
[New LWP 32071]
[New LWP 32072]
[New LWP 32073]
[New LWP 32074]
[New LWP 32075]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
0x00007fa73309cf37 in sched_yield () at ../sysdeps/unix/syscall-template.S:78
78 ../sysdeps/unix/syscall-template.S: No such file or directory.
(gdb) thread 0
Invalid thread ID: 0
(gdb) thread 2
[Switching to thread 2 (Thread 0x7fa732f98700 (LWP 32065))]
#0 __lll_lock_wait_private () at
../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
95 ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S: No such file or
directory.
(gdb) n
96 in ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
(gdb)
98 in ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
(gdb)
99 in ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
(gdb)
91 in ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
(gdb)
92 in ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
(gdb)
93 in ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
(gdb)
95 in ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
(gdb)
96 in ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
(gdb)
98 in ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
(gdb) bt full
#0 __lll_lock_wait_private () at
../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:98
No locals.
#1 0x00007fa73303034b in __GI___libc_malloc (bytes=bytes@entry=27) at
malloc.c:3063
ignore1 = <optimized out>
ignore2 = <optimized out>
ignore3 = <optimized out>
ar_ptr = 0x7fa72c000020
victim = <optimized out>
hook = <optimized out>
tbytes = <optimized out>
tc_idx = <optimized out>
__x = <optimized out>
ignore1 = <optimized out>
ignore2 = <optimized out>
ignore3 = <optimized out>
ignore = <optimized out>
#2 0x0000559253bb8b29 in hlua_alloc (ud=0x5592540000d0
<hlua_global_allocator>, ptr=<optimized out>, osize=4, nsize=27) at
src/hlua.c:8204
zone = 0x5592540000d0 <hlua_global_allocator>
nsize = 27
zone = <optimized out>
ptr = 0x0
osize = 4
ud = 0x5592540000d0 <hlua_global_allocator>
zone = 0x5592540000d0 <hlua_global_allocator>
#3 0x0000559253d39561 in luaM_realloc_ ()
No symbol table info available.
#4 0x0000559253d390ae in luaC_newobj ()
No symbol table info available.
#5 0x0000559253d3e23d in internshrstr ()
No symbol table info available.
#6 0x0000559253d39782 in pushstr ()
No symbol table info available.
#7 0x0000559253d3a0da in luaO_pushvfstring ()
No symbol table info available.
#8 0x0000559253d33111 in lua_pushfstring ()
No symbol table info available.
#9 0x0000559253d447e8 in luaL_traceback ()
No symbol table info available.
#10 0x0000559253cb94f0 in ha_task_dump (buf=buf@entry=0x7fa73214a628,
task=0x7fa72d7a2b40, pfx=pfx@entry=0x559253d8a099 ' ' <repeats 13 times>) at
src/debug.c:226
s = <optimized out>
appctx = <optimized out>
---Type <return> to continue, or q <return> to quit---
hlua = 0x7fa72e412100
#11 0x0000559253cb9947 in ha_thread_dump (buf=0x7fa73214a628, thr=<optimized
out>, calling_tid=2) at src/debug.c:91
thr_bit = 2
p = 3226949469
n = <optimized out>
stuck = 0
#12 0x0000559253cb9ba6 in debug_handler (sig=<optimized out>, si=<optimized
out>, arg=<optimized out>) at src/debug.c:843
sig = <optimized out>
si = <optimized out>
arg = <optimized out>
#13 <signal handler called>
No locals.
#14 0x00007fa7330b4c07 in mprotect () at ../sysdeps/unix/syscall-template.S:78
No locals.
#15 0x00007fa73302cf5b in grow_heap (diff=4096, h=0x7fa72c000000) at arena.c:542
pagesize = 4096
new_size = 48644096
pagesize = <optimized out>
new_size = <optimized out>
#16 sysmalloc (nb=nb@entry=64, av=av@entry=0x7fa72c000020) at malloc.c:2416
old_heap = 0x7fa72c000000
heap = <optimized out>
old_heap_size = 48640000
old_top = 0x7fa72ee62fe0
old_size = 32
old_end = 0x7fa72ee63000 ""
size = <optimized out>
brk = 0x0
correction = <optimized out>
snd_brk = 0x0
front_misalign = <optimized out>
end_misalign = <optimized out>
aligned_brk = <optimized out>
p = <optimized out>
remainder = <optimized out>
remainder_size = <optimized out>
pagesize = 4096
tried_mmap = false
__PRETTY_FUNCTION__ = "sysmalloc"
#17 0x00007fa73302e060 in _int_malloc (av=av@entry=0x7fa72c000020,
bytes=bytes@entry=56) at malloc.c:4125
p = <optimized out>
---Type <return> to continue, or q <return> to quit---
iters = <optimized out>
nb = 64
idx = 5
bin = <optimized out>
victim = <optimized out>
size = <optimized out>
victim_index = <optimized out>
remainder = <optimized out>
remainder_size = <optimized out>
block = <optimized out>
bit = <optimized out>
map = <optimized out>
fwd = <optimized out>
bck = <optimized out>
tcache_unsorted_count = 0
tcache_nb = 64
tc_idx = 2
return_cached = <optimized out>
__PRETTY_FUNCTION__ = "_int_malloc"
#18 0x00007fa73303035d in __GI___libc_malloc (bytes=bytes@entry=56) at
malloc.c:3065
ar_ptr = 0x7fa72c000020
victim = <optimized out>
hook = <optimized out>
tbytes = <optimized out>
tc_idx = <optimized out>
__x = <optimized out>
ignore1 = <optimized out>
ignore2 = <optimized out>
ignore3 = <optimized out>
ignore = <optimized out>
#19 0x0000559253bb8b29 in hlua_alloc (ud=0x5592540000d0
<hlua_global_allocator>, ptr=<optimized out>, osize=5, nsize=56) at
src/hlua.c:8204
zone = 0x5592540000d0 <hlua_global_allocator>
nsize = 56
zone = <optimized out>
ptr = 0x0
osize = 5
ud = 0x5592540000d0 <hlua_global_allocator>
zone = 0x5592540000d0 <hlua_global_allocator>
#20 0x0000559253d39561 in luaM_realloc_ ()
No symbol table info available.
#21 0x0000559253d390ae in luaC_newobj ()
---Type <return> to continue, or q <return> to quit---
No symbol table info available.
#22 0x0000559253d3eae3 in luaH_new ()
No symbol table info available.
#23 0x0000559253d33511 in lua_createtable ()
No symbol table info available.
#24 0x0000559253bb8530 in hlua_http_get_headers (L=0x7fa72e417fe8,
msg=<optimized out>) at src/hlua.c:4827
blk = <optimized out>
type = <optimized out>
len = <optimized out>
n = {
ptr = 0x7fa72db91eda
"trace-reported0span-id717d855dce3e24c2trace-id717d855dce3e24c2x-request-id17faa3c2-b085-4311-9a57-0f169868b7fcx-sniallegro.plx-siteplx-client-regionPLcookie_cmuid=1r06mqt2-3b2a-5m0c-79pn-qcbsuey1x98f;"...,
len = 14}
v = {
ptr = 0x7fa72db91ee8
"0span-id717d855dce3e24c2trace-id717d855dce3e24c2x-request-id17faa3c2-b085-4311-9a57-0f169868b7fcx-sniallegro.plx-siteplx-client-regionPLcookie_cmuid=1r06mqt2-3b2a-5m0c-79pn-qcbsuey1x98f;
datadome=5xiY"..., len = 1}
htx = <optimized out>
pos = 22
#25 0x0000559253d36246 in luaD_precall ()
No symbol table info available.
#26 0x0000559253d41a1d in luaV_execute ()
No symbol table info available.
#27 0x0000559253d35992 in luaD_rawrunprotected ()
No symbol table info available.
#28 0x0000559253d36669 in lua_resume ()
No symbol table info available.
#29 0x0000559253bc1fb1 in hlua_ctx_resume (lua=0x7fa72e412100,
yield_allowed=yield_allowed@entry=1) at src/hlua.c:1178
ret = <optimized out>
msg = <optimized out>
trace = <optimized out>
#30 0x0000559253bc6661 in hlua_action (rule=0x5592596b5ba0, px=0x559259686260,
sess=<optimized out>, s=0x7fa72d7a2650, flags=2) at src/hlua.c:6770
arg = <optimized out>
dir = 0
act_ret = <optimized out>
error = <optimized out>
#31 0x0000559253c353c3 in http_req_get_intercept_rule
(px=px@entry=0x559259686260, rules=rules@entry=0x5592596862b0,
s=s@entry=0x7fa72d7a2650) at src/http_ana.c:2884
sess = <optimized out>
txn = 0x7fa72d7a2e30
rule = 0x5592596b5ba0
rule_ret = HTTP_RULE_RES_CONT
act_opts = 2
#32 0x0000559253c382f8 in http_process_req_common (s=s@entry=0x7fa72d7a2650,
req=req@entry=0x7fa72d7a2660, an_bit=an_bit@entry=16, px=<optimized out>) at
src/http_ana.c:501
---Type <return> to continue, or q <return> to quit---
sess = <optimized out>
txn = <optimized out>
msg = <optimized out>
htx = <optimized out>
rule = <optimized out>
verdict = <optimized out>
conn = <optimized out>
#33 0x0000559253c48669 in process_stream (t=<optimized out>,
context=0x7fa72d7a2650, state=<optimized out>) at src/stream.c:1781
max_loops = <optimized out>
ana_list = 409648
ana_back = 409648
flags = <optimized out>
srv = <optimized out>
s = 0x7fa72d7a2650
sess = <optimized out>
rqf_last = <optimized out>
rpf_last = 2147483648
rq_prod_last = <optimized out>
rq_cons_last = 0
rp_cons_last = 8
rp_prod_last = 0
req_ana_back = <optimized out>
req = 0x7fa72d7a2660
res = 0x7fa72d7a26c0
si_f = 0x7fa72d7a2988
si_b = 0x7fa72d7a29e0
rate = <optimized out>
#34 0x0000559253d0a643 in run_tasks_from_lists
(budgets=budgets@entry=0x7fa732f7535c) at src/task.c:483
process = <optimized out>
tl_queues = <optimized out>
t = 0x7fa72d7a2b40
budget_mask = 6 '\006'
done = <optimized out>
queue = <optimized out>
state = <optimized out>
ctx = <optimized out>
__ret = <optimized out>
__n = <optimized out>
__p = <optimized out>
#35 0x0000559253d0b05d in process_runnable_tasks () at src/task.c:679
tt = 0x55925412f840 <task_per_thread+128>
---Type <return> to continue, or q <return> to quit---
lrq = <optimized out>
grq = <optimized out>
t = <optimized out>
max = {0, 52, 25}
max_total = <optimized out>
tmp_list = <optimized out>
queue = 3
max_processed = <optimized out>
#36 0x0000559253cc1df7 in run_poll_loop () at src/haproxy.c:2939
next = <optimized out>
wake = <optimized out>
#37 0x0000559253cc21a9 in run_thread_poll_loop (data=<optimized out>) at
src/haproxy.c:3104
ptaf = <optimized out>
ptif = <optimized out>
ptdf = <optimized out>
ptff = <optimized out>
init_left = 0
init_mutex = pthread_mutex_t = {Type = Normal, Status = Not acquired,
Robust = No, Shared = No, Protocol = None}
init_cond = pthread_cond_t = {Threads known to still execute a wait
function = 0, Clock ID = CLOCK_REALTIME, Shared = No}
#38 0x00007fa7341246db in start_thread (arg=0x7fa732f98700) at
pthread_create.c:463
pd = 0x7fa732f98700
now = <optimized out>
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140356091479808,
-7771812612016694466, 140356091335936, 0, 1, 140722030215552,
7740170938185423678, 7740159161842539326}, mask_was_saved = 0}},
priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup =
0x0, canceltype = 0}}}
not_first_call = <optimized out>
#39 0x00007fa7330baa3f in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:95
No locals.
(gdb)