Hi,

After a couple weeks running HAProxy 2.2.11, one server started to loop on
thread 9. If I'm reading this correctly something went wrong on h2c at
0x7fd7b08d0530.

### show threads
Thread 1 : id=0x7fd8855a2200 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=1 wantrdv=0
             cpu_ns: poll=24099004674002 now=24099004739108 diff=65106
             curr_task=0
  Thread 2 : id=0x7fd883786700 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=1 wantrdv=0
             cpu_ns: poll=23776498018239 now=23776498104181 diff=85942
             curr_task=0
  Thread 3 : id=0x7fd882b5b700 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=1 wantrdv=0
             cpu_ns: poll=23713672613057 now=23713672680973 diff=67916
             curr_task=0
  Thread 4 : id=0x7fd881416700 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=1 wantrdv=0
             cpu_ns: poll=23752667768434 now=23752667910419 diff=141985
             curr_task=0
  Thread 5 : id=0x7fd880c15700 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=1 wantrdv=0
             cpu_ns: poll=23726859650451 now=23726859726954 diff=76503
             curr_task=0
  Thread 6 : id=0x7fd7d1488700 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=1 wantrdv=0
             cpu_ns: poll=23982640999956 now=23982641100021 diff=100065
             curr_task=0
* Thread 7 : id=0x7fd7d0c87700 act=1 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=0 wantrdv=0
             cpu_ns: poll=23482398415461 now=23482398534664 diff=119203
             curr_task=0x5557150b7400 (task) calls=1 last=0
               fct=0x55570d2c8f40(task_run_applet) ctx=0x7fd7949a1a20(<CLI>)
             strm=0x7fd87c322d70 src=unix fe=GLOBAL be=GLOBAL dst=<CLI>
             rqf=c48200 rqa=0 rpf=80008000 rpa=0 sif=EST,200008
sib=EST,204018
             af=(nil),0 csf=0x555716465060,8200
             ab=0x7fd7949a1a20,9 csb=(nil),0

 cof=0x7fd791941e90,1300:PASS(0x7fd78411c1f0)/RAW((nil))/unix_stream(1271)
             cob=(nil),0:NONE((nil))/NONE((nil))/NONE(0)

  Thread 8 : id=0x7fd7d0486700 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=1 wantrdv=0
             cpu_ns: poll=23504865560248 now=23504865701145 diff=140897
             curr_task=0
  Thread 9 : id=0x7fd7cfc85700 act=1 glob=0 wq=1 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=0 wantrdv=0
             cpu_ns: poll=24731954111541 now=24731954183811 diff=72270
             curr_task=0x7fd79e235bf0 (tasklet) calls=1301021937
               fct=0x55570d1c1390(h2_io_cb) ctx=0x7fd7b08d0530

  Thread 10: id=0x7fd7cf484700 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=1 wantrdv=0
             cpu_ns: poll=23544763552254 now=23544763716578 diff=164324
             curr_task=0
  Thread 11: id=0x7fd7cec83700 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=1 wantrdv=0
             cpu_ns: poll=23918013478538 now=23918013624911 diff=146373
             curr_task=0
  Thread 12: id=0x7fd7ce482700 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=1 wantrdv=0
             cpu_ns: poll=23657530305687 now=23657530468631 diff=162944
             curr_task=0
  Thread 13: id=0x7fd7cdc81700 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=1 wantrdv=0
             cpu_ns: poll=23491571084184 now=23491571212288 diff=128104
             curr_task=0
  Thread 14: id=0x7fd7cd480700 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=1 wantrdv=0
             cpu_ns: poll=24048462813219 now=24048462979499 diff=166280
             curr_task=0
  Thread 15: id=0x7fd7ccc7f700 act=0 glob=0 wq=1 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=1 wantrdv=0
             cpu_ns: poll=23948328115728 now=23948328275800 diff=160072
             curr_task=0
  Thread 16: id=0x7fd7cc47e700 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=1 wantrdv=0
             cpu_ns: poll=24133419414190 now=24133419605024 diff=190834
             curr_task=0
  Thread 17: id=0x7fd7cbc7d700 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=1 wantrdv=0
             cpu_ns: poll=24124631248817 now=24124631415673 diff=166856
             curr_task=0
  Thread 18: id=0x7fd7cb47c700 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=1 wantrdv=0
             cpu_ns: poll=23906001641575 now=23906001827864 diff=186289
             curr_task=0
  Thread 19: id=0x7fd7cac7b700 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=1 wantrdv=0
             cpu_ns: poll=23969313722856 now=23969313891707 diff=168851
             curr_task=0
  Thread 20: id=0x7fd7ca47a700 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0
             stuck=0 prof=0 harmless=1 wantrdv=0
             cpu_ns: poll=24159930828789 now=24159931032613 diff=203824
             curr_task=0

### show fd
[...]
16551 : st=0x22(R:Ra W:Ra) ev=0x01(heopI) [lc] tmask=0x100 umask=0x0
owner=0x7fd785b87990 iocb=0x55570d2b9db0(conn_fd_handler) back=0
cflg=0x80001300 fam=ipv4 lport=443 rport=52713 fe=XXX_443_front mux=H2
ctx=0x7fd7b08d0530 h2c.st0=FRP .err=0 .maxid=0 .lastid=-1 .flg=0x0008
.nbst=0 .nbcs=0 .fctl_cnt=0 .send_cnt=0 .tree_cnt=0 .orph_cnt=0 .sub=0
.dsi=1 .dbuf=16384@0x7fd7b1dec0b0+48/16384 .msi=-1 .mbuf=[1..1|32],h=[0@
(nil)+0/0],t=[0@(nil)+0/0] xprt=SSL xprt_ctx=0x555715e2b460 xctx.st=0
.xprt=RAW .wait.ev=0 .subs=(nil) .sent_early=0 .early_in=0
[...]


### show activity
thread_id: 14 (1..20)
date_now: 1618852171.949847
ctxsw: 1288425435 [ 1035929756 1032771863 1031705413 1030724655 1035339156
1034721090 1034310542 1031563885 3075542472 1033284107 1038448472
1034757368 1032557432 1047375707 1052208164 1034367503 1034600491
1040490873 1039628079 1032934887 ]
tasksw: 3773379840 [ 188378854 188548505 188100970 188548238 188791210
188172001 188741628 188714456 188745134 188574058 188102598 188560452
188615118 189090727 189331307 188822213 188331261 190458889 188705521
188046700 ]
empty_rq: 1113540496 [ 55665602 55349045 55271324 56571248 55600768
55286754 55742826 55486155 55331783 55783054 56158632 56325210 55401431
55480262 55489614 55403740 56490657 55730228 55503633 55468530 ]
long_rq: 31755535 [ 112 321 602 45 1261 832 1877 85 31720412 905 5312 2512
151 4508 7194 242 2323 62 1449 5330 ]
loops: 2371176475 [ 332113345 330644622 330771161 331624384 330894079
331123604 331644038 330856422 360628903 331339563 332562551 331855592
331204726 334101754 335769516 330741123 332496454 332795067 332746318
330230549 ]
wake_tasks: 32171268 [ 22061 21309 21298 20837 22090 21025 22680 20119
31741000 21056 25996 22518 21920 24895 27680 20409 23054 21865 22935 26521 ]
wake_signal: 0 [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
poll_io: 2226719665 [ 326403635 324889890 325015011 325980984 325190099
325377837 325930344 325119413 324551602 325638523 326899700 326209941
325469134 328427751 330068634 325013699 326879365 327077720 327049489
324494190 ]
poll_exp: 131999 [ 6598 6598 6684 6746 6498 6396 6563 6537 6457 6739 6839
6576 6521 6667 6683 6473 6692 6684 6520 6528 ]
poll_drop_fd: 5423594 [ 270546 270832 270320 269361 269230 270802 270049
271511 274714 271284 270897 271024 270838 273400 272201 271225 271838
271395 271572 270555 ]
poll_dead_fd: 84 [ 3 6 7 3 6 2 2 7 3 4 3 3 8 6 3 2 4 5 4 3 ]
poll_skip_fd: 1204744899 [ 60168872 60213667 60124096 60180288 60307006
60088733 60244699 60273871 60194737 60247317 60114630 60223673 60204002
60335593 60337309 60291405 60175135 60643642 60270658 60105566 ]
conn_dead: 0 [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
stream_calls: 3630147526 [ 181218689 181407095 180970982 181419529
181650906 181019691 181597571 181566162 181413716 181437034 180964955
181417014 181470642 181841863 182137528 181661421 181184631 183309986
181535959 180922152 ]
pool_fail: 0 [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
buf_wait: 0 [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
cpust_ms_tot: 4856 [ 245 208 234 279 241 253 241 274 282 51 253 242 271 283
227 233 308 242 221 268 ]
cpust_ms_1s: 0 [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
cpust_ms_15s: 4856 [ 245 208 234 279 241 253 241 274 282 51 253 242 271 283
227 233 308 242 221 268 ]
avg_loop_us: 11 [ 8 9 8 11 8 9 9 10 32 11 9 10 9 10 8 10 9 10 10 10 ]
accepted: 96187920 [ 4808943 4809282 4809649 4812013 4811331 4805072
4809925 4808252 4811684 4807859 4802746 4811326 4813495 4807539 4807561
4813529 4806575 4813311 4808558 4809270 ]
accq_pushed: 14239 [ 713 712 712 712 712 712 712 712 712 712 712 712 712
712 711 711 712 712 712 712 ]
accq_full: 0 [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
accq_ring: 0 [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
fd_takeover: 1234181377 [ 61641198 61683379 61587126 61653301 61773400
61552609 61699310 61736661 61661670 61692746 61571748 61687767 61665431
61805399 61845090 61779562 61653101 62170758 61744171 61576950 ]

### (gdb) bt
#0  0x000055570d153ee6 in free_trash_chunk (chunk=0x0) at
include/haproxy/chunk.h:57
#1  h2c_decode_headers (h2c=h2c@entry=0x7fd7b08d0530,
rxbuf=rxbuf@entry=0x7fd7cfc61ee0,
flags=flags@entry=0x7fd7cfc61ed4,
    body_len=body_len@entry=0x7fd7cfc61ed8) at src/mux_h2.c:4520
#2  0x000055570d154ab8 in h2c_frt_handle_headers (h2c=h2c@entry=0x7fd7b08d0530,
h2s=h2s@entry=0x55570d329400 <__compound_literal.8>) at src/mux_h2.c:2497
#3  0x000055570d1c07e3 in h2_process_demux (h2c=0x7fd7b08d0530) at
src/mux_h2.c:3123
#4  h2_process (h2c=h2c@entry=0x7fd7b08d0530) at src/mux_h2.c:3616
#5  0x000055570d1c1678 in h2_io_cb (t=<optimized out>, ctx=0x7fd7b08d0530,
status=<optimized out>) at src/mux_h2.c:3583
#6  0x000055570d2ccf62 in run_tasks_from_lists
(budgets=budgets@entry=0x7fd7cfc6235c)
at src/task.c:454
#7  0x000055570d2cd82d in process_runnable_tasks () at src/task.c:679
#8  0x000055570d284467 in run_poll_loop () at src/haproxy.c:2942
#9  0x000055570d284819 in run_thread_poll_loop (data=<optimized out>) at
src/haproxy.c:3107
#10 0x00007fd8849126db in start_thread (arg=0x7fd7cfc85700) at
pthread_create.c:463
#11 0x00007fd8838a8a3f in epoll_wait (epfd=-808954112, events=0x0,
maxevents=-809097984, timeout=0) at
../sysdeps/unix/sysv/linux/epoll_wait.c:30
#12 0x0000000000000000 in ?? ()


### (gdb) p *((struct h2c *)0x7fd7b08d0530)
$1 = {conn = 0x7fd785b87990, st0 = H2_CS_FRAME_P, errcode =
H2_ERR_NO_ERROR, flags = 8, streams_limit = 100, max_id = 0, rcvd_c = 0,
rcvd_s = 0,
  ddht = 0x7fd7b0820d60, dbuf = {size = 16384,
    area = 0x7fd7b1dec0b0
"¤\226\205\064\f\212jܧâ\201\004A\fN\177jC]t\027\221cÌd°à\033\\+\205µ?¬Ø÷èÏô¥\006êU1\024\235O",
data = 16384, head = 48},
  dsi = 1, dfl = 16383, dft = 1 '\001', dff = 1 '\001', dpl = 0 '\000',
last_sid = -1, mbuf = {{size = 32,
      area = 0x2 <error: Cannot access memory at address 0x2>, data = 1,
head = 1}, {size = 0, area = 0x0, data = 0, head = 0} <repeats 29 times>, {
      size = 0, area = 0x0, data = 0, head = 12}, {size = 1249, area =
0x7fd7b0000080 "ðHX²×\177", data = 140564347486336, head = 0}}, msi = -1,
mfl = 0,
  mft = 32 ' ', mff = -96 ' ', miw = 65535, mws = 65535, mfs = 16384,
timeout = 20000, shut_timeout = 20000, nb_streams = 0, nb_cs = 0,
nb_reserved = 0,
  stream_cnt = 0, proxy = 0x5557136cbe60, task = 0x7fd792079be0,
streams_by_id = {b = {0x0, 0x0}}, send_list = {n = 0x7fd7b08d09d8, p =
0x7fd7b08d09d8},
  fctl_list = {n = 0x7fd7b08d09e8, p = 0x7fd7b08d09e8}, blocked_list = {n =
0x7fd7b08d09f8, p = 0x7fd7b08d09f8}, buf_wait = {target = 0x0, wakeup_cb =
0x0,
    list = {next = 0x7fd7b08d0a18, prev = 0x7fd7b08d0a18}}, wait_event =
{tasklet = 0x7fd79e235bf0, events = 0}}

That is all I've managed to collect. I have also dumped the core file
manually.

Kind regards,
Maciej

Reply via email to