Hi again,

I verified that local DoS did not cause that high cpu usage. I'm upgrading
from 1.8.19 to 1.9.6.

Please look at strace:

of process with 100% cpu usage:
strace -fp 9790 -c
Process 9790 attached
% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 27.51    0.009501           0    942536           clock_gettime
 13.65    0.004716           0    471268           epoll_wait
 12.09    0.004176           0    305211    157644 recvfrom
 11.09    0.003831           0    109003     64867 connect
  9.04    0.003122           0    141908      6479 sendto
  6.55    0.002264           0    193372           epoll_ctl
  5.05    0.001746           0     57430           close
  4.46    0.001539           0    118067           setsockopt
  3.05    0.001053           0     94756      1860 getsockopt
  2.55    0.000882           0     50009           socket
  1.43    0.000493           0     50009           fcntl
  1.32    0.000457           0     14947      7353 accept4
  0.89    0.000306           0     11660         1 shutdown
  0.58    0.000199           0     12172      3226 read
  0.47    0.000164           0      3838       279 write
  0.21    0.000074           0      6640           bind
  0.06    0.000019           0      1860           getsockname
  0.00    0.000000           0         4           brk
  0.00    0.000000           0        24           sendmsg
------ ----------- ----------- --------- --------- ----------------
100.00    0.034542               2584714    241709 total

fragment of strace:
[...]
epoll_ctl(3, EPOLL_CTL_ADD, 2020, {EPOLLOUT, {u32=2020, u64=2020}}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87792876}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87804387}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87811541}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87823913}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87830810}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87842796}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87849448}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87859750}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87866006}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87877936}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87885329}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87897045}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87904108}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87915961}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87922580}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87933941}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87940258}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87952045}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87959070}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87971104}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87978136}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87990210}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 87998045}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88009585}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88015976}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88027627}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88034407}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88046698}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88053419}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88065381}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88071551}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88082999}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88089594}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88112393}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88118910}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88130295}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88136707}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88148429}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88155219}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88167019}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88173660}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88185332}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88191732}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88203660}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88210542}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88223145}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88230090}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88242627}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88250786}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88262976}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88270439}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88282001}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88288596}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88299666}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88305992}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88316967}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88324161}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88335865}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88342721}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88354743}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88361932}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88385864}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88392851}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88405104}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88411895}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88423943}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88430569}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88442977}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88450051}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88462006}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88468393}) = 0
epoll_wait(3, {}, 200, 0)               = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88479825}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88497719}) = 0
epoll_wait(3, {{EPOLLIN, {u32=1142, u64=1142}}}, 200, 0) = 1
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {848, 88510556}) = 0
[...]

After some time I have 2 then 3 haproxy processes with 100% cpu.

This is strace of process with normal 10% cpu usage:
strace -fp 9809 -c
Process 9809 attached
% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 38.20    0.020981           0    615163           epoll_wait
 15.60    0.008565           0   1230326           clock_gettime
 10.32    0.005667           0    110707     66703 connect
  8.51    0.004673           0    391704    201233 recvfrom
  7.82    0.004292           0     14450      7157 accept4
  7.48    0.004106           0    183790      6254 sendto
  3.48    0.001909           0     57090           close
  3.02    0.001658           0    240248           epoll_ctl
  1.66    0.000914           0     50016           socket
  1.43    0.000786           0    117936           setsockopt
  1.21    0.000666           0     97594      1660 getsockopt
  0.51    0.000281           0     50016           fcntl
  0.42    0.000229           0     12275           shutdown
  0.22    0.000119           0     13236      3752 read
  0.08    0.000043           0      4169       280 write
  0.03    0.000016           0      6313           bind
  0.02    0.000012           0      1660           getsockname
  0.00    0.000000           0         4           brk
------ ----------- ----------- --------- --------- ----------------
100.00    0.054917               3196697    287039 total

Any guidance how to debug it further would be very helpful.

czw., 4 kwi 2019 o 14:56 Maciej Zdeb <[email protected]> napisał(a):

> I'm very sorry, it is probably "real" traffic that haproxy needs to
> handle. Some local DDoS occurred just in time when I upgraded to 1.9 (what
> terribly bad luck).
>
> Please ignore previous email! Sorry for wasting your time.
>
> czw., 4 kwi 2019 o 14:36 Maciej Zdeb <[email protected]> napisał(a):
>
>> Hi,
>>
>> After haproxy starts everything is fine, but after some time one of
>> haproxy processes uses 100% of CPU. I've managed to dump some "show"
>> commands (I have also "show fd", "show sess all" and "strace" but if needed
>> I would prefer to send it privately). If you could give any advice howto
>> debug it further it would be great.
>>
>> ps aux | grep haprox
>> haproxy   99023 97.5  0.6 3139384 623476 ?      Rs   14:07  16:38
>> /usr/sbin/haproxy -f /etc/haproxy/haproxy.cfg -p /var/run/haproxy.pid -D
>> -sf 15434 15443 15444 15445 15454 15455 15456 15457
>> haproxy   99024 13.0  0.6 3138964 623096 ?      Ss   14:07   2:13
>> /usr/sbin/haproxy -f /etc/haproxy/haproxy.cfg -p /var/run/haproxy.pid -D
>> -sf 15434 15443 15444 15445 15454 15455 15456 15457
>> haproxy   99025 14.1  0.6 3139044 623160 ?      Rs   14:07   2:25
>> /usr/sbin/haproxy -f /etc/haproxy/haproxy.cfg -p /var/run/haproxy.pid -D
>> -sf 15434 15443 15444 15445 15454 15455 15456 15457
>> haproxy   99034 11.9  0.6 3139024 623496 ?      Ss   14:07   2:02
>> /usr/sbin/haproxy -f /etc/haproxy/haproxy.cfg -p /var/run/haproxy.pid -D
>> -sf 15434 15443 15444 15445 15454 15455 15456 15457
>> haproxy   99035 16.3  0.6 3139184 623116 ?      Ss   14:07   2:47
>> /usr/sbin/haproxy -f /etc/haproxy/haproxy.cfg -p /var/run/haproxy.pid -D
>> -sf 15434 15443 15444 15445 15454 15455 15456 15457
>> haproxy   99037 13.0  0.6 3138724 623076 ?      Ss   14:07   2:13
>> /usr/sbin/haproxy -f /etc/haproxy/haproxy.cfg -p /var/run/haproxy.pid -D
>> -sf 15434 15443 15444 15445 15454 15455 15456 15457
>> haproxy   99045 16.3  0.6 3139332 623216 ?      Ss   14:07   2:47
>> /usr/sbin/haproxy -f /etc/haproxy/haproxy.cfg -p /var/run/haproxy.pid -D
>> -sf 15434 15443 15444 15445 15454 15455 15456 15457
>> haproxy   99046 12.0  0.6 3137792 622208 ?      Rs   14:07   2:03
>> /usr/sbin/haproxy -f /etc/haproxy/haproxy.cfg -p /var/run/haproxy.pid -D
>> -sf 15434 15443 15444 15445 15454 15455 15456 15457
>>
>> socat /var/run/haproxy/haproxy1.sock - <<< "show info"
>> Name: HAProxy
>> Version: 1.9.6
>> Release_date: 2019/03/29
>> Nbthread: 1
>> Nbproc: 8
>> Process_num: 1
>> Pid: 99023
>> Uptime: 0d 0h12m26s
>> Uptime_sec: 746
>> Memmax_MB: 0
>> PoolAlloc_MB: 8
>> PoolUsed_MB: 8
>> PoolFailed: 0
>> Ulimit-n: 1000000
>> Maxsock: 4012387
>> Maxconn: 2000000
>> Hard_maxconn: 2000000
>> CurrConns: 850
>> CumConns: 114038
>> CumReq: 179023
>> MaxSslConns: 0
>> CurrSslConns: 4
>> CumSslConns: 6225
>> Maxpipes: 0
>> PipesUsed: 0
>> PipesFree: 0
>> ConnRate: 161
>> ConnRateLimit: 0
>> MaxConnRate: 366
>> SessRate: 161
>> SessRateLimit: 0
>> MaxSessRate: 366
>> SslRate: 6
>> SslRateLimit: 0
>> MaxSslRate: 18
>> SslFrontendKeyRate: 3
>> SslFrontendMaxKeyRate: 7
>> SslFrontendSessionReuse_pct: 50
>> SslBackendKeyRate: 0
>> SslBackendMaxKeyRate: 10
>> SslCacheLookups: 1054
>> SslCacheMisses: 103
>> CompressBpsIn: 0
>> CompressBpsOut: 0
>> CompressBpsRateLim: 0
>> ZlibMemUsage: 0
>> MaxZlibMemUsage: 0
>> Tasks: 26283
>> Run_queue: 3
>> Idle_pct: 39
>> node: XXXX
>> Stopping: 0
>> Jobs: 7181
>> Unstoppable Jobs: 0
>> Listeners: 6330
>> ActivePeers: 0
>> ConnectedPeers: 0
>> DroppedLogs: 6
>> BusyPolling: 0
>>
>> socat /var/run/haproxy/haproxy1.sock - <<< "show activity"
>> thread_id: 0
>> date_now: 1554380334.734878
>> loops: 355064374
>> wake_cache: 2498224
>> wake_tasks: 352539647
>> wake_signal: 0
>> poll_exp: 355037871
>> poll_drop: 194560
>> poll_dead: 0
>> poll_skip: 0
>> fd_skip: 0
>> fd_lock: 0
>> fd_del: 0
>> conn_dead: 0
>> stream: 710586336
>> empty_rq: 15313
>> long_rq: 57
>> cpust_ms_tot: 747
>> cpust_ms_1s: 0
>> cpust_ms_15s: 0
>> avg_loop_us: 1
>>
>> haproxy -vv
>> HA-Proxy version 1.9.6 2019/03/29 - https://haproxy.org/
>> Build options :
>>   TARGET  = linux2628
>>   CPU     = generic
>>   CC      = gcc
>>   CFLAGS  = -O2 -g -fno-strict-aliasing -Wdeclaration-after-statement
>> -fwrapv -Wno-unused-label -Wno-sign-compare -Wno-unused-parameter
>> -Wno-old-style-declaration -Wno-ignored-qualifiers -Wno-clobbered
>> -Wno-missing-field-initializers -Wtype-limits -DIP_BIND_ADDRESS_NO_PORT=24
>>   OPTIONS = USE_GETADDRINFO=1 USE_ZLIB=1 USE_DL=1 USE_OPENSSL=1 USE_LUA=1
>> USE_PCRE=1 USE_PCRE_JIT=1
>>
>> Default settings :
>>   maxconn = 2000, bufsize = 16384, maxrewrite = 1024, maxpollevents = 200
>>
>> Built with OpenSSL version : OpenSSL 1.1.1b  26 Feb 2019
>> Running on OpenSSL version : OpenSSL 1.1.1b  26 Feb 2019
>> OpenSSL library supports TLS extensions : yes
>> OpenSSL library supports SNI : yes
>> OpenSSL library supports : TLSv1.0 TLSv1.1 TLSv1.2 TLSv1.3
>> Built with Lua version : Lua 5.3.5
>> Built with transparent proxy support using: IP_TRANSPARENT
>> IPV6_TRANSPARENT IP_FREEBIND
>> Built with zlib version : 1.2.8
>> Running on zlib version : 1.2.8
>> Compression algorithms supported : identity("identity"),
>> deflate("deflate"), raw-deflate("deflate"), gzip("gzip")
>> Built with PCRE version : 8.42 2018-03-20
>> Running on PCRE version : 8.42 2018-03-20
>> PCRE library supports JIT : yes
>> Encrypted password support via crypt(3): yes
>> Built with multi-threading support.
>>
>> Available polling systems :
>>       epoll : pref=300,  test result OK
>>        poll : pref=200,  test result OK
>>      select : pref=150,  test result OK
>> Total: 3 (3 usable), will use epoll.
>>
>> Available multiplexer protocols :
>> (protocols marked as <default> cannot be specified using 'proto' keyword)
>>               h2 : mode=HTX        side=FE|BE
>>               h2 : mode=HTTP       side=FE
>>        <default> : mode=HTX        side=FE|BE
>>        <default> : mode=TCP|HTTP   side=FE|BE
>>
>> Available filters :
>> [SPOE] spoe
>> [COMP] compression
>> [CACHE] cache
>> [TRACE] trace
>>
>

Reply via email to