Haproxy 1.5.4 unable to accept new TCP request, backlog full, tens of thousands close_wait connection

jaseywang Thu, 20 Apr 2017 08:05:32 -0700

Our haproxy 1.5.4 MS cluster performs quite well before, and the peak
current connections is about 6k. Haproxy forward the request from client to
Nginx, Nginx send the request to upstream JVM servers like this:
client -> Haproxy -> Nginx -> upstream


This week we use CDN to accept request from client, then to haproxy, Nginx
and upstream servers, like this:
client -> CDN -> Haproxy -> Nginx -> upstream

Unfortunately, due to the new added CDN, the haproxy seems broken during
peak hours, what we see during peak hour is:

1. The backlog of haproxy soon become full and begin to drop new tcp
connection since peak traffic begin, before CDN, our net.core.somaxconn is
1024, and use default backlog of haproxy, everything performs well. After
using CDN, even we increase the somaxconn and backlog parameter to 40000,
it soon becomes full and deny to accept new connection, so no normal http
request to haproxy, and our client can't open the webpage.

2. Before CDN,  the normal TCP connections like:
7k estab, 700 finwait1, 5k finwai2, 14k timewait,10 closewait, 230 lastack.
Now, the TCP connections like:
22k estab, 260 finwai1, 1.6k finwai2, 23k timewait, 25k closewait, 5k
lastack.
The most weird thing is the abnormal increasement of closewait, and after
the packet capture, most of the closewait is between haproxy and cdn.

3. For haproxy, the current connections before is around 6k, and now, since
haproxy cant be connected during the broken time, what we see from the most
recent data during that period is at least 30k, most of them come from cdn.

Now, the weird thing is why haproxy has so many closewait connections? and
why the backlog queue soon becomes full? Usually so many closewait means
haproxy can't correctly close the connection.

Below is our config of haproxy.cfg:

global
    log         127.0.0.1 local2
    chroot      /var/lib/haproxy
    pidfile     /var/run/haproxy.pid
    maxconn     500000
    user        haproxy
    group       haproxy
    daemon
    debug

    # socket monitoring
    stats socket /var/run/haproxy.sock mode 666 level admin
    stats timeout 2m

    ssl-default-bind-ciphers
ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:!aNULL:!MD5:!DSS
    ssl-default-bind-options no-sslv3
    tune.ssl.default-dh-param 4096

defaults
    mode                    http
    log                     global
    option                  httplog
    option                  dontlognull
    option                  forwardfor       except 127.0.0.0/8
    option                  redispatch
    retries                 3

    timeout http-request    10s
    timeout queue           1m
    timeout connect         10s
    timeout client          30s
    timeout server          30s
    timeout http-keep-alive 10s
    timeout check           5s
    maxconn     500000

    errorfile 403 /etc/haproxy/errors/403.http

listen stats *:60912
        mode http
        stats enable
        stats uri /stats
        stats realm HAProxy\ Statistics
        stats refresh 3s

frontend fe-wechat-80
   bind *:80
   mode http
   backlog 40960
   redirect scheme https if !{ ssl_fc }
   http-request set-header X-Forwarded-Port %[dst_port]
   default_backend be-wechat

   # wechat.example.com for public
   acl acl_wechat_example_com hdr_end(host) -i wechat.example.com
   redirect scheme https code 301 if acl_wechat_ example_com


frontend fe-wechat-443
   bind *:443 ssl crt /etc/haproxy/key/example.com.pem
   mode http
   backlog 40960
   redirect scheme https if !{ ssl_fc }
   http-request set-header X-Forwarded-Port %[dst_port]
   reqadd X-Forwarded-Proto:\ https
   default_backend be-wechat

   # wechat.example.com for public
   acl acl_wechat_ example_com hdr_end(host) -i wechat.example.com
   use_backend be-wechat if acl_wechat_example_com

backend be-wechat
   balance roundrobin
   mode http
   server nginx1  10.32.132.119:80 check
   server nginx2  10.32.132.114:80 check
   server nginx3  10.32.132.116:80 check

Below is our nginx.conf:
user  nginx;
worker_processes  16;
error_log  logs/error.log;
pid        logs/nginx.pid;
worker_rlimit_nofile 102400;

events {
    worker_connections  102400;
  use epoll;
}

http {
    include       mime.types;
    default_type  application/octet-stream;

    log_format ppformat '$msec $time_iso8601 $remote_addr $host
$remote_user '
                        '"$request" $status $bytes_sent "$http_referer" '
                        '"$http_user_agent" "$http_x_forwarded_for" '
                        '$http_x_up_calling_line_id $request_time
$upstream_addr $upstream_status $upstream_response_time';

    access_log  logs/access.log  ppformat;

    sendfile        on;
    tcp_nopush     on;

    keepalive_timeout  60;

    gzip  on;
    gzip_types  text/css text/xml application/x-javascript text/plain
text/vnd.sun.j2me.app-descriptor text/vnd.wap.wml
application/vnd.wap.xhtml+xml;

    client_max_body_size 20M;
    client_body_buffer_size  256k;
    client_header_buffer_size  256k;

    proxy_http_version  1.1;

    proxy_ignore_client_abort  on;
    proxy_set_header  Connection "";
    proxy_set_header  Host $host;
    proxy_set_header  X-Forwarded-For $proxy_add_x_forwarded_for;
    proxy_set_header  X-Real-IP $remote_addr;

    proxy_cache_path /usr/local/nginx/cache levels=1:2
keys_zone=lev_cache:100m inactive=120m max_size=1000m;

      include       conf.d/wechat.example.com.conf;
   }

wechat.example.com.conf:
server {
        listen        80;
        server_name   wechat.example.com;

        if ( $request_method !~ ^(GET|POST|HEAD)$ ) {
            return 403;
        }

        proxy_buffers 64 4k;

        gzip                    on;
        gzip_http_version       1.1;
        gzip_buffers            256 64k;
        gzip_comp_level         5;
        gzip_min_length         1000;
        gzip_types              text/javascript application/x-javascript
application/javascript text/css text/plain;

location /mobile {
            proxy_pass http://mts_mobile;
            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-Scheme $scheme;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        }
}

upstream.conf:
upstream mts_mobile {
    server 10.32.128.124:8080;
    server 10.32.128.127:8080;
    server 10.32.130.138:8080;
    server 10.32.130.134:8080;
    check interval=3000 rise=2 fall=3 timeout=2000 type=http;
    check_http_send "GET /healthcheck.html HTTP/1.1\r\nHost:
wechat.example.com\r\n\r\n";
    check_http_expect_alive http_2xx http_3xx;
}



After many search of haproxy maillist with 1.5.4 version, I can't find
similar issue like us, can anyone give some hint or help, thanks.

Haproxy 1.5.4 unable to accept new TCP request, backlog full, tens of thousands close_wait connection

Reply via email to