Hello,

We have deployed HAProxy (1.4.24 2013/06/17) on a new, fully updated
CentOS 6.4 machine, with transparent proxying and routing. It worked
fine in testing, but in production there is a strange issue. This
system is doing TCP balancing across three backend MTA/anti-spam
servers on the "private" 172.18.31.0/24 network on eth1. The "public"
interface eth0 is on 172.18.30.0/24. Those backend servers then
forward the mail on to the proper final destinations internally, on
the 172.18.30.0/24 network. The IP addresses of the HAProxy server are
172.18.30.13 and 172.18.31.13.

The issue is that one of the three servers (asav-1) is apparently
experiencing very slow throughput for those outbound connections to
our local network. The other two are fine. On the one slow system, the
queue builds up, as there are timeouts at our other internal MTAs.

We have added a static route on the problem system, to bypass using
the HAProxy/CentOS balancer for outbound connections to our network:

[root@asav-1 ~]# ip route show
172.18.30.185 via 172.18.31.1 dev eth0
172.18.30.0/24 via 172.18.31.1 dev eth0
172.18.31.0/24 dev eth0  proto kernel  scope link  src 172.18.31.101
169.254.0.0/16 dev eth0  scope link
default via 172.18.31.13 dev eth0

Via the 172.18.31.1 gateway, things work fine. The other two are routed:

[root@asav-2 ~]# ip route show
172.18.31.0/24 dev eth0  proto kernel  scope link  src 172.18.31.102
169.254.0.0/16 dev eth0  scope link
default via 172.18.31.13 dev eth0

I am very perplexed by this issue, as the three scanner nodes are
identical, and the configuration in HAProxy/iptables treats them all
identically. Any suggestions would be most appreciated.

HAproxy config excerpt:

global
    log         127.0.0.1 local2
    chroot      /var/lib/haproxy
    pidfile     /var/run/haproxy.pid
    maxconn     4000
    daemon
    stats socket /var/lib/haproxy/stats

defaults
    mode                    tcp
    log                     global
    option                  tcplog
    retries                 3
    timeout http-request    10s
    timeout queue           1m
    timeout connect         10s
    timeout client          1m
    timeout server          1m
    timeout http-keep-alive 10s
    timeout check           10s
    maxconn                 3000

frontend  mx_smtp
    log                 global
    option              tcplog
    bind                172.18.30.182:25
    default_backend     mx_scan

frontend  mx-a_smtp
    log                 global
    option              tcplog
    bind                172.18.30.183:25
    default_backend     mx_scan

backend mx_scan
    balance             roundrobin
    option              smtpchk HELO msh-balance
    source              172.18.31.13 usesrc clientip
    server              asav-1 172.18.31.101:25 check inter 10s
    server              asav-2 172.18.31.102:25 check inter 10s
    server              asav-3 172.18.31.103:25 check inter 10s

All ipv4 sysctls:

net.ipv4.ip_forward = 1
net.ipv4.conf.default.rp_filter = 1
net.ipv4.conf.lo.rp_filter = 0
net.ipv4.conf.all.send_redirects = 0
net.ipv4.conf.default.send_redirects = 0
net.ipv4.conf.eth0.send_redirects = 0
net.ipv4.conf.eth1.send_redirects = 0
net.ipv4.conf.lo.send_redirects = 0
net.ipv4.conf.default.accept_source_route = 0
net.ipv4.tcp_syncookies = 1

# ip addr
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 16436 qdisc noqueue state UNKNOWN
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
    inet6 ::1/128 scope host
       valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast
state UNKNOWN qlen 1000
    link/ether 00:50:56:a4:4e:b0 brd ff:ff:ff:ff:ff:ff
    inet 172.18.30.13/24 brd 172.18.30.255 scope global eth0
    inet 172.18.30.182/32 scope global eth0
    inet 172.18.30.183/32 scope global eth0
    inet 172.18.30.197/32 scope global eth0
    inet6 fe80::250:56ff:fea4:4eb0/64 scope link
       valid_lft forever preferred_lft forever
3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast
state UNKNOWN qlen 1000
    link/ether 00:50:56:a4:4e:b1 brd ff:ff:ff:ff:ff:ff
    inet 172.18.31.13/24 brd 172.18.31.255 scope global eth1
    inet6 fe80::250:56ff:fea4:4eb1/64 scope link
       valid_lft forever preferred_lft forever

[root@msh-balance haproxy]# ip route show
172.18.30.0/24 dev eth0  proto kernel  scope link  src 172.18.30.13
172.18.31.0/24 dev eth1  proto kernel  scope link  src 172.18.31.13
169.254.0.0/16 dev eth0  scope link  metric 1002
169.254.0.0/16 dev eth1  scope link  metric 1003
default via 172.18.30.1 dev eth0
[root@msh-balance haproxy]# ip rule show
0: from all lookup local
32765: from all fwmark 0x6f lookup 100
32766: from all lookup main
32767: from all lookup default
[root@msh-balance haproxy]# ip route show table 100
local default dev lo  scope host

[root@msh-balance haproxy]# egrep -v '^(#|:)' /etc/sysconfig/iptables
*nat
-A POSTROUTING -s 172.18.31.101/32 -o eth0 -j SNAT --to-source 172.18.30.182
-A POSTROUTING -s 172.18.31.102/32 -o eth0 -j SNAT --to-source 172.18.30.182
-A POSTROUTING -s 172.18.31.103/32 -o eth0 -j SNAT --to-source 172.18.30.182
-A POSTROUTING -s 172.18.31.104/32 -o eth0 -j SNAT --to-source 172.18.30.197
COMMIT
*filter
-A INPUT -m state --state RELATED,ESTABLISHED -j ACCEPT
-A INPUT -p icmp -j ACCEPT
-A INPUT -i lo -j ACCEPT
-A INPUT -s 172.18.31.0/24 -j ACCEPT
-A INPUT -p tcp -m tcp --dport 22 -j ACCEPT
-A INPUT -p tcp -m tcp --dport 25 -j ACCEPT
-A INPUT -p tcp -m tcp --dport 80 -j ACCEPT
-A INPUT -p tcp -m tcp --dport 443 -j ACCEPT
-A INPUT -j LOG --log-prefix "rej-INPUT "
-A INPUT -j REJECT --reject-with icmp-host-prohibited
-A FORWARD -i eth0 -o eth1 -j ACCEPT
-A FORWARD -i eth1 -o eth0 -j ACCEPT
-A FORWARD -j LOG --log-prefix "rej-FWD "
-A FORWARD -j REJECT --reject-with icmp-host-prohibited
COMMIT
*mangle
-A PREROUTING -p tcp -m socket -j DIVERT
-A DIVERT -j MARK --set-xmark 0x6f/0xffffffff
-A DIVERT -j ACCEPT
COMMIT


-- 
Lawrence Weeks                                    [email protected]
Anabasis Consulting Ltd

Reply via email to