Hello,
We have deployed HAProxy (1.4.24 2013/06/17) on a new, fully updated
CentOS 6.4 machine, with transparent proxying and routing. It worked
fine in testing, but in production there is a strange issue. This
system is doing TCP balancing across three backend MTA/anti-spam
servers on the "private" 172.18.31.0/24 network on eth1. The "public"
interface eth0 is on 172.18.30.0/24. Those backend servers then
forward the mail on to the proper final destinations internally, on
the 172.18.30.0/24 network. The IP addresses of the HAProxy server are
172.18.30.13 and 172.18.31.13.
The issue is that one of the three servers (asav-1) is apparently
experiencing very slow throughput for those outbound connections to
our local network. The other two are fine. On the one slow system, the
queue builds up, as there are timeouts at our other internal MTAs.
We have added a static route on the problem system, to bypass using
the HAProxy/CentOS balancer for outbound connections to our network:
[root@asav-1 ~]# ip route show
172.18.30.185 via 172.18.31.1 dev eth0
172.18.30.0/24 via 172.18.31.1 dev eth0
172.18.31.0/24 dev eth0 proto kernel scope link src 172.18.31.101
169.254.0.0/16 dev eth0 scope link
default via 172.18.31.13 dev eth0
Via the 172.18.31.1 gateway, things work fine. The other two are routed:
[root@asav-2 ~]# ip route show
172.18.31.0/24 dev eth0 proto kernel scope link src 172.18.31.102
169.254.0.0/16 dev eth0 scope link
default via 172.18.31.13 dev eth0
I am very perplexed by this issue, as the three scanner nodes are
identical, and the configuration in HAProxy/iptables treats them all
identically. Any suggestions would be most appreciated.
HAproxy config excerpt:
global
log 127.0.0.1 local2
chroot /var/lib/haproxy
pidfile /var/run/haproxy.pid
maxconn 4000
daemon
stats socket /var/lib/haproxy/stats
defaults
mode tcp
log global
option tcplog
retries 3
timeout http-request 10s
timeout queue 1m
timeout connect 10s
timeout client 1m
timeout server 1m
timeout http-keep-alive 10s
timeout check 10s
maxconn 3000
frontend mx_smtp
log global
option tcplog
bind 172.18.30.182:25
default_backend mx_scan
frontend mx-a_smtp
log global
option tcplog
bind 172.18.30.183:25
default_backend mx_scan
backend mx_scan
balance roundrobin
option smtpchk HELO msh-balance
source 172.18.31.13 usesrc clientip
server asav-1 172.18.31.101:25 check inter 10s
server asav-2 172.18.31.102:25 check inter 10s
server asav-3 172.18.31.103:25 check inter 10s
All ipv4 sysctls:
net.ipv4.ip_forward = 1
net.ipv4.conf.default.rp_filter = 1
net.ipv4.conf.lo.rp_filter = 0
net.ipv4.conf.all.send_redirects = 0
net.ipv4.conf.default.send_redirects = 0
net.ipv4.conf.eth0.send_redirects = 0
net.ipv4.conf.eth1.send_redirects = 0
net.ipv4.conf.lo.send_redirects = 0
net.ipv4.conf.default.accept_source_route = 0
net.ipv4.tcp_syncookies = 1
# ip addr
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 16436 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast
state UNKNOWN qlen 1000
link/ether 00:50:56:a4:4e:b0 brd ff:ff:ff:ff:ff:ff
inet 172.18.30.13/24 brd 172.18.30.255 scope global eth0
inet 172.18.30.182/32 scope global eth0
inet 172.18.30.183/32 scope global eth0
inet 172.18.30.197/32 scope global eth0
inet6 fe80::250:56ff:fea4:4eb0/64 scope link
valid_lft forever preferred_lft forever
3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast
state UNKNOWN qlen 1000
link/ether 00:50:56:a4:4e:b1 brd ff:ff:ff:ff:ff:ff
inet 172.18.31.13/24 brd 172.18.31.255 scope global eth1
inet6 fe80::250:56ff:fea4:4eb1/64 scope link
valid_lft forever preferred_lft forever
[root@msh-balance haproxy]# ip route show
172.18.30.0/24 dev eth0 proto kernel scope link src 172.18.30.13
172.18.31.0/24 dev eth1 proto kernel scope link src 172.18.31.13
169.254.0.0/16 dev eth0 scope link metric 1002
169.254.0.0/16 dev eth1 scope link metric 1003
default via 172.18.30.1 dev eth0
[root@msh-balance haproxy]# ip rule show
0: from all lookup local
32765: from all fwmark 0x6f lookup 100
32766: from all lookup main
32767: from all lookup default
[root@msh-balance haproxy]# ip route show table 100
local default dev lo scope host
[root@msh-balance haproxy]# egrep -v '^(#|:)' /etc/sysconfig/iptables
*nat
-A POSTROUTING -s 172.18.31.101/32 -o eth0 -j SNAT --to-source 172.18.30.182
-A POSTROUTING -s 172.18.31.102/32 -o eth0 -j SNAT --to-source 172.18.30.182
-A POSTROUTING -s 172.18.31.103/32 -o eth0 -j SNAT --to-source 172.18.30.182
-A POSTROUTING -s 172.18.31.104/32 -o eth0 -j SNAT --to-source 172.18.30.197
COMMIT
*filter
-A INPUT -m state --state RELATED,ESTABLISHED -j ACCEPT
-A INPUT -p icmp -j ACCEPT
-A INPUT -i lo -j ACCEPT
-A INPUT -s 172.18.31.0/24 -j ACCEPT
-A INPUT -p tcp -m tcp --dport 22 -j ACCEPT
-A INPUT -p tcp -m tcp --dport 25 -j ACCEPT
-A INPUT -p tcp -m tcp --dport 80 -j ACCEPT
-A INPUT -p tcp -m tcp --dport 443 -j ACCEPT
-A INPUT -j LOG --log-prefix "rej-INPUT "
-A INPUT -j REJECT --reject-with icmp-host-prohibited
-A FORWARD -i eth0 -o eth1 -j ACCEPT
-A FORWARD -i eth1 -o eth0 -j ACCEPT
-A FORWARD -j LOG --log-prefix "rej-FWD "
-A FORWARD -j REJECT --reject-with icmp-host-prohibited
COMMIT
*mangle
-A PREROUTING -p tcp -m socket -j DIVERT
-A DIVERT -j MARK --set-xmark 0x6f/0xffffffff
-A DIVERT -j ACCEPT
COMMIT
--
Lawrence Weeks [email protected]
Anabasis Consulting Ltd