Max Laier wrote:

There is clearly something very wrong with how the vswitch works and it's not really FreeBSD's job to work around these issues. The patch you posted is rather intrusive and certainly not something we want in the tree. You should talk to VMWare's support to fix the obvious short-comings in the vswitch design.


I agree completely. We have a support contract with VMWare and I intend to open a ticket. However I think the likelihood of them providing a fix for this just about zero. Who knows, maybe they will surprise me.

As for your patch - you want "IF_ADDR_[UN]LOCK(ifp);" around walking the address list. Don't forget to unlock before the return.


Thanks for the help. Here is an updated patch against 7.2 if anyone else has a VMWare + FreeBSD + CARP problem child and needs a fix today ...

http://www.shrew.net/static/patches/esx-carp.diff

The IPv6 code path is untested. Also, the changes were placed under a sysctl conditional so the following is required in /etc/sysctl.conf to enable it at boot time ...

net.inet.carp.drop_echoed=1

Thanks again,

-Matthew
Index: ip_carp.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.52.2.3
diff -u -r1.52.2.3 ip_carp.c
--- ip_carp.c   9 May 2009 00:35:38 -0000       1.52.2.3
+++ ip_carp.c   26 Jul 2009 22:46:19 -0000
@@ -143,6 +143,8 @@
     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
+SYSCTL_INT(_net_inet_carp, CARPCTL_DROPECHOED, drop_echoed, CTLFLAG_RW,
+    &carp_opts[CARPCTL_DROPECHOED], 0, "drop packets echoed to sender");
 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
     &carp_suppress_preempt, 0, "Preemption is suppressed");
 
@@ -552,6 +554,28 @@
                return;
        }
 
+       /*
+        * verify that the source address is not valid
+        * for the interface it was received on. this
+        * tends to happen with VMWare ESX vSwitches.
+        */
+       if (carp_opts[CARPCTL_DROPECHOED]) {
+               struct ifnet *ifp = m->m_pkthdr.rcvif;
+               struct ifaddr *ifa;
+               IF_ADDR_LOCK(ifp);
+               TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
+                       struct in_addr in4;
+                       in4 = ifatoia(ifa)->ia_addr.sin_addr;
+                       if (ifa->ifa_addr->sa_family == AF_INET &&
+                           in4.s_addr == ip->ip_src.s_addr) {
+                               IF_ADDR_UNLOCK(ifp);
+                               m_freem(m);
+                               return;
+                       }
+               }
+               IF_ADDR_UNLOCK(ifp);
+       }
+
        /* verify that the IP TTL is 255.  */
        if (ip->ip_ttl != CARP_DFLTTL) {
                carpstats.carps_badttl++;
@@ -644,6 +668,28 @@
                return (IPPROTO_DONE);
        }
 
+       /*
+        * verify that the source address is not valid
+        * for the interface it was received on. this
+        * tends to happen with VMWare ESX vSwitches.
+        */
+       if (carp_opts[CARPCTL_DROPECHOED]) {
+               struct ifnet *ifp = m->m_pkthdr.rcvif;
+               struct ifaddr *ifa;
+               IF_ADDR_LOCK(ifp);
+               TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
+                       struct in6_addr in6;
+                       in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
+                       if (ifa->ifa_addr->sa_family == AF_INET6 &&
+                           memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0) {
+                               IF_ADDR_UNLOCK(ifp);
+                               m_freem(m);
+                               return (IPPROTO_DONE);
+                       }
+               }
+               IF_ADDR_UNLOCK(ifp);
+       }
+
        /* verify that the IP TTL is 255 */
        if (ip6->ip6_hlim != CARP_DFLTTL) {
                carpstats.carps_badttl++;
Index: ip_carp.h
===================================================================
RCS file: /home/ncvs/src/sys/netinet/ip_carp.h,v
retrieving revision 1.3
diff -u -r1.3 ip_carp.h
--- ip_carp.h   1 Dec 2006 18:37:41 -0000       1.3
+++ ip_carp.h   26 Jul 2009 22:46:19 -0000
@@ -140,7 +140,8 @@
 #define        CARPCTL_LOG             3       /* log bad packets */
 #define        CARPCTL_STATS           4       /* statistics (read-only) */
 #define        CARPCTL_ARPBALANCE      5       /* balance arp responses */
-#define        CARPCTL_MAXID           6
+#define        CARPCTL_DROPECHOED      6       /* drop packets echoed to the 
sender */
+#define        CARPCTL_MAXID           7
 
 #define        CARPCTL_NAMES { \
        { 0, 0 }, \
_______________________________________________
freebsd-net@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-net
To unsubscribe, send any mail to "freebsd-net-unsubscr...@freebsd.org"

Reply via email to