Hi,

after the fix for carp balancing ip-stealth is in, here is the fix for
balancing ip.

Non-stealth balancing traffic needs some special treatment since it contains
layer 3 unicast inside layer 2 multicast.

Now the idea is to deal at layer 2 (ether_input()) with the multicast frames
like regular multicast. After layer 2 processing is done, ip(6)_input() resets
the M_MCAST flag and we are unicast.

To achieve this I mark incoming packets matching to balancing mac addresses with
a mbuf tag. In ip(6)_input() I remove M_MCAST from mbuf's m_flags if the tag
exists. Thanks to mpi@ who brought me to this idea.

The current code tried to solve the problem by removing the MCAST-Bit from the
MAC address to avoid that the kernel treat it as multicast. This is very
fragile and it was broken more than once. At the moment it is broken
due to the mac address checks at the begin of ether_input().

>From my point of view carp balancing is fully working again after
this patch is in. No further issues are known at the moment. Feel
free to test and report.

Regards,

Florian

Index: share/man/man9/mbuf_tags.9
===================================================================
RCS file: /cvs/src/share/man/man9/mbuf_tags.9,v
retrieving revision 1.37
diff -u -p -r1.37 mbuf_tags.9
--- share/man/man9/mbuf_tags.9  24 Nov 2015 19:58:48 -0000      1.37
+++ share/man/man9/mbuf_tags.9  28 May 2017 08:14:31 -0000
@@ -170,6 +170,13 @@ Used by the IPv4 stack to keep track of 
 IP packet, in case a protocol wants to respond over the same route.
 The tag contains a
 .Va struct ip_srcrt .
+.It PACKET_TAG_CARP_BAL_IP
+Used by
+.Xr carp 4
+to mark packets received in mode 
+.Va balancing ip .
+This packets need some special treatment since they contain layer 3 unicast
+inside layer 2 multicast. The tag contains no data.
 .El
 .Pp
 .Fn m_tag_find
Index: sys/netinet/ip_carp.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.310
diff -u -p -r1.310 ip_carp.c
--- sys/netinet/ip_carp.c       27 May 2017 21:55:52 -0000      1.310
+++ sys/netinet/ip_carp.c       28 May 2017 08:14:32 -0000
@@ -1422,8 +1422,23 @@ carp_input(struct ifnet *ifp0, struct mb
                    (IFF_UP|IFF_RUNNING))
                        continue;
 
-               if (carp_vhe_match(sc, eh->ether_dhost))
+               if (carp_vhe_match(sc, eh->ether_dhost)) {
+                       /*
+                        * These packets look like layer 2 multicast but they
+                        * are unicast at layer 3. With help of the tag the
+                        * mbuf's M_MCAST flag can be removed in ip(6)_input,
+                        * after we have passed layer 2.
+                        */
+                       if (sc->sc_balancing == CARP_BAL_IP) {
+                               struct m_tag *mtag;
+                               mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0,
+                                   M_NOWAIT);
+                               if (mtag == NULL)
+                                       return (0);
+                               m_tag_prepend(m, mtag);
+                       }
                        break;
+               }
        }
 
        if (sc == NULL) {
@@ -1455,13 +1470,6 @@ carp_input(struct ifnet *ifp0, struct mb
 
                return (0);
        }
-
-       /*
-        * Clear mcast if received on a carp IP balanced address.
-        */
-       if (sc->sc_balancing == CARP_BAL_IP &&
-           ETHER_IS_MULTICAST(eh->ether_dhost))
-               *(eh->ether_dhost) &= ~0x01;
 
        ml_enqueue(&ml, m);
        if_input(&sc->sc_if, &ml);
Index: sys/netinet/ip_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.304
diff -u -p -r1.304 ip_input.c
--- sys/netinet/ip_input.c      22 May 2017 22:23:11 -0000      1.304
+++ sys/netinet/ip_input.c      28 May 2017 08:14:32 -0000
@@ -319,9 +319,18 @@ ipv4_input(struct mbuf *m)
        }
 
 #if NCARP > 0
-       if (ifp->if_type == IFT_CARP && ip->ip_p != IPPROTO_ICMP &&
-           carp_lsdrop(m, AF_INET, &ip->ip_src.s_addr, &ip->ip_dst.s_addr))
-               goto bad;
+       if (ifp->if_type == IFT_CARP) {
+               struct m_tag *mtag;
+               if (m->m_flags & M_MCAST &&
+                   (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) {
+                       m_tag_delete(m, mtag);
+                       m->m_flags &= ~M_MCAST;
+               }
+
+               if (ip->ip_p != IPPROTO_ICMP && carp_lsdrop(m, AF_INET,
+                   &ip->ip_src.s_addr, &ip->ip_dst.s_addr))
+                       goto bad;
+       }
 #endif
 
 #if NPF > 0
Index: sys/netinet6/ip6_input.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.189
diff -u -p -r1.189 ip6_input.c
--- sys/netinet6/ip6_input.c    23 May 2017 08:13:10 -0000      1.189
+++ sys/netinet6/ip6_input.c    28 May 2017 08:14:32 -0000
@@ -207,10 +207,18 @@ ip6_input(struct mbuf *m)
        }
 
 #if NCARP > 0
-       if (ifp->if_type == IFT_CARP && ip6->ip6_nxt != IPPROTO_ICMPV6 &&
-           carp_lsdrop(m, AF_INET6, ip6->ip6_src.s6_addr32,
-           ip6->ip6_dst.s6_addr32))
-               goto bad;
+       if (ifp->if_type == IFT_CARP) {
+               struct m_tag *mtag;
+               if (m->m_flags & M_MCAST &&
+                   (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) {
+                       m_tag_delete(m, mtag);
+                       m->m_flags &= ~M_MCAST;
+               }
+
+               if (ip6->ip6_nxt != IPPROTO_ICMPV6 && carp_lsdrop(m, AF_INET6,
+                   ip6->ip6_src.s6_addr32, ip6->ip6_dst.s6_addr32))
+                       goto bad;
+       }
 #endif
        ip6stat_inc(ip6s_nxthist + ip6->ip6_nxt);
 
Index: sys/sys/mbuf.h
===================================================================
RCS file: /cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.228
diff -u -p -r1.228 mbuf.h
--- sys/sys/mbuf.h      16 May 2017 15:57:03 -0000      1.228
+++ sys/sys/mbuf.h      28 May 2017 08:14:32 -0000
@@ -485,11 +485,12 @@ struct m_tag *m_tag_next(struct mbuf *, 
 #define PACKET_TAG_PF_REASSEMBLED      0x0800 /* pf reassembled ipv6 packet */
 #define PACKET_TAG_SRCROUTE            0x1000 /* IPv4 source routing options */
 #define PACKET_TAG_TUNNEL              0x2000  /* Tunnel endpoint address */
+#define PACKET_TAG_CARP_BAL_IP         0x4000  /* carp(4) ip balanced marker */
 
 #define MTAG_BITS \
     ("\20\1IPSEC_IN_DONE\2IPSEC_OUT_DONE\3IPSEC_IN_CRYPTO_DONE" \
     "\4IPSEC_OUT_CRYPTO_NEEDED\5IPSEC_PENDING_TDB\6BRIDGE\7GIF\10GRE\11DLT" \
-    "\12PF_DIVERT\14PF_REASSEMBLED\15SRCROUTE\16TUNNEL")
+    "\12PF_DIVERT\14PF_REASSEMBLED\15SRCROUTE\16TUNNEL\17CARP_BAL_IP")
 
 /*
  * Maximum tag payload length (that is excluding the m_tag structure).

Reply via email to