On 05/28/17 14:04, Martin Pieuchot wrote:
> On 28/05/17(Sun) 13:58, Florian Riehm wrote:
>> On 05/28/17 11:33, Martin Pieuchot wrote:
>>> On 28/05/17(Sun) 10:34, Florian Riehm wrote:
>>>> Hi,
>>>>
>>>> after the fix for carp balancing ip-stealth is in, here is the fix for
>>>> balancing ip.
>>>
>>> Great!
>>>
>>>>
>>>> Non-stealth balancing traffic needs some special treatment since it 
>>>> contains
>>>> layer 3 unicast inside layer 2 multicast.
>>>>
>>>> Now the idea is to deal at layer 2 (ether_input()) with the multicast 
>>>> frames
>>>> like regular multicast. After layer 2 processing is done, ip(6)_input() 
>>>> resets
>>>> the M_MCAST flag and we are unicast.
>>>>
>>>> To achieve this I mark incoming packets matching to balancing mac 
>>>> addresses with
>>>> a mbuf tag. In ip(6)_input() I remove M_MCAST from mbuf's m_flags if the 
>>>> tag
>>>> exists. Thanks to mpi@ who brought me to this idea.
>>>
>>> Could you remove this flag in carp_lsdrop() instead?  That would keep
>>> carp logic's in netinet/ip_carp.c which makes it more resilient to
>>> future changes.
>>
>> Actually I did this in my first attempt and basically it worked.
>> Then I decided to move it out of carp_lsdrop() because carp_lsdrop()
>> is called twice in ip(6)_input(). ICMP has to be handled later,
>> to make sure we don't drop the wrong ICMP packets.
>>
>> My intention was to remove the flag as early as possible to avoid any
>> potential problems. Before carp_lsdrop() is called for ICMP, ip_input()
>> is already dealing with the M_MCAST flag. As I saw that, I decided to move
>> my fix out of carp_lsdrop(). Even it would work at the moment, it would
>> be more fragile. In example a change in pf_test() in the future could
>> break it.
>>
>> So I think a direkt fix inside ip(6)_input() is a better solution.
>> What do you think?
> 
> If you need a special case for ICMP, then do this check inside
> carp_lsdrop().
> 

Ok, new diff below.

Index: share/man/man9/mbuf_tags.9
===================================================================
RCS file: /cvs/src/share/man/man9/mbuf_tags.9,v
retrieving revision 1.37
diff -u -p -r1.37 mbuf_tags.9
--- share/man/man9/mbuf_tags.9  24 Nov 2015 19:58:48 -0000      1.37
+++ share/man/man9/mbuf_tags.9  28 May 2017 13:50:23 -0000
@@ -170,6 +170,13 @@ Used by the IPv4 stack to keep track of 
 IP packet, in case a protocol wants to respond over the same route.
 The tag contains a
 .Va struct ip_srcrt .
+.It PACKET_TAG_CARP_BAL_IP
+Used by
+.Xr carp 4
+to mark packets received in mode 
+.Va balancing ip .
+This packets need some special treatment since they contain layer 3 unicast
+inside layer 2 multicast. The tag contains no data.
 .El
 .Pp
 .Fn m_tag_find
Index: sys/netinet/ip_carp.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.310
diff -u -p -r1.310 ip_carp.c
--- sys/netinet/ip_carp.c       27 May 2017 21:55:52 -0000      1.310
+++ sys/netinet/ip_carp.c       28 May 2017 13:50:25 -0000
@@ -1422,8 +1422,23 @@ carp_input(struct ifnet *ifp0, struct mb
                    (IFF_UP|IFF_RUNNING))
                        continue;
 
-               if (carp_vhe_match(sc, eh->ether_dhost))
+               if (carp_vhe_match(sc, eh->ether_dhost)) {
+                       /*
+                        * These packets look like layer 2 multicast but they
+                        * are unicast at layer 3. With help of the tag the
+                        * mbuf's M_MCAST flag can be removed by carp_lsdrop()
+                        * after we have passed layer 2.
+                        */
+                       if (sc->sc_balancing == CARP_BAL_IP) {
+                               struct m_tag *mtag;
+                               mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0,
+                                   M_NOWAIT);
+                               if (mtag == NULL)
+                                       return (0);
+                               m_tag_prepend(m, mtag);
+                       }
                        break;
+               }
        }
 
        if (sc == NULL) {
@@ -1456,13 +1471,6 @@ carp_input(struct ifnet *ifp0, struct mb
                return (0);
        }
 
-       /*
-        * Clear mcast if received on a carp IP balanced address.
-        */
-       if (sc->sc_balancing == CARP_BAL_IP &&
-           ETHER_IS_MULTICAST(eh->ether_dhost))
-               *(eh->ether_dhost) &= ~0x01;
-
        ml_enqueue(&ml, m);
        if_input(&sc->sc_if, &ml);
        SRPL_LEAVE(&sr);
@@ -1471,12 +1479,14 @@ carp_input(struct ifnet *ifp0, struct mb
 }
 
 int
-carp_lsdrop(struct mbuf *m, sa_family_t af, u_int32_t *src, u_int32_t *dst)
+carp_lsdrop(struct mbuf *m, sa_family_t af, u_int32_t *src, u_int32_t *dst,
+   int drop)
 {
        struct ifnet *ifp;
        struct carp_softc *sc;
        int match = 1;
        u_int32_t fold;
+       struct m_tag *mtag;
 
        ifp = if_get(m->m_pkthdr.ph_ifidx);
        KASSERT(ifp != NULL);
@@ -1484,6 +1494,25 @@ carp_lsdrop(struct mbuf *m, sa_family_t 
        sc = ifp->if_softc;
        if (sc->sc_balancing == CARP_BAL_NONE)
                goto done;
+
+       /*
+        * Remove M_MCAST flag from mbuf of balancing ip traffic, since the fact
+        * that it is layer 2 multicast does not implicate that it is also layer
+        * 3 multicast.
+        */
+       if (m->m_flags & M_MCAST &&
+           (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) {
+               m_tag_delete(m, mtag);
+               m->m_flags &= ~M_MCAST;
+       }
+       
+       /*
+        * Return without making a drop decision. This allows to clear the
+        * M_MCAST flag and do nothing else.
+        */
+       if (!drop)
+               goto done;
+
        /*
         * Never drop carp advertisements.
         * XXX Bad idea to pass all broadcast / multicast traffic?
Index: sys/netinet/ip_carp.h
===================================================================
RCS file: /cvs/src/sys/netinet/ip_carp.h,v
retrieving revision 1.42
diff -u -p -r1.42 ip_carp.h
--- sys/netinet/ip_carp.h       14 Apr 2017 20:46:31 -0000      1.42
+++ sys/netinet/ip_carp.h       28 May 2017 13:50:25 -0000
@@ -204,6 +204,7 @@ struct ifnet        *carp_ourether(void *, u_in
 int             carp_output(struct ifnet *, struct mbuf *, struct sockaddr *,
                     struct rtentry *);
 int             carp_sysctl(int *, u_int,  void *, size_t *, void *, size_t);
-int             carp_lsdrop(struct mbuf *, sa_family_t, u_int32_t *, u_int32_t 
*);
+int             carp_lsdrop(struct mbuf *, sa_family_t, u_int32_t *,
+                    u_int32_t *, int);
 #endif /* _KERNEL */
 #endif /* _NETINET_IP_CARP_H_ */
Index: sys/netinet/ip_icmp.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_icmp.c,v
retrieving revision 1.168
diff -u -p -r1.168 ip_icmp.c
--- sys/netinet/ip_icmp.c       22 May 2017 14:26:08 -0000      1.168
+++ sys/netinet/ip_icmp.c       28 May 2017 13:50:25 -0000
@@ -498,7 +498,7 @@ icmp_input_if(struct ifnet *ifp, struct 
 #if NCARP > 0
                if (ifp->if_type == IFT_CARP &&
                    carp_lsdrop(m, AF_INET, &sin.sin_addr.s_addr,
-                   &ip->ip_dst.s_addr))
+                   &ip->ip_dst.s_addr, 1))
                        goto freeit;
 #endif
                /*
@@ -582,7 +582,7 @@ reflect:
 #if NCARP > 0
                if (ifp->if_type == IFT_CARP &&
                    carp_lsdrop(m, AF_INET, &ip->ip_src.s_addr,
-                   &ip->ip_dst.s_addr))
+                   &ip->ip_dst.s_addr, 1))
                        goto freeit;
 #endif
                /* Free packet atttributes */
@@ -650,7 +650,7 @@ reflect:
 #if NCARP > 0
                if (ifp->if_type == IFT_CARP &&
                    carp_lsdrop(m, AF_INET, &sdst.sin_addr.s_addr,
-                   &ip->ip_dst.s_addr))
+                   &ip->ip_dst.s_addr, 1))
                        goto freeit;
 #endif
                rtredirect(sintosa(&sdst), sintosa(&sgw),
Index: sys/netinet/ip_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.304
diff -u -p -r1.304 ip_input.c
--- sys/netinet/ip_input.c      22 May 2017 22:23:11 -0000      1.304
+++ sys/netinet/ip_input.c      28 May 2017 13:50:25 -0000
@@ -319,8 +319,9 @@ ipv4_input(struct mbuf *m)
        }
 
 #if NCARP > 0
-       if (ifp->if_type == IFT_CARP && ip->ip_p != IPPROTO_ICMP &&
-           carp_lsdrop(m, AF_INET, &ip->ip_src.s_addr, &ip->ip_dst.s_addr))
+       if (ifp->if_type == IFT_CARP &&
+           carp_lsdrop(m, AF_INET, &ip->ip_src.s_addr, &ip->ip_dst.s_addr,
+           (ip->ip_p == IPPROTO_ICMP ? 0 : 1)))
                goto bad;
 #endif
 
@@ -427,7 +428,7 @@ ipv4_input(struct mbuf *m)
 
 #if NCARP > 0
        if (ifp->if_type == IFT_CARP && ip->ip_p == IPPROTO_ICMP &&
-           carp_lsdrop(m, AF_INET, &ip->ip_src.s_addr, &ip->ip_dst.s_addr))
+           carp_lsdrop(m, AF_INET, &ip->ip_src.s_addr, &ip->ip_dst.s_addr, 1))
                goto bad;
 #endif
        /*
Index: sys/netinet6/icmp6.c
===================================================================
RCS file: /cvs/src/sys/netinet6/icmp6.c,v
retrieving revision 1.210
diff -u -p -r1.210 icmp6.c
--- sys/netinet6/icmp6.c        8 May 2017 16:14:47 -0000       1.210
+++ sys/netinet6/icmp6.c        28 May 2017 13:50:25 -0000
@@ -446,7 +446,7 @@ icmp6_input(struct mbuf **mp, int *offp,
        if (ifp->if_type == IFT_CARP &&
            icmp6->icmp6_type == ICMP6_ECHO_REQUEST &&
            carp_lsdrop(m, AF_INET6, ip6->ip6_src.s6_addr32,
-           ip6->ip6_dst.s6_addr32)) {
+           ip6->ip6_dst.s6_addr32, 1)) {
                if_put(ifp);
                goto freeit;
        }
Index: sys/netinet6/ip6_input.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.189
diff -u -p -r1.189 ip6_input.c
--- sys/netinet6/ip6_input.c    23 May 2017 08:13:10 -0000      1.189
+++ sys/netinet6/ip6_input.c    28 May 2017 13:50:25 -0000
@@ -207,9 +207,9 @@ ip6_input(struct mbuf *m)
        }
 
 #if NCARP > 0
-       if (ifp->if_type == IFT_CARP && ip6->ip6_nxt != IPPROTO_ICMPV6 &&
+       if (ifp->if_type == IFT_CARP &&
            carp_lsdrop(m, AF_INET6, ip6->ip6_src.s6_addr32,
-           ip6->ip6_dst.s6_addr32))
+           ip6->ip6_dst.s6_addr32, (ip6->ip6_nxt == IPPROTO_ICMPV6 ? 0 : 1)))
                goto bad;
 #endif
        ip6stat_inc(ip6s_nxthist + ip6->ip6_nxt);
@@ -448,7 +448,7 @@ ip6_input(struct mbuf *m)
 #if NCARP > 0
        if (ifp->if_type == IFT_CARP && ip6->ip6_nxt == IPPROTO_ICMPV6 &&
            carp_lsdrop(m, AF_INET6, ip6->ip6_src.s6_addr32,
-           ip6->ip6_dst.s6_addr32))
+           ip6->ip6_dst.s6_addr32, 1))
                goto bad;
 #endif
        /*
Index: sys/sys/mbuf.h
===================================================================
RCS file: /cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.228
diff -u -p -r1.228 mbuf.h
--- sys/sys/mbuf.h      16 May 2017 15:57:03 -0000      1.228
+++ sys/sys/mbuf.h      28 May 2017 13:50:25 -0000
@@ -485,11 +485,12 @@ struct m_tag *m_tag_next(struct mbuf *, 
 #define PACKET_TAG_PF_REASSEMBLED      0x0800 /* pf reassembled ipv6 packet */
 #define PACKET_TAG_SRCROUTE            0x1000 /* IPv4 source routing options */
 #define PACKET_TAG_TUNNEL              0x2000  /* Tunnel endpoint address */
+#define PACKET_TAG_CARP_BAL_IP         0x4000  /* carp(4) ip balanced marker */
 
 #define MTAG_BITS \
     ("\20\1IPSEC_IN_DONE\2IPSEC_OUT_DONE\3IPSEC_IN_CRYPTO_DONE" \
     "\4IPSEC_OUT_CRYPTO_NEEDED\5IPSEC_PENDING_TDB\6BRIDGE\7GIF\10GRE\11DLT" \
-    "\12PF_DIVERT\14PF_REASSEMBLED\15SRCROUTE\16TUNNEL")
+    "\12PF_DIVERT\14PF_REASSEMBLED\15SRCROUTE\16TUNNEL\17CARP_BAL_IP")
 
 /*
  * Maximum tag payload length (that is excluding the m_tag structure).

Reply via email to