On 28/05/17(Sun) 10:34, Florian Riehm wrote:
> Hi,
> 
> after the fix for carp balancing ip-stealth is in, here is the fix for
> balancing ip.

Great!

> 
> Non-stealth balancing traffic needs some special treatment since it contains
> layer 3 unicast inside layer 2 multicast.
> 
> Now the idea is to deal at layer 2 (ether_input()) with the multicast frames
> like regular multicast. After layer 2 processing is done, ip(6)_input() resets
> the M_MCAST flag and we are unicast.
> 
> To achieve this I mark incoming packets matching to balancing mac addresses 
> with
> a mbuf tag. In ip(6)_input() I remove M_MCAST from mbuf's m_flags if the tag
> exists. Thanks to mpi@ who brought me to this idea.

Could you remove this flag in carp_lsdrop() instead?  That would keep
carp logic's in netinet/ip_carp.c which makes it more resilient to
future changes.

> The current code tried to solve the problem by removing the MCAST-Bit from the
> MAC address to avoid that the kernel treat it as multicast. This is very
> fragile and it was broken more than once. At the moment it is broken
> due to the mac address checks at the begin of ether_input().
> 
> From my point of view carp balancing is fully working again after
> this patch is in. No further issues are known at the moment. Feel
> free to test and report.
> 
> Regards,
> 
> Florian
> 
> Index: share/man/man9/mbuf_tags.9
> ===================================================================
> RCS file: /cvs/src/share/man/man9/mbuf_tags.9,v
> retrieving revision 1.37
> diff -u -p -r1.37 mbuf_tags.9
> --- share/man/man9/mbuf_tags.9        24 Nov 2015 19:58:48 -0000      1.37
> +++ share/man/man9/mbuf_tags.9        28 May 2017 08:14:31 -0000
> @@ -170,6 +170,13 @@ Used by the IPv4 stack to keep track of 
>  IP packet, in case a protocol wants to respond over the same route.
>  The tag contains a
>  .Va struct ip_srcrt .
> +.It PACKET_TAG_CARP_BAL_IP
> +Used by
> +.Xr carp 4
> +to mark packets received in mode 
> +.Va balancing ip .
> +This packets need some special treatment since they contain layer 3 unicast
> +inside layer 2 multicast. The tag contains no data.
>  .El
>  .Pp
>  .Fn m_tag_find
> Index: sys/netinet/ip_carp.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_carp.c,v
> retrieving revision 1.310
> diff -u -p -r1.310 ip_carp.c
> --- sys/netinet/ip_carp.c     27 May 2017 21:55:52 -0000      1.310
> +++ sys/netinet/ip_carp.c     28 May 2017 08:14:32 -0000
> @@ -1422,8 +1422,23 @@ carp_input(struct ifnet *ifp0, struct mb
>                   (IFF_UP|IFF_RUNNING))
>                       continue;
>  
> -             if (carp_vhe_match(sc, eh->ether_dhost))
> +             if (carp_vhe_match(sc, eh->ether_dhost)) {
> +                     /*
> +                      * These packets look like layer 2 multicast but they
> +                      * are unicast at layer 3. With help of the tag the
> +                      * mbuf's M_MCAST flag can be removed in ip(6)_input,
> +                      * after we have passed layer 2.
> +                      */
> +                     if (sc->sc_balancing == CARP_BAL_IP) {
> +                             struct m_tag *mtag;
> +                             mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0,
> +                                 M_NOWAIT);
> +                             if (mtag == NULL)
> +                                     return (0);
> +                             m_tag_prepend(m, mtag);
> +                     }
>                       break;
> +             }
>       }
>  
>       if (sc == NULL) {
> @@ -1455,13 +1470,6 @@ carp_input(struct ifnet *ifp0, struct mb
>  
>               return (0);
>       }
> -
> -     /*
> -      * Clear mcast if received on a carp IP balanced address.
> -      */
> -     if (sc->sc_balancing == CARP_BAL_IP &&
> -         ETHER_IS_MULTICAST(eh->ether_dhost))
> -             *(eh->ether_dhost) &= ~0x01;
>  
>       ml_enqueue(&ml, m);
>       if_input(&sc->sc_if, &ml);
> Index: sys/netinet/ip_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_input.c,v
> retrieving revision 1.304
> diff -u -p -r1.304 ip_input.c
> --- sys/netinet/ip_input.c    22 May 2017 22:23:11 -0000      1.304
> +++ sys/netinet/ip_input.c    28 May 2017 08:14:32 -0000
> @@ -319,9 +319,18 @@ ipv4_input(struct mbuf *m)
>       }
>  
>  #if NCARP > 0
> -     if (ifp->if_type == IFT_CARP && ip->ip_p != IPPROTO_ICMP &&
> -         carp_lsdrop(m, AF_INET, &ip->ip_src.s_addr, &ip->ip_dst.s_addr))
> -             goto bad;
> +     if (ifp->if_type == IFT_CARP) {
> +             struct m_tag *mtag;
> +             if (m->m_flags & M_MCAST &&
> +                 (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) {
> +                     m_tag_delete(m, mtag);
> +                     m->m_flags &= ~M_MCAST;
> +             }
> +
> +             if (ip->ip_p != IPPROTO_ICMP && carp_lsdrop(m, AF_INET,
> +                 &ip->ip_src.s_addr, &ip->ip_dst.s_addr))
> +                     goto bad;
> +     }
>  #endif
>  
>  #if NPF > 0
> Index: sys/netinet6/ip6_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
> retrieving revision 1.189
> diff -u -p -r1.189 ip6_input.c
> --- sys/netinet6/ip6_input.c  23 May 2017 08:13:10 -0000      1.189
> +++ sys/netinet6/ip6_input.c  28 May 2017 08:14:32 -0000
> @@ -207,10 +207,18 @@ ip6_input(struct mbuf *m)
>       }
>  
>  #if NCARP > 0
> -     if (ifp->if_type == IFT_CARP && ip6->ip6_nxt != IPPROTO_ICMPV6 &&
> -         carp_lsdrop(m, AF_INET6, ip6->ip6_src.s6_addr32,
> -         ip6->ip6_dst.s6_addr32))
> -             goto bad;
> +     if (ifp->if_type == IFT_CARP) {
> +             struct m_tag *mtag;
> +             if (m->m_flags & M_MCAST &&
> +                 (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) {
> +                     m_tag_delete(m, mtag);
> +                     m->m_flags &= ~M_MCAST;
> +             }
> +
> +             if (ip6->ip6_nxt != IPPROTO_ICMPV6 && carp_lsdrop(m, AF_INET6,
> +                 ip6->ip6_src.s6_addr32, ip6->ip6_dst.s6_addr32))
> +                     goto bad;
> +     }
>  #endif
>       ip6stat_inc(ip6s_nxthist + ip6->ip6_nxt);
>  
> Index: sys/sys/mbuf.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/mbuf.h,v
> retrieving revision 1.228
> diff -u -p -r1.228 mbuf.h
> --- sys/sys/mbuf.h    16 May 2017 15:57:03 -0000      1.228
> +++ sys/sys/mbuf.h    28 May 2017 08:14:32 -0000
> @@ -485,11 +485,12 @@ struct m_tag *m_tag_next(struct mbuf *, 
>  #define PACKET_TAG_PF_REASSEMBLED    0x0800 /* pf reassembled ipv6 packet */
>  #define PACKET_TAG_SRCROUTE          0x1000 /* IPv4 source routing options */
>  #define PACKET_TAG_TUNNEL            0x2000  /* Tunnel endpoint address */
> +#define PACKET_TAG_CARP_BAL_IP               0x4000  /* carp(4) ip balanced 
> marker */
>  
>  #define MTAG_BITS \
>      ("\20\1IPSEC_IN_DONE\2IPSEC_OUT_DONE\3IPSEC_IN_CRYPTO_DONE" \
>      "\4IPSEC_OUT_CRYPTO_NEEDED\5IPSEC_PENDING_TDB\6BRIDGE\7GIF\10GRE\11DLT" \
> -    "\12PF_DIVERT\14PF_REASSEMBLED\15SRCROUTE\16TUNNEL")
> +    "\12PF_DIVERT\14PF_REASSEMBLED\15SRCROUTE\16TUNNEL\17CARP_BAL_IP")
>  
>  /*
>   * Maximum tag payload length (that is excluding the m_tag structure).
> 

Reply via email to