On 28/05/17(Sun) 10:34, Florian Riehm wrote:
> Hi,
>
> after the fix for carp balancing ip-stealth is in, here is the fix for
> balancing ip.
Great!
>
> Non-stealth balancing traffic needs some special treatment since it contains
> layer 3 unicast inside layer 2 multicast.
>
> Now the idea is to deal at layer 2 (ether_input()) with the multicast frames
> like regular multicast. After layer 2 processing is done, ip(6)_input() resets
> the M_MCAST flag and we are unicast.
>
> To achieve this I mark incoming packets matching to balancing mac addresses
> with
> a mbuf tag. In ip(6)_input() I remove M_MCAST from mbuf's m_flags if the tag
> exists. Thanks to mpi@ who brought me to this idea.
Could you remove this flag in carp_lsdrop() instead? That would keep
carp logic's in netinet/ip_carp.c which makes it more resilient to
future changes.
> The current code tried to solve the problem by removing the MCAST-Bit from the
> MAC address to avoid that the kernel treat it as multicast. This is very
> fragile and it was broken more than once. At the moment it is broken
> due to the mac address checks at the begin of ether_input().
>
> From my point of view carp balancing is fully working again after
> this patch is in. No further issues are known at the moment. Feel
> free to test and report.
>
> Regards,
>
> Florian
>
> Index: share/man/man9/mbuf_tags.9
> ===================================================================
> RCS file: /cvs/src/share/man/man9/mbuf_tags.9,v
> retrieving revision 1.37
> diff -u -p -r1.37 mbuf_tags.9
> --- share/man/man9/mbuf_tags.9 24 Nov 2015 19:58:48 -0000 1.37
> +++ share/man/man9/mbuf_tags.9 28 May 2017 08:14:31 -0000
> @@ -170,6 +170,13 @@ Used by the IPv4 stack to keep track of
> IP packet, in case a protocol wants to respond over the same route.
> The tag contains a
> .Va struct ip_srcrt .
> +.It PACKET_TAG_CARP_BAL_IP
> +Used by
> +.Xr carp 4
> +to mark packets received in mode
> +.Va balancing ip .
> +This packets need some special treatment since they contain layer 3 unicast
> +inside layer 2 multicast. The tag contains no data.
> .El
> .Pp
> .Fn m_tag_find
> Index: sys/netinet/ip_carp.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_carp.c,v
> retrieving revision 1.310
> diff -u -p -r1.310 ip_carp.c
> --- sys/netinet/ip_carp.c 27 May 2017 21:55:52 -0000 1.310
> +++ sys/netinet/ip_carp.c 28 May 2017 08:14:32 -0000
> @@ -1422,8 +1422,23 @@ carp_input(struct ifnet *ifp0, struct mb
> (IFF_UP|IFF_RUNNING))
> continue;
>
> - if (carp_vhe_match(sc, eh->ether_dhost))
> + if (carp_vhe_match(sc, eh->ether_dhost)) {
> + /*
> + * These packets look like layer 2 multicast but they
> + * are unicast at layer 3. With help of the tag the
> + * mbuf's M_MCAST flag can be removed in ip(6)_input,
> + * after we have passed layer 2.
> + */
> + if (sc->sc_balancing == CARP_BAL_IP) {
> + struct m_tag *mtag;
> + mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0,
> + M_NOWAIT);
> + if (mtag == NULL)
> + return (0);
> + m_tag_prepend(m, mtag);
> + }
> break;
> + }
> }
>
> if (sc == NULL) {
> @@ -1455,13 +1470,6 @@ carp_input(struct ifnet *ifp0, struct mb
>
> return (0);
> }
> -
> - /*
> - * Clear mcast if received on a carp IP balanced address.
> - */
> - if (sc->sc_balancing == CARP_BAL_IP &&
> - ETHER_IS_MULTICAST(eh->ether_dhost))
> - *(eh->ether_dhost) &= ~0x01;
>
> ml_enqueue(&ml, m);
> if_input(&sc->sc_if, &ml);
> Index: sys/netinet/ip_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_input.c,v
> retrieving revision 1.304
> diff -u -p -r1.304 ip_input.c
> --- sys/netinet/ip_input.c 22 May 2017 22:23:11 -0000 1.304
> +++ sys/netinet/ip_input.c 28 May 2017 08:14:32 -0000
> @@ -319,9 +319,18 @@ ipv4_input(struct mbuf *m)
> }
>
> #if NCARP > 0
> - if (ifp->if_type == IFT_CARP && ip->ip_p != IPPROTO_ICMP &&
> - carp_lsdrop(m, AF_INET, &ip->ip_src.s_addr, &ip->ip_dst.s_addr))
> - goto bad;
> + if (ifp->if_type == IFT_CARP) {
> + struct m_tag *mtag;
> + if (m->m_flags & M_MCAST &&
> + (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) {
> + m_tag_delete(m, mtag);
> + m->m_flags &= ~M_MCAST;
> + }
> +
> + if (ip->ip_p != IPPROTO_ICMP && carp_lsdrop(m, AF_INET,
> + &ip->ip_src.s_addr, &ip->ip_dst.s_addr))
> + goto bad;
> + }
> #endif
>
> #if NPF > 0
> Index: sys/netinet6/ip6_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
> retrieving revision 1.189
> diff -u -p -r1.189 ip6_input.c
> --- sys/netinet6/ip6_input.c 23 May 2017 08:13:10 -0000 1.189
> +++ sys/netinet6/ip6_input.c 28 May 2017 08:14:32 -0000
> @@ -207,10 +207,18 @@ ip6_input(struct mbuf *m)
> }
>
> #if NCARP > 0
> - if (ifp->if_type == IFT_CARP && ip6->ip6_nxt != IPPROTO_ICMPV6 &&
> - carp_lsdrop(m, AF_INET6, ip6->ip6_src.s6_addr32,
> - ip6->ip6_dst.s6_addr32))
> - goto bad;
> + if (ifp->if_type == IFT_CARP) {
> + struct m_tag *mtag;
> + if (m->m_flags & M_MCAST &&
> + (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) {
> + m_tag_delete(m, mtag);
> + m->m_flags &= ~M_MCAST;
> + }
> +
> + if (ip6->ip6_nxt != IPPROTO_ICMPV6 && carp_lsdrop(m, AF_INET6,
> + ip6->ip6_src.s6_addr32, ip6->ip6_dst.s6_addr32))
> + goto bad;
> + }
> #endif
> ip6stat_inc(ip6s_nxthist + ip6->ip6_nxt);
>
> Index: sys/sys/mbuf.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/mbuf.h,v
> retrieving revision 1.228
> diff -u -p -r1.228 mbuf.h
> --- sys/sys/mbuf.h 16 May 2017 15:57:03 -0000 1.228
> +++ sys/sys/mbuf.h 28 May 2017 08:14:32 -0000
> @@ -485,11 +485,12 @@ struct m_tag *m_tag_next(struct mbuf *,
> #define PACKET_TAG_PF_REASSEMBLED 0x0800 /* pf reassembled ipv6 packet */
> #define PACKET_TAG_SRCROUTE 0x1000 /* IPv4 source routing options */
> #define PACKET_TAG_TUNNEL 0x2000 /* Tunnel endpoint address */
> +#define PACKET_TAG_CARP_BAL_IP 0x4000 /* carp(4) ip balanced
> marker */
>
> #define MTAG_BITS \
> ("\20\1IPSEC_IN_DONE\2IPSEC_OUT_DONE\3IPSEC_IN_CRYPTO_DONE" \
> "\4IPSEC_OUT_CRYPTO_NEEDED\5IPSEC_PENDING_TDB\6BRIDGE\7GIF\10GRE\11DLT" \
> - "\12PF_DIVERT\14PF_REASSEMBLED\15SRCROUTE\16TUNNEL")
> + "\12PF_DIVERT\14PF_REASSEMBLED\15SRCROUTE\16TUNNEL\17CARP_BAL_IP")
>
> /*
> * Maximum tag payload length (that is excluding the m_tag structure).
>