On 05/28/17 14:04, Martin Pieuchot wrote:
> On 28/05/17(Sun) 13:58, Florian Riehm wrote:
>> On 05/28/17 11:33, Martin Pieuchot wrote:
>>> On 28/05/17(Sun) 10:34, Florian Riehm wrote:
>>>> Hi,
>>>>
>>>> after the fix for carp balancing ip-stealth is in, here is the fix for
>>>> balancing ip.
>>>
>>> Great!
>>>
>>>>
>>>> Non-stealth balancing traffic needs some special treatment since it
>>>> contains
>>>> layer 3 unicast inside layer 2 multicast.
>>>>
>>>> Now the idea is to deal at layer 2 (ether_input()) with the multicast
>>>> frames
>>>> like regular multicast. After layer 2 processing is done, ip(6)_input()
>>>> resets
>>>> the M_MCAST flag and we are unicast.
>>>>
>>>> To achieve this I mark incoming packets matching to balancing mac
>>>> addresses with
>>>> a mbuf tag. In ip(6)_input() I remove M_MCAST from mbuf's m_flags if the
>>>> tag
>>>> exists. Thanks to mpi@ who brought me to this idea.
>>>
>>> Could you remove this flag in carp_lsdrop() instead? That would keep
>>> carp logic's in netinet/ip_carp.c which makes it more resilient to
>>> future changes.
>>
>> Actually I did this in my first attempt and basically it worked.
>> Then I decided to move it out of carp_lsdrop() because carp_lsdrop()
>> is called twice in ip(6)_input(). ICMP has to be handled later,
>> to make sure we don't drop the wrong ICMP packets.
>>
>> My intention was to remove the flag as early as possible to avoid any
>> potential problems. Before carp_lsdrop() is called for ICMP, ip_input()
>> is already dealing with the M_MCAST flag. As I saw that, I decided to move
>> my fix out of carp_lsdrop(). Even it would work at the moment, it would
>> be more fragile. In example a change in pf_test() in the future could
>> break it.
>>
>> So I think a direkt fix inside ip(6)_input() is a better solution.
>> What do you think?
>
> If you need a special case for ICMP, then do this check inside
> carp_lsdrop().
>
Ok, new diff below.
Index: share/man/man9/mbuf_tags.9
===================================================================
RCS file: /cvs/src/share/man/man9/mbuf_tags.9,v
retrieving revision 1.37
diff -u -p -r1.37 mbuf_tags.9
--- share/man/man9/mbuf_tags.9 24 Nov 2015 19:58:48 -0000 1.37
+++ share/man/man9/mbuf_tags.9 28 May 2017 13:50:23 -0000
@@ -170,6 +170,13 @@ Used by the IPv4 stack to keep track of
IP packet, in case a protocol wants to respond over the same route.
The tag contains a
.Va struct ip_srcrt .
+.It PACKET_TAG_CARP_BAL_IP
+Used by
+.Xr carp 4
+to mark packets received in mode
+.Va balancing ip .
+This packets need some special treatment since they contain layer 3 unicast
+inside layer 2 multicast. The tag contains no data.
.El
.Pp
.Fn m_tag_find
Index: sys/netinet/ip_carp.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.310
diff -u -p -r1.310 ip_carp.c
--- sys/netinet/ip_carp.c 27 May 2017 21:55:52 -0000 1.310
+++ sys/netinet/ip_carp.c 28 May 2017 13:50:25 -0000
@@ -1422,8 +1422,23 @@ carp_input(struct ifnet *ifp0, struct mb
(IFF_UP|IFF_RUNNING))
continue;
- if (carp_vhe_match(sc, eh->ether_dhost))
+ if (carp_vhe_match(sc, eh->ether_dhost)) {
+ /*
+ * These packets look like layer 2 multicast but they
+ * are unicast at layer 3. With help of the tag the
+ * mbuf's M_MCAST flag can be removed by carp_lsdrop()
+ * after we have passed layer 2.
+ */
+ if (sc->sc_balancing == CARP_BAL_IP) {
+ struct m_tag *mtag;
+ mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0,
+ M_NOWAIT);
+ if (mtag == NULL)
+ return (0);
+ m_tag_prepend(m, mtag);
+ }
break;
+ }
}
if (sc == NULL) {
@@ -1456,13 +1471,6 @@ carp_input(struct ifnet *ifp0, struct mb
return (0);
}
- /*
- * Clear mcast if received on a carp IP balanced address.
- */
- if (sc->sc_balancing == CARP_BAL_IP &&
- ETHER_IS_MULTICAST(eh->ether_dhost))
- *(eh->ether_dhost) &= ~0x01;
-
ml_enqueue(&ml, m);
if_input(&sc->sc_if, &ml);
SRPL_LEAVE(&sr);
@@ -1471,12 +1479,14 @@ carp_input(struct ifnet *ifp0, struct mb
}
int
-carp_lsdrop(struct mbuf *m, sa_family_t af, u_int32_t *src, u_int32_t *dst)
+carp_lsdrop(struct mbuf *m, sa_family_t af, u_int32_t *src, u_int32_t *dst,
+ int drop)
{
struct ifnet *ifp;
struct carp_softc *sc;
int match = 1;
u_int32_t fold;
+ struct m_tag *mtag;
ifp = if_get(m->m_pkthdr.ph_ifidx);
KASSERT(ifp != NULL);
@@ -1484,6 +1494,25 @@ carp_lsdrop(struct mbuf *m, sa_family_t
sc = ifp->if_softc;
if (sc->sc_balancing == CARP_BAL_NONE)
goto done;
+
+ /*
+ * Remove M_MCAST flag from mbuf of balancing ip traffic, since the fact
+ * that it is layer 2 multicast does not implicate that it is also layer
+ * 3 multicast.
+ */
+ if (m->m_flags & M_MCAST &&
+ (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) {
+ m_tag_delete(m, mtag);
+ m->m_flags &= ~M_MCAST;
+ }
+
+ /*
+ * Return without making a drop decision. This allows to clear the
+ * M_MCAST flag and do nothing else.
+ */
+ if (!drop)
+ goto done;
+
/*
* Never drop carp advertisements.
* XXX Bad idea to pass all broadcast / multicast traffic?
Index: sys/netinet/ip_carp.h
===================================================================
RCS file: /cvs/src/sys/netinet/ip_carp.h,v
retrieving revision 1.42
diff -u -p -r1.42 ip_carp.h
--- sys/netinet/ip_carp.h 14 Apr 2017 20:46:31 -0000 1.42
+++ sys/netinet/ip_carp.h 28 May 2017 13:50:25 -0000
@@ -204,6 +204,7 @@ struct ifnet *carp_ourether(void *, u_in
int carp_output(struct ifnet *, struct mbuf *, struct sockaddr *,
struct rtentry *);
int carp_sysctl(int *, u_int, void *, size_t *, void *, size_t);
-int carp_lsdrop(struct mbuf *, sa_family_t, u_int32_t *, u_int32_t
*);
+int carp_lsdrop(struct mbuf *, sa_family_t, u_int32_t *,
+ u_int32_t *, int);
#endif /* _KERNEL */
#endif /* _NETINET_IP_CARP_H_ */
Index: sys/netinet/ip_icmp.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_icmp.c,v
retrieving revision 1.168
diff -u -p -r1.168 ip_icmp.c
--- sys/netinet/ip_icmp.c 22 May 2017 14:26:08 -0000 1.168
+++ sys/netinet/ip_icmp.c 28 May 2017 13:50:25 -0000
@@ -498,7 +498,7 @@ icmp_input_if(struct ifnet *ifp, struct
#if NCARP > 0
if (ifp->if_type == IFT_CARP &&
carp_lsdrop(m, AF_INET, &sin.sin_addr.s_addr,
- &ip->ip_dst.s_addr))
+ &ip->ip_dst.s_addr, 1))
goto freeit;
#endif
/*
@@ -582,7 +582,7 @@ reflect:
#if NCARP > 0
if (ifp->if_type == IFT_CARP &&
carp_lsdrop(m, AF_INET, &ip->ip_src.s_addr,
- &ip->ip_dst.s_addr))
+ &ip->ip_dst.s_addr, 1))
goto freeit;
#endif
/* Free packet atttributes */
@@ -650,7 +650,7 @@ reflect:
#if NCARP > 0
if (ifp->if_type == IFT_CARP &&
carp_lsdrop(m, AF_INET, &sdst.sin_addr.s_addr,
- &ip->ip_dst.s_addr))
+ &ip->ip_dst.s_addr, 1))
goto freeit;
#endif
rtredirect(sintosa(&sdst), sintosa(&sgw),
Index: sys/netinet/ip_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.304
diff -u -p -r1.304 ip_input.c
--- sys/netinet/ip_input.c 22 May 2017 22:23:11 -0000 1.304
+++ sys/netinet/ip_input.c 28 May 2017 13:50:25 -0000
@@ -319,8 +319,9 @@ ipv4_input(struct mbuf *m)
}
#if NCARP > 0
- if (ifp->if_type == IFT_CARP && ip->ip_p != IPPROTO_ICMP &&
- carp_lsdrop(m, AF_INET, &ip->ip_src.s_addr, &ip->ip_dst.s_addr))
+ if (ifp->if_type == IFT_CARP &&
+ carp_lsdrop(m, AF_INET, &ip->ip_src.s_addr, &ip->ip_dst.s_addr,
+ (ip->ip_p == IPPROTO_ICMP ? 0 : 1)))
goto bad;
#endif
@@ -427,7 +428,7 @@ ipv4_input(struct mbuf *m)
#if NCARP > 0
if (ifp->if_type == IFT_CARP && ip->ip_p == IPPROTO_ICMP &&
- carp_lsdrop(m, AF_INET, &ip->ip_src.s_addr, &ip->ip_dst.s_addr))
+ carp_lsdrop(m, AF_INET, &ip->ip_src.s_addr, &ip->ip_dst.s_addr, 1))
goto bad;
#endif
/*
Index: sys/netinet6/icmp6.c
===================================================================
RCS file: /cvs/src/sys/netinet6/icmp6.c,v
retrieving revision 1.210
diff -u -p -r1.210 icmp6.c
--- sys/netinet6/icmp6.c 8 May 2017 16:14:47 -0000 1.210
+++ sys/netinet6/icmp6.c 28 May 2017 13:50:25 -0000
@@ -446,7 +446,7 @@ icmp6_input(struct mbuf **mp, int *offp,
if (ifp->if_type == IFT_CARP &&
icmp6->icmp6_type == ICMP6_ECHO_REQUEST &&
carp_lsdrop(m, AF_INET6, ip6->ip6_src.s6_addr32,
- ip6->ip6_dst.s6_addr32)) {
+ ip6->ip6_dst.s6_addr32, 1)) {
if_put(ifp);
goto freeit;
}
Index: sys/netinet6/ip6_input.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.189
diff -u -p -r1.189 ip6_input.c
--- sys/netinet6/ip6_input.c 23 May 2017 08:13:10 -0000 1.189
+++ sys/netinet6/ip6_input.c 28 May 2017 13:50:25 -0000
@@ -207,9 +207,9 @@ ip6_input(struct mbuf *m)
}
#if NCARP > 0
- if (ifp->if_type == IFT_CARP && ip6->ip6_nxt != IPPROTO_ICMPV6 &&
+ if (ifp->if_type == IFT_CARP &&
carp_lsdrop(m, AF_INET6, ip6->ip6_src.s6_addr32,
- ip6->ip6_dst.s6_addr32))
+ ip6->ip6_dst.s6_addr32, (ip6->ip6_nxt == IPPROTO_ICMPV6 ? 0 : 1)))
goto bad;
#endif
ip6stat_inc(ip6s_nxthist + ip6->ip6_nxt);
@@ -448,7 +448,7 @@ ip6_input(struct mbuf *m)
#if NCARP > 0
if (ifp->if_type == IFT_CARP && ip6->ip6_nxt == IPPROTO_ICMPV6 &&
carp_lsdrop(m, AF_INET6, ip6->ip6_src.s6_addr32,
- ip6->ip6_dst.s6_addr32))
+ ip6->ip6_dst.s6_addr32, 1))
goto bad;
#endif
/*
Index: sys/sys/mbuf.h
===================================================================
RCS file: /cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.228
diff -u -p -r1.228 mbuf.h
--- sys/sys/mbuf.h 16 May 2017 15:57:03 -0000 1.228
+++ sys/sys/mbuf.h 28 May 2017 13:50:25 -0000
@@ -485,11 +485,12 @@ struct m_tag *m_tag_next(struct mbuf *,
#define PACKET_TAG_PF_REASSEMBLED 0x0800 /* pf reassembled ipv6 packet */
#define PACKET_TAG_SRCROUTE 0x1000 /* IPv4 source routing options */
#define PACKET_TAG_TUNNEL 0x2000 /* Tunnel endpoint address */
+#define PACKET_TAG_CARP_BAL_IP 0x4000 /* carp(4) ip balanced marker */
#define MTAG_BITS \
("\20\1IPSEC_IN_DONE\2IPSEC_OUT_DONE\3IPSEC_IN_CRYPTO_DONE" \
"\4IPSEC_OUT_CRYPTO_NEEDED\5IPSEC_PENDING_TDB\6BRIDGE\7GIF\10GRE\11DLT" \
- "\12PF_DIVERT\14PF_REASSEMBLED\15SRCROUTE\16TUNNEL")
+ "\12PF_DIVERT\14PF_REASSEMBLED\15SRCROUTE\16TUNNEL\17CARP_BAL_IP")
/*
* Maximum tag payload length (that is excluding the m_tag structure).