[patch] bridge_fragment can leak mbufs

Karim Fodil-Lemelin Fri, 16 Sep 2016 12:27:07 -0700

Hello,

It has come to my attention that when bridge_fragment() has to fragmentan IP packet it can cause mbuf leaks if, for example m_pullup() ends upallocating a new buffer at the front of the mbuf chain or if the callsto M_PREPEND would fail due to the mbuf pool being depleted.

One can easily convince himself that something is wrong by observingthat, in bridge_pfil(), bridge_fragment() gets passed an mbuf pointerand if the call to m_pullup() in bridge_fragment ends up allocating anew mbuf at the front of the chain, then whatever happens next isn'tgoing to give bridge_pfil() the updated mbuf pointer. You can see belowthis is why I had to change the arguments to bridge_fragment to take adouble pointer to the mbuf.

The problem actually goes a lot deeper since, after having calledip_fragment(), bridge_fragment() simply goes over the list of packets itgot and carelessly calls into M_PREPEND() to make space for the ethernetheader. Now this isn't a problem if M_PREPEND() can find the space atthe beginning of the mbuf but will wreak havoc if it must take the leastused code path and prepend an mbuf to the chain. In this case we have m0pointing to a newly allocated mbuf but the list (walked throughm_nextpkt) will never get updated to point to m0 (the previous mbuf'sm_nextpkt pointer should now point to the new m0).

And it goes on, the error case isn't' handled properly since if MPREPENDdeleted m0 we not have a list of packets that is potentially pointed toan already freed mbuf. Nowhere can we see that m_nextpkt is updated totake care of that case. This is why in my patch below I detach each m0from the packet list so if a failure occurs in M_PREPEND of if a newmbuf is added at the beginning of the chain I can keep the list ofpackets updated.

Same thing with the goto out; at the end. If we lost a fragment whileadding ethernet header to it we must free the entire chain of packets orwe will leak mbufs. There is no point in sending fragments if we aren'tgoing to send them all.

To convince yourself of this problem, one can simple create, on hisfavorite bridge one 'reass' rule in ipfw. I use the following rule:


00080 253 27878 reass ip from any to any { proto udp or proto gre }

Then send (assuming your MTU is 1500), a big udp packet using iperf. Iuse this command:


 iperf -c 10.10.73.2 -u -b16Mb -t 240 -i 10 -l 5000

After iperf has finished, look at your mbuf count with netstat -m.

I have made a patch to fix this problem (attached). The patch was testedunder load when we actually do run out of mbuf to trigger the errorcases as well as with all sorts of number of fragments (varying theinitial packet size).

I see this seems to affect all modern versions of FreeBSD, please feelfree to test or contest. I would like to see this added to FreeBSDeventually in one shape or another.


Best regards,

Karim.

diff --git a/freebsd/sys/net/if_bridge.c b/freebsd/sys/net/if_bridge.c
index b9c3bca..a550bc1 100644
--- a/freebsd/sys/net/if_bridge.c
+++ b/freebsd/sys/net/if_bridge.c
@@ -344,7 +344,7 @@ static int	bridge_ip_checkbasic(struct mbuf **mp);
 #ifdef INET6
 static int	bridge_ip6_checkbasic(struct mbuf **mp);
 #endif /* INET6 */
-static int	bridge_fragment(struct ifnet *, struct mbuf *,
+static int	bridge_fragment(struct ifnet *, struct mbuf **,
 		    struct ether_header *, int, struct llc *);
 static void	bridge_linkstate(struct ifnet *ifp);
 static void	bridge_linkcheck(struct bridge_softc *sc);
@@ -4025,7 +4025,7 @@ ipfwpass:
 		if (pfil_member && ifp != NULL && dir == PFIL_OUT) {
 			i = (*mp)->m_pkthdr.len;
 			if (i > ifp->if_mtu) {
-				error = bridge_fragment(ifp, *mp, &eh2, snap,
+				error = bridge_fragment(ifp, mp, &eh2, snap,
 					    &llc1);
 				return (error);
 			}
@@ -4313,10 +4313,13 @@ bad:
  *	Return a fragmented mbuf chain.
  */
 static int
-bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
+bridge_fragment(struct ifnet *ifp, struct mbuf **mp, struct ether_header *eh,
     int snap, struct llc *llc)
 {
 	struct mbuf *m0;
+	struct mbuf *prev = NULL;
+	struct mbuf *next;
+	struct mbuf *m = *mp;
 	struct ip *ip;
 	int error = -1;
 
@@ -4324,42 +4327,78 @@ bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
 	    (m = m_pullup(m, sizeof(struct ip))) == NULL)
 		goto out;
 	ip = mtod(m, struct ip *);
+	/* m could have changed */
+	*mp = m;
 
 	error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
 		    CSUM_DELAY_IP);
 	if (error)
 		goto out;
 
-	/* walk the chain and re-add the Ethernet header */
-	for (m0 = m; m0; m0 = m0->m_nextpkt) {
-		if (error == 0) {
-			if (snap) {
-				M_PREPEND(m0, sizeof(struct llc), M_DONTWAIT);
-				if (m0 == NULL) {
-					error = ENOBUFS;
-					continue;
-				}
-				bcopy(llc, mtod(m0, caddr_t),
-				    sizeof(struct llc));
-			}
-			M_PREPEND(m0, ETHER_HDR_LEN, M_DONTWAIT);
+	/* m could have changed */
+	*mp = m;
+	m0 = m;
+	next = m->m_nextpkt;
+	/*
+	 * walk the chain and re-add the Ethernet header:
+	 *
+	 * We can't trust M_PREPEND to keep the list of packets consistent since it may
+	 * allocate new pkt headers to be put in front and won't copy m_nextpkt over.
+	 */
+	while (m0) {
+		/* disconnect from list of fragments the mbuf we may potentially free */
+		if (prev != NULL)
+			prev->m_nextpkt = NULL;
+
+		if (snap) {
+			M_PREPEND(m0, sizeof(struct llc), M_DONTWAIT);
 			if (m0 == NULL) {
 				error = ENOBUFS;
-				continue;
+				if (prev) /* there are pkt fragments before the one we lost */
+					prev->m_nextpkt = next;
+				else
+					m = next;
+				goto out;
 			}
-			bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
-		} else
-			m_freem(m);
+			bcopy(llc, mtod(m0, caddr_t),
+			    sizeof(struct llc));
+		}
+		M_PREPEND(m0, ETHER_HDR_LEN, M_DONTWAIT);
+		if (m0 == NULL) {
+			error = ENOBUFS;
+			if (prev) /* there are pkt fragments before the one we lost */
+				prev->m_nextpkt = next;
+			else
+				m = next;
+			goto out;
+		}
+		bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
+		/* m0 could have changed due to m_prepend */
+		if (prev != NULL)
+			prev->m_nextpkt = m0;
+		else
+			m = m0;	/* first fragment has no previous mbufs */
+		prev = m0;
+		m0 = next;
+		if (next)
+			next = next->m_nextpkt;
 	}
 
 	if (error == 0)
 		KMOD_IPSTAT_INC(ips_fragmented);
 
+	*mp = m;
 	return (error);
 
 out:
-	if (m != NULL)
+	/* free the whole list of potential fragments */
+	while (m) {
+		next = m->m_nextpkt;
 		m_freem(m);
+		m = next;
+	}
+
+	*mp = NULL;
 	return (error);
 }

_______________________________________________
freebsd-net@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-net
To unsubscribe, send any mail to "freebsd-net-unsubscr...@freebsd.org"

[patch] bridge_fragment can leak mbufs

Reply via email to