Module Name:    src
Committed By:   rmind
Date:           Sun Oct  3 19:44:47 UTC 2010

Modified Files:
        src/sys/netinet: ip_reass.c

Log Message:
Re-structure IPv4 reassembly code to make it more MP-friendly and simplify
some code fragments while here.  Also, use pool_cache(9) and mutex(9).

IPv4 reassembly mechanism is MP-safe now.


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/netinet/ip_reass.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/netinet/ip_reass.c
diff -u src/sys/netinet/ip_reass.c:1.3 src/sys/netinet/ip_reass.c:1.4
--- src/sys/netinet/ip_reass.c:1.3	Wed Aug 25 00:05:14 2010
+++ src/sys/netinet/ip_reass.c	Sun Oct  3 19:44:47 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip_reass.c,v 1.3 2010/08/25 00:05:14 rmind Exp $	*/
+/*	$NetBSD: ip_reass.c,v 1.4 2010/10/03 19:44:47 rmind Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1988, 1993
@@ -46,13 +46,14 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_reass.c,v 1.3 2010/08/25 00:05:14 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_reass.c,v 1.4 2010/10/03 19:44:47 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
 
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/mutex.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/pool.h>
@@ -107,8 +108,8 @@
 	(((((x) & 0xf) | ((((x) >> 8) & 0xf) << 4)) ^ (y)) & IPREASS_HASH_MASK)
 
 static LIST_HEAD(, ipfr_queue)	ip_frags[IPREASS_HASH_SIZE];
-static struct pool	ipqent_pool;
-static int		ipq_locked;
+static pool_cache_t	ipfren_cache;
+static kmutex_t		ipfr_lock;
 
 /* Number of packets in reassembly queue and total number of fragments. */
 static int		ip_nfragpackets;
@@ -129,6 +130,8 @@
  */
 static u_int		fragttl_histo[IPFRAGTTL + 1];
 
+static struct sysctllog *ip_reass_sysctllog;
+
 void			sysctl_ip_reass_setup(void);
 static void		ip_nmbclusters_changed(void);
 
@@ -147,8 +150,9 @@
 {
 	int i;
 
-	pool_init(&ipqent_pool, sizeof(ipfr_qent_t), 0, 0, 0, "ipqepl",
-	    NULL, IPL_VM);
+	ipfren_cache = pool_cache_init(sizeof(ipfr_qent_t), coherency_unit,
+	    0, 0, "ipfrenpl", NULL, IPL_NET, NULL, NULL, NULL);
+	mutex_init(&ipfr_lock, MUTEX_DEFAULT, IPL_SOFTNET);
 
 	for (i = 0; i < IPREASS_HASH_SIZE; i++) {
 		LIST_INIT(&ip_frags[i]);
@@ -160,8 +164,6 @@
 	sysctl_ip_reass_setup();
 }
 
-static struct sysctllog *ip_reass_sysctllog;
-
 void
 sysctl_ip_reass_setup(void)
 {
@@ -209,60 +211,6 @@
 	ip_nmbclusters = nmbclusters;
 }
 
-static inline int	ipq_lock_try(void);
-static inline void	ipq_unlock(void);
-
-static inline int
-ipq_lock_try(void)
-{
-	int s;
-
-	/*
-	 * Use splvm() -- we're blocking things that would cause
-	 * mbuf allocation.
-	 */
-	s = splvm();
-	if (ipq_locked) {
-		splx(s);
-		return (0);
-	}
-	ipq_locked = 1;
-	splx(s);
-	return (1);
-}
-
-static inline void
-ipq_unlock(void)
-{
-	int s;
-
-	s = splvm();
-	ipq_locked = 0;
-	splx(s);
-}
-
-#ifdef DIAGNOSTIC
-#define	IPQ_LOCK()							\
-do {									\
-	if (ipq_lock_try() == 0) {					\
-		printf("%s:%d: ipq already locked\n", __FILE__, __LINE__); \
-		panic("ipq_lock");					\
-	}								\
-} while (/*CONSTCOND*/ 0)
-#define	IPQ_LOCK_CHECK()						\
-do {									\
-	if (ipq_locked == 0) {						\
-		printf("%s:%d: ipq lock not held\n", __FILE__, __LINE__); \
-		panic("ipq lock check");				\
-	}								\
-} while (/*CONSTCOND*/ 0)
-#else
-#define	IPQ_LOCK()		(void) ipq_lock_try()
-#define	IPQ_LOCK_CHECK()	/* nothing */
-#endif
-
-#define	IPQ_UNLOCK()		ipq_unlock()
-
 /*
  * ip_reass:
  *
@@ -277,9 +225,9 @@
 	struct mbuf *m = ipqe->ipqe_m, *t;
 	ipfr_qent_t *nq, *p, *q;
 	struct ip *ip;
-	int i, next, s;
+	int i, next;
 
-	IPQ_LOCK_CHECK();
+	KASSERT(mutex_owned(&ipfr_lock));
 
 	/*
 	 * Presence of header sizes in mbufs would confuse code below.
@@ -385,9 +333,7 @@
 		nq = TAILQ_NEXT(q, ipqe_q);
 		m_freem(q->ipqe_m);
 		TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q);
-		s = splvm();
-		pool_put(&ipqent_pool, q);
-		splx(s);
+		pool_cache_put(ipfren_cache, q);
 		fp->ipq_nfrags--;
 		ip_nfrags--;
 	}
@@ -405,44 +351,46 @@
 	for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL;
 	    p = q, q = TAILQ_NEXT(q, ipqe_q)) {
 		if (ntohs(q->ipqe_ip->ip_off) != next) {
-			IPQ_UNLOCK();
+			mutex_exit(&ipfr_lock);
 			return NULL;
 		}
 		next += ntohs(q->ipqe_ip->ip_len);
 	}
 	if (p->ipqe_mff) {
-		IPQ_UNLOCK();
+		mutex_exit(&ipfr_lock);
 		return NULL;
 	}
 	/*
-	 * Reassembly is complete.  Check for a bogus message size and
-	 * concatenate fragments.
+	 * Reassembly is complete.  Check for a bogus message size.
 	 */
 	q = TAILQ_FIRST(&fp->ipq_fragq);
 	ip = q->ipqe_ip;
 	if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) {
 		IP_STATINC(IP_STAT_TOOLONG);
 		ip_freef(fp);
-		IPQ_UNLOCK();
+		mutex_exit(&ipfr_lock);
 		return NULL;
 	}
+	LIST_REMOVE(fp, ipq_q);
+	ip_nfrags -= fp->ipq_nfrags;
+	ip_nfragpackets--;
+	mutex_exit(&ipfr_lock);
+
+	/* Concatenate all fragments. */
 	m = q->ipqe_m;
 	t = m->m_next;
 	m->m_next = NULL;
 	m_cat(m, t);
 	nq = TAILQ_NEXT(q, ipqe_q);
-	s = splvm();
-	pool_put(&ipqent_pool, q);
-	splx(s);
+	pool_cache_put(ipfren_cache, q);
+
 	for (q = nq; q != NULL; q = nq) {
 		t = q->ipqe_m;
 		nq = TAILQ_NEXT(q, ipqe_q);
-		s = splvm();
-		pool_put(&ipqent_pool, q);
-		splx(s);
+		pool_cache_put(ipfren_cache, q);
 		m_cat(m, t);
 	}
-	ip_nfrags -= fp->ipq_nfrags;
+	free(fp, M_FTABLE);
 
 	/*
 	 * Create header for new packet by modifying header of first
@@ -453,13 +401,11 @@
 	ip->ip_src = fp->ipq_src;
 	ip->ip_dst = fp->ipq_dst;
 
-	LIST_REMOVE(fp, ipq_q);
-	free(fp, M_FTABLE);
-	ip_nfragpackets--;
 	m->m_len += (ip->ip_hl << 2);
 	m->m_data -= (ip->ip_hl << 2);
-	/* some debugging cruft by sklower, below, will go away soon */
-	if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
+
+	/* Fix up mbuf.  XXX This should be done elsewhere. */
+	if (m->m_flags & M_PKTHDR) {
 		int plen = 0;
 		for (t = m; t; t = t->m_next) {
 			plen += t->m_len;
@@ -467,7 +413,6 @@
 		m->m_pkthdr.len = plen;
 		m->m_pkthdr.csum_flags = 0;
 	}
-	IPQ_UNLOCK();
 	return m;
 
 dropfrag:
@@ -476,11 +421,10 @@
 	}
 	ip_nfrags--;
 	IP_STATINC(IP_STAT_FRAGDROPPED);
+	mutex_exit(&ipfr_lock);
+
+	pool_cache_put(ipfren_cache, ipqe);
 	m_freem(m);
-	s = splvm();
-	pool_put(&ipqent_pool, ipqe);
-	splx(s);
-	IPQ_UNLOCK();
 	return NULL;
 }
 
@@ -492,29 +436,20 @@
 static void
 ip_freef(ipfr_queue_t *fp)
 {
-	ipfr_qent_t *q, *p;
-	u_int nfrags = 0;
-	int s;
+	ipfr_qent_t *q;
 
-	IPQ_LOCK_CHECK();
+	KASSERT(mutex_owned(&ipfr_lock));
 
-	for (q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL; q = p) {
-		p = TAILQ_NEXT(q, ipqe_q);
-		m_freem(q->ipqe_m);
-		nfrags++;
-		TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q);
-		s = splvm();
-		pool_put(&ipqent_pool, q);
-		splx(s);
-	}
+	LIST_REMOVE(fp, ipq_q);
+	ip_nfrags -= fp->ipq_nfrags;
+	ip_nfragpackets--;
 
-	if (nfrags != fp->ipq_nfrags) {
-		printf("ip_freef: nfrags %d != %d\n", fp->ipq_nfrags, nfrags);
+	while ((q = TAILQ_FIRST(&fp->ipq_fragq)) != NULL) {
+		TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q);
+		m_freem(q->ipqe_m);
+		pool_cache_put(ipfren_cache, q);
 	}
-	ip_nfrags -= nfrags;
-	LIST_REMOVE(fp, ipq_q);
 	free(fp, M_FTABLE);
-	ip_nfragpackets--;
 }
 
 /*
@@ -571,6 +506,8 @@
 {
 	u_int median_ticks;
 
+	KASSERT(mutex_owned(&ipfr_lock));
+
 	/*
 	 * Compute median TTL of all fragments, and count frags
 	 * with that TTL or lower (roughly half of all fragments).
@@ -593,13 +530,13 @@
 	 * We may be called from a device's interrupt context.  If
 	 * the ipq is already busy, just bail out now.
 	 */
-	if (ipq_lock_try() != 0) {
+	if (mutex_tryenter(&ipfr_lock)) {
 		/*
 		 * Drop half the total fragments now. If more mbufs are
 		 * needed, we will be called again soon.
 		 */
 		ip_reass_drophalf();
-		IPQ_UNLOCK();
+		mutex_exit(&ipfr_lock);
 	}
 }
 
@@ -614,7 +551,7 @@
 	static u_int dropscanidx = 0;
 	u_int i, median_ttl;
 
-	IPQ_LOCK();
+	mutex_enter(&ipfr_lock);
 
 	/* Age TTL of all fragments by 1 tick .*/
 	median_ttl = ip_reass_ttl_decr(1);
@@ -656,7 +593,7 @@
 		}
 		dropscanidx = i;
 	}
-	IPQ_UNLOCK();
+	mutex_exit(&ipfr_lock);
 }
 
 /*
@@ -676,7 +613,7 @@
 	u_int hash;
 
 	/* Look for queue of fragments of this datagram. */
-	IPQ_LOCK();
+	mutex_enter(&ipfr_lock);
 	hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
 	LIST_FOREACH(fp, &ip_frags[hash], ipq_q) {
 		if (ip->ip_id != fp->ipq_id)
@@ -693,7 +630,7 @@
 	/* Make sure that TOS matches previous fragments. */
 	if (fp && fp->ipq_tos != ip->ip_tos) {
 		IP_STATINC(IP_STAT_BADFRAGS);
-		IPQ_UNLOCK();
+		mutex_exit(&ipfr_lock);
 		return EINVAL;
 	}
 
@@ -701,12 +638,10 @@
 	 * Create new entry and attempt to reassembly.
 	 */
 	IP_STATINC(IP_STAT_FRAGMENTS);
-	int s = splvm();
-	ipqe = pool_get(&ipqent_pool, PR_NOWAIT);
-	splx(s);
+	ipqe = pool_cache_get(ipfren_cache, PR_NOWAIT);
 	if (ipqe == NULL) {
 		IP_STATINC(IP_STAT_RCVMEMDROP);
-		IPQ_UNLOCK();
+		mutex_exit(&ipfr_lock);
 		return ENOMEM;
 	}
 	ipqe->ipqe_mff = mff;

Reply via email to