The following diff replaces the existing hash table with a red black
tree, to the acclaim of network hackers worldwide.
This has already received some testing, but needs some more shakedown
in order to make sure that nobody gets hosed by this. Folks with
more esoteric (read: vether and/or gif) setups are especially encouraged
to test, and may receive cookies.
- Bert
Index: if_bridge.h
===================================================================
RCS file: /cvs/src/sys/net/if_bridge.h,v
retrieving revision 1.32
diff -u -p -r1.32 if_bridge.h
--- if_bridge.h 28 Oct 2010 13:49:54 -0000 1.32
+++ if_bridge.h 29 Oct 2010 19:45:38 -0000
@@ -397,7 +397,7 @@ struct bridge_iflist {
* Bridge route node
*/
struct bridge_rtnode {
- LIST_ENTRY(bridge_rtnode) brt_next; /* next in list */
+ RB_ENTRY(bridge_rtnode) brt_entry; /* entry in rb tree */
struct ifnet *brt_if; /* destination ifs */
u_int8_t brt_flags; /* address flags */
u_int8_t brt_age; /* age counter */
@@ -423,7 +423,7 @@ struct bridge_softc {
struct timeout sc_brtimeout; /* timeout state */
struct bstp_state *sc_stp; /* stp state */
LIST_HEAD(, bridge_iflist) sc_iflist; /* interface list */
- LIST_HEAD(, bridge_rtnode) sc_rts[BRIDGE_RTABLE_SIZE]; /* hash
table */
+ RB_HEAD(brt_tree, bridge_rtnode) sc_rts; /* route tree */
LIST_HEAD(, bridge_iflist) sc_spanlist; /* span ports */
};
Index: if_bridge.c
===================================================================
RCS file: /cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.186
diff -u -p -r1.186 if_bridge.c
--- if_bridge.c 28 Oct 2010 19:00:57 -0000 1.186
+++ if_bridge.c 29 Oct 2010 19:45:38 -0000
@@ -45,6 +45,7 @@
#include <sys/ioctl.h>
#include <sys/errno.h>
#include <sys/kernel.h>
+#include <sys/tree.h>
#include <machine/cpu.h>
#include <net/if.h>
@@ -135,7 +136,7 @@ int bridge_rtfind(struct bridge_softc *,
void bridge_rtage(struct bridge_softc *);
void bridge_rttrim(struct bridge_softc *);
int bridge_rtdaddr(struct bridge_softc *, struct ether_addr *);
-int bridge_rtflush(struct bridge_softc *, int);
+void bridge_rtflush(struct bridge_softc *, int);
struct ifnet * bridge_rtupdate(struct bridge_softc *,
struct ether_addr *, struct ifnet *ifp, int, u_int8_t);
struct ifnet * bridge_rtlookup(struct bridge_softc *,
@@ -180,6 +181,16 @@ LIST_HEAD(, bridge_softc) bridge_list;
struct if_clone bridge_cloner =
IF_CLONE_INITIALIZER("bridge", bridge_clone_create, bridge_clone_destroy);
+static __inline int brt_cmp(struct bridge_rtnode *, struct bridge_rtnode *);
+RB_PROTOTYPE(brt_tree, bridge_rtnode, brt_entry, brt_cmp);
+RB_GENERATE(brt_tree, bridge_rtnode, brt_entry, brt_cmp);
+
+static __inline int
+brt_cmp(struct bridge_rtnode *a, struct bridge_rtnode *b)
+{
+ return (memcmp(&a->brt_addr, &b->brt_addr, sizeof(a->brt_addr)));
+}
+
/* ARGSUSED */
void
bridgeattach(int n)
@@ -194,7 +205,7 @@ bridge_clone_create(struct if_clone *ifc
{
struct bridge_softc *sc;
struct ifnet *ifp;
- int i, s;
+ int s;
sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO);
if (!sc)
@@ -211,8 +222,7 @@ bridge_clone_create(struct if_clone *ifc
timeout_set(&sc->sc_brtimeout, bridge_timer, sc);
LIST_INIT(&sc->sc_iflist);
LIST_INIT(&sc->sc_spanlist);
- for (i = 0; i < BRIDGE_RTABLE_SIZE; i++)
- LIST_INIT(&sc->sc_rts[i]);
+ RB_INIT(&sc->sc_rts);
sc->sc_hashkey = arc4random();
ifp = &sc->sc_if;
snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
@@ -560,7 +570,7 @@ bridge_ioctl(struct ifnet *ifp, u_long c
if ((error = suser(curproc, 0)) != 0)
break;
- error = bridge_rtflush(sc, req->ifbr_ifsflags);
+ bridge_rtflush(sc, req->ifbr_ifsflags);
break;
case SIOCBRDGSADDR:
if ((error = suser(curproc, 0)) != 0)
@@ -1003,7 +1013,7 @@ bridge_output(struct ifnet *ifp, struct
struct ifnet *dst_if;
struct ether_addr *dst;
struct bridge_softc *sc;
- int s, error, len;
+ int s, error;
#ifdef IPSEC
struct m_tag *mtag;
#endif /* IPSEC */
@@ -1109,33 +1119,12 @@ bridge_output(struct ifnet *ifp, struct
used = 1;
mc = m;
} else {
- struct mbuf *m1, *m2, *mx;
-
- m1 = m_copym2(m, 0, ETHER_HDR_LEN,
- M_DONTWAIT);
- if (m1 == NULL) {
- sc->sc_if.if_oerrors++;
- continue;
- }
- m2 = m_copym2(m, ETHER_HDR_LEN,
+ mc = m_copym2(m, 0,
M_COPYALL, M_DONTWAIT);
- if (m2 == NULL) {
- m_freem(m1);
+ if (mc == NULL) {
sc->sc_if.if_oerrors++;
continue;
}
-
- for (mx = m1; mx->m_next != NULL; mx =
mx->m_next)
- /*EMPTY*/;
- mx->m_next = m2;
-
- if (m1->m_flags & M_PKTHDR) {
- len = 0;
- for (mx = m1; mx != NULL; mx =
mx->m_next)
- len += mx->m_len;
- m1->m_pkthdr.len = len;
- }
- mc = m1;
}
error = bridge_ifenqueue(sc, dst_if, mc);
@@ -1762,151 +1751,46 @@ struct ifnet *
bridge_rtupdate(struct bridge_softc *sc, struct ether_addr *ea,
struct ifnet *ifp, int setflags, u_int8_t flags)
{
- struct bridge_rtnode *p, *q;
- u_int32_t h;
- int dir;
-
- h = bridge_hash(sc, ea);
- p = LIST_FIRST(&sc->sc_rts[h]);
- if (p == LIST_END(&sc->sc_rts[h])) {
+ struct bridge_rtnode *brt, tmp;
+
+ bcopy(ea, &tmp.brt_addr, sizeof(*ea));
+
+ if ((brt = RB_FIND(brt_tree, &sc->sc_rts, &tmp)) == NULL) {
+
if (sc->sc_brtcnt >= sc->sc_brtmax)
- goto done;
- p = malloc(sizeof(*p), M_DEVBUF, M_NOWAIT);
- if (p == NULL)
- goto done;
+ goto fail;
+ if ((brt = malloc(sizeof(*brt), M_DEVBUF, M_NOWAIT)) == NULL)
+ goto fail;
- bcopy(ea, &p->brt_addr, sizeof(p->brt_addr));
- p->brt_if = ifp;
- p->brt_age = 1;
+ bcopy(ea, &brt->brt_addr, sizeof(brt->brt_addr));
+ brt->brt_if = ifp;
+ brt->brt_age = 1;
if (setflags)
- p->brt_flags = flags;
+ brt->brt_flags = flags;
else
- p->brt_flags = IFBAF_DYNAMIC;
+ brt->brt_flags = IFBAF_DYNAMIC;
- LIST_INSERT_HEAD(&sc->sc_rts[h], p, brt_next);
+ RB_INSERT(brt_tree, &sc->sc_rts, brt);
sc->sc_brtcnt++;
- goto want;
}
- do {
- q = p;
- p = LIST_NEXT(p, brt_next);
-
- dir = memcmp(ea, &q->brt_addr, sizeof(q->brt_addr));
- if (dir == 0) {
- if (setflags) {
- q->brt_if = ifp;
- q->brt_flags = flags;
- } else if (!(q->brt_flags & IFBAF_STATIC))
- q->brt_if = ifp;
-
- if (q->brt_if == ifp)
- q->brt_age = 1;
- ifp = q->brt_if;
- goto want;
- }
-
- if (dir > 0) {
- if (sc->sc_brtcnt >= sc->sc_brtmax)
- goto done;
- p = malloc(sizeof(*p), M_DEVBUF, M_NOWAIT);
- if (p == NULL)
- goto done;
-
- bcopy(ea, &p->brt_addr, sizeof(p->brt_addr));
- p->brt_if = ifp;
- p->brt_age = 1;
-
- if (setflags)
- p->brt_flags = flags;
- else
- p->brt_flags = IFBAF_DYNAMIC;
-
- LIST_INSERT_BEFORE(q, p, brt_next);
- sc->sc_brtcnt++;
- goto want;
- }
-
- if (p == LIST_END(&sc->sc_rts[h])) {
- if (sc->sc_brtcnt >= sc->sc_brtmax)
- goto done;
- p = malloc(sizeof(*p), M_DEVBUF, M_NOWAIT);
- if (p == NULL)
- goto done;
-
- bcopy(ea, &p->brt_addr, sizeof(p->brt_addr));
- p->brt_if = ifp;
- p->brt_age = 1;
-
- if (setflags)
- p->brt_flags = flags;
- else
- p->brt_flags = IFBAF_DYNAMIC;
- LIST_INSERT_AFTER(q, p, brt_next);
- sc->sc_brtcnt++;
- goto want;
- }
- } while (p != LIST_END(&sc->sc_rts[h]));
-
-done:
- ifp = NULL;
-want:
- return (ifp);
+ return (brt->brt_if);
+fail:
+ return (NULL);
}
struct ifnet *
bridge_rtlookup(struct bridge_softc *sc, struct ether_addr *ea)
{
- struct bridge_rtnode *p;
- u_int32_t h;
- int dir;
-
- h = bridge_hash(sc, ea);
- LIST_FOREACH(p, &sc->sc_rts[h], brt_next) {
- dir = memcmp(ea, &p->brt_addr, sizeof(p->brt_addr));
- if (dir == 0)
- return (p->brt_if);
- if (dir > 0)
- goto fail;
- }
-fail:
- return (NULL);
-}
+ struct bridge_rtnode *brt, tmp;
-/*
- * The following hash function is adapted from 'Hash Functions' by Bob Jenkins
- * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
- * "You may use this code any way you wish, private, educational, or
- * commercial. It's free."
- */
-#define mix(a,b,c) \
- do { \
- a -= b; a -= c; a ^= (c >> 13); \
- b -= c; b -= a; b ^= (a << 8); \
- c -= a; c -= b; c ^= (b >> 13); \
- a -= b; a -= c; a ^= (c >> 12); \
- b -= c; b -= a; b ^= (a << 16); \
- c -= a; c -= b; c ^= (b >> 5); \
- a -= b; a -= c; a ^= (c >> 3); \
- b -= c; b -= a; b ^= (a << 10); \
- c -= a; c -= b; c ^= (b >> 15); \
- } while (0)
-
-u_int32_t
-bridge_hash(struct bridge_softc *sc, struct ether_addr *addr)
-{
- u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_hashkey;
-
- b += addr->ether_addr_octet[5] << 8;
- b += addr->ether_addr_octet[4];
- a += addr->ether_addr_octet[3] << 24;
- a += addr->ether_addr_octet[2] << 16;
- a += addr->ether_addr_octet[1] << 8;
- a += addr->ether_addr_octet[0];
+ bcopy(ea, &tmp.brt_addr, sizeof(*ea));
+
+ if ((brt = RB_FIND(brt_tree, &sc->sc_rts, &tmp)))
+ return (brt->brt_if);
- mix(a, b, c);
- return (c & BRIDGE_RTABLE_MASK);
+ return (NULL);
}
/*
@@ -1917,7 +1801,6 @@ void
bridge_rttrim(struct bridge_softc *sc)
{
struct bridge_rtnode *n, *p;
- int i;
/*
* Make sure we have to trim the address table
@@ -1933,18 +1816,14 @@ bridge_rttrim(struct bridge_softc *sc)
if (sc->sc_brtcnt <= sc->sc_brtmax)
return;
- for (i = 0; i < BRIDGE_RTABLE_SIZE; i++) {
- n = LIST_FIRST(&sc->sc_rts[i]);
- while (n != LIST_END(&sc->sc_rts[i])) {
- p = LIST_NEXT(n, brt_next);
- if ((n->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
- LIST_REMOVE(n, brt_next);
- sc->sc_brtcnt--;
- free(n, M_DEVBUF);
- if (sc->sc_brtcnt <= sc->sc_brtmax)
- return;
- }
- n = p;
+ for (n = RB_MIN(brt_tree, &sc->sc_rts); n != NULL; n = p) {
+ p = RB_NEXT(brt_tree, &sc->sc_rts, n);
+ if ((n->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
+ RB_REMOVE(brt_tree, &sc->sc_rts, n);
+ sc->sc_brtcnt--;
+ free(n, M_DEVBUF);
+ if (sc->sc_brtcnt <= sc->sc_brtmax)
+ return;
}
}
}
@@ -1967,26 +1846,19 @@ void
bridge_rtage(struct bridge_softc *sc)
{
struct bridge_rtnode *n, *p;
- int i;
- for (i = 0; i < BRIDGE_RTABLE_SIZE; i++) {
- n = LIST_FIRST(&sc->sc_rts[i]);
- while (n != LIST_END(&sc->sc_rts[i])) {
- if ((n->brt_flags & IFBAF_TYPEMASK) == IFBAF_STATIC) {
- n->brt_age = !n->brt_age;
- if (n->brt_age)
- n->brt_age = 0;
- n = LIST_NEXT(n, brt_next);
- } else if (n->brt_age) {
+ for (n = RB_MIN(brt_tree, &sc->sc_rts); n != NULL; n = p) {
+ p = RB_NEXT(brt_tree, &sc->sc_rts, n);
+ if ((n->brt_flags & IFBAF_TYPEMASK) == IFBAF_STATIC) {
+ n->brt_age = !n->brt_age;
+ if (n->brt_age)
n->brt_age = 0;
- n = LIST_NEXT(n, brt_next);
- } else {
- p = LIST_NEXT(n, brt_next);
- LIST_REMOVE(n, brt_next);
- sc->sc_brtcnt--;
- free(n, M_DEVBUF);
- n = p;
- }
+ } else if (n->brt_age) {
+ n->brt_age = 0;
+ } else {
+ RB_REMOVE(brt_tree, &sc->sc_rts, n);
+ sc->sc_brtcnt--;
+ free(n, M_DEVBUF);
}
}
@@ -1999,7 +1871,6 @@ bridge_rtagenode(struct ifnet *ifp, int
{
struct bridge_softc *sc = (struct bridge_softc *)ifp->if_bridge;
struct bridge_rtnode *n;
- int i;
if (sc == NULL)
return;
@@ -2011,14 +1882,12 @@ bridge_rtagenode(struct ifnet *ifp, int
if (age == 0)
bridge_rtdelete(sc, ifp, 1);
else {
- for (i = 0; i < BRIDGE_RTABLE_SIZE; i++) {
- LIST_FOREACH(n, &sc->sc_rts[i], brt_next) {
- /* Cap the expiry time to 'age' */
- if (n->brt_if == ifp &&
- n->brt_age > time_uptime + age &&
- (n->brt_flags & IFBAF_TYPEMASK) ==
IFBAF_DYNAMIC)
- n->brt_age = time_uptime + age;
- }
+ RB_FOREACH(n, brt_tree, &sc->sc_rts) {
+ /* Cap the expiry time to 'age' */
+ if (n->brt_if == ifp &&
+ n->brt_age > time_uptime + age &&
+ (n->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
+ n->brt_age = time_uptime + age;
}
}
}
@@ -2028,28 +1897,20 @@ bridge_rtagenode(struct ifnet *ifp, int
/*
* Remove all dynamic addresses from the cache
*/
-int
+void
bridge_rtflush(struct bridge_softc *sc, int full)
{
- int i;
struct bridge_rtnode *p, *n;
- for (i = 0; i < BRIDGE_RTABLE_SIZE; i++) {
- n = LIST_FIRST(&sc->sc_rts[i]);
- while (n != LIST_END(&sc->sc_rts[i])) {
- if (full ||
- (n->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
- p = LIST_NEXT(n, brt_next);
- LIST_REMOVE(n, brt_next);
- sc->sc_brtcnt--;
- free(n, M_DEVBUF);
- n = p;
- } else
- n = LIST_NEXT(n, brt_next);
+ for (n = RB_MIN(brt_tree, &sc->sc_rts); n != NULL; n = p) {
+ p = RB_NEXT(brt_tree, &sc->sc_rts, n);
+
+ if (full || (n->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
+ RB_REMOVE(brt_tree, &sc->sc_rts, n);
+ sc->sc_brtcnt--;
+ free(n, M_DEVBUF);
}
}
-
- return (0);
}
/*
@@ -2058,54 +1919,44 @@ bridge_rtflush(struct bridge_softc *sc,
int
bridge_rtdaddr(struct bridge_softc *sc, struct ether_addr *ea)
{
- int h;
- struct bridge_rtnode *p;
+ struct bridge_rtnode *brt, tmp;
- h = bridge_hash(sc, ea);
- LIST_FOREACH(p, &sc->sc_rts[h], brt_next) {
- if (bcmp(ea, &p->brt_addr, sizeof(p->brt_addr)) == 0) {
- LIST_REMOVE(p, brt_next);
- sc->sc_brtcnt--;
- free(p, M_DEVBUF);
- return (0);
- }
- }
+ bcopy(ea, &tmp.brt_addr, sizeof(*ea));
+
+ if ((brt = RB_FIND(brt_tree, &sc->sc_rts, &tmp)) == NULL)
+ return (ENOENT);
- return (ENOENT);
+ RB_REMOVE(brt_tree, &sc->sc_rts, brt);
+ sc->sc_brtcnt--;
+ free(brt, M_DEVBUF);
+
+ return (0);
}
+
/*
* Delete routes to a specific interface member.
*/
void
bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int dynonly)
{
- int i;
struct bridge_rtnode *n, *p;
/*
- * Loop through all of the hash buckets and traverse each
- * chain looking for routes to this interface.
+ * traverse tree looking for routes to this interface.
*/
- for (i = 0; i < BRIDGE_RTABLE_SIZE; i++) {
- n = LIST_FIRST(&sc->sc_rts[i]);
- while (n != LIST_END(&sc->sc_rts[i])) {
- if (n->brt_if != ifp) {
- /* Not ours */
- n = LIST_NEXT(n, brt_next);
- continue;
- }
- if (dynonly &&
- (n->brt_flags & IFBAF_TYPEMASK) != IFBAF_DYNAMIC) {
- /* only deleting dynamics */
- n = LIST_NEXT(n, brt_next);
- continue;
- }
- p = LIST_NEXT(n, brt_next);
- LIST_REMOVE(n, brt_next);
- sc->sc_brtcnt--;
- free(n, M_DEVBUF);
- n = p;
- }
+ for (n = RB_MIN(brt_tree, &sc->sc_rts); n != NULL; n = p) {
+ p = RB_NEXT(brt_tree, &sc->sc_rts, n);
+
+ if (n->brt_if != ifp)
+ continue;
+
+ if (dynonly &&
+ (n->brt_flags & IFBAF_TYPEMASK) != IFBAF_DYNAMIC)
+ continue;
+
+ RB_REMOVE(brt_tree, &sc->sc_rts, n);
+ sc->sc_brtcnt--;
+ free(n, M_DEVBUF);
}
}
@@ -2115,35 +1966,33 @@ bridge_rtdelete(struct bridge_softc *sc,
int
bridge_rtfind(struct bridge_softc *sc, struct ifbaconf *baconf)
{
- int i, error = 0, onlycnt = 0;
- u_int32_t cnt = 0;
- struct bridge_rtnode *n;
struct ifbareq bareq;
+ struct bridge_rtnode *brt;
+ int error = 0, onlycnt = 0;
+ u_int32_t cnt = 0;
if (baconf->ifbac_len == 0)
onlycnt = 1;
- for (i = 0, cnt = 0; i < BRIDGE_RTABLE_SIZE; i++) {
- LIST_FOREACH(n, &sc->sc_rts[i], brt_next) {
- if (!onlycnt) {
- if (baconf->ifbac_len < sizeof(struct ifbareq))
- goto done;
- bcopy(sc->sc_if.if_xname, bareq.ifba_name,
- sizeof(bareq.ifba_name));
- bcopy(n->brt_if->if_xname, bareq.ifba_ifsname,
- sizeof(bareq.ifba_ifsname));
- bcopy(&n->brt_addr, &bareq.ifba_dst,
- sizeof(bareq.ifba_dst));
- bareq.ifba_age = n->brt_age;
- bareq.ifba_flags = n->brt_flags;
- error = copyout((caddr_t)&bareq,
- (caddr_t)(baconf->ifbac_req + cnt),
sizeof(bareq));
- if (error)
- goto done;
- baconf->ifbac_len -= sizeof(struct ifbareq);
- }
- cnt++;
+ RB_FOREACH(brt, brt_tree, &sc->sc_rts) {
+ if (!onlycnt) {
+ if (baconf->ifbac_len < sizeof(struct ifbareq))
+ goto done;
+ bcopy(sc->sc_if.if_xname, bareq.ifba_name,
+ sizeof(bareq.ifba_name));
+ bcopy(brt->brt_if->if_xname, bareq.ifba_ifsname,
+ sizeof(bareq.ifba_ifsname));
+ bcopy(&brt->brt_addr, &bareq.ifba_dst,
+ sizeof(bareq.ifba_dst));
+ bareq.ifba_age = brt->brt_age;
+ bareq.ifba_flags = brt->brt_flags;
+ error = copyout((caddr_t)&bareq,
+ (caddr_t)(baconf->ifbac_req + cnt), sizeof(bareq));
+ if (error)
+ goto done;
+ baconf->ifbac_len -= sizeof(struct ifbareq);
}
+ cnt++;
}
done:
baconf->ifbac_len = cnt * sizeof(struct ifbareq);