> On 6 Jun 2016, at 17:14, Jonathan Matthew <[email protected]> wrote:
>
> We've finally got srp and art to the point where we can use srp to manage the
> internal links in the art data structures. This allows us to do route lookups
> without holding any locks, which is kind of nice.
>
> As we're not doing unlocked insert, delete or walk (yet), those art functions
> use the locked variants of the srp functions. Currently the lock that
> protects those operations is the kernel lock.
>
> Callers to art_lookup and art_match now pass in a pointer to an srp_ref, which
> is always active on return, even if no route is found. In some situations we
> use these functions while modifying the routing table, in which case the
> kernel lock already protects against concurrent modifications so the srp_ref
> is unnecessary, so we srp_leave it immediately.
so it just replaces the pointers used for lookups.
this is ok by me.
>
> I'd also like to draw attention to the comment in rtable_match. Is it OK that
> we might choose a multipath route at random if the set of available routes
> changes while we're choosing?
i dont know the answer to this one :/
>
>
> Index: art.h
> ===================================================================
> RCS file: /cvs/src/sys/net/art.h,v
> retrieving revision 1.13
> diff -u -p -u -p -r1.13 art.h
> --- art.h 3 Jun 2016 03:59:43 -0000 1.13
> +++ art.h 6 Jun 2016 00:32:31 -0000
> @@ -19,13 +19,15 @@
> #ifndef _NET_ART_H_
> #define _NET_ART_H_
>
> +#include <sys/srp.h>
> +
> #define ART_MAXLVL 32 /* We currently use 32 levels for IPv6. */
>
> /*
> * Root of the ART tables, equivalent to the radix head.
> */
> struct art_root {
> - struct art_table *ar_root; /* First table */
> + struct srp ar_root; /* First table */
> uint8_t ar_bits[ART_MAXLVL]; /* Per level stride */
> uint8_t ar_nlvl; /* Number of levels */
> uint8_t ar_alen; /* Address length in bits */
> @@ -59,8 +61,9 @@ struct art_node *art_insert(struct art_r
> int);
> struct art_node *art_delete(struct art_root *, struct art_node *, uint8_t *,
> int);
> -struct art_node *art_match(struct art_root *, uint8_t *);
> -struct art_node *art_lookup(struct art_root *, uint8_t *, int);
> +struct art_node *art_match(struct art_root *, uint8_t *, struct srp_ref
> *);
> +struct art_node *art_lookup(struct art_root *, uint8_t *, int,
> + struct srp_ref *);
> int art_walk(struct art_root *,
> int (*)(struct art_node *, void *), void *);
>
> Index: art.c
> ===================================================================
> RCS file: /cvs/src/sys/net/art.c,v
> retrieving revision 1.18
> diff -u -p -u -p -r1.18 art.c
> --- art.c 3 Jun 2016 03:59:43 -0000 1.18
> +++ art.c 6 Jun 2016 00:32:31 -0000
> @@ -37,8 +37,8 @@
>
> #include <net/art.h>
>
> -#define ISLEAF(e) (((unsigned long)(e).node & 1) == 0)
> -#define SUBTABLE(e) (((struct art_table *)((unsigned long)(e).child & ~1)))
> +#define ISLEAF(e) (((unsigned long)(e) & 1) == 0)
> +#define SUBTABLE(e) ((struct art_table *)((unsigned long)(e) & ~1))
> #define ASNODE(t) ((struct art_node *)((unsigned long)(t) | 1))
>
> /*
> @@ -58,8 +58,7 @@ struct art_table {
> * is a route counter.
> */
> union {
> - struct art_node *node;
> - struct art_table *child;
> + struct srp node;
> unsigned long count;
> } *at_heap; /* Array of 2^(slen+1) items */
> };
> @@ -249,41 +248,60 @@ art_findex(struct art_table *at, uint8_t
> * Return the best existing match for a destination.
> */
> struct art_node *
> -art_match(struct art_root *ar, uint8_t *addr)
> +art_match(struct art_root *ar, uint8_t *addr, struct srp_ref *nsr)
> {
> + struct srp_ref dsr, ndsr;
> + void *entry;
> struct art_table *at;
> - struct art_node *dflt = NULL;
> - int j;
> + struct art_node *dflt, *ndflt;
> + int j;
> +
> + entry = srp_enter(nsr, &ar->ar_root);
> + at = entry;
>
> - at = ar->ar_root;
> if (at == NULL)
> - return (NULL);
> + goto done;
> +
> + /*
> + * Remember the default route of each table we visit in case
> + * we do not find a better matching route.
> + */
> + dflt = srp_enter(&dsr, &at->at_default);
>
> /*
> * Iterate until we find a leaf.
> */
> while (1) {
> - /*
> - * Rember the default route of this table in case
> - * we do not find a better matching route.
> - */
> - if (at->at_default != NULL)
> - dflt = at->at_default;
> -
> /* Do a single level route lookup. */
> j = art_findex(at, addr);
> + entry = srp_follow(nsr, &at->at_heap[j].node);
>
> - /* If this is a leaf we're done. */
> - if (ISLEAF(at->at_heap[j]))
> + /* If this is a leaf (NULL is a leaf) we're done. */
> + if (ISLEAF(entry))
> break;
>
> - at = SUBTABLE(at->at_heap[j]);
> + at = SUBTABLE(entry);
> +
> + ndflt = srp_enter(&ndsr, &at->at_default);
> + if (ndflt != NULL) {
> + srp_leave(&dsr);
> + dsr = ndsr;
> + dflt = ndflt;
> + } else
> + srp_leave(&ndsr);
> }
>
> - if (at->at_heap[j].node != NULL)
> - return (at->at_heap[j].node);
> + if (entry == NULL) {
> + srp_leave(nsr);
> + *nsr = dsr;
> + KASSERT(ISLEAF(dflt));
> + return (dflt);
> + }
>
> - return (dflt);
> + srp_leave(&dsr);
> +done:
> + KASSERT(ISLEAF(entry));
> + return (entry);
> }
>
> /*
> @@ -293,21 +311,25 @@ art_match(struct art_root *ar, uint8_t *
> * it does not exist.
> */
> struct art_node *
> -art_lookup(struct art_root *ar, uint8_t *addr, int plen)
> +art_lookup(struct art_root *ar, uint8_t *addr, int plen, struct srp_ref *nsr)
> {
> + void *entry;
> struct art_table *at;
> - struct art_node *an;
> int i, j;
>
> KASSERT(plen >= 0 && plen <= ar->ar_alen);
>
> - at = ar->ar_root;
> + entry = srp_enter(nsr, &ar->ar_root);
> + at = entry;
> +
> if (at == NULL)
> - return (NULL);
> + goto done;
>
> /* Default route */
> - if (plen == 0)
> - return (at->at_default);
> + if (plen == 0) {
> + entry = srp_follow(nsr, &at->at_default);
> + goto done;
> + }
>
> /*
> * If the prefix length is smaller than the sum of
> @@ -317,24 +339,26 @@ art_lookup(struct art_root *ar, uint8_t
> while (plen > (at->at_offset + at->at_bits)) {
> /* Do a single level route lookup. */
> j = art_findex(at, addr);
> + entry = srp_follow(nsr, &at->at_heap[j].node);
>
> /* A leaf is a match, but not a perfect one. */
> - if (ISLEAF(at->at_heap[j]))
> + if (ISLEAF(entry))
> return (NULL);
>
> - at = SUBTABLE(at->at_heap[j]);
> + at = SUBTABLE(entry);
> }
>
> i = art_bindex(at, addr, plen);
> if (i == -1)
> return (NULL);
>
> - if (!ISLEAF(at->at_heap[i]))
> - an = SUBTABLE(at->at_heap[i])->at_default;
> - else
> - an = at->at_heap[i].node;
> -
> - return (an);
> + entry = srp_follow(nsr, &at->at_heap[i].node);
> + if (!ISLEAF(entry))
> + entry = srp_follow(nsr, &SUBTABLE(entry)->at_default);
> +
> +done:
> + KASSERT(ISLEAF(entry));
> + return (entry);
> }
>
>
> @@ -347,27 +371,30 @@ art_lookup(struct art_root *ar, uint8_t
> struct art_node *
> art_insert(struct art_root *ar, struct art_node *an, uint8_t *addr, int plen)
> {
> - struct art_table *at;
> + struct art_table *at, *child;
> + struct art_node *node;
> int i, j;
>
> + KERNEL_ASSERT_LOCKED();
> KASSERT(plen >= 0 && plen <= ar->ar_alen);
>
> - at = ar->ar_root;
> + at = srp_get_locked(&ar->ar_root);
> if (at == NULL) {
> at = art_table_get(ar, NULL, -1);
> if (at == NULL)
> return (NULL);
>
> - ar->ar_root = at;
> + srp_swap_locked(&ar->ar_root, at);
> }
>
> /* Default route */
> if (plen == 0) {
> - if (at->at_default != NULL)
> - return (at->at_default);
> + node = srp_get_locked(&at->at_default);
> + if (node != NULL)
> + return (node);
>
> art_table_ref(ar, at);
> - at->at_default = an;
> + srp_swap_locked(&at->at_default, an);
> return (an);
> }
>
> @@ -379,6 +406,7 @@ art_insert(struct art_root *ar, struct a
> while (plen > (at->at_offset + at->at_bits)) {
> /* Do a single level route lookup. */
> j = art_findex(at, addr);
> + node = srp_get_locked(&at->at_heap[j].node);
>
> /*
> * If the node corresponding to the fringe index is
> @@ -386,18 +414,16 @@ art_insert(struct art_root *ar, struct a
> * entry of this node will then become the default
> * route of the subtable.
> */
> - if (ISLEAF(at->at_heap[j])) {
> - struct art_table *child;
> -
> + if (ISLEAF(node)) {
> child = art_table_get(ar, at, j);
> if (child == NULL)
> return (NULL);
>
> art_table_ref(ar, at);
> - at->at_heap[j].node = ASNODE(child);
> - }
> -
> - at = SUBTABLE(at->at_heap[j]);
> + srp_swap_locked(&at->at_heap[j].node, ASNODE(child));
> + at = child;
> + } else
> + at = SUBTABLE(node);
> }
>
> i = art_bindex(at, addr, plen);
> @@ -414,12 +440,13 @@ struct art_node *
> art_table_insert(struct art_root *ar, struct art_table *at, int i,
> struct art_node *an)
> {
> - struct art_node *prev;
> + struct art_node *prev, *node;
>
> - if (!ISLEAF(at->at_heap[i]))
> - prev = SUBTABLE(at->at_heap[i])->at_default;
> + node = srp_get_locked(&at->at_heap[i].node);
> + if (!ISLEAF(node))
> + prev = srp_get_locked(&SUBTABLE(node)->at_default);
> else
> - prev = at->at_heap[i].node;
> + prev = node;
>
> if (art_check_duplicate(ar, prev, an))
> return (prev);
> @@ -433,10 +460,10 @@ art_table_insert(struct art_root *ar, st
> */
> if (i < at->at_minfringe)
> art_allot(at, i, prev, an);
> - else if (!ISLEAF(at->at_heap[i]))
> - SUBTABLE(at->at_heap[i])->at_default = an;
> + else if (!ISLEAF(node))
> + srp_swap_locked(&SUBTABLE(node)->at_default, an);
> else
> - at->at_heap[i].node = an;
> + srp_swap_locked(&at->at_heap[i].node, an);
>
> return (an);
> }
> @@ -449,21 +476,22 @@ struct art_node *
> art_delete(struct art_root *ar, struct art_node *an, uint8_t *addr, int plen)
> {
> struct art_table *at;
> - struct art_node *dflt;
> + struct art_node *node;
> int i, j;
>
> + KERNEL_ASSERT_LOCKED();
> KASSERT(plen >= 0 && plen <= ar->ar_alen);
>
> - at = ar->ar_root;
> + at = srp_get_locked(&ar->ar_root);
> if (at == NULL)
> return (NULL);
>
> /* Default route */
> if (plen == 0) {
> - dflt = at->at_default;
> - at->at_default = NULL;
> + node = srp_get_locked(&at->at_default);
> + srp_swap_locked(&at->at_default, NULL);
> art_table_free(ar, at);
> - return (dflt);
> + return (node);
> }
>
> /*
> @@ -474,12 +502,13 @@ art_delete(struct art_root *ar, struct a
> while (plen > (at->at_offset + at->at_bits)) {
> /* Do a single level route lookup. */
> j = art_findex(at, addr);
> + node = srp_get_locked(&at->at_heap[j].node);
>
> /* If this is a leaf, there is no route to delete. */
> - if (ISLEAF(at->at_heap[j]))
> + if (ISLEAF(node))
> return (NULL);
>
> - at = SUBTABLE(at->at_heap[j]);
> + at = SUBTABLE(node);
> }
>
> i = art_bindex(at, addr, plen);
> @@ -494,24 +523,27 @@ art_delete(struct art_root *ar, struct a
> */
> struct art_node *
> art_table_delete(struct art_root *ar, struct art_table *at, int i,
> - struct art_node *node)
> + struct art_node *an)
> {
> - struct art_node *next;
> + struct art_node *next, *node;
>
> #ifdef DIAGNOSTIC
> struct art_node *prev;
> +#endif
>
> - if (!ISLEAF(at->at_heap[i]))
> - prev = SUBTABLE(at->at_heap[i])->at_default;
> + node = srp_get_locked(&at->at_heap[i].node);
> +#ifdef DIAGNOSTIC
> + if (!ISLEAF(node))
> + prev = srp_get_locked(&SUBTABLE(node)->at_default);
> else
> - prev = at->at_heap[i].node;
> + prev = node;
>
> - KASSERT(prev == node);
> + KASSERT(prev == an);
> #endif
>
> /* Get the next most specific route for the index `i'. */
> if ((i >> 1) > 1)
> - next = at->at_heap[i >> 1].node;
> + next = srp_get_locked(&at->at_heap[i >> 1].node);
> else
> next = NULL;
>
> @@ -521,16 +553,16 @@ art_table_delete(struct art_root *ar, st
> * route pointer to all the corresponding fringe indices.
> */
> if (i < at->at_minfringe)
> - art_allot(at, i, node, next);
> - else if (!ISLEAF(at->at_heap[i]))
> - SUBTABLE(at->at_heap[i])->at_default = next;
> + art_allot(at, i, an, next);
> + else if (!ISLEAF(node))
> + srp_swap_locked(&SUBTABLE(node)->at_default, next);
> else
> - at->at_heap[i].node = next;
> + srp_swap_locked(&at->at_heap[i].node, next);
>
> /* We have removed an entry from this table. */
> art_table_free(ar, at);
>
> - return (node);
> + return (an);
> }
>
> void
> @@ -539,17 +571,26 @@ art_table_ref(struct art_root *ar, struc
> at->at_refcnt++;
> }
>
> +static inline int
> +art_table_rele(struct art_table *at)
> +{
> + if (at == NULL)
> + return (0);
> +
> + return (--at->at_refcnt == 0);
> +}
> +
> int
> art_table_free(struct art_root *ar, struct art_table *at)
> {
> - if (--at->at_refcnt == 0) {
> + if (art_table_rele(at)) {
> /*
> * Garbage collect this table and all its parents
> * that are empty.
> */
> do {
> at = art_table_put(ar, at);
> - } while (at != NULL && --at->at_refcnt == 0);
> + } while (art_table_rele(at));
>
> return (1);
> }
> @@ -564,9 +605,12 @@ int
> art_walk(struct art_root *ar, int (*f)(struct art_node *, void *), void *arg)
> {
> struct art_table *at;
> + struct art_node *node;
> int error;
>
> - at = ar->ar_root;
> + KERNEL_ASSERT_LOCKED();
> +
> + at = srp_get_locked(&ar->ar_root);
> if (at == NULL)
> return (0);
>
> @@ -574,8 +618,9 @@ art_walk(struct art_root *ar, int (*f)(s
> * The default route should be processed here because the root
> * table does not have a parent.
> */
> - if (at->at_default != NULL) {
> - error = (*f)(at->at_default, arg);
> + node = srp_get_locked(&at->at_default);
> + if (node != NULL) {
> + error = (*f)(node, arg);
> if (error)
> return (error);
> }
> @@ -588,6 +633,7 @@ art_table_walk(struct art_root *ar, stru
> int (*f)(struct art_node *, void *), void *arg)
> {
> struct art_node *next, *an = NULL;
> + struct art_node *node;
> int i, j, error = 0;
> uint32_t maxfringe = (at->at_minfringe << 1);
>
> @@ -604,8 +650,8 @@ art_table_walk(struct art_root *ar, stru
> * be processed more than once.
> */
> for (i = max(j, 2); i < at->at_minfringe; i <<= 1) {
> - next = at->at_heap[i >> 1].node;
> - an = at->at_heap[i].node;
> + next = srp_get_locked(&at->at_heap[i >> 1].node);
> + an = srp_get_locked(&at->at_heap[i].node);
> if ((an != NULL) && (an != next)) {
> error = (*f)(an, arg);
> if (error)
> @@ -618,21 +664,23 @@ art_table_walk(struct art_root *ar, stru
> * Iterate fringe nodes.
> */
> for (i = at->at_minfringe; i < maxfringe; i++) {
> - next = at->at_heap[i >> 1].node;
> - if (!ISLEAF(at->at_heap[i]))
> - an = SUBTABLE(at->at_heap[i])->at_default;
> + next = srp_get_locked(&at->at_heap[i >> 1].node);
> + node = srp_get_locked(&at->at_heap[i].node);
> + if (!ISLEAF(node))
> + an = srp_get_locked(&SUBTABLE(node)->at_default);
> else
> - an = at->at_heap[i].node;
> + an = node;
> +
> if ((an != NULL) && (an != next)) {
> error = (*f)(an, arg);
> if (error)
> goto out;
> }
>
> - if (ISLEAF(at->at_heap[i]))
> + if (ISLEAF(node))
> continue;
>
> - error = art_table_walk(ar, SUBTABLE(at->at_heap[i]), f, arg);
> + error = art_table_walk(ar, SUBTABLE(node), f, arg);
> if (error)
> break;
> }
> @@ -652,6 +700,7 @@ struct art_table *
> art_table_get(struct art_root *ar, struct art_table *parent, int j)
> {
> struct art_table *at;
> + struct art_node *node;
> void *at_heap;
> uint32_t lvl;
>
> @@ -694,7 +743,9 @@ art_table_get(struct art_root *ar, struc
> at->at_refcnt = 0;
>
> if (parent != NULL) {
> - at->at_default = parent->at_heap[j].node;
> + node = srp_get_locked(&parent->at_heap[j].node);
> + /* node isn't being deleted, no srp_finalize needed */
> + srp_swap_locked(&at->at_default, node);
> at->at_offset = (parent->at_offset + parent->at_bits);
> }
>
> @@ -711,20 +762,24 @@ struct art_table *
> art_table_put(struct art_root *ar, struct art_table *at)
> {
> struct art_table *parent = at->at_parent;
> - uint32_t lvl = at->at_level;
> + struct art_node *node;
> uint32_t j = at->at_index;
>
> + KASSERT(at->at_refcnt == 0);
> KASSERT(j != 0 && j != 1);
> - KASSERT(parent != NULL || j == -1);
>
> if (parent != NULL) {
> - KASSERT(lvl == parent->at_level + 1);
> + KASSERT(j != -1);
> + KASSERT(at->at_level == parent->at_level + 1);
> KASSERT(parent->at_refcnt >= 1);
>
> /* Give the route back to its parent. */
> - parent->at_heap[j].node = at->at_default;
> + node = srp_get_locked(&at->at_default);
> + srp_swap_locked(&parent->at_heap[j].node, node);
> } else {
> - ar->ar_root = NULL;
> + KASSERT(j == -1);
> + KASSERT(at->at_level == 0);
> + srp_swap_locked(&ar->ar_root, NULL);
> }
>
> mtx_enter(&art_table_gc_mtx);
> @@ -750,6 +805,11 @@ art_table_gc(void *null)
> while (at != NULL) {
> next = at->at_parent;
>
> + if (at->at_level == 0)
> + srp_finalize(at, "arttfini");
> + else
> + srp_finalize(ASNODE(at), "arttfini");
> +
> switch (AT_HEAPSIZE(at->at_bits)) {
> case AT_HEAPSIZE(4):
> pool_put(&at_heap_4_pool, at->at_heap);
> @@ -794,7 +854,8 @@ void
> art_allot(struct art_table *at, int i, struct art_node *old,
> struct art_node *new)
> {
> - int k = i;
> + struct art_node *node, *dflt;
> + int k = i;
>
> KASSERT(i < at->at_minfringe);
>
> @@ -805,12 +866,15 @@ again:
>
> /* Change fringe nodes. */
> while (1) {
> - if (!ISLEAF(at->at_heap[k])) {
> - if (SUBTABLE(at->at_heap[k])->at_default == old) {
> - SUBTABLE(at->at_heap[k])->at_default = new;
> + node = srp_get_locked(&at->at_heap[k].node);
> + if (!ISLEAF(node)) {
> + dflt = srp_get_locked(&SUBTABLE(node)->at_default);
> + if (dflt == old) {
> + srp_swap_locked(&SUBTABLE(node)->at_default,
> + new);
> }
> - } else if (at->at_heap[k].node == old) {
> - at->at_heap[k].node = new;
> + } else if (node == old) {
> + srp_swap_locked(&at->at_heap[k].node, new);
> }
> if (k % 2)
> goto moveup;
> @@ -818,7 +882,8 @@ again:
> }
>
> nonfringe:
> - if (at->at_heap[k].node == old)
> + node = srp_get_locked(&at->at_heap[k].node);
> + if (node == old)
> goto again;
> moveon:
> if (k % 2)
> @@ -827,7 +892,7 @@ moveon:
> goto nonfringe;
> moveup:
> k >>= 1;
> - at->at_heap[k].node = new;
> + srp_swap_locked(&at->at_heap[k].node, new);
>
> /* Change non-fringe node. */
> if (k != i)
> @@ -875,6 +940,8 @@ art_gc(void *null)
>
> while (an != NULL) {
> next = an->an_gc;
> +
> + srp_finalize(an, "artnfini");
>
> pool_put(&an_pool, an);
>
> Index: rtable.c
> ===================================================================
> RCS file: /cvs/src/sys/net/rtable.c,v
> retrieving revision 1.45
> diff -u -p -u -p -r1.45 rtable.c
> --- rtable.c 1 Jun 2016 09:46:19 -0000 1.45
> +++ rtable.c 6 Jun 2016 00:32:31 -0000
> @@ -535,8 +535,8 @@ rtable_lookup(unsigned int rtableid, str
> {
> struct art_root *ar;
> struct art_node *an;
> - struct rtentry *rt;
> - struct srp_ref sr;
> + struct rtentry *rt = NULL;
> + struct srp_ref sr, nsr;
> uint8_t *addr;
> int plen;
>
> @@ -548,19 +548,20 @@ rtable_lookup(unsigned int rtableid, str
>
> /* No need for a perfect match. */
> if (mask == NULL) {
> - an = art_match(ar, addr);
> + an = art_match(ar, addr, &nsr);
> if (an == NULL)
> - return (NULL);
> + goto out;
> } else {
> plen = rtable_satoplen(dst->sa_family, mask);
> if (plen == -1)
> return (NULL);
>
> - an = art_lookup(ar, addr, plen);
> + an = art_lookup(ar, addr, plen, &nsr);
> +
> /* Make sure we've got a perfect match. */
> if (an == NULL || an->an_plen != plen ||
> memcmp(an->an_dst, dst, dst->sa_len))
> - return (NULL);
> + goto out;
> }
>
> #ifdef SMALL_KERNEL
> @@ -578,14 +579,13 @@ rtable_lookup(unsigned int rtableid, str
> memcmp(rt->rt_gateway, gateway, gateway->sa_len) == 0)
> break;
> }
> - if (rt == NULL) {
> - SRPL_LEAVE(&sr);
> - return (NULL);
> - }
> #endif /* SMALL_KERNEL */
> + if (rt != NULL)
> + rtref(rt);
>
> - rtref(rt);
> SRPL_LEAVE(&sr);
> +out:
> + srp_leave(&nsr);
>
> return (rt);
> }
> @@ -596,7 +596,7 @@ rtable_match(unsigned int rtableid, stru
> struct art_root *ar;
> struct art_node *an;
> struct rtentry *rt = NULL;
> - struct srp_ref sr;
> + struct srp_ref sr, nsr;
> uint8_t *addr;
> #ifndef SMALL_KERNEL
> int hash;
> @@ -608,8 +608,7 @@ rtable_match(unsigned int rtableid, stru
>
> addr = satoaddr(ar, dst);
>
> - KERNEL_LOCK();
> - an = art_match(ar, addr);
> + an = art_match(ar, addr, &nsr);
> if (an == NULL)
> goto out;
>
> @@ -634,6 +633,16 @@ rtable_match(unsigned int rtableid, stru
>
> threshold = (0xffff / npaths) + 1;
>
> + /*
> + * we have no protection against concurrent modification of the
> + * route list attached to the node, so we won't necessarily
> + * have the same number of routes. for most modifications,
> + * we'll pick a route that we wouldn't have if we only saw the
> + * list before or after the change. if we were going to use
> + * the last available route, but it got removed, we'll hit
> + * the end of the list and then pick the first route.
> + */
> +
> mrt = SRPL_ENTER(&sr, &an->an_rtlist);
> while (hash > threshold && mrt != NULL) {
> if (mrt->rt_priority == rt->rt_priority)
> @@ -650,7 +659,7 @@ rtable_match(unsigned int rtableid, stru
> }
> #endif /* SMALL_KERNEL */
> out:
> - KERNEL_UNLOCK();
> + srp_leave(&nsr);
> return (rt);
> }
>
> @@ -661,12 +670,14 @@ rtable_insert(unsigned int rtableid, str
> {
> #ifndef SMALL_KERNEL
> struct rtentry *mrt;
> + struct srp_ref sr;
> #endif /* SMALL_KERNEL */
> struct art_root *ar;
> struct art_node *an, *prev;
> uint8_t *addr;
> int plen;
> unsigned int rt_flags;
> + int error = 0;
>
> KERNEL_ASSERT_LOCKED();
>
> @@ -679,12 +690,15 @@ rtable_insert(unsigned int rtableid, str
> if (plen == -1)
> return (EINVAL);
>
> + rtref(rt); /* guarantee rtfree won't do anything during insert */
> +
> #ifndef SMALL_KERNEL
> /* Do not permit exactly the same dst/mask/gw pair. */
> - an = art_lookup(ar, addr, plen);
> + an = art_lookup(ar, addr, plen, &sr);
> + srp_leave(&sr); /* an can't go away while we have the lock */
> if (an != NULL && an->an_plen == plen &&
> !memcmp(an->an_dst, dst, dst->sa_len)) {
> - struct rtentry *mrt;
> + struct rtentry *mrt;
> int mpathok = ISSET(rt->rt_flags, RTF_MPATH);
>
> SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) {
> @@ -695,15 +709,18 @@ rtable_insert(unsigned int rtableid, str
> if (!mpathok ||
> (mrt->rt_gateway->sa_len == gateway->sa_len &&
> !memcmp(mrt->rt_gateway, gateway,
> gateway->sa_len))){
> - return (EEXIST);
> + error = EEXIST;
> + goto leave;
> }
> }
> }
> #endif /* SMALL_KERNEL */
>
> an = art_get(dst, plen);
> - if (an == NULL)
> - return (ENOBUFS);
> + if (an == NULL) {
> + error = ENOBUFS;
> + goto leave;
> + }
>
> /* prepare for immediate operation if insert succeeds */
> rt_flags = rt->rt_flags;
> @@ -718,9 +735,11 @@ rtable_insert(unsigned int rtableid, str
> rt_next);
> rt->rt_flags = rt_flags;
> art_put(an);
> -
> - if (prev == NULL)
> - return ESRCH;
> +
> + if (prev == NULL) {
> + error = ESRCH;
> + goto leave;
> + }
>
> #ifndef SMALL_KERNEL
> an = prev;
> @@ -752,11 +771,12 @@ rtable_insert(unsigned int rtableid, str
> /* Put newly inserted entry at the right place. */
> rtable_mpath_reprio(rtableid, dst, mask, rt->rt_priority, rt);
> #else
> - return (EEXIST);
> + error = EEXIST;
> #endif /* SMALL_KERNEL */
> }
> -
> - return (0);
> +leave:
> + rtfree(rt);
> + return (error);
> }
>
> int
> @@ -765,6 +785,7 @@ rtable_delete(unsigned int rtableid, str
> {
> struct art_root *ar;
> struct art_node *an;
> + struct srp_ref sr;
> uint8_t *addr;
> int plen;
> #ifndef SMALL_KERNEL
> @@ -772,8 +793,6 @@ rtable_delete(unsigned int rtableid, str
> int npaths = 0;
> #endif /* SMALL_KERNEL */
>
> - KERNEL_ASSERT_LOCKED();
> -
> ar = rtable_get(rtableid, dst->sa_family);
> if (ar == NULL)
> return (EAFNOSUPPORT);
> @@ -781,7 +800,11 @@ rtable_delete(unsigned int rtableid, str
> addr = satoaddr(ar, dst);
> plen = rtable_satoplen(dst->sa_family, mask);
>
> - an = art_lookup(ar, addr, plen);
> + KERNEL_ASSERT_LOCKED();
> +
> + an = art_lookup(ar, addr, plen, &sr);
> + srp_leave(&sr); /* an can't go away while we have the lock */
> +
> /* Make sure we've got a perfect match. */
> if (an == NULL || an->an_plen != plen ||
> memcmp(an->an_dst, dst, dst->sa_len))
> @@ -796,7 +819,7 @@ rtable_delete(unsigned int rtableid, str
> npaths++;
>
> if (npaths > 1) {
> - KASSERT(rt->rt_refcnt >= 2);
> + KASSERT(rt->rt_refcnt >= 1);
> SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry,
> rt_next);
>
> @@ -811,7 +834,7 @@ rtable_delete(unsigned int rtableid, str
> if (art_delete(ar, an, addr, plen) == NULL)
> return (ESRCH);
>
> - KASSERT(rt->rt_refcnt >= 2);
> + KASSERT(rt->rt_refcnt >= 1);
> SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next);
>
> art_put(an);
> @@ -879,6 +902,7 @@ rtable_mpath_reprio(unsigned int rtablei
> {
> struct art_root *ar;
> struct art_node *an;
> + struct srp_ref sr;
> uint8_t *addr;
> int plen;
> struct rtentry *mrt, *prt = NULL;
> @@ -890,13 +914,15 @@ rtable_mpath_reprio(unsigned int rtablei
> addr = satoaddr(ar, dst);
> plen = rtable_satoplen(dst->sa_family, mask);
>
> - an = art_lookup(ar, addr, plen);
> + KERNEL_ASSERT_LOCKED();
> +
> + an = art_lookup(ar, addr, plen, &sr);
> + srp_leave(&sr); /* an can't go away while we have the lock */
> +
> /* Make sure we've got a perfect match. */
> if (an == NULL || an->an_plen != plen ||
> memcmp(an->an_dst, dst, dst->sa_len))
> return (ESRCH);
> -
> - KERNEL_ASSERT_LOCKED();
>
> rtref(rt); /* keep rt alive in between remove and add */
> SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next);
>