> On 6 Jun 2016, at 17:14, Jonathan Matthew <[email protected]> wrote:
> 
> We've finally got srp and art to the point where we can use srp to manage the
> internal links in the art data structures.  This allows us to do route lookups
> without holding any locks, which is kind of nice.
> 
> As we're not doing unlocked insert, delete or walk (yet), those art functions
> use the locked variants of the srp functions.  Currently the lock that
> protects those operations is the kernel lock.
> 
> Callers to art_lookup and art_match now pass in a pointer to an srp_ref, which
> is always active on return, even if no route is found.  In some situations we
> use these functions while modifying the routing table, in which case the
> kernel lock already protects against concurrent modifications so the srp_ref
> is unnecessary, so we srp_leave it immediately.

so it just replaces the pointers used for lookups.

this is ok by me.

> 
> I'd also like to draw attention to the comment in rtable_match.  Is it OK that
> we might choose a multipath route at random if the set of available routes
> changes while we're choosing?

i dont know the answer to this one :/

> 
> 
> Index: art.h
> ===================================================================
> RCS file: /cvs/src/sys/net/art.h,v
> retrieving revision 1.13
> diff -u -p -u -p -r1.13 art.h
> --- art.h     3 Jun 2016 03:59:43 -0000       1.13
> +++ art.h     6 Jun 2016 00:32:31 -0000
> @@ -19,13 +19,15 @@
> #ifndef _NET_ART_H_
> #define _NET_ART_H_
> 
> +#include <sys/srp.h>
> +
> #define ART_MAXLVL    32      /* We currently use 32 levels for IPv6. */
> 
> /*
>  * Root of the ART tables, equivalent to the radix head.
>  */
> struct art_root {
> -     struct art_table        *ar_root;       /* First table */
> +     struct srp               ar_root;       /* First table */
>       uint8_t                  ar_bits[ART_MAXLVL];   /* Per level stride */
>       uint8_t                  ar_nlvl;       /* Number of levels */
>       uint8_t                  ar_alen;       /* Address length in bits */
> @@ -59,8 +61,9 @@ struct art_node *art_insert(struct art_r
>                    int);
> struct art_node *art_delete(struct art_root *, struct art_node *, uint8_t *,
>                    int);
> -struct art_node      *art_match(struct art_root *, uint8_t *);
> -struct art_node *art_lookup(struct art_root *, uint8_t *, int);
> +struct art_node      *art_match(struct art_root *, uint8_t *, struct srp_ref 
> *);
> +struct art_node *art_lookup(struct art_root *, uint8_t *, int,
> +                  struct srp_ref *);
> int            art_walk(struct art_root *,
>                    int (*)(struct art_node *, void *), void *);
> 
> Index: art.c
> ===================================================================
> RCS file: /cvs/src/sys/net/art.c,v
> retrieving revision 1.18
> diff -u -p -u -p -r1.18 art.c
> --- art.c     3 Jun 2016 03:59:43 -0000       1.18
> +++ art.c     6 Jun 2016 00:32:31 -0000
> @@ -37,8 +37,8 @@
> 
> #include <net/art.h>
> 
> -#define ISLEAF(e)    (((unsigned long)(e).node & 1) == 0)
> -#define SUBTABLE(e)  (((struct art_table *)((unsigned long)(e).child & ~1)))
> +#define ISLEAF(e)    (((unsigned long)(e) & 1) == 0)
> +#define SUBTABLE(e)  ((struct art_table *)((unsigned long)(e) & ~1))
> #define ASNODE(t)     ((struct art_node *)((unsigned long)(t) | 1))
> 
> /*
> @@ -58,8 +58,7 @@ struct art_table {
>        * is a route counter.
>        */
>       union {
> -             struct art_node         *node;
> -             struct art_table        *child;
> +             struct srp               node;
>               unsigned long            count;
>       } *at_heap;                             /* Array of 2^(slen+1) items */
> };
> @@ -249,41 +248,60 @@ art_findex(struct art_table *at, uint8_t
>  * Return the best existing match for a destination.
>  */
> struct art_node *
> -art_match(struct art_root *ar, uint8_t *addr)
> +art_match(struct art_root *ar, uint8_t *addr, struct srp_ref *nsr)
> {
> +     struct srp_ref          dsr, ndsr;
> +     void                    *entry;
>       struct art_table        *at;
> -     struct art_node         *dflt = NULL;
> -     int                      j;
> +     struct art_node         *dflt, *ndflt;
> +     int                     j;
> +
> +     entry = srp_enter(nsr, &ar->ar_root);
> +     at = entry;
> 
> -     at = ar->ar_root;
>       if (at == NULL)
> -             return (NULL);
> +             goto done;
> +
> +     /*
> +      * Remember the default route of each table we visit in case
> +      * we do not find a better matching route.
> +      */
> +     dflt = srp_enter(&dsr, &at->at_default);
> 
>       /*
>        * Iterate until we find a leaf.
>        */
>       while (1) {
> -             /*
> -              * Rember the default route of this table in case
> -              * we do not find a better matching route.
> -              */
> -             if (at->at_default != NULL)
> -                     dflt = at->at_default;
> -
>               /* Do a single level route lookup. */
>               j = art_findex(at, addr);
> +             entry = srp_follow(nsr, &at->at_heap[j].node);
> 
> -             /* If this is a leaf we're done. */
> -             if (ISLEAF(at->at_heap[j]))
> +             /* If this is a leaf (NULL is a leaf) we're done. */
> +             if (ISLEAF(entry))
>                       break;
> 
> -             at = SUBTABLE(at->at_heap[j]);
> +             at = SUBTABLE(entry);
> +
> +             ndflt = srp_enter(&ndsr, &at->at_default);
> +             if (ndflt != NULL) {
> +                     srp_leave(&dsr);
> +                     dsr = ndsr;
> +                     dflt = ndflt;
> +             } else
> +                     srp_leave(&ndsr);
>       }
> 
> -     if (at->at_heap[j].node != NULL)
> -             return (at->at_heap[j].node);
> +     if (entry == NULL) {
> +             srp_leave(nsr);
> +             *nsr = dsr;
> +             KASSERT(ISLEAF(dflt));
> +             return (dflt);
> +     }
> 
> -     return (dflt);
> +     srp_leave(&dsr);
> +done:
> +     KASSERT(ISLEAF(entry));
> +     return (entry);
> }
> 
> /*
> @@ -293,21 +311,25 @@ art_match(struct art_root *ar, uint8_t *
>  * it does not exist.
>  */
> struct art_node *
> -art_lookup(struct art_root *ar, uint8_t *addr, int plen)
> +art_lookup(struct art_root *ar, uint8_t *addr, int plen, struct srp_ref *nsr)
> {
> +     void                    *entry;
>       struct art_table        *at;
> -     struct art_node         *an;
>       int                      i, j;
> 
>       KASSERT(plen >= 0 && plen <= ar->ar_alen);
> 
> -     at = ar->ar_root;
> +     entry = srp_enter(nsr, &ar->ar_root);
> +     at = entry;
> +
>       if (at == NULL)
> -             return (NULL);
> +             goto done;
> 
>       /* Default route */
> -     if (plen == 0)
> -             return (at->at_default);
> +     if (plen == 0) {
> +             entry = srp_follow(nsr, &at->at_default);
> +             goto done;
> +     }
> 
>       /*
>        * If the prefix length is smaller than the sum of
> @@ -317,24 +339,26 @@ art_lookup(struct art_root *ar, uint8_t 
>       while (plen > (at->at_offset + at->at_bits)) {
>               /* Do a single level route lookup. */
>               j = art_findex(at, addr);
> +             entry = srp_follow(nsr, &at->at_heap[j].node);
> 
>               /* A leaf is a match, but not a perfect one. */
> -             if (ISLEAF(at->at_heap[j]))
> +             if (ISLEAF(entry))
>                       return (NULL);
> 
> -             at = SUBTABLE(at->at_heap[j]);
> +             at = SUBTABLE(entry);
>       }
> 
>       i = art_bindex(at, addr, plen);
>       if (i == -1)
>               return (NULL);
> 
> -     if (!ISLEAF(at->at_heap[i]))
> -             an = SUBTABLE(at->at_heap[i])->at_default;
> -     else
> -             an = at->at_heap[i].node;
> -
> -     return (an);
> +     entry = srp_follow(nsr, &at->at_heap[i].node);
> +     if (!ISLEAF(entry))
> +             entry = srp_follow(nsr, &SUBTABLE(entry)->at_default);
> +
> +done:
> +     KASSERT(ISLEAF(entry));
> +     return (entry);
> }
> 
> 
> @@ -347,27 +371,30 @@ art_lookup(struct art_root *ar, uint8_t 
> struct art_node *
> art_insert(struct art_root *ar, struct art_node *an, uint8_t *addr, int plen)
> {
> -     struct art_table        *at;
> +     struct art_table        *at, *child;
> +     struct art_node         *node;
>       int                      i, j;
> 
> +     KERNEL_ASSERT_LOCKED();
>       KASSERT(plen >= 0 && plen <= ar->ar_alen);
> 
> -     at = ar->ar_root;
> +     at = srp_get_locked(&ar->ar_root);
>       if (at == NULL) {
>               at = art_table_get(ar, NULL, -1);
>               if (at == NULL)
>                       return (NULL);
> 
> -             ar->ar_root = at;
> +             srp_swap_locked(&ar->ar_root, at);
>       }
> 
>       /* Default route */
>       if (plen == 0) {
> -             if (at->at_default != NULL)
> -                     return (at->at_default);
> +             node = srp_get_locked(&at->at_default);
> +             if (node != NULL)
> +                     return (node);
> 
>               art_table_ref(ar, at);
> -             at->at_default = an;
> +             srp_swap_locked(&at->at_default, an);
>               return (an);
>       }
> 
> @@ -379,6 +406,7 @@ art_insert(struct art_root *ar, struct a
>       while (plen > (at->at_offset + at->at_bits)) {
>               /* Do a single level route lookup. */
>               j = art_findex(at, addr);
> +             node = srp_get_locked(&at->at_heap[j].node);
> 
>               /*
>                * If the node corresponding to the fringe index is
> @@ -386,18 +414,16 @@ art_insert(struct art_root *ar, struct a
>                * entry of this node will then become the default
>                * route of the subtable.
>                */
> -             if (ISLEAF(at->at_heap[j])) {
> -                     struct art_table  *child;
> -
> +             if (ISLEAF(node)) {
>                       child = art_table_get(ar, at, j);
>                       if (child == NULL)
>                               return (NULL);
> 
>                       art_table_ref(ar, at);
> -                     at->at_heap[j].node = ASNODE(child);
> -             }
> -
> -             at = SUBTABLE(at->at_heap[j]);
> +                     srp_swap_locked(&at->at_heap[j].node, ASNODE(child));
> +                     at = child;
> +             } else
> +                     at = SUBTABLE(node);
>       }
> 
>       i = art_bindex(at, addr, plen);
> @@ -414,12 +440,13 @@ struct art_node *
> art_table_insert(struct art_root *ar, struct art_table *at, int i,
>     struct art_node *an)
> {
> -     struct art_node *prev;
> +     struct art_node *prev, *node;
> 
> -     if (!ISLEAF(at->at_heap[i]))
> -             prev = SUBTABLE(at->at_heap[i])->at_default;
> +     node = srp_get_locked(&at->at_heap[i].node);
> +     if (!ISLEAF(node))
> +             prev = srp_get_locked(&SUBTABLE(node)->at_default);
>       else
> -             prev = at->at_heap[i].node;
> +             prev = node;
> 
>       if (art_check_duplicate(ar, prev, an))
>               return (prev);
> @@ -433,10 +460,10 @@ art_table_insert(struct art_root *ar, st
>        */
>       if (i < at->at_minfringe)
>               art_allot(at, i, prev, an);
> -     else if (!ISLEAF(at->at_heap[i]))
> -             SUBTABLE(at->at_heap[i])->at_default = an;
> +     else if (!ISLEAF(node))
> +             srp_swap_locked(&SUBTABLE(node)->at_default, an);
>       else
> -             at->at_heap[i].node = an;
> +             srp_swap_locked(&at->at_heap[i].node, an);
> 
>       return (an);
> }
> @@ -449,21 +476,22 @@ struct art_node *
> art_delete(struct art_root *ar, struct art_node *an, uint8_t *addr, int plen)
> {
>       struct art_table        *at;
> -     struct art_node         *dflt;
> +     struct art_node         *node;
>       int                      i, j;
> 
> +     KERNEL_ASSERT_LOCKED();
>       KASSERT(plen >= 0 && plen <= ar->ar_alen);
> 
> -     at = ar->ar_root;
> +     at = srp_get_locked(&ar->ar_root);
>       if (at == NULL)
>               return (NULL);
> 
>       /* Default route */
>       if (plen == 0) {
> -             dflt = at->at_default;
> -             at->at_default = NULL;
> +             node = srp_get_locked(&at->at_default);
> +             srp_swap_locked(&at->at_default, NULL);
>               art_table_free(ar, at);
> -             return (dflt);
> +             return (node);
>       }
> 
>       /*
> @@ -474,12 +502,13 @@ art_delete(struct art_root *ar, struct a
>       while (plen > (at->at_offset + at->at_bits)) {
>               /* Do a single level route lookup. */
>               j = art_findex(at, addr);
> +             node = srp_get_locked(&at->at_heap[j].node);
> 
>               /* If this is a leaf, there is no route to delete. */
> -             if (ISLEAF(at->at_heap[j]))
> +             if (ISLEAF(node))
>                       return (NULL);
> 
> -             at = SUBTABLE(at->at_heap[j]);
> +             at = SUBTABLE(node);
>       }
> 
>       i = art_bindex(at, addr, plen);
> @@ -494,24 +523,27 @@ art_delete(struct art_root *ar, struct a
>  */
> struct art_node *
> art_table_delete(struct art_root *ar, struct art_table *at, int i,
> -    struct art_node *node)
> +    struct art_node *an)
> {
> -     struct art_node         *next;
> +     struct art_node         *next, *node;
> 
> #ifdef DIAGNOSTIC
>       struct art_node         *prev;
> +#endif
> 
> -     if (!ISLEAF(at->at_heap[i]))
> -             prev = SUBTABLE(at->at_heap[i])->at_default;
> +     node = srp_get_locked(&at->at_heap[i].node);
> +#ifdef DIAGNOSTIC
> +     if (!ISLEAF(node))
> +             prev = srp_get_locked(&SUBTABLE(node)->at_default);
>       else
> -             prev = at->at_heap[i].node;
> +             prev = node;
> 
> -     KASSERT(prev == node);
> +     KASSERT(prev == an);
> #endif
> 
>       /* Get the next most specific route for the index `i'. */
>       if ((i >> 1) > 1)
> -             next = at->at_heap[i >> 1].node;
> +             next = srp_get_locked(&at->at_heap[i >> 1].node);
>       else
>               next = NULL;
> 
> @@ -521,16 +553,16 @@ art_table_delete(struct art_root *ar, st
>        * route pointer to all the corresponding fringe indices.
>        */
>       if (i < at->at_minfringe)
> -             art_allot(at, i, node, next);
> -     else if (!ISLEAF(at->at_heap[i]))
> -             SUBTABLE(at->at_heap[i])->at_default = next;
> +             art_allot(at, i, an, next);
> +     else if (!ISLEAF(node))
> +             srp_swap_locked(&SUBTABLE(node)->at_default, next);
>       else
> -             at->at_heap[i].node = next;
> +             srp_swap_locked(&at->at_heap[i].node, next);
> 
>       /* We have removed an entry from this table. */
>       art_table_free(ar, at);
> 
> -     return (node);
> +     return (an);
> }
> 
> void
> @@ -539,17 +571,26 @@ art_table_ref(struct art_root *ar, struc
>       at->at_refcnt++;
> }
> 
> +static inline int
> +art_table_rele(struct art_table *at)
> +{
> +     if (at == NULL)
> +             return (0);
> +
> +     return (--at->at_refcnt == 0);
> +}
> +
> int
> art_table_free(struct art_root *ar, struct art_table *at)
> {
> -     if (--at->at_refcnt == 0) {
> +     if (art_table_rele(at)) {
>               /*
>                * Garbage collect this table and all its parents
>                * that are empty.
>                */
>               do {
>                       at = art_table_put(ar, at);
> -             } while (at != NULL && --at->at_refcnt == 0);
> +             } while (art_table_rele(at));
> 
>               return (1);
>       }
> @@ -564,9 +605,12 @@ int
> art_walk(struct art_root *ar, int (*f)(struct art_node *, void *), void *arg)
> {
>       struct art_table        *at;
> +     struct art_node         *node;
>       int                      error;
> 
> -     at = ar->ar_root;
> +     KERNEL_ASSERT_LOCKED();
> +
> +     at = srp_get_locked(&ar->ar_root);
>       if (at == NULL)
>               return (0);
> 
> @@ -574,8 +618,9 @@ art_walk(struct art_root *ar, int (*f)(s
>        * The default route should be processed here because the root
>        * table does not have a parent.
>        */
> -     if (at->at_default != NULL) {
> -             error = (*f)(at->at_default, arg);
> +     node = srp_get_locked(&at->at_default);
> +     if (node != NULL) {
> +             error = (*f)(node, arg);
>               if (error)
>                       return (error);
>       }
> @@ -588,6 +633,7 @@ art_table_walk(struct art_root *ar, stru
>     int (*f)(struct art_node *, void *), void *arg)
> {
>       struct art_node         *next, *an = NULL;
> +     struct art_node         *node;
>       int                      i, j, error = 0;
>       uint32_t                 maxfringe = (at->at_minfringe << 1);
> 
> @@ -604,8 +650,8 @@ art_table_walk(struct art_root *ar, stru
>                * be processed more than once.
>                */
>               for (i = max(j, 2); i < at->at_minfringe; i <<= 1) {
> -                     next = at->at_heap[i >> 1].node;
> -                     an = at->at_heap[i].node;
> +                     next = srp_get_locked(&at->at_heap[i >> 1].node);
> +                     an = srp_get_locked(&at->at_heap[i].node);
>                       if ((an != NULL) && (an != next)) {
>                               error = (*f)(an, arg);
>                               if (error)
> @@ -618,21 +664,23 @@ art_table_walk(struct art_root *ar, stru
>        * Iterate fringe nodes.
>        */
>       for (i = at->at_minfringe; i < maxfringe; i++) {
> -             next = at->at_heap[i >> 1].node;
> -             if (!ISLEAF(at->at_heap[i]))
> -                     an = SUBTABLE(at->at_heap[i])->at_default;
> +             next = srp_get_locked(&at->at_heap[i >> 1].node);
> +             node = srp_get_locked(&at->at_heap[i].node);
> +             if (!ISLEAF(node))
> +                     an = srp_get_locked(&SUBTABLE(node)->at_default);
>               else
> -                     an = at->at_heap[i].node;
> +                     an = node;
> +
>               if ((an != NULL) && (an != next)) {
>                       error = (*f)(an, arg);
>                       if (error)
>                               goto out;
>               }
> 
> -             if (ISLEAF(at->at_heap[i]))
> +             if (ISLEAF(node))
>                       continue;
> 
> -             error = art_table_walk(ar, SUBTABLE(at->at_heap[i]), f, arg);
> +             error = art_table_walk(ar, SUBTABLE(node), f, arg);
>               if (error)
>                       break;
>       }
> @@ -652,6 +700,7 @@ struct art_table *
> art_table_get(struct art_root *ar, struct art_table *parent, int j)
> {
>       struct art_table        *at;
> +     struct art_node         *node;
>       void                    *at_heap;
>       uint32_t                 lvl;
> 
> @@ -694,7 +743,9 @@ art_table_get(struct art_root *ar, struc
>       at->at_refcnt = 0;
> 
>       if (parent != NULL) {
> -             at->at_default = parent->at_heap[j].node;
> +             node = srp_get_locked(&parent->at_heap[j].node);
> +             /* node isn't being deleted, no srp_finalize needed */
> +             srp_swap_locked(&at->at_default, node);
>               at->at_offset = (parent->at_offset + parent->at_bits);
>       }
> 
> @@ -711,20 +762,24 @@ struct art_table *
> art_table_put(struct art_root *ar, struct art_table *at)
> {
>       struct art_table        *parent = at->at_parent;
> -     uint32_t                 lvl = at->at_level;
> +     struct art_node         *node;
>       uint32_t                 j = at->at_index;
> 
> +     KASSERT(at->at_refcnt == 0);
>       KASSERT(j != 0 && j != 1);
> -     KASSERT(parent != NULL || j == -1);
> 
>       if (parent != NULL) {
> -             KASSERT(lvl == parent->at_level + 1);
> +             KASSERT(j != -1);
> +             KASSERT(at->at_level == parent->at_level + 1);
>               KASSERT(parent->at_refcnt >= 1);
> 
>               /* Give the route back to its parent. */
> -             parent->at_heap[j].node = at->at_default;
> +             node = srp_get_locked(&at->at_default);
> +             srp_swap_locked(&parent->at_heap[j].node, node);
>       } else {
> -             ar->ar_root = NULL;
> +             KASSERT(j == -1);
> +             KASSERT(at->at_level == 0);
> +             srp_swap_locked(&ar->ar_root, NULL);
>       }
> 
>       mtx_enter(&art_table_gc_mtx);
> @@ -750,6 +805,11 @@ art_table_gc(void *null)
>       while (at != NULL) {
>               next = at->at_parent;
> 
> +             if (at->at_level == 0)
> +                     srp_finalize(at, "arttfini");
> +             else
> +                     srp_finalize(ASNODE(at), "arttfini");
> +
>               switch (AT_HEAPSIZE(at->at_bits)) {
>               case AT_HEAPSIZE(4):
>                       pool_put(&at_heap_4_pool, at->at_heap);
> @@ -794,7 +854,8 @@ void
> art_allot(struct art_table *at, int i, struct art_node *old,
>     struct art_node *new)
> {
> -     int                     k = i;
> +     struct art_node         *node, *dflt;
> +     int                      k = i;
> 
>       KASSERT(i < at->at_minfringe);
> 
> @@ -805,12 +866,15 @@ again:
> 
>       /* Change fringe nodes. */
>       while (1) {
> -             if (!ISLEAF(at->at_heap[k])) {
> -                     if (SUBTABLE(at->at_heap[k])->at_default == old) {
> -                             SUBTABLE(at->at_heap[k])->at_default = new;
> +             node = srp_get_locked(&at->at_heap[k].node);
> +             if (!ISLEAF(node)) {
> +                     dflt = srp_get_locked(&SUBTABLE(node)->at_default);
> +                     if (dflt == old) {
> +                             srp_swap_locked(&SUBTABLE(node)->at_default,
> +                                 new);
>                       }
> -             } else if (at->at_heap[k].node == old) {
> -                     at->at_heap[k].node = new;
> +             } else if (node == old) {
> +                     srp_swap_locked(&at->at_heap[k].node, new);
>               }
>               if (k % 2)
>                       goto moveup;
> @@ -818,7 +882,8 @@ again:
>       }
> 
> nonfringe:
> -     if (at->at_heap[k].node == old)
> +     node = srp_get_locked(&at->at_heap[k].node);
> +     if (node == old)
>               goto again;
> moveon:
>       if (k % 2)
> @@ -827,7 +892,7 @@ moveon:
>       goto nonfringe;
> moveup:
>       k >>= 1;
> -     at->at_heap[k].node = new;
> +     srp_swap_locked(&at->at_heap[k].node, new);
> 
>       /* Change non-fringe node. */
>       if (k != i)
> @@ -875,6 +940,8 @@ art_gc(void *null)
> 
>       while (an != NULL) {
>               next = an->an_gc;
> +
> +             srp_finalize(an, "artnfini");
> 
>               pool_put(&an_pool, an);
> 
> Index: rtable.c
> ===================================================================
> RCS file: /cvs/src/sys/net/rtable.c,v
> retrieving revision 1.45
> diff -u -p -u -p -r1.45 rtable.c
> --- rtable.c  1 Jun 2016 09:46:19 -0000       1.45
> +++ rtable.c  6 Jun 2016 00:32:31 -0000
> @@ -535,8 +535,8 @@ rtable_lookup(unsigned int rtableid, str
> {
>       struct art_root                 *ar;
>       struct art_node                 *an;
> -     struct rtentry                  *rt;
> -     struct srp_ref                   sr;
> +     struct rtentry                  *rt = NULL;
> +     struct srp_ref                   sr, nsr;
>       uint8_t                         *addr;
>       int                              plen;
> 
> @@ -548,19 +548,20 @@ rtable_lookup(unsigned int rtableid, str
> 
>       /* No need for a perfect match. */
>       if (mask == NULL) {
> -             an = art_match(ar, addr);
> +             an = art_match(ar, addr, &nsr);
>               if (an == NULL)
> -                     return (NULL);
> +                     goto out;
>       } else {
>               plen = rtable_satoplen(dst->sa_family, mask);
>               if (plen == -1)
>                       return (NULL);
> 
> -             an = art_lookup(ar, addr, plen);
> +             an = art_lookup(ar, addr, plen, &nsr);
> +
>               /* Make sure we've got a perfect match. */
>               if (an == NULL || an->an_plen != plen ||
>                   memcmp(an->an_dst, dst, dst->sa_len))
> -                     return (NULL);
> +                     goto out;
>       }
> 
> #ifdef SMALL_KERNEL
> @@ -578,14 +579,13 @@ rtable_lookup(unsigned int rtableid, str
>                   memcmp(rt->rt_gateway, gateway, gateway->sa_len) == 0)
>                       break;
>       }
> -     if (rt == NULL) {
> -             SRPL_LEAVE(&sr);
> -             return (NULL);
> -     }
> #endif /* SMALL_KERNEL */
> +     if (rt != NULL)
> +             rtref(rt);
> 
> -     rtref(rt);
>       SRPL_LEAVE(&sr);
> +out:
> +     srp_leave(&nsr);
> 
>       return (rt);
> }
> @@ -596,7 +596,7 @@ rtable_match(unsigned int rtableid, stru
>       struct art_root                 *ar;
>       struct art_node                 *an;
>       struct rtentry                  *rt = NULL;
> -     struct srp_ref                   sr;
> +     struct srp_ref                   sr, nsr;
>       uint8_t                         *addr;
> #ifndef SMALL_KERNEL
>       int                              hash;
> @@ -608,8 +608,7 @@ rtable_match(unsigned int rtableid, stru
> 
>       addr = satoaddr(ar, dst);
> 
> -     KERNEL_LOCK();
> -     an = art_match(ar, addr);
> +     an = art_match(ar, addr, &nsr);
>       if (an == NULL)
>               goto out;
> 
> @@ -634,6 +633,16 @@ rtable_match(unsigned int rtableid, stru
> 
>               threshold = (0xffff / npaths) + 1;
> 
> +             /*
> +              * we have no protection against concurrent modification of the
> +              * route list attached to the node, so we won't necessarily
> +              * have the same number of routes.  for most modifications,
> +              * we'll pick a route that we wouldn't have if we only saw the
> +              * list before or after the change.  if we were going to use
> +              * the last available route, but it got removed, we'll hit
> +              * the end of the list and then pick the first route.
> +              */
> +
>               mrt = SRPL_ENTER(&sr, &an->an_rtlist);
>               while (hash > threshold && mrt != NULL) {
>                       if (mrt->rt_priority == rt->rt_priority)
> @@ -650,7 +659,7 @@ rtable_match(unsigned int rtableid, stru
>       }
> #endif /* SMALL_KERNEL */
> out:
> -     KERNEL_UNLOCK();
> +     srp_leave(&nsr);
>       return (rt);
> }
> 
> @@ -661,12 +670,14 @@ rtable_insert(unsigned int rtableid, str
> {
> #ifndef SMALL_KERNEL
>       struct rtentry                  *mrt;
> +     struct srp_ref                   sr;
> #endif /* SMALL_KERNEL */
>       struct art_root                 *ar;
>       struct art_node                 *an, *prev;
>       uint8_t                         *addr;
>       int                              plen;
>       unsigned int                     rt_flags;
> +     int                              error = 0;
> 
>       KERNEL_ASSERT_LOCKED();
> 
> @@ -679,12 +690,15 @@ rtable_insert(unsigned int rtableid, str
>       if (plen == -1)
>               return (EINVAL);
> 
> +     rtref(rt); /* guarantee rtfree won't do anything during insert */
> +
> #ifndef SMALL_KERNEL
>       /* Do not permit exactly the same dst/mask/gw pair. */
> -     an = art_lookup(ar, addr, plen);
> +     an = art_lookup(ar, addr, plen, &sr);
> +     srp_leave(&sr); /* an can't go away while we have the lock */
>       if (an != NULL && an->an_plen == plen &&
>           !memcmp(an->an_dst, dst, dst->sa_len)) {
> -             struct rtentry  *mrt;
> +             struct rtentry  *mrt;
>               int              mpathok = ISSET(rt->rt_flags, RTF_MPATH);
> 
>               SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) {
> @@ -695,15 +709,18 @@ rtable_insert(unsigned int rtableid, str
>                       if (!mpathok ||
>                           (mrt->rt_gateway->sa_len == gateway->sa_len &&
>                           !memcmp(mrt->rt_gateway, gateway, 
> gateway->sa_len))){
> -                             return (EEXIST);
> +                             error = EEXIST;
> +                             goto leave;
>                       }
>               }
>       }
> #endif /* SMALL_KERNEL */
> 
>       an = art_get(dst, plen);
> -     if (an == NULL)
> -             return (ENOBUFS);
> +     if (an == NULL) {
> +             error = ENOBUFS;
> +             goto leave;
> +     }
> 
>       /* prepare for immediate operation if insert succeeds */
>       rt_flags = rt->rt_flags;
> @@ -718,9 +735,11 @@ rtable_insert(unsigned int rtableid, str
>                   rt_next);
>               rt->rt_flags = rt_flags;
>               art_put(an);
> - 
> -             if (prev == NULL)
> -                     return ESRCH;
> +
> +             if (prev == NULL) {
> +                     error = ESRCH;
> +                     goto leave;
> +             }
> 
> #ifndef SMALL_KERNEL
>               an = prev;
> @@ -752,11 +771,12 @@ rtable_insert(unsigned int rtableid, str
>               /* Put newly inserted entry at the right place. */
>               rtable_mpath_reprio(rtableid, dst, mask, rt->rt_priority, rt);
> #else
> -             return (EEXIST);
> +             error = EEXIST;
> #endif /* SMALL_KERNEL */
>       }
> -
> -     return (0);
> +leave:
> +     rtfree(rt);
> +     return (error);
> }
> 
> int
> @@ -765,6 +785,7 @@ rtable_delete(unsigned int rtableid, str
> {
>       struct art_root                 *ar;
>       struct art_node                 *an;
> +     struct srp_ref                   sr;
>       uint8_t                         *addr;
>       int                              plen;
> #ifndef SMALL_KERNEL
> @@ -772,8 +793,6 @@ rtable_delete(unsigned int rtableid, str
>       int                              npaths = 0;
> #endif /* SMALL_KERNEL */
> 
> -     KERNEL_ASSERT_LOCKED();
> -
>       ar = rtable_get(rtableid, dst->sa_family);
>       if (ar == NULL)
>               return (EAFNOSUPPORT);
> @@ -781,7 +800,11 @@ rtable_delete(unsigned int rtableid, str
>       addr = satoaddr(ar, dst);
>       plen = rtable_satoplen(dst->sa_family, mask);
> 
> -     an = art_lookup(ar, addr, plen);
> +     KERNEL_ASSERT_LOCKED();
> +
> +     an = art_lookup(ar, addr, plen, &sr);
> +     srp_leave(&sr); /* an can't go away while we have the lock */
> +
>       /* Make sure we've got a perfect match. */
>       if (an == NULL || an->an_plen != plen ||
>           memcmp(an->an_dst, dst, dst->sa_len))
> @@ -796,7 +819,7 @@ rtable_delete(unsigned int rtableid, str
>               npaths++;
> 
>       if (npaths > 1) {
> -             KASSERT(rt->rt_refcnt >= 2);
> +             KASSERT(rt->rt_refcnt >= 1);
>               SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry,
>                   rt_next);
> 
> @@ -811,7 +834,7 @@ rtable_delete(unsigned int rtableid, str
>       if (art_delete(ar, an, addr, plen) == NULL)
>               return (ESRCH);
> 
> -     KASSERT(rt->rt_refcnt >= 2);
> +     KASSERT(rt->rt_refcnt >= 1);
>       SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next);
> 
>       art_put(an);
> @@ -879,6 +902,7 @@ rtable_mpath_reprio(unsigned int rtablei
> {
>       struct art_root                 *ar;
>       struct art_node                 *an;
> +     struct srp_ref                   sr;
>       uint8_t                         *addr;
>       int                              plen;
>       struct rtentry                  *mrt, *prt = NULL;
> @@ -890,13 +914,15 @@ rtable_mpath_reprio(unsigned int rtablei
>       addr = satoaddr(ar, dst);
>       plen = rtable_satoplen(dst->sa_family, mask);
> 
> -     an = art_lookup(ar, addr, plen);
> +     KERNEL_ASSERT_LOCKED();
> +
> +     an = art_lookup(ar, addr, plen, &sr);
> +     srp_leave(&sr); /* an can't go away while we have the lock */
> +
>       /* Make sure we've got a perfect match. */
>       if (an == NULL || an->an_plen != plen ||
>           memcmp(an->an_dst, dst, dst->sa_len))
>               return (ESRCH);
> -
> -     KERNEL_ASSERT_LOCKED();
> 
>       rtref(rt); /* keep rt alive in between remove and add */
>       SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next);
> 

Reply via email to