On Mon, Aug 15, 2016 at 10:08:16PM +1000, Jonathan Matthew wrote:
> This removes ART's reliance on the kernel lock to serialise updates.
> I sent out an earlier version of this prior to 6.0, but it didn't make it in.
> Since then, we've decided to go with rwlocks in the packet processing path,
> so here's a version with an rwlock instead of a mutex.
ok by me.
>
>
> Index: art.c
> ===================================================================
> RCS file: /cvs/src/sys/net/art.c,v
> retrieving revision 1.22
> diff -u -p -r1.22 art.c
> --- art.c 19 Jul 2016 10:51:44 -0000 1.22
> +++ art.c 14 Aug 2016 11:10:45 -0000
> @@ -152,6 +152,7 @@ art_alloc(unsigned int rtableid, unsigne
>
> ar->ar_off = off;
> ar->ar_rtableid = rtableid;
> + rw_init(&ar->ar_lock, "art");
>
> return (ar);
> }
> @@ -378,7 +379,7 @@ art_insert(struct art_root *ar, struct a
> struct art_node *node;
> int i, j;
>
> - KERNEL_ASSERT_LOCKED();
> + rw_assert_wrlock(&ar->ar_lock);
> KASSERT(plen >= 0 && plen <= ar->ar_alen);
>
> at = srp_get_locked(&ar->ar_root);
> @@ -482,7 +483,7 @@ art_delete(struct art_root *ar, struct a
> struct art_node *node;
> int i, j;
>
> - KERNEL_ASSERT_LOCKED();
> + rw_assert_wrlock(&ar->ar_lock);
> KASSERT(plen >= 0 && plen <= ar->ar_alen);
>
> at = srp_get_locked(&ar->ar_root);
> @@ -613,7 +614,7 @@ art_walk(struct art_root *ar, int (*f)(s
> struct art_node *node;
> int error = 0;
>
> - KERNEL_LOCK();
> + rw_enter_write(&ar->ar_lock);
> at = srp_get_locked(&ar->ar_root);
> if (at != NULL) {
> art_table_ref(ar, at);
> @@ -631,7 +632,7 @@ art_walk(struct art_root *ar, int (*f)(s
>
> art_table_free(ar, at);
> }
> - KERNEL_UNLOCK();
> + rw_exit_write(&ar->ar_lock);
>
> return (error);
> }
> @@ -708,9 +709,9 @@ art_walk_apply(struct art_root *ar,
>
> if ((an != NULL) && (an != next)) {
> /* this assumes an->an_dst is not used by f */
> - KERNEL_UNLOCK();
> + rw_exit_write(&ar->ar_lock);
> error = (*f)(an, arg);
> - KERNEL_LOCK();
> + rw_enter_write(&ar->ar_lock);
> }
>
> return (error);
> Index: art.h
> ===================================================================
> RCS file: /cvs/src/sys/net/art.h,v
> retrieving revision 1.14
> diff -u -p -r1.14 art.h
> --- art.h 14 Jun 2016 04:42:02 -0000 1.14
> +++ art.h 14 Aug 2016 11:10:45 -0000
> @@ -19,6 +19,8 @@
> #ifndef _NET_ART_H_
> #define _NET_ART_H_
>
> +#include <sys/rwlock.h>
> +
> #define ART_MAXLVL 32 /* We currently use 32 levels for IPv6. */
>
> /*
> @@ -26,6 +28,7 @@
> */
> struct art_root {
> struct srp ar_root; /* First table */
> + struct rwlock ar_lock; /* Serialise modifications */
> uint8_t ar_bits[ART_MAXLVL]; /* Per level stride */
> uint8_t ar_nlvl; /* Number of levels */
> uint8_t ar_alen; /* Address length in bits */
> Index: rtable.c
> ===================================================================
> RCS file: /cvs/src/sys/net/rtable.c,v
> retrieving revision 1.50
> diff -u -p -r1.50 rtable.c
> --- rtable.c 19 Jul 2016 10:51:44 -0000 1.50
> +++ rtable.c 14 Aug 2016 11:10:45 -0000
> @@ -515,6 +515,10 @@ static inline uint8_t *satoaddr(struct a
> void rtentry_ref(void *, void *);
> void rtentry_unref(void *, void *);
>
> +#ifndef SMALL_KERNEL
> +void rtable_mpath_insert(struct art_node *, struct rtentry *);
> +#endif
> +
> struct srpl_rc rt_rc = SRPL_RC_INITIALIZER(rtentry_ref, rtentry_unref, NULL);
>
> void
> @@ -679,8 +683,6 @@ rtable_insert(unsigned int rtableid, str
> unsigned int rt_flags;
> int error = 0;
>
> - KERNEL_ASSERT_LOCKED();
> -
> ar = rtable_get(rtableid, dst->sa_family);
> if (ar == NULL)
> return (EAFNOSUPPORT);
> @@ -691,6 +693,7 @@ rtable_insert(unsigned int rtableid, str
> return (EINVAL);
>
> rtref(rt); /* guarantee rtfree won't do anything during insert */
> + rw_enter_write(&ar->ar_lock);
>
> #ifndef SMALL_KERNEL
> /* Do not permit exactly the same dst/mask/gw pair. */
> @@ -766,15 +769,14 @@ rtable_insert(unsigned int rtableid, str
> }
> }
>
> - SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next);
> -
> /* Put newly inserted entry at the right place. */
> - rtable_mpath_reprio(rtableid, dst, mask, rt->rt_priority, rt);
> + rtable_mpath_insert(an, rt);
> #else
> error = EEXIST;
> #endif /* SMALL_KERNEL */
> }
> leave:
> + rw_exit_write(&ar->ar_lock);
> rtfree(rt);
> return (error);
> }
> @@ -792,6 +794,7 @@ rtable_delete(unsigned int rtableid, str
> struct rtentry *mrt;
> int npaths = 0;
> #endif /* SMALL_KERNEL */
> + int error = 0;
>
> ar = rtable_get(rtableid, dst->sa_family);
> if (ar == NULL)
> @@ -800,15 +803,17 @@ rtable_delete(unsigned int rtableid, str
> addr = satoaddr(ar, dst);
> plen = rtable_satoplen(dst->sa_family, mask);
>
> - KERNEL_ASSERT_LOCKED();
> -
> + rtref(rt); /* guarantee rtfree won't do anything under ar_lock */
> + rw_enter_write(&ar->ar_lock);
> an = art_lookup(ar, addr, plen, &sr);
> srp_leave(&sr); /* an can't go away while we have the lock */
>
> /* Make sure we've got a perfect match. */
> if (an == NULL || an->an_plen != plen ||
> - memcmp(an->an_dst, dst, dst->sa_len))
> - return (ESRCH);
> + memcmp(an->an_dst, dst, dst->sa_len)) {
> + error = ESRCH;
> + goto leave;
> + }
>
> #ifndef SMALL_KERNEL
> /*
> @@ -827,18 +832,23 @@ rtable_delete(unsigned int rtableid, str
> an->an_dst = mrt->rt_dest;
> if (npaths == 2)
> mrt->rt_flags &= ~RTF_MPATH;
> - return (0);
> +
> + goto leave;
> }
> #endif /* SMALL_KERNEL */
>
> if (art_delete(ar, an, addr, plen) == NULL)
> - return (ESRCH);
> + panic("art_delete failed to find node %p", an);
>
> KASSERT(rt->rt_refcnt >= 1);
> SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next);
> -
> art_put(an);
> - return (0);
> +
> +leave:
> + rw_exit_write(&ar->ar_lock);
> + rtfree(rt);
> +
> + return (error);
> }
>
> struct rtable_walk_cookie {
> @@ -905,7 +915,7 @@ rtable_mpath_reprio(unsigned int rtablei
> struct srp_ref sr;
> uint8_t *addr;
> int plen;
> - struct rtentry *mrt, *prt = NULL;
> + int error = 0;
>
> ar = rtable_get(rtableid, dst->sa_family);
> if (ar == NULL)
> @@ -914,19 +924,32 @@ rtable_mpath_reprio(unsigned int rtablei
> addr = satoaddr(ar, dst);
> plen = rtable_satoplen(dst->sa_family, mask);
>
> - KERNEL_ASSERT_LOCKED();
> -
> + rw_enter_write(&ar->ar_lock);
> an = art_lookup(ar, addr, plen, &sr);
> srp_leave(&sr); /* an can't go away while we have the lock */
>
> /* Make sure we've got a perfect match. */
> if (an == NULL || an->an_plen != plen ||
> memcmp(an->an_dst, dst, dst->sa_len))
> - return (ESRCH);
> + error = ESRCH;
> + else {
> + rtref(rt); /* keep rt alive in between remove and insert */
> + SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist,
> + rt, rtentry, rt_next);
> + rt->rt_priority = prio;
> + rtable_mpath_insert(an, rt);
> + rtfree(rt);
> + }
> + rw_exit_write(&ar->ar_lock);
>
> - rtref(rt); /* keep rt alive in between remove and add */
> - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next);
> - rt->rt_priority = prio;
> + return (error);
> +}
> +
> +void
> +rtable_mpath_insert(struct art_node *an, struct rtentry *rt)
> +{
> + struct rtentry *mrt, *prt = NULL;
> + uint8_t prio = rt->rt_priority;
>
> if ((mrt = SRPL_FIRST_LOCKED(&an->an_rtlist)) != NULL) {
> /*
> @@ -957,9 +980,6 @@ rtable_mpath_reprio(unsigned int rtablei
> } else {
> SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next);
> }
> - rtfree(rt);
> -
> - return (0);
> }
>
> struct rtentry *
>