On 29/06/22(Wed) 19:40, Alexander Bluhm wrote:
> Hi,
>
> To fix the KASSERT(la != NULL) we have to protect the rt_llinfo
> with a mutex. The idea is to keep rt_llinfo and RTF_LLINFO consistent.
> Also do not put the mutex in the fast path.
Losing the RTM_ADD/DELETE race is not a bug. I would not add a printf
in these cases. I understand you might want one for debugging purposes
but I don't see any value in committing it. Do you agree?
Note that some times the code checks for the RTF_LLINFO flags and some
time for rt_llinfo != NULL. This is inconsistent and a bit confusing
now that we use a mutex to protect those states.
Could you document that rt_llinfo is now protected by the mutex (or
KERNEL_LOCK())?
Anyway this is an improvement ok mpi@
PS: What about ND6?
> Index: netinet/if_ether.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v
> retrieving revision 1.250
> diff -u -p -r1.250 if_ether.c
> --- netinet/if_ether.c 27 Jun 2022 20:47:10 -0000 1.250
> +++ netinet/if_ether.c 28 Jun 2022 14:00:12 -0000
> @@ -101,6 +101,8 @@ void arpreply(struct ifnet *, struct mbu
> unsigned int);
>
> struct niqueue arpinq = NIQUEUE_INITIALIZER(50, NETISR_ARP);
> +
> +/* llinfo_arp live time, rt_llinfo and RTF_LLINFO are protected by arp_mtx */
> struct mutex arp_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
>
> LIST_HEAD(, llinfo_arp) arp_list; /* [mN] list of all llinfo_arp structures
> */
> @@ -155,7 +157,7 @@ void
> arp_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
> {
> struct sockaddr *gate = rt->rt_gateway;
> - struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
> + struct llinfo_arp *la;
> time_t uptime;
>
> NET_ASSERT_LOCKED();
> @@ -171,7 +173,7 @@ arp_rtrequest(struct ifnet *ifp, int req
> rt->rt_expire = 0;
> break;
> }
> - if ((rt->rt_flags & RTF_LOCAL) && !la)
> + if ((rt->rt_flags & RTF_LOCAL) && rt->rt_llinfo == NULL)
> rt->rt_expire = 0;
> /*
> * Announce a new entry if requested or warn the user
> @@ -192,44 +194,54 @@ arp_rtrequest(struct ifnet *ifp, int req
> }
> satosdl(gate)->sdl_type = ifp->if_type;
> satosdl(gate)->sdl_index = ifp->if_index;
> - if (la != NULL)
> - break; /* This happens on a route change */
> /*
> * Case 2: This route may come from cloning, or a manual route
> * add with a LL address.
> */
> la = pool_get(&arp_pool, PR_NOWAIT | PR_ZERO);
> - rt->rt_llinfo = (caddr_t)la;
> if (la == NULL) {
> log(LOG_DEBUG, "%s: pool get failed\n", __func__);
> break;
> }
>
> + mtx_enter(&arp_mtx);
> + if (rt->rt_llinfo != NULL) {
> + /* we lost the race, another thread has entered it */
> + mtx_leave(&arp_mtx);
> + printf("%s: llinfo exists\n", __func__);
> + pool_put(&arp_pool, la);
> + break;
> + }
> mq_init(&la->la_mq, LA_HOLD_QUEUE, IPL_SOFTNET);
> + rt->rt_llinfo = (caddr_t)la;
> la->la_rt = rt;
> rt->rt_flags |= RTF_LLINFO;
> + LIST_INSERT_HEAD(&arp_list, la, la_list);
> if ((rt->rt_flags & RTF_LOCAL) == 0)
> rt->rt_expire = uptime;
> - mtx_enter(&arp_mtx);
> - LIST_INSERT_HEAD(&arp_list, la, la_list);
> mtx_leave(&arp_mtx);
> +
> break;
>
> case RTM_DELETE:
> - if (la == NULL)
> - break;
> mtx_enter(&arp_mtx);
> + la = (struct llinfo_arp *)rt->rt_llinfo;
> + if (la == NULL) {
> + /* we lost the race, another thread has removed it */
> + mtx_leave(&arp_mtx);
> + printf("%s: llinfo missing\n", __func__);
> + break;
> + }
> LIST_REMOVE(la, la_list);
> - mtx_leave(&arp_mtx);
> rt->rt_llinfo = NULL;
> rt->rt_flags &= ~RTF_LLINFO;
> atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq));
> + mtx_leave(&arp_mtx);
> +
> pool_put(&arp_pool, la);
> break;
>
> case RTM_INVALIDATE:
> - if (la == NULL)
> - break;
> if (!ISSET(rt->rt_flags, RTF_LOCAL))
> arpinvalidate(rt);
> break;
> @@ -363,8 +375,6 @@ arpresolve(struct ifnet *ifp, struct rte
> goto bad;
> }
>
> - la = (struct llinfo_arp *)rt->rt_llinfo;
> - KASSERT(la != NULL);
>
> /*
> * Check the address family and length is valid, the address
> @@ -372,13 +382,27 @@ arpresolve(struct ifnet *ifp, struct rte
> */
> if ((rt->rt_expire == 0 || rt->rt_expire > uptime) &&
> sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) {
> + int refresh = 0;
> +
> memcpy(desten, LLADDR(sdl), sdl->sdl_alen);
>
> /* refresh ARP entry when timeout gets close */
> if (rt->rt_expire != 0 &&
> - rt->rt_expire - arpt_keep / 8 < uptime &&
> - la->la_refreshed + 30 < uptime) {
> - la->la_refreshed = uptime;
> + rt->rt_expire - arpt_keep / 8 < uptime) {
> +
> + mtx_enter(&arp_mtx);
> + if (ISSET(rt->rt_flags, RTF_LLINFO)) {
> + la = (struct llinfo_arp *)rt->rt_llinfo;
> + KASSERT(la != NULL);
> +
> + if (la->la_refreshed + 30 < uptime) {
> + la->la_refreshed = uptime;
> + refresh = 1;
> + }
> + }
> + mtx_leave(&arp_mtx);
> + }
> + if (refresh) {
> arprequest(ifp,
> &satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr,
> &satosin(dst)->sin_addr.s_addr,
> @@ -724,12 +748,19 @@ arpcache(struct ifnet *ifp, struct ether
> void
> arpinvalidate(struct rtentry *rt)
> {
> - struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
> + struct llinfo_arp *la;
> struct sockaddr_dl *sdl = satosdl(rt->rt_gateway);
>
> + mtx_enter(&arp_mtx);
> + la = (struct llinfo_arp *)rt->rt_llinfo;
> + if (la == NULL) {
> + mtx_leave(&arp_mtx);
> + return;
> + }
> atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq));
> sdl->sdl_alen = 0;
> la->la_asked = 0;
> + mtx_leave(&arp_mtx);
> }
>
> /*
>