On Tue, Apr 27, 2021 at 07:27:43PM +0200, Alexander Bluhm wrote:
> Hi,
>
> I would like to document the locking mechanism of the global variables
> in ARP.
>
> The global arp_list is protected by net lock. This is not sufficent
> when we switch to shared netlock. So I added a mutex for insertion
> and removal if netlock is not exclusive.
>
> ok?
>
> bluhm
>
Hi.
I doubt `la_mq' should be marked as [I]. This is not true. It's mutable
but through api, so related locking description should be placed where
`mbuf_queue' structure is defined. Just leave `la_mq' without any marks
as we do in other similar places.
Also do you want to run arp_rtrequest() with shared lock in future? I'm
asking because we run in with exclusive lock held.
> Index: netinet/if_ether.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v
> retrieving revision 1.246
> diff -u -p -r1.246 if_ether.c
> --- netinet/if_ether.c 26 Apr 2021 07:55:16 -0000 1.246
> +++ netinet/if_ether.c 27 Apr 2021 17:23:17 -0000
> @@ -65,10 +65,19 @@
> #include <netinet/ip_carp.h>
> #endif
>
> +/*
> + * Locks used to protect struct members in this file:
> + * a atomic operations
> + * I immutable after creation
> + * K kernel lock
> + * m arp mutex, needed when net lock is shared
> + * N net lock
> + */
> +
> struct llinfo_arp {
> - LIST_ENTRY(llinfo_arp) la_list;
> - struct rtentry *la_rt; /* backpointer to rtentry */
> - struct mbuf_queue la_mq; /* packet hold queue */
> + LIST_ENTRY(llinfo_arp) la_list; /* [mN] global arp_list */
> + struct rtentry *la_rt; /* [I] backpointer to rtentry */
> + struct mbuf_queue la_mq; /* [I] packet hold queue */
> time_t la_refreshed; /* when was refresh sent */
> int la_asked; /* number of queries sent */
> };
> @@ -76,9 +85,9 @@ struct llinfo_arp {
> #define LA_HOLD_TOTAL 100
>
> /* timer values */
> -int arpt_prune = (5 * 60); /* walk list every 5 minutes */
> -int arpt_keep = (20 * 60); /* once resolved, cache for 20 minutes */
> -int arpt_down = 20; /* once declared down, don't send for 20 secs */
> +int arpt_prune = (5 * 60); /* [I] walk list every 5 minutes */
> +int arpt_keep = (20 * 60); /* [a] once resolved, cache for 20 minutes */
> +int arpt_down = 20; /* [a] once declared down, don't send for 20 secs */
>
> struct mbuf *arppullup(struct mbuf *m);
> void arpinvalidate(struct rtentry *);
> @@ -92,11 +101,12 @@ void arpreply(struct ifnet *, struct mbu
> unsigned int);
>
> struct niqueue arpinq = NIQUEUE_INITIALIZER(50, NETISR_ARP);
> +struct mutex arp_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
>
> -LIST_HEAD(, llinfo_arp) arp_list;
> -struct pool arp_pool; /* pool for llinfo_arp structures */
> -int arp_maxtries = 5;
> -int la_hold_total;
> +LIST_HEAD(, llinfo_arp) arp_list; /* [Nm] list of all llinfo_arp structures
> */
> +struct pool arp_pool; /* [I] pool for llinfo_arp structures */
> +int arp_maxtries = 5; /* [I] arp requests before set to rejected */
> +int la_hold_total; /* [a] packets currently in the arp queue */
>
> #ifdef NFSCLIENT
> /* revarp state */
> @@ -117,6 +127,7 @@ arptimer(void *arg)
>
> NET_LOCK();
> timeout_add_sec(to, arpt_prune);
> + /* Net lock is exclusive, no arp mutex needed for arp_list here. */
> LIST_FOREACH_SAFE(la, &arp_list, la_list, nla) {
> struct rtentry *rt = la->la_rt;
>
> @@ -144,6 +155,8 @@ arp_rtrequest(struct ifnet *ifp, int req
> struct sockaddr *gate = rt->rt_gateway;
> struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
>
> + NET_ASSERT_LOCKED();
> +
> if (ISSET(rt->rt_flags,
> RTF_GATEWAY|RTF_BROADCAST|RTF_MULTICAST|RTF_MPLS))
> return;
> @@ -194,13 +207,17 @@ arp_rtrequest(struct ifnet *ifp, int req
> rt->rt_flags |= RTF_LLINFO;
> if ((rt->rt_flags & RTF_LOCAL) == 0)
> rt->rt_expire = getuptime();
> + mtx_enter(&arp_mtx);
> LIST_INSERT_HEAD(&arp_list, la, la_list);
> + mtx_leave(&arp_mtx);
> break;
>
> case RTM_DELETE:
> if (la == NULL)
> break;
> + mtx_enter(&arp_mtx);
> LIST_REMOVE(la, la_list);
> + mtx_leave(&arp_mtx);
> rt->rt_llinfo = NULL;
> rt->rt_flags &= ~RTF_LLINFO;
> atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq));
>