On 04/12/21(Sat) 01:02, Alexander Bluhm wrote:
> Hi,
>
> As I want a read-only net lock for forwarding, all paths should be
> checked for the correct net lock and asserts. I found code in
> in_pcbhashlookup() where reading the PCB table results in a write
> to optimize the cache.
>
> Porperly protecting PCB hashes is out of scope for parallel forwarding.
> Can we get away with this hack? Only update the cache when we are
> in TCP oder UDP stack with the write lock. The access from pf is
> read-only.
>
> NET_WLOCKED() indicates whether we may change data structures.
I recall that we currently do not want to introduce such idiom: change
the behavior of the code depending on which lock is held by the caller.
Can we instead assert that a write-lock is held before modifying the
list?
> Also move the assert from pf to in_pcb where the critical section
> is.
>
> bluhm
>
> Index: net/pf.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
> retrieving revision 1.1122
> diff -u -p -r1.1122 pf.c
> --- net/pf.c 7 Jul 2021 18:38:25 -0000 1.1122
> +++ net/pf.c 3 Dec 2021 22:20:32 -0000
> @@ -3317,14 +3317,12 @@ pf_socket_lookup(struct pf_pdesc *pd)
> sport = pd->hdr.tcp.th_sport;
> dport = pd->hdr.tcp.th_dport;
> PF_ASSERT_LOCKED();
> - NET_ASSERT_LOCKED();
> tb = &tcbtable;
> break;
> case IPPROTO_UDP:
> sport = pd->hdr.udp.uh_sport;
> dport = pd->hdr.udp.uh_dport;
> PF_ASSERT_LOCKED();
> - NET_ASSERT_LOCKED();
> tb = &udbtable;
> break;
> default:
> Index: netinet/in_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
> retrieving revision 1.256
> diff -u -p -r1.256 in_pcb.c
> --- netinet/in_pcb.c 25 Oct 2021 22:20:47 -0000 1.256
> +++ netinet/in_pcb.c 3 Dec 2021 22:20:32 -0000
> @@ -1069,6 +1069,8 @@ in_pcbhashlookup(struct inpcbtable *tabl
> u_int16_t fport = fport_arg, lport = lport_arg;
> u_int rdomain;
>
> + NET_ASSERT_LOCKED();
> +
> rdomain = rtable_l2(rtable);
> head = in_pcbhash(table, rdomain, &faddr, fport, &laddr, lport);
> LIST_FOREACH(inp, head, inp_hash) {
> @@ -1085,7 +1087,7 @@ in_pcbhashlookup(struct inpcbtable *tabl
> * repeated accesses are quicker. This is analogous to
> * the historic single-entry PCB cache.
> */
> - if (inp != LIST_FIRST(head)) {
> + if (NET_WLOCKED() && inp != LIST_FIRST(head)) {
> LIST_REMOVE(inp, inp_hash);
> LIST_INSERT_HEAD(head, inp, inp_hash);
> }
> @@ -1119,6 +1121,8 @@ in_pcblookup_listen(struct inpcbtable *t
> u_int16_t lport = lport_arg;
> u_int rdomain;
>
> + NET_ASSERT_LOCKED();
> +
> key1 = &laddr;
> key2 = &zeroin_addr;
> #if NPF > 0
> @@ -1185,7 +1189,7 @@ in_pcblookup_listen(struct inpcbtable *t
> * repeated accesses are quicker. This is analogous to
> * the historic single-entry PCB cache.
> */
> - if (inp != NULL && inp != LIST_FIRST(head)) {
> + if (NET_WLOCKED() && inp != NULL && inp != LIST_FIRST(head)) {
> LIST_REMOVE(inp, inp_hash);
> LIST_INSERT_HEAD(head, inp, inp_hash);
> }
> Index: sys/systm.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/sys/systm.h,v
> retrieving revision 1.154
> diff -u -p -r1.154 systm.h
> --- sys/systm.h 2 Jun 2021 00:39:25 -0000 1.154
> +++ sys/systm.h 3 Dec 2021 22:20:32 -0000
> @@ -344,6 +344,8 @@ extern struct rwlock netlock;
> #define NET_RLOCK_IN_IOCTL() do { rw_enter_read(&netlock); } while
> (0)
> #define NET_RUNLOCK_IN_IOCTL() do { rw_exit_read(&netlock); } while (0)
>
> +#define NET_WLOCKED() (rw_status(&netlock) == RW_WRITE)
> +
> #ifdef DIAGNOSTIC
>
> #define NET_ASSERT_UNLOCKED()
> \
>