On Mon, Jul 19, 2021 at 08:02:30PM +0300, Vitaliy Makkoveev wrote: > I mean the case when ip_local() called by ip_ours(). Unfortunately, I'm > not familiar with PPTP but it looks affected because it don't use tcp or > udp as transport but encapsulates them into ip frames. Sorry for noise > if I'm wrong. > > +ip_ours(struct mbuf **mp, int *offp, int nxt, int af) > +{ > + /* We are already in a IPv4/IPv6 local deliver loop. */ > + if (af != AF_UNSPEC) > + return ip_local(mp, offp, nxt, af); > + > + niq_enqueue(&ipintrq, *mp); > + *mp = NULL; > + return IPPROTO_DONE; > +}
The af != AF_UNSPEC case already has the exclusive net lock. ipv4_input() sets AF_UNSPEC, the other case is for IP in IP header. The latter is called from ipintr(). I can put a NET_ASSERT_WLOCKED() into ip_local(). Still running a full regress with that. bluhm Index: net/if.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v retrieving revision 1.642 diff -u -p -r1.642 if.c --- net/if.c 30 Jun 2021 13:23:33 -0000 1.642 +++ net/if.c 19 Jul 2021 14:51:31 -0000 @@ -109,6 +109,10 @@ #include <netinet6/ip6_var.h> #endif +#ifdef IPSEC +#include <netinet/ip_ipsp.h> +#endif + #ifdef MPLS #include <netmpls/mpls.h> #endif @@ -238,7 +242,7 @@ int ifq_congestion; int netisr; -#define NET_TASKQ 1 +#define NET_TASKQ 4 struct taskq *nettqmp[NET_TASKQ]; struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL); @@ -815,6 +819,7 @@ void if_input_process(struct ifnet *ifp, struct mbuf_list *ml) { struct mbuf *m; + int exclusive_lock = 0; if (ml_empty(ml)) return; @@ -834,10 +839,25 @@ if_input_process(struct ifnet *ifp, stru * to PF globals, pipex globals, unicast and multicast addresses * lists and the socket layer. */ - NET_LOCK(); + + /* + * XXXSMP IPsec data structures are not ready to be + * accessed by multiple Network threads in parallel. + */ + if (ipsec_in_use) + exclusive_lock = 1; + if (exclusive_lock) + NET_LOCK(); + else + NET_RLOCK_IN_SOFTNET(); + while ((m = ml_dequeue(ml)) != NULL) (*ifp->if_input)(ifp, m); - NET_UNLOCK(); + + if (exclusive_lock) + NET_UNLOCK(); + else + NET_RUNLOCK_IN_SOFTNET(); } void @@ -895,6 +915,12 @@ if_netisr(void *unused) KERNEL_UNLOCK(); } #endif + if (n & (1 << NETISR_IP)) + ipintr(); +#ifdef INET6 + if (n & (1 << NETISR_IPV6)) + ip6intr(); +#endif #if NPPP > 0 if (n & (1 << NETISR_PPP)) { KERNEL_LOCK(); @@ -3311,17 +3337,14 @@ unhandled_af(int af) panic("unhandled af %d", af); } -/* - * XXXSMP This tunable is here to work around the fact that IPsec - * globals aren't ready to be accessed by multiple threads in - * parallel. - */ -int nettaskqs = NET_TASKQ; - struct taskq * net_tq(unsigned int ifindex) { struct taskq *t = NULL; + static int nettaskqs; + + if (nettaskqs == 0) + nettaskqs = min(NET_TASKQ, ncpus); t = nettqmp[ifindex % nettaskqs]; Index: net/if_ethersubr.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_ethersubr.c,v retrieving revision 1.275 diff -u -p -r1.275 if_ethersubr.c --- net/if_ethersubr.c 7 Jul 2021 20:19:01 -0000 1.275 +++ net/if_ethersubr.c 19 Jul 2021 14:32:48 -0000 @@ -222,7 +222,10 @@ ether_resolve(struct ifnet *ifp, struct switch (af) { case AF_INET: + KERNEL_LOCK(); + /* XXXSMP there is a MP race in arpresolve() */ error = arpresolve(ifp, rt, m, dst, eh->ether_dhost); + KERNEL_UNLOCK(); if (error) return (error); eh->ether_type = htons(ETHERTYPE_IP); @@ -245,7 +248,10 @@ ether_resolve(struct ifnet *ifp, struct break; #ifdef INET6 case AF_INET6: + KERNEL_LOCK(); + /* XXXSMP there is a MP race in nd6_resolve() */ error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost); + KERNEL_UNLOCK(); if (error) return (error); eh->ether_type = htons(ETHERTYPE_IPV6); Index: net/ifq.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/ifq.c,v retrieving revision 1.44 diff -u -p -r1.44 ifq.c --- net/ifq.c 9 Jul 2021 01:22:05 -0000 1.44 +++ net/ifq.c 19 Jul 2021 14:32:48 -0000 @@ -243,7 +243,7 @@ void ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx) { ifq->ifq_if = ifp; - ifq->ifq_softnet = net_tq(ifp->if_index); /* + idx */ + ifq->ifq_softnet = net_tq(ifp->if_index + idx); ifq->ifq_softc = NULL; mtx_init(&ifq->ifq_mtx, IPL_NET); @@ -620,7 +620,7 @@ void ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx) { ifiq->ifiq_if = ifp; - ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */ + ifiq->ifiq_softnet = net_tq(ifp->if_index + idx); ifiq->ifiq_softc = NULL; mtx_init(&ifiq->ifiq_mtx, IPL_NET); Index: net/netisr.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/netisr.h,v retrieving revision 1.55 diff -u -p -r1.55 netisr.h --- net/netisr.h 5 Jan 2021 20:43:36 -0000 1.55 +++ net/netisr.h 19 Jul 2021 14:32:48 -0000 @@ -41,8 +41,10 @@ * interrupt used for scheduling the network code to calls * on the lowest level routine of each protocol. */ +#define NETISR_IP 2 /* same as AF_INET */ #define NETISR_PFSYNC 5 /* for pfsync "immediate" tx */ #define NETISR_ARP 18 /* same as AF_LINK */ +#define NETISR_IPV6 24 /* same as AF_INET6 */ #define NETISR_PPP 28 /* for PPP processing */ #define NETISR_BRIDGE 29 /* for bridge processing */ #define NETISR_SWITCH 31 /* for switch dataplane */ @@ -57,6 +59,8 @@ extern int netisr; /* scheduling bits extern struct task if_input_task_locked; void arpintr(void); +void ipintr(void); +void ip6intr(void); void pppintr(void); void bridgeintr(void); void switchintr(void); Index: net/pfkeyv2.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfkeyv2.c,v retrieving revision 1.218 diff -u -p -r1.218 pfkeyv2.c --- net/pfkeyv2.c 14 Jul 2021 22:39:26 -0000 1.218 +++ net/pfkeyv2.c 19 Jul 2021 14:48:34 -0000 @@ -2019,14 +2019,6 @@ pfkeyv2_send(struct socket *so, void *me } TAILQ_INSERT_HEAD(&ipsec_policy_head, ipo, ipo_list); ipsec_in_use++; - /* - * XXXSMP IPsec data structures are not ready to be - * accessed by multiple Network threads in parallel, - * so force all packets to be processed by the first - * one. - */ - extern int nettaskqs; - nettaskqs = 1; } else { ipo->ipo_last_searched = ipo->ipo_flags = 0; } Index: netinet/ip_input.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v retrieving revision 1.363 diff -u -p -r1.363 ip_input.c --- netinet/ip_input.c 21 Jun 2021 22:09:14 -0000 1.363 +++ netinet/ip_input.c 19 Jul 2021 20:32:15 -0000 @@ -130,6 +130,8 @@ const struct sysctl_bounded_args ipctl_v { IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX }, }; +struct niqueue ipintrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IP); + struct pool ipqent_pool; struct pool ipq_pool; @@ -143,6 +145,7 @@ static struct mbuf_queue ipsendraw_mq; extern struct niqueue arpinq; int ip_ours(struct mbuf **, int *, int, int); +int ip_local(struct mbuf **, int *, int, int); int ip_dooptions(struct mbuf *, struct ifnet *); int in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **); @@ -230,6 +233,43 @@ ip_init(void) } /* + * Enqueue packet for local delivery. Queuing is used as a boundary + * between the network layer (input/forward path) running with shared + * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively. + */ +int +ip_ours(struct mbuf **mp, int *offp, int nxt, int af) +{ + /* We are already in a IPv4/IPv6 local deliver loop. */ + if (af != AF_UNSPEC) + return ip_local(mp, offp, nxt, af); + + niq_enqueue(&ipintrq, *mp); + *mp = NULL; + return IPPROTO_DONE; +} + +/* + * Dequeue and process locally delivered packets. + */ +void +ipintr(void) +{ + struct mbuf *m; + int off, nxt; + + while ((m = niq_dequeue(&ipintrq)) != NULL) { +#ifdef DIAGNOSTIC + if ((m->m_flags & M_PKTHDR) == 0) + panic("ipintr no HDR"); +#endif + off = 0; + nxt = ip_local(&m, &off, IPPROTO_IPV4, AF_UNSPEC); + KASSERT(nxt == IPPROTO_DONE); + } +} + +/* * IPv4 input routine. * * Checksum and byte swap header. Process options. Forward or deliver. @@ -514,7 +554,7 @@ ip_input_if(struct mbuf **mp, int *offp, * If fragmented try to reassemble. Pass to next level. */ int -ip_ours(struct mbuf **mp, int *offp, int nxt, int af) +ip_local(struct mbuf **mp, int *offp, int nxt, int af) { struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); @@ -522,6 +562,8 @@ ip_ours(struct mbuf **mp, int *offp, int struct ipqent *ipqe; int mff, hlen; + NET_ASSERT_WLOCKED(); + hlen = ip->ip_hl << 2; /* @@ -1665,7 +1707,8 @@ ip_sysctl(int *name, u_int namelen, void newlen)); #endif case IPCTL_IFQUEUE: - return (EOPNOTSUPP); + return (sysctl_niq(name + 1, namelen - 1, + oldp, oldlenp, newp, newlen, &ipintrq)); case IPCTL_ARPQUEUE: return (sysctl_niq(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, &arpinq)); Index: netinet/ip_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v retrieving revision 1.88 diff -u -p -r1.88 ip_var.h --- netinet/ip_var.h 30 Mar 2021 08:37:11 -0000 1.88 +++ netinet/ip_var.h 19 Jul 2021 14:32:48 -0000 @@ -248,7 +248,6 @@ void ip_stripoptions(struct mbuf *); int ip_sysctl(int *, u_int, void *, size_t *, void *, size_t); void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *, struct mbuf *); -void ipintr(void); int ip_input_if(struct mbuf **, int *, int, int, struct ifnet *); int ip_deliver(struct mbuf **, int *, int, int); void ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int); Index: netinet6/ip6_input.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v retrieving revision 1.237 diff -u -p -r1.237 ip6_input.c --- netinet6/ip6_input.c 3 Jun 2021 04:47:54 -0000 1.237 +++ netinet6/ip6_input.c 19 Jul 2021 20:31:14 -0000 @@ -115,11 +115,14 @@ #include <netinet/ip_carp.h> #endif +struct niqueue ip6intrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IPV6); + struct cpumem *ip6counters; uint8_t ip6_soiikey[IP6_SOIIKEY_LEN]; int ip6_ours(struct mbuf **, int *, int, int); +int ip6_local(struct mbuf **, int *, int, int); int ip6_check_rh0hdr(struct mbuf *, int *); int ip6_hbhchcheck(struct mbuf *, int *, int *, int *); int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *); @@ -162,6 +165,43 @@ ip6_init(void) ip6counters = counters_alloc(ip6s_ncounters); } +/* + * Enqueue packet for local delivery. Queuing is used as a boundary + * between the network layer (input/forward path) running with shared + * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively. + */ +int +ip6_ours(struct mbuf **mp, int *offp, int nxt, int af) +{ + /* We are already in a IPv4/IPv6 local deliver loop. */ + if (af != AF_UNSPEC) + return ip6_local(mp, offp, nxt, af); + + niq_enqueue(&ip6intrq, *mp); + *mp = NULL; + return IPPROTO_DONE; +} + +/* + * Dequeue and process locally delivered packets. + */ +void +ip6intr(void) +{ + struct mbuf *m; + int off, nxt; + + while ((m = niq_dequeue(&ip6intrq)) != NULL) { +#ifdef DIAGNOSTIC + if ((m->m_flags & M_PKTHDR) == 0) + panic("ip6intr no HDR"); +#endif + off = 0; + nxt = ip6_local(&m, &off, IPPROTO_IPV6, AF_UNSPEC); + KASSERT(nxt == IPPROTO_DONE); + } +} + void ipv6_input(struct ifnet *ifp, struct mbuf *m) { @@ -544,8 +584,10 @@ ip6_input_if(struct mbuf **mp, int *offp } int -ip6_ours(struct mbuf **mp, int *offp, int nxt, int af) +ip6_local(struct mbuf **mp, int *offp, int nxt, int af) { + NET_ASSERT_WLOCKED(); + if (ip6_hbhchcheck(*mp, offp, &nxt, NULL)) return IPPROTO_DONE; @@ -1470,7 +1512,8 @@ ip6_sysctl(int *name, u_int namelen, voi NET_UNLOCK(); return (error); case IPV6CTL_IFQUEUE: - return (EOPNOTSUPP); + return (sysctl_niq(name + 1, namelen - 1, + oldp, oldlenp, newp, newlen, &ip6intrq)); case IPV6CTL_SOIIKEY: return (ip6_sysctl_soiikey(oldp, oldlenp, newp, newlen)); default: