On Fri, Dec 03, 2021 at 08:35:45PM +0100, Alexander Bluhm wrote: > Note that IPsec still has the workaround to disable multiple queues.
I think we can remove the ipsec_in_use workaround now. The IPsec path is protected with the kernel lock. There are some issues left: - npppd l2pt ipsecflowinfo is not MP safe - the acquire SA feature is not MP safe - Hrvoje has seen a panic with sasync If you use one of these, the diff below should trigger crashes faster. If you use only regular IPsec or forwarding, I hope it is stable. bluhm Index: net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.644 diff -u -p -r1.644 if.c --- net/if.c 11 Nov 2021 10:03:10 -0000 1.644 +++ net/if.c 23 Dec 2021 23:11:20 -0000 @@ -108,6 +108,10 @@ #include <netinet6/ip6_var.h> #endif +#ifdef IPSEC +#include <netinet/ip_ipsp.h> +#endif + #ifdef MPLS #include <netmpls/mpls.h> #endif @@ -237,7 +241,7 @@ int ifq_congestion; int netisr; -#define NET_TASKQ 1 +#define NET_TASKQ 4 struct taskq *nettqmp[NET_TASKQ]; struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL); @@ -834,15 +838,10 @@ if_input_process(struct ifnet *ifp, stru * lists and the socket layer. */ - /* - * XXXSMP IPsec data structures are not ready to be accessed - * by multiple network threads in parallel. In this case - * use an exclusive lock. - */ - NET_LOCK(); + NET_RLOCK_IN_SOFTNET(); while ((m = ml_dequeue(ml)) != NULL) (*ifp->if_input)(ifp, m); - NET_UNLOCK(); + NET_RUNLOCK_IN_SOFTNET(); } void @@ -899,6 +898,12 @@ if_netisr(void *unused) arpintr(); KERNEL_UNLOCK(); } +#endif + if (n & (1 << NETISR_IP)) + ipintr(); +#ifdef INET6 + if (n & (1 << NETISR_IPV6)) + ip6intr(); #endif #if NPPP > 0 if (n & (1 << NETISR_PPP)) { Index: net/if_ethersubr.c =================================================================== RCS file: /cvs/src/sys/net/if_ethersubr.c,v retrieving revision 1.276 diff -u -p -r1.276 if_ethersubr.c --- net/if_ethersubr.c 19 Aug 2021 10:22:00 -0000 1.276 +++ net/if_ethersubr.c 23 Dec 2021 23:11:20 -0000 @@ -222,7 +222,10 @@ ether_resolve(struct ifnet *ifp, struct switch (af) { case AF_INET: + KERNEL_LOCK(); + /* XXXSMP there is a MP race in arpresolve() */ error = arpresolve(ifp, rt, m, dst, eh->ether_dhost); + KERNEL_UNLOCK(); if (error) return (error); eh->ether_type = htons(ETHERTYPE_IP); @@ -245,7 +248,10 @@ ether_resolve(struct ifnet *ifp, struct break; #ifdef INET6 case AF_INET6: + KERNEL_LOCK(); + /* XXXSMP there is a MP race in nd6_resolve() */ error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost); + KERNEL_UNLOCK(); if (error) return (error); eh->ether_type = htons(ETHERTYPE_IPV6); @@ -271,13 +277,19 @@ ether_resolve(struct ifnet *ifp, struct break; #ifdef INET6 case AF_INET6: + KERNEL_LOCK(); + /* XXXSMP there is a MP race in nd6_resolve() */ error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost); + KERNEL_UNLOCK(); if (error) return (error); break; #endif case AF_INET: + KERNEL_LOCK(); + /* XXXSMP there is a MP race in arpresolve() */ error = arpresolve(ifp, rt, m, dst, eh->ether_dhost); + KERNEL_UNLOCK(); if (error) return (error); break; @@ -529,12 +541,14 @@ ether_input(struct ifnet *ifp, struct mb case ETHERTYPE_PPPOE: if (m->m_flags & (M_MCAST | M_BCAST)) goto dropanyway; + KERNEL_LOCK(); #ifdef PIPEX if (pipex_enable) { struct pipex_session *session; if ((session = pipex_pppoe_lookup_session(m)) != NULL) { pipex_pppoe_input(m, session); + KERNEL_UNLOCK(); return; } } @@ -543,6 +557,7 @@ ether_input(struct ifnet *ifp, struct mb pppoe_disc_input(m); else pppoe_data_input(m); + KERNEL_UNLOCK(); return; #endif #ifdef MPLS Index: net/ifq.c =================================================================== RCS file: /cvs/src/sys/net/ifq.c,v retrieving revision 1.44 diff -u -p -r1.44 ifq.c --- net/ifq.c 9 Jul 2021 01:22:05 -0000 1.44 +++ net/ifq.c 23 Dec 2021 23:11:20 -0000 @@ -243,7 +243,7 @@ void ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx) { ifq->ifq_if = ifp; - ifq->ifq_softnet = net_tq(ifp->if_index); /* + idx */ + ifq->ifq_softnet = net_tq(ifp->if_index + idx); ifq->ifq_softc = NULL; mtx_init(&ifq->ifq_mtx, IPL_NET); @@ -620,7 +620,7 @@ void ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx) { ifiq->ifiq_if = ifp; - ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */ + ifiq->ifiq_softnet = net_tq(ifp->if_index + idx); ifiq->ifiq_softc = NULL; mtx_init(&ifiq->ifiq_mtx, IPL_NET); Index: net/netisr.h =================================================================== RCS file: /cvs/src/sys/net/netisr.h,v retrieving revision 1.55 diff -u -p -r1.55 netisr.h --- net/netisr.h 5 Jan 2021 20:43:36 -0000 1.55 +++ net/netisr.h 23 Dec 2021 23:11:20 -0000 @@ -41,8 +41,10 @@ * interrupt used for scheduling the network code to calls * on the lowest level routine of each protocol. */ +#define NETISR_IP 2 /* same as AF_INET */ #define NETISR_PFSYNC 5 /* for pfsync "immediate" tx */ #define NETISR_ARP 18 /* same as AF_LINK */ +#define NETISR_IPV6 24 /* same as AF_INET6 */ #define NETISR_PPP 28 /* for PPP processing */ #define NETISR_BRIDGE 29 /* for bridge processing */ #define NETISR_SWITCH 31 /* for switch dataplane */ @@ -57,6 +59,8 @@ extern int netisr; /* scheduling bits extern struct task if_input_task_locked; void arpintr(void); +void ipintr(void); +void ip6intr(void); void pppintr(void); void bridgeintr(void); void switchintr(void); Index: netinet/ip_input.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_input.c,v retrieving revision 1.364 diff -u -p -r1.364 ip_input.c --- netinet/ip_input.c 22 Nov 2021 13:47:10 -0000 1.364 +++ netinet/ip_input.c 23 Dec 2021 23:11:21 -0000 @@ -130,6 +130,8 @@ const struct sysctl_bounded_args ipctl_v { IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX }, }; +struct niqueue ipintrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IP); + struct pool ipqent_pool; struct pool ipq_pool; @@ -143,6 +145,7 @@ static struct mbuf_queue ipsendraw_mq; extern struct niqueue arpinq; int ip_ours(struct mbuf **, int *, int, int); +int ip_local(struct mbuf **, int *, int, int); int ip_dooptions(struct mbuf *, struct ifnet *); int in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **); @@ -230,6 +233,43 @@ ip_init(void) } /* + * Enqueue packet for local delivery. Queuing is used as a boundary + * between the network layer (input/forward path) running with shared + * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively. + */ +int +ip_ours(struct mbuf **mp, int *offp, int nxt, int af) +{ + /* We are already in a IPv4/IPv6 local deliver loop. */ + if (af != AF_UNSPEC) + return ip_local(mp, offp, nxt, af); + + niq_enqueue(&ipintrq, *mp); + *mp = NULL; + return IPPROTO_DONE; +} + +/* + * Dequeue and process locally delivered packets. + */ +void +ipintr(void) +{ + struct mbuf *m; + int off, nxt; + + while ((m = niq_dequeue(&ipintrq)) != NULL) { +#ifdef DIAGNOSTIC + if ((m->m_flags & M_PKTHDR) == 0) + panic("ipintr no HDR"); +#endif + off = 0; + nxt = ip_local(&m, &off, IPPROTO_IPV4, AF_UNSPEC); + KASSERT(nxt == IPPROTO_DONE); + } +} + +/* * IPv4 input routine. * * Checksum and byte swap header. Process options. Forward or deliver. @@ -514,7 +554,7 @@ ip_input_if(struct mbuf **mp, int *offp, * If fragmented try to reassemble. Pass to next level. */ int -ip_ours(struct mbuf **mp, int *offp, int nxt, int af) +ip_local(struct mbuf **mp, int *offp, int nxt, int af) { struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); @@ -522,6 +562,8 @@ ip_ours(struct mbuf **mp, int *offp, int struct ipqent *ipqe; int mff, hlen; + NET_ASSERT_WLOCKED(); + hlen = ip->ip_hl << 2; /* @@ -1663,7 +1705,8 @@ ip_sysctl(int *name, u_int namelen, void newlen)); #endif case IPCTL_IFQUEUE: - return (EOPNOTSUPP); + return (sysctl_niq(name + 1, namelen - 1, + oldp, oldlenp, newp, newlen, &ipintrq)); case IPCTL_ARPQUEUE: return (sysctl_niq(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, &arpinq)); Index: netinet/ip_var.h =================================================================== RCS file: /cvs/src/sys/netinet/ip_var.h,v retrieving revision 1.88 diff -u -p -r1.88 ip_var.h --- netinet/ip_var.h 30 Mar 2021 08:37:11 -0000 1.88 +++ netinet/ip_var.h 23 Dec 2021 23:11:21 -0000 @@ -248,7 +248,6 @@ void ip_stripoptions(struct mbuf *); int ip_sysctl(int *, u_int, void *, size_t *, void *, size_t); void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *, struct mbuf *); -void ipintr(void); int ip_input_if(struct mbuf **, int *, int, int, struct ifnet *); int ip_deliver(struct mbuf **, int *, int, int); void ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int); Index: netinet6/ip6_input.c =================================================================== RCS file: /cvs/src/sys/netinet6/ip6_input.c,v retrieving revision 1.237 diff -u -p -r1.237 ip6_input.c --- netinet6/ip6_input.c 3 Jun 2021 04:47:54 -0000 1.237 +++ netinet6/ip6_input.c 23 Dec 2021 23:11:22 -0000 @@ -115,11 +115,14 @@ #include <netinet/ip_carp.h> #endif +struct niqueue ip6intrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IPV6); + struct cpumem *ip6counters; uint8_t ip6_soiikey[IP6_SOIIKEY_LEN]; int ip6_ours(struct mbuf **, int *, int, int); +int ip6_local(struct mbuf **, int *, int, int); int ip6_check_rh0hdr(struct mbuf *, int *); int ip6_hbhchcheck(struct mbuf *, int *, int *, int *); int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *); @@ -162,6 +165,43 @@ ip6_init(void) ip6counters = counters_alloc(ip6s_ncounters); } +/* + * Enqueue packet for local delivery. Queuing is used as a boundary + * between the network layer (input/forward path) running with shared + * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively. + */ +int +ip6_ours(struct mbuf **mp, int *offp, int nxt, int af) +{ + /* We are already in a IPv4/IPv6 local deliver loop. */ + if (af != AF_UNSPEC) + return ip6_local(mp, offp, nxt, af); + + niq_enqueue(&ip6intrq, *mp); + *mp = NULL; + return IPPROTO_DONE; +} + +/* + * Dequeue and process locally delivered packets. + */ +void +ip6intr(void) +{ + struct mbuf *m; + int off, nxt; + + while ((m = niq_dequeue(&ip6intrq)) != NULL) { +#ifdef DIAGNOSTIC + if ((m->m_flags & M_PKTHDR) == 0) + panic("ip6intr no HDR"); +#endif + off = 0; + nxt = ip6_local(&m, &off, IPPROTO_IPV6, AF_UNSPEC); + KASSERT(nxt == IPPROTO_DONE); + } +} + void ipv6_input(struct ifnet *ifp, struct mbuf *m) { @@ -544,8 +584,10 @@ ip6_input_if(struct mbuf **mp, int *offp } int -ip6_ours(struct mbuf **mp, int *offp, int nxt, int af) +ip6_local(struct mbuf **mp, int *offp, int nxt, int af) { + NET_ASSERT_WLOCKED(); + if (ip6_hbhchcheck(*mp, offp, &nxt, NULL)) return IPPROTO_DONE; @@ -1470,7 +1512,8 @@ ip6_sysctl(int *name, u_int namelen, voi NET_UNLOCK(); return (error); case IPV6CTL_IFQUEUE: - return (EOPNOTSUPP); + return (sysctl_niq(name + 1, namelen - 1, + oldp, oldlenp, newp, newlen, &ip6intrq)); case IPV6CTL_SOIIKEY: return (ip6_sysctl_soiikey(oldp, oldlenp, newp, newlen)); default: