On Thu, Apr 22, 2021 at 11:36:07AM +0200, Hrvoje Popovski wrote:
> On 22.4.2021. 11:02, Alexander Bluhm wrote:
> > This was without my kernel lock around ARP bandage, right?
>
> yes, yes ...
Good. Just wanted to be sure.
> > Did you enter boot reboot before doing mach ddbcpu 0xa?
>
> nope... is doing that ever useful?
No. I was looking at this strange trace. It is a bug in panic and
ddb, you did nothing wrong.
> > The if (db_panic) in the panic() function was not written with
> > simultaneous panics on multiple CPUs in mind.
>
> if you want i'll try to reproduce in on other boxes..
> maybe i can trigger it here easily because of 2 sockets ?
The panic and ddb bug is pretty clear. We just need an idea for
the fix and someone who wants to fix it.
But if you have more than one machine, it would be great to stress
it with "network in parallel" and the "kernel lock around ARP
bandage" diff. Attached together for convenience.
It is not clear why the lock helps. Is it a bug in routing or ARP?
Or is it just different timing introduced by the additional kernel
lock? The parallel network task stress the subsystems of the kernel
more than before with MP load. Having more results from machines
with different timing would help.
bluhm
Index: net/if.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
retrieving revision 1.640
diff -u -p -r1.640 if.c
--- net/if.c 26 Mar 2021 22:41:06 -0000 1.640
+++ net/if.c 21 Apr 2021 12:52:08 -0000
@@ -238,7 +238,7 @@ int ifq_congestion;
int netisr;
-#define NET_TASKQ 1
+#define NET_TASKQ 4
struct taskq *nettqmp[NET_TASKQ];
struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
@@ -834,10 +834,10 @@ if_input_process(struct ifnet *ifp, stru
* to PF globals, pipex globals, unicast and multicast addresses
* lists and the socket layer.
*/
- NET_LOCK();
+ NET_RLOCK_IN_SOFTNET();
while ((m = ml_dequeue(ml)) != NULL)
(*ifp->if_input)(ifp, m);
- NET_UNLOCK();
+ NET_RUNLOCK_IN_SOFTNET();
}
void
@@ -895,6 +895,12 @@ if_netisr(void *unused)
KERNEL_UNLOCK();
}
#endif
+ if (n & (1 << NETISR_IP))
+ ipintr();
+#ifdef INET6
+ if (n & (1 << NETISR_IPV6))
+ ip6intr();
+#endif
#if NPPP > 0
if (n & (1 << NETISR_PPP)) {
KERNEL_LOCK();
@@ -3316,12 +3322,15 @@ unhandled_af(int af)
* globals aren't ready to be accessed by multiple threads in
* parallel.
*/
-int nettaskqs = NET_TASKQ;
+int nettaskqs;
struct taskq *
net_tq(unsigned int ifindex)
{
struct taskq *t = NULL;
+
+ if (nettaskqs == 0)
+ nettaskqs = min(NET_TASKQ, ncpus);
t = nettqmp[ifindex % nettaskqs];
Index: net/if_ethersubr.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.274
diff -u -p -r1.274 if_ethersubr.c
--- net/if_ethersubr.c 7 Mar 2021 06:02:32 -0000 1.274
+++ net/if_ethersubr.c 21 Apr 2021 22:16:05 -0000
@@ -222,7 +222,10 @@ ether_resolve(struct ifnet *ifp, struct
switch (af) {
case AF_INET:
+ KERNEL_LOCK();
+ /* XXXSMP there is a MP race in arpresolve() */
error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
+ KERNEL_UNLOCK();
if (error)
return (error);
eh->ether_type = htons(ETHERTYPE_IP);
@@ -245,7 +248,10 @@ ether_resolve(struct ifnet *ifp, struct
break;
#ifdef INET6
case AF_INET6:
+ KERNEL_LOCK();
+ /* XXXSMP there is a MP race in nd6_resolve() */
error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
+ KERNEL_UNLOCK();
if (error)
return (error);
eh->ether_type = htons(ETHERTYPE_IPV6);
Index: net/ifq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/ifq.c,v
retrieving revision 1.43
diff -u -p -r1.43 ifq.c
--- net/ifq.c 20 Feb 2021 04:37:26 -0000 1.43
+++ net/ifq.c 21 Apr 2021 13:12:44 -0000
@@ -243,7 +243,7 @@ void
ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
{
ifq->ifq_if = ifp;
- ifq->ifq_softnet = net_tq(ifp->if_index); /* + idx */
+ ifq->ifq_softnet = net_tq(ifp->if_index + idx);
ifq->ifq_softc = NULL;
mtx_init(&ifq->ifq_mtx, IPL_NET);
@@ -617,7 +617,7 @@ void
ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
{
ifiq->ifiq_if = ifp;
- ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */
+ ifiq->ifiq_softnet = net_tq(ifp->if_index + idx);
ifiq->ifiq_softc = NULL;
mtx_init(&ifiq->ifiq_mtx, IPL_NET);
Index: net/netisr.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/netisr.h,v
retrieving revision 1.55
diff -u -p -r1.55 netisr.h
--- net/netisr.h 5 Jan 2021 20:43:36 -0000 1.55
+++ net/netisr.h 21 Apr 2021 12:52:08 -0000
@@ -41,8 +41,10 @@
* interrupt used for scheduling the network code to calls
* on the lowest level routine of each protocol.
*/
+#define NETISR_IP 2 /* same as AF_INET */
#define NETISR_PFSYNC 5 /* for pfsync "immediate" tx */
#define NETISR_ARP 18 /* same as AF_LINK */
+#define NETISR_IPV6 24 /* same as AF_INET6 */
#define NETISR_PPP 28 /* for PPP processing */
#define NETISR_BRIDGE 29 /* for bridge processing */
#define NETISR_SWITCH 31 /* for switch dataplane */
@@ -57,6 +59,8 @@ extern int netisr; /* scheduling bits
extern struct task if_input_task_locked;
void arpintr(void);
+void ipintr(void);
+void ip6intr(void);
void pppintr(void);
void bridgeintr(void);
void switchintr(void);
Index: netinet/ip_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.356
diff -u -p -r1.356 ip_input.c
--- netinet/ip_input.c 30 Mar 2021 08:37:10 -0000 1.356
+++ netinet/ip_input.c 21 Apr 2021 12:52:08 -0000
@@ -131,6 +131,8 @@ const struct sysctl_bounded_args ipctl_v
{ IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX },
};
+struct niqueue ipintrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IP);
+
struct pool ipqent_pool;
struct pool ipq_pool;
@@ -144,6 +146,7 @@ static struct mbuf_queue ipsendraw_mq;
extern struct niqueue arpinq;
int ip_ours(struct mbuf **, int *, int, int);
+int ip_local(struct mbuf **, int *, int, int);
int ip_dooptions(struct mbuf *, struct ifnet *);
int in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **);
@@ -230,6 +233,43 @@ ip_init(void)
}
/*
+ * Enqueue packet for local delivery. Queuing is used as a boundary
+ * between the network layer (input/forward path) running with shared
+ * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively.
+ */
+int
+ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
+{
+ /* We are already in a IPv4/IPv6 local deliver loop. */
+ if (af != AF_UNSPEC)
+ return ip_local(mp, offp, nxt, af);
+
+ niq_enqueue(&ipintrq, *mp);
+ *mp = NULL;
+ return IPPROTO_DONE;
+}
+
+/*
+ * Dequeue and process locally delivered packets.
+ */
+void
+ipintr(void)
+{
+ struct mbuf *m;
+ int off, nxt;
+
+ while ((m = niq_dequeue(&ipintrq)) != NULL) {
+#ifdef DIAGNOSTIC
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("ipintr no HDR");
+#endif
+ off = 0;
+ nxt = ip_local(&m, &off, IPPROTO_IPV4, AF_UNSPEC);
+ KASSERT(nxt == IPPROTO_DONE);
+ }
+}
+
+/*
* IPv4 input routine.
*
* Checksum and byte swap header. Process options. Forward or deliver.
@@ -488,7 +528,7 @@ ip_input_if(struct mbuf **mp, int *offp,
* If fragmented try to reassemble. Pass to next level.
*/
int
-ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
+ip_local(struct mbuf **mp, int *offp, int nxt, int af)
{
struct mbuf *m = *mp;
struct ip *ip = mtod(m, struct ip *);
@@ -1639,7 +1679,8 @@ ip_sysctl(int *name, u_int namelen, void
newlen));
#endif
case IPCTL_IFQUEUE:
- return (EOPNOTSUPP);
+ return (sysctl_niq(name + 1, namelen - 1,
+ oldp, oldlenp, newp, newlen, &ipintrq));
case IPCTL_ARPQUEUE:
return (sysctl_niq(name + 1, namelen - 1,
oldp, oldlenp, newp, newlen, &arpinq));
Index: netinet/ip_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v
retrieving revision 1.88
diff -u -p -r1.88 ip_var.h
--- netinet/ip_var.h 30 Mar 2021 08:37:11 -0000 1.88
+++ netinet/ip_var.h 21 Apr 2021 12:52:08 -0000
@@ -248,7 +248,6 @@ void ip_stripoptions(struct mbuf *);
int ip_sysctl(int *, u_int, void *, size_t *, void *, size_t);
void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
struct mbuf *);
-void ipintr(void);
int ip_input_if(struct mbuf **, int *, int, int, struct ifnet *);
int ip_deliver(struct mbuf **, int *, int, int);
void ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int);
Index: netinet6/ip6_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.232
diff -u -p -r1.232 ip6_input.c
--- netinet6/ip6_input.c 10 Mar 2021 10:21:49 -0000 1.232
+++ netinet6/ip6_input.c 21 Apr 2021 12:52:08 -0000
@@ -115,11 +115,14 @@
#include <netinet/ip_carp.h>
#endif
+struct niqueue ip6intrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IPV6);
+
struct cpumem *ip6counters;
uint8_t ip6_soiikey[IP6_SOIIKEY_LEN];
int ip6_ours(struct mbuf **, int *, int, int);
+int ip6_local(struct mbuf **, int *, int, int);
int ip6_check_rh0hdr(struct mbuf *, int *);
int ip6_hbhchcheck(struct mbuf *, int *, int *, int *);
int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
@@ -162,6 +165,43 @@ ip6_init(void)
ip6counters = counters_alloc(ip6s_ncounters);
}
+/*
+ * Enqueue packet for local delivery. Queuing is used as a boundary
+ * between the network layer (input/forward path) running with shared
+ * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively.
+ */
+int
+ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
+{
+ /* We are already in a IPv4/IPv6 local deliver loop. */
+ if (af != AF_UNSPEC)
+ return ip6_local(mp, offp, nxt, af);
+
+ niq_enqueue(&ip6intrq, *mp);
+ *mp = NULL;
+ return IPPROTO_DONE;
+}
+
+/*
+ * Dequeue and process locally delivered packets.
+ */
+void
+ip6intr(void)
+{
+ struct mbuf *m;
+ int off, nxt;
+
+ while ((m = niq_dequeue(&ip6intrq)) != NULL) {
+#ifdef DIAGNOSTIC
+ if ((m->m_flags & M_PKTHDR) == 0)
+ panic("ip6intr no HDR");
+#endif
+ off = 0;
+ nxt = ip6_local(&m, &off, IPPROTO_IPV6, AF_UNSPEC);
+ KASSERT(nxt == IPPROTO_DONE);
+ }
+}
+
void
ipv6_input(struct ifnet *ifp, struct mbuf *m)
{
@@ -526,7 +566,7 @@ ip6_input_if(struct mbuf **mp, int *offp
}
int
-ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
+ip6_local(struct mbuf **mp, int *offp, int nxt, int af)
{
if (ip6_hbhchcheck(*mp, offp, &nxt, NULL))
return IPPROTO_DONE;
@@ -1452,7 +1492,8 @@ ip6_sysctl(int *name, u_int namelen, voi
NET_UNLOCK();
return (error);
case IPV6CTL_IFQUEUE:
- return (EOPNOTSUPP);
+ return (sysctl_niq(name + 1, namelen - 1,
+ oldp, oldlenp, newp, newlen, &ip6intrq));
case IPV6CTL_SOIIKEY:
return (ip6_sysctl_soiikey(oldp, oldlenp, newp, newlen));
default: