On Mon, Apr 18, 2022 at 12:27:23PM +0200, Hrvoje Popovski wrote:
> On 8.4.2022. 12:56, Alexander Bluhm wrote:
> > I now the right time to commit the parallel forwarding diff?
> > 
> > Known limitiations are:
> > - Hrvoje has seen a crash with both pfsync and ipsec on his production
> >   machine.  But he cannot reproduce it in his lab.
> 
> This is resolved. At least this panic doesn't happen any more.

Good to hear.  I guess the crash seen before is related to an
uncommited diff that was tested in Hrvoje's setup.

So we can run IP forwarding in parallel.  I would like to commit
this diff now.  Then we can learn whether there are other limitations
and fix them in tree.

ok?

bluhm

Index: net/if.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
retrieving revision 1.649
diff -u -p -r1.649 if.c
--- net/if.c    25 Feb 2022 23:51:03 -0000      1.649
+++ net/if.c    18 Apr 2022 17:12:59 -0000
@@ -237,7 +237,7 @@ int ifq_congestion;
 
 int             netisr;
 
-#define        NET_TASKQ       1
+#define        NET_TASKQ       4
 struct taskq   *nettqmp[NET_TASKQ];
 
 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
@@ -834,15 +834,10 @@ if_input_process(struct ifnet *ifp, stru
         * lists and the socket layer.
         */
 
-       /*
-        * XXXSMP IPsec data structures are not ready to be accessed
-        * by multiple network threads in parallel.  In this case
-        * use an exclusive lock.
-        */
-       NET_LOCK();
+       NET_RLOCK_IN_SOFTNET();
        while ((m = ml_dequeue(ml)) != NULL)
                (*ifp->if_input)(ifp, m);
-       NET_UNLOCK();
+       NET_RUNLOCK_IN_SOFTNET();
 }
 
 void
@@ -899,6 +894,12 @@ if_netisr(void *unused)
                        arpintr();
                        KERNEL_UNLOCK();
                }
+#endif
+               if (n & (1 << NETISR_IP))
+                       ipintr();
+#ifdef INET6
+               if (n & (1 << NETISR_IPV6))
+                       ip6intr();
 #endif
 #if NPPP > 0
                if (n & (1 << NETISR_PPP)) {
Index: net/if_ethersubr.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.278
diff -u -p -r1.278 if_ethersubr.c
--- net/if_ethersubr.c  22 Feb 2022 01:15:02 -0000      1.278
+++ net/if_ethersubr.c  18 Apr 2022 17:12:59 -0000
@@ -221,7 +221,10 @@ ether_resolve(struct ifnet *ifp, struct 
 
        switch (af) {
        case AF_INET:
+               KERNEL_LOCK();
+               /* XXXSMP there is a MP race in arpresolve() */
                error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
+               KERNEL_UNLOCK();
                if (error)
                        return (error);
                eh->ether_type = htons(ETHERTYPE_IP);
@@ -244,7 +247,10 @@ ether_resolve(struct ifnet *ifp, struct 
                break;
 #ifdef INET6
        case AF_INET6:
+               KERNEL_LOCK();
+               /* XXXSMP there is a MP race in nd6_resolve() */
                error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
+               KERNEL_UNLOCK();
                if (error)
                        return (error);
                eh->ether_type = htons(ETHERTYPE_IPV6);
@@ -270,13 +276,19 @@ ether_resolve(struct ifnet *ifp, struct 
                        break;
 #ifdef INET6
                case AF_INET6:
+                       KERNEL_LOCK();
+                       /* XXXSMP there is a MP race in nd6_resolve() */
                        error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
+                       KERNEL_UNLOCK();
                        if (error)
                                return (error);
                        break;
 #endif
                case AF_INET:
+                       KERNEL_LOCK();
+                       /* XXXSMP there is a MP race in arpresolve() */
                        error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
+                       KERNEL_UNLOCK();
                        if (error)
                                return (error);
                        break;
@@ -528,12 +540,14 @@ ether_input(struct ifnet *ifp, struct mb
        case ETHERTYPE_PPPOE:
                if (m->m_flags & (M_MCAST | M_BCAST))
                        goto dropanyway;
+               KERNEL_LOCK();
 #ifdef PIPEX
                if (pipex_enable) {
                        struct pipex_session *session;
 
                        if ((session = pipex_pppoe_lookup_session(m)) != NULL) {
                                pipex_pppoe_input(m, session);
+                               KERNEL_UNLOCK();
                                return;
                        }
                }
@@ -542,6 +556,7 @@ ether_input(struct ifnet *ifp, struct mb
                        pppoe_disc_input(m);
                else
                        pppoe_data_input(m);
+               KERNEL_UNLOCK();
                return;
 #endif
 #ifdef MPLS
Index: net/ifq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/ifq.c,v
retrieving revision 1.45
diff -u -p -r1.45 ifq.c
--- net/ifq.c   18 Jan 2022 10:54:05 -0000      1.45
+++ net/ifq.c   18 Apr 2022 17:12:59 -0000
@@ -243,7 +243,7 @@ void
 ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
 {
        ifq->ifq_if = ifp;
-       ifq->ifq_softnet = net_tq(ifp->if_index); /* + idx */
+       ifq->ifq_softnet = net_tq(ifp->if_index + idx);
        ifq->ifq_softc = NULL;
 
        mtx_init(&ifq->ifq_mtx, IPL_NET);
@@ -620,7 +620,7 @@ void
 ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
 {
        ifiq->ifiq_if = ifp;
-       ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */
+       ifiq->ifiq_softnet = net_tq(ifp->if_index + idx);
        ifiq->ifiq_softc = NULL;
 
        mtx_init(&ifiq->ifiq_mtx, IPL_NET);
Index: net/netisr.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/netisr.h,v
retrieving revision 1.55
diff -u -p -r1.55 netisr.h
--- net/netisr.h        5 Jan 2021 20:43:36 -0000       1.55
+++ net/netisr.h        18 Apr 2022 17:12:59 -0000
@@ -41,8 +41,10 @@
  * interrupt used for scheduling the network code to calls
  * on the lowest level routine of each protocol.
  */
+#define        NETISR_IP       2               /* same as AF_INET */
 #define        NETISR_PFSYNC   5               /* for pfsync "immediate" tx */
 #define        NETISR_ARP      18              /* same as AF_LINK */
+#define        NETISR_IPV6     24              /* same as AF_INET6 */
 #define        NETISR_PPP      28              /* for PPP processing */
 #define        NETISR_BRIDGE   29              /* for bridge processing */
 #define        NETISR_SWITCH   31              /* for switch dataplane */
@@ -57,6 +59,8 @@ extern int    netisr;                 /* scheduling bits 
 extern struct task if_input_task_locked;
 
 void   arpintr(void);
+void   ipintr(void);
+void   ip6intr(void);
 void   pppintr(void);
 void   bridgeintr(void);
 void   switchintr(void);
Index: netinet/ip_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.366
diff -u -p -r1.366 ip_input.c
--- netinet/ip_input.c  22 Feb 2022 01:35:40 -0000      1.366
+++ netinet/ip_input.c  18 Apr 2022 17:12:59 -0000
@@ -129,6 +129,8 @@ const struct sysctl_bounded_args ipctl_v
        { IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX },
 };
 
+struct niqueue ipintrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IP);
+
 struct pool ipqent_pool;
 struct pool ipq_pool;
 
@@ -142,6 +144,7 @@ static struct mbuf_queue    ipsendraw_mq;
 extern struct niqueue          arpinq;
 
 int    ip_ours(struct mbuf **, int *, int, int);
+int    ip_local(struct mbuf **, int *, int, int);
 int    ip_dooptions(struct mbuf *, struct ifnet *);
 int    in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **);
 
@@ -229,6 +232,43 @@ ip_init(void)
 }
 
 /*
+ * Enqueue packet for local delivery.  Queuing is used as a boundary
+ * between the network layer (input/forward path) running with shared
+ * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively.
+ */
+int
+ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
+{
+       /* We are already in a IPv4/IPv6 local deliver loop. */
+       if (af != AF_UNSPEC)
+               return ip_local(mp, offp, nxt, af);
+
+       niq_enqueue(&ipintrq, *mp);
+       *mp = NULL;
+       return IPPROTO_DONE;
+}
+
+/*
+ * Dequeue and process locally delivered packets.
+ */
+void
+ipintr(void)
+{
+       struct mbuf *m;
+       int off, nxt;
+
+       while ((m = niq_dequeue(&ipintrq)) != NULL) {
+#ifdef DIAGNOSTIC
+               if ((m->m_flags & M_PKTHDR) == 0)
+                       panic("ipintr no HDR");
+#endif
+               off = 0;
+               nxt = ip_local(&m, &off, IPPROTO_IPV4, AF_UNSPEC);
+               KASSERT(nxt == IPPROTO_DONE);
+       }
+}
+
+/*
  * IPv4 input routine.
  *
  * Checksum and byte swap header.  Process options. Forward or deliver.
@@ -513,7 +553,7 @@ ip_input_if(struct mbuf **mp, int *offp,
  * If fragmented try to reassemble.  Pass to next level.
  */
 int
-ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
+ip_local(struct mbuf **mp, int *offp, int nxt, int af)
 {
        struct mbuf *m = *mp;
        struct ip *ip = mtod(m, struct ip *);
@@ -521,6 +561,8 @@ ip_ours(struct mbuf **mp, int *offp, int
        struct ipqent *ipqe;
        int mff, hlen;
 
+       NET_ASSERT_WLOCKED();
+
        hlen = ip->ip_hl << 2;
 
        /*
@@ -1657,7 +1699,8 @@ ip_sysctl(int *name, u_int namelen, void
                    newlen));
 #endif
        case IPCTL_IFQUEUE:
-               return (EOPNOTSUPP);
+               return (sysctl_niq(name + 1, namelen - 1,
+                   oldp, oldlenp, newp, newlen, &ipintrq));
        case IPCTL_ARPQUEUE:
                return (sysctl_niq(name + 1, namelen - 1,
                    oldp, oldlenp, newp, newlen, &arpinq));
Index: netinet/ip_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v
retrieving revision 1.90
diff -u -p -r1.90 ip_var.h
--- netinet/ip_var.h    25 Feb 2022 23:51:03 -0000      1.90
+++ netinet/ip_var.h    18 Apr 2022 17:12:59 -0000
@@ -248,7 +248,6 @@ void         ip_stripoptions(struct mbuf *);
 int     ip_sysctl(int *, u_int, void *, size_t *, void *, size_t);
 void    ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
            struct mbuf *);
-void    ipintr(void);
 int     ip_input_if(struct mbuf **, int *, int, int, struct ifnet *);
 int     ip_deliver(struct mbuf **, int *, int, int);
 void    ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int);
Index: netinet6/ip6_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.240
diff -u -p -r1.240 ip6_input.c
--- netinet6/ip6_input.c        22 Feb 2022 01:35:41 -0000      1.240
+++ netinet6/ip6_input.c        18 Apr 2022 17:12:59 -0000
@@ -113,11 +113,14 @@
 #include <netinet/ip_carp.h>
 #endif
 
+struct niqueue ip6intrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IPV6);
+
 struct cpumem *ip6counters;
 
 uint8_t ip6_soiikey[IP6_SOIIKEY_LEN];
 
 int ip6_ours(struct mbuf **, int *, int, int);
+int ip6_local(struct mbuf **, int *, int, int);
 int ip6_check_rh0hdr(struct mbuf *, int *);
 int ip6_hbhchcheck(struct mbuf *, int *, int *, int *);
 int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
@@ -160,6 +163,43 @@ ip6_init(void)
        ip6counters = counters_alloc(ip6s_ncounters);
 }
 
+/*
+ * Enqueue packet for local delivery.  Queuing is used as a boundary
+ * between the network layer (input/forward path) running with shared
+ * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively.
+ */
+int
+ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
+{
+       /* We are already in a IPv4/IPv6 local deliver loop. */
+       if (af != AF_UNSPEC)
+               return ip6_local(mp, offp, nxt, af);
+
+       niq_enqueue(&ip6intrq, *mp);
+       *mp = NULL;
+       return IPPROTO_DONE;
+}
+
+/*
+ * Dequeue and process locally delivered packets.
+ */
+void
+ip6intr(void)
+{
+       struct mbuf *m;
+       int off, nxt;
+
+       while ((m = niq_dequeue(&ip6intrq)) != NULL) {
+#ifdef DIAGNOSTIC
+               if ((m->m_flags & M_PKTHDR) == 0)
+                       panic("ip6intr no HDR");
+#endif
+               off = 0;
+               nxt = ip6_local(&m, &off, IPPROTO_IPV6, AF_UNSPEC);
+               KASSERT(nxt == IPPROTO_DONE);
+       }
+}
+
 void
 ipv6_input(struct ifnet *ifp, struct mbuf *m)
 {
@@ -536,8 +576,10 @@ ip6_input_if(struct mbuf **mp, int *offp
 }
 
 int
-ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
+ip6_local(struct mbuf **mp, int *offp, int nxt, int af)
 {
+       NET_ASSERT_WLOCKED();
+
        if (ip6_hbhchcheck(*mp, offp, &nxt, NULL))
                return IPPROTO_DONE;
 
@@ -1462,7 +1504,8 @@ ip6_sysctl(int *name, u_int namelen, voi
                NET_UNLOCK();
                return (error);
        case IPV6CTL_IFQUEUE:
-               return (EOPNOTSUPP);
+               return (sysctl_niq(name + 1, namelen - 1,
+                   oldp, oldlenp, newp, newlen, &ip6intrq));
        case IPV6CTL_SOIIKEY:
                return (ip6_sysctl_soiikey(oldp, oldlenp, newp, newlen));
        default:

Reply via email to