On Mon, Jul 19, 2021 at 08:02:30PM +0300, Vitaliy Makkoveev wrote:
> I mean the case when ip_local() called by ip_ours(). Unfortunately, I'm
> not familiar with PPTP but it looks affected because it don't use tcp or
> udp as transport but encapsulates them into ip frames. Sorry for noise
> if I'm wrong.
> 
>  +ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
>  +{
>  +    /* We are already in a IPv4/IPv6 local deliver loop. */
>  +    if (af != AF_UNSPEC)
>  +            return ip_local(mp, offp, nxt, af);
>  +
>  +    niq_enqueue(&ipintrq, *mp);
>  +    *mp = NULL;
>  +    return IPPROTO_DONE;
>  +}

The af != AF_UNSPEC case already has the exclusive net lock.
ipv4_input() sets AF_UNSPEC, the other case is for IP in IP header.
The latter is called from ipintr().

I can put a NET_ASSERT_WLOCKED() into ip_local().  Still running a
full regress with that.

bluhm

Index: net/if.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
retrieving revision 1.642
diff -u -p -r1.642 if.c
--- net/if.c    30 Jun 2021 13:23:33 -0000      1.642
+++ net/if.c    19 Jul 2021 14:51:31 -0000
@@ -109,6 +109,10 @@
 #include <netinet6/ip6_var.h>
 #endif
 
+#ifdef IPSEC
+#include <netinet/ip_ipsp.h>
+#endif
+
 #ifdef MPLS
 #include <netmpls/mpls.h>
 #endif
@@ -238,7 +242,7 @@ int ifq_congestion;
 
 int             netisr;
 
-#define        NET_TASKQ       1
+#define        NET_TASKQ       4
 struct taskq   *nettqmp[NET_TASKQ];
 
 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
@@ -815,6 +819,7 @@ void
 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
 {
        struct mbuf *m;
+       int exclusive_lock = 0;
 
        if (ml_empty(ml))
                return;
@@ -834,10 +839,25 @@ if_input_process(struct ifnet *ifp, stru
         * to PF globals, pipex globals, unicast and multicast addresses
         * lists and the socket layer.
         */
-       NET_LOCK();
+
+       /*
+        * XXXSMP IPsec data structures are not ready to be
+        * accessed by multiple Network threads in parallel.
+        */
+       if (ipsec_in_use)
+               exclusive_lock = 1;
+       if (exclusive_lock)
+               NET_LOCK();
+       else
+               NET_RLOCK_IN_SOFTNET();
+
        while ((m = ml_dequeue(ml)) != NULL)
                (*ifp->if_input)(ifp, m);
-       NET_UNLOCK();
+
+       if (exclusive_lock)
+               NET_UNLOCK();
+       else
+               NET_RUNLOCK_IN_SOFTNET();
 }
 
 void
@@ -895,6 +915,12 @@ if_netisr(void *unused)
                        KERNEL_UNLOCK();
                }
 #endif
+               if (n & (1 << NETISR_IP))
+                       ipintr();
+#ifdef INET6
+               if (n & (1 << NETISR_IPV6))
+                       ip6intr();
+#endif
 #if NPPP > 0
                if (n & (1 << NETISR_PPP)) {
                        KERNEL_LOCK();
@@ -3311,17 +3337,14 @@ unhandled_af(int af)
        panic("unhandled af %d", af);
 }
 
-/*
- * XXXSMP This tunable is here to work around the fact that IPsec
- * globals aren't ready to be accessed by multiple threads in
- * parallel.
- */
-int             nettaskqs = NET_TASKQ;
-
 struct taskq *
 net_tq(unsigned int ifindex)
 {
        struct taskq *t = NULL;
+       static int nettaskqs;
+
+       if (nettaskqs == 0)
+               nettaskqs = min(NET_TASKQ, ncpus);
 
        t = nettqmp[ifindex % nettaskqs];
 
Index: net/if_ethersubr.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.275
diff -u -p -r1.275 if_ethersubr.c
--- net/if_ethersubr.c  7 Jul 2021 20:19:01 -0000       1.275
+++ net/if_ethersubr.c  19 Jul 2021 14:32:48 -0000
@@ -222,7 +222,10 @@ ether_resolve(struct ifnet *ifp, struct 
 
        switch (af) {
        case AF_INET:
+               KERNEL_LOCK();
+               /* XXXSMP there is a MP race in arpresolve() */
                error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
+               KERNEL_UNLOCK();
                if (error)
                        return (error);
                eh->ether_type = htons(ETHERTYPE_IP);
@@ -245,7 +248,10 @@ ether_resolve(struct ifnet *ifp, struct 
                break;
 #ifdef INET6
        case AF_INET6:
+               KERNEL_LOCK();
+               /* XXXSMP there is a MP race in nd6_resolve() */
                error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
+               KERNEL_UNLOCK();
                if (error)
                        return (error);
                eh->ether_type = htons(ETHERTYPE_IPV6);
Index: net/ifq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/ifq.c,v
retrieving revision 1.44
diff -u -p -r1.44 ifq.c
--- net/ifq.c   9 Jul 2021 01:22:05 -0000       1.44
+++ net/ifq.c   19 Jul 2021 14:32:48 -0000
@@ -243,7 +243,7 @@ void
 ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
 {
        ifq->ifq_if = ifp;
-       ifq->ifq_softnet = net_tq(ifp->if_index); /* + idx */
+       ifq->ifq_softnet = net_tq(ifp->if_index + idx);
        ifq->ifq_softc = NULL;
 
        mtx_init(&ifq->ifq_mtx, IPL_NET);
@@ -620,7 +620,7 @@ void
 ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
 {
        ifiq->ifiq_if = ifp;
-       ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */
+       ifiq->ifiq_softnet = net_tq(ifp->if_index + idx);
        ifiq->ifiq_softc = NULL;
 
        mtx_init(&ifiq->ifiq_mtx, IPL_NET);
Index: net/netisr.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/netisr.h,v
retrieving revision 1.55
diff -u -p -r1.55 netisr.h
--- net/netisr.h        5 Jan 2021 20:43:36 -0000       1.55
+++ net/netisr.h        19 Jul 2021 14:32:48 -0000
@@ -41,8 +41,10 @@
  * interrupt used for scheduling the network code to calls
  * on the lowest level routine of each protocol.
  */
+#define        NETISR_IP       2               /* same as AF_INET */
 #define        NETISR_PFSYNC   5               /* for pfsync "immediate" tx */
 #define        NETISR_ARP      18              /* same as AF_LINK */
+#define        NETISR_IPV6     24              /* same as AF_INET6 */
 #define        NETISR_PPP      28              /* for PPP processing */
 #define        NETISR_BRIDGE   29              /* for bridge processing */
 #define        NETISR_SWITCH   31              /* for switch dataplane */
@@ -57,6 +59,8 @@ extern int    netisr;                 /* scheduling bits 
 extern struct task if_input_task_locked;
 
 void   arpintr(void);
+void   ipintr(void);
+void   ip6intr(void);
 void   pppintr(void);
 void   bridgeintr(void);
 void   switchintr(void);
Index: net/pfkeyv2.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfkeyv2.c,v
retrieving revision 1.218
diff -u -p -r1.218 pfkeyv2.c
--- net/pfkeyv2.c       14 Jul 2021 22:39:26 -0000      1.218
+++ net/pfkeyv2.c       19 Jul 2021 14:48:34 -0000
@@ -2019,14 +2019,6 @@ pfkeyv2_send(struct socket *so, void *me
                        }
                        TAILQ_INSERT_HEAD(&ipsec_policy_head, ipo, ipo_list);
                        ipsec_in_use++;
-                       /*
-                        * XXXSMP IPsec data structures are not ready to be
-                        * accessed by multiple Network threads in parallel,
-                        * so force all packets to be processed by the first
-                        * one.
-                        */
-                       extern int nettaskqs;
-                       nettaskqs = 1;
                } else {
                        ipo->ipo_last_searched = ipo->ipo_flags = 0;
                }
Index: netinet/ip_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.363
diff -u -p -r1.363 ip_input.c
--- netinet/ip_input.c  21 Jun 2021 22:09:14 -0000      1.363
+++ netinet/ip_input.c  19 Jul 2021 20:32:15 -0000
@@ -130,6 +130,8 @@ const struct sysctl_bounded_args ipctl_v
        { IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX },
 };
 
+struct niqueue ipintrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IP);
+
 struct pool ipqent_pool;
 struct pool ipq_pool;
 
@@ -143,6 +145,7 @@ static struct mbuf_queue    ipsendraw_mq;
 extern struct niqueue          arpinq;
 
 int    ip_ours(struct mbuf **, int *, int, int);
+int    ip_local(struct mbuf **, int *, int, int);
 int    ip_dooptions(struct mbuf *, struct ifnet *);
 int    in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **);
 
@@ -230,6 +233,43 @@ ip_init(void)
 }
 
 /*
+ * Enqueue packet for local delivery.  Queuing is used as a boundary
+ * between the network layer (input/forward path) running with shared
+ * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively.
+ */
+int
+ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
+{
+       /* We are already in a IPv4/IPv6 local deliver loop. */
+       if (af != AF_UNSPEC)
+               return ip_local(mp, offp, nxt, af);
+
+       niq_enqueue(&ipintrq, *mp);
+       *mp = NULL;
+       return IPPROTO_DONE;
+}
+
+/*
+ * Dequeue and process locally delivered packets.
+ */
+void
+ipintr(void)
+{
+       struct mbuf *m;
+       int off, nxt;
+
+       while ((m = niq_dequeue(&ipintrq)) != NULL) {
+#ifdef DIAGNOSTIC
+               if ((m->m_flags & M_PKTHDR) == 0)
+                       panic("ipintr no HDR");
+#endif
+               off = 0;
+               nxt = ip_local(&m, &off, IPPROTO_IPV4, AF_UNSPEC);
+               KASSERT(nxt == IPPROTO_DONE);
+       }
+}
+
+/*
  * IPv4 input routine.
  *
  * Checksum and byte swap header.  Process options. Forward or deliver.
@@ -514,7 +554,7 @@ ip_input_if(struct mbuf **mp, int *offp,
  * If fragmented try to reassemble.  Pass to next level.
  */
 int
-ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
+ip_local(struct mbuf **mp, int *offp, int nxt, int af)
 {
        struct mbuf *m = *mp;
        struct ip *ip = mtod(m, struct ip *);
@@ -522,6 +562,8 @@ ip_ours(struct mbuf **mp, int *offp, int
        struct ipqent *ipqe;
        int mff, hlen;
 
+       NET_ASSERT_WLOCKED();
+
        hlen = ip->ip_hl << 2;
 
        /*
@@ -1665,7 +1707,8 @@ ip_sysctl(int *name, u_int namelen, void
                    newlen));
 #endif
        case IPCTL_IFQUEUE:
-               return (EOPNOTSUPP);
+               return (sysctl_niq(name + 1, namelen - 1,
+                   oldp, oldlenp, newp, newlen, &ipintrq));
        case IPCTL_ARPQUEUE:
                return (sysctl_niq(name + 1, namelen - 1,
                    oldp, oldlenp, newp, newlen, &arpinq));
Index: netinet/ip_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v
retrieving revision 1.88
diff -u -p -r1.88 ip_var.h
--- netinet/ip_var.h    30 Mar 2021 08:37:11 -0000      1.88
+++ netinet/ip_var.h    19 Jul 2021 14:32:48 -0000
@@ -248,7 +248,6 @@ void         ip_stripoptions(struct mbuf *);
 int     ip_sysctl(int *, u_int, void *, size_t *, void *, size_t);
 void    ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
            struct mbuf *);
-void    ipintr(void);
 int     ip_input_if(struct mbuf **, int *, int, int, struct ifnet *);
 int     ip_deliver(struct mbuf **, int *, int, int);
 void    ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int);
Index: netinet6/ip6_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.237
diff -u -p -r1.237 ip6_input.c
--- netinet6/ip6_input.c        3 Jun 2021 04:47:54 -0000       1.237
+++ netinet6/ip6_input.c        19 Jul 2021 20:31:14 -0000
@@ -115,11 +115,14 @@
 #include <netinet/ip_carp.h>
 #endif
 
+struct niqueue ip6intrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IPV6);
+
 struct cpumem *ip6counters;
 
 uint8_t ip6_soiikey[IP6_SOIIKEY_LEN];
 
 int ip6_ours(struct mbuf **, int *, int, int);
+int ip6_local(struct mbuf **, int *, int, int);
 int ip6_check_rh0hdr(struct mbuf *, int *);
 int ip6_hbhchcheck(struct mbuf *, int *, int *, int *);
 int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
@@ -162,6 +165,43 @@ ip6_init(void)
        ip6counters = counters_alloc(ip6s_ncounters);
 }
 
+/*
+ * Enqueue packet for local delivery.  Queuing is used as a boundary
+ * between the network layer (input/forward path) running with shared
+ * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively.
+ */
+int
+ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
+{
+       /* We are already in a IPv4/IPv6 local deliver loop. */
+       if (af != AF_UNSPEC)
+               return ip6_local(mp, offp, nxt, af);
+
+       niq_enqueue(&ip6intrq, *mp);
+       *mp = NULL;
+       return IPPROTO_DONE;
+}
+
+/*
+ * Dequeue and process locally delivered packets.
+ */
+void
+ip6intr(void)
+{
+       struct mbuf *m;
+       int off, nxt;
+
+       while ((m = niq_dequeue(&ip6intrq)) != NULL) {
+#ifdef DIAGNOSTIC
+               if ((m->m_flags & M_PKTHDR) == 0)
+                       panic("ip6intr no HDR");
+#endif
+               off = 0;
+               nxt = ip6_local(&m, &off, IPPROTO_IPV6, AF_UNSPEC);
+               KASSERT(nxt == IPPROTO_DONE);
+       }
+}
+
 void
 ipv6_input(struct ifnet *ifp, struct mbuf *m)
 {
@@ -544,8 +584,10 @@ ip6_input_if(struct mbuf **mp, int *offp
 }
 
 int
-ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
+ip6_local(struct mbuf **mp, int *offp, int nxt, int af)
 {
+       NET_ASSERT_WLOCKED();
+
        if (ip6_hbhchcheck(*mp, offp, &nxt, NULL))
                return IPPROTO_DONE;
 
@@ -1470,7 +1512,8 @@ ip6_sysctl(int *name, u_int namelen, voi
                NET_UNLOCK();
                return (error);
        case IPV6CTL_IFQUEUE:
-               return (EOPNOTSUPP);
+               return (sysctl_niq(name + 1, namelen - 1,
+                   oldp, oldlenp, newp, newlen, &ip6intrq));
        case IPV6CTL_SOIIKEY:
                return (ip6_sysctl_soiikey(oldp, oldlenp, newp, newlen));
        default:

Reply via email to