On Fri, Dec 03, 2021 at 08:35:45PM +0100, Alexander Bluhm wrote:
> Hi,
> 
> After implementing MP safe refcounting for IPsec TDB, I wonder how
> stable my diff for forwarding on multiple CPU is.
> 
> Note that IPsec still has the workaround to disable multiple queues.
> We do not have a mutex that protects the TDB fields yet.  We have
> only fixed the memory management.
> 
> My goal is to get real MP pressure on the lower part of the IP
> network stack.  Only this will show remaining bugs.
> 
> bluhm
> 

This unlocked `ipsec_in_use' check doesn't work. 

SADB_X_ADDFLOW and `ipsec_in_use' increment are netlock serialized. 

>  if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
>  {
>       struct mbuf *m;
> +     int exclusive_lock = 0;
>  
>       if (ml_empty(ml))
>               return;
> @@ -834,15 +839,27 @@ if_input_process(struct ifnet *ifp, stru
>        * lists and the socket layer.
>        */
>  
> +#ifdef IPSEC
>       /*
>        * XXXSMP IPsec data structures are not ready to be accessed
>        * by multiple network threads in parallel.  In this case
>        * use an exclusive lock.
>        */
> -     NET_LOCK();
> +     if (ipsec_in_use)
> +             exclusive_lock = 1;
> +#endif

Concurrent SADB_X_ADDFLOW threads could add some SA between your
unlocked `ipsec_in_use' check and the following shared netlock. This
time `ipsec_in_use' modification is netlock serialized but you could add
the new rwlock(9). Since we are only interesting in non-null value of
`ipsec_in_use' it's enough to add it here and around SADB_X_ADDFLOW case
within pfkeyv2_send().

I already posted the fix for this diff to your old [1] thread.

> +     if (exclusive_lock)
> +             NET_LOCK();
> +     else
> +             NET_RLOCK_IN_SOFTNET();
> +
>       while ((m = ml_dequeue(ml)) != NULL)
>               (*ifp->if_input)(ifp, m);

1. https://marc.info/?l=openbsd-tech&m=162698721127298&w=2

> Index: net/if.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
> retrieving revision 1.644
> diff -u -p -r1.644 if.c
> --- net/if.c  11 Nov 2021 10:03:10 -0000      1.644
> +++ net/if.c  3 Dec 2021 18:51:29 -0000
> @@ -108,6 +108,10 @@
>  #include <netinet6/ip6_var.h>
>  #endif
>  
> +#ifdef IPSEC
> +#include <netinet/ip_ipsp.h>
> +#endif
> +
>  #ifdef MPLS
>  #include <netmpls/mpls.h>
>  #endif
> @@ -237,7 +241,7 @@ int       ifq_congestion;
>  
>  int           netisr;
>  
> -#define      NET_TASKQ       1
> +#define      NET_TASKQ       4
>  struct taskq *nettqmp[NET_TASKQ];
>  
>  struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
> @@ -814,6 +818,7 @@ void
>  if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
>  {
>       struct mbuf *m;
> +     int exclusive_lock = 0;
>  
>       if (ml_empty(ml))
>               return;
> @@ -834,15 +839,27 @@ if_input_process(struct ifnet *ifp, stru
>        * lists and the socket layer.
>        */
>  
> +#ifdef IPSEC
>       /*
>        * XXXSMP IPsec data structures are not ready to be accessed
>        * by multiple network threads in parallel.  In this case
>        * use an exclusive lock.
>        */
> -     NET_LOCK();
> +     if (ipsec_in_use)
> +             exclusive_lock = 1;
> +#endif
> +     if (exclusive_lock)
> +             NET_LOCK();
> +     else
> +             NET_RLOCK_IN_SOFTNET();
> +
>       while ((m = ml_dequeue(ml)) != NULL)
>               (*ifp->if_input)(ifp, m);
> -     NET_UNLOCK();
> +
> +     if (exclusive_lock)
> +             NET_UNLOCK();
> +     else
> +             NET_RUNLOCK_IN_SOFTNET();
>  }
>  
>  void
> @@ -899,6 +916,12 @@ if_netisr(void *unused)
>                       arpintr();
>                       KERNEL_UNLOCK();
>               }
> +#endif
> +             if (n & (1 << NETISR_IP))
> +                     ipintr();
> +#ifdef INET6
> +             if (n & (1 << NETISR_IPV6))
> +                     ip6intr();
>  #endif
>  #if NPPP > 0
>               if (n & (1 << NETISR_PPP)) {
> Index: net/if_ethersubr.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_ethersubr.c,v
> retrieving revision 1.276
> diff -u -p -r1.276 if_ethersubr.c
> --- net/if_ethersubr.c        19 Aug 2021 10:22:00 -0000      1.276
> +++ net/if_ethersubr.c        3 Dec 2021 18:51:29 -0000
> @@ -222,7 +222,10 @@ ether_resolve(struct ifnet *ifp, struct 
>  
>       switch (af) {
>       case AF_INET:
> +             KERNEL_LOCK();
> +             /* XXXSMP there is a MP race in arpresolve() */
>               error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
> +             KERNEL_UNLOCK();
>               if (error)
>                       return (error);
>               eh->ether_type = htons(ETHERTYPE_IP);
> @@ -245,7 +248,10 @@ ether_resolve(struct ifnet *ifp, struct 
>               break;
>  #ifdef INET6
>       case AF_INET6:
> +             KERNEL_LOCK();
> +             /* XXXSMP there is a MP race in nd6_resolve() */
>               error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
> +             KERNEL_UNLOCK();
>               if (error)
>                       return (error);
>               eh->ether_type = htons(ETHERTYPE_IPV6);
> @@ -271,13 +277,19 @@ ether_resolve(struct ifnet *ifp, struct 
>                       break;
>  #ifdef INET6
>               case AF_INET6:
> +                     KERNEL_LOCK();
> +                     /* XXXSMP there is a MP race in nd6_resolve() */
>                       error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
> +                     KERNEL_UNLOCK();
>                       if (error)
>                               return (error);
>                       break;
>  #endif
>               case AF_INET:
> +                     KERNEL_LOCK();
> +                     /* XXXSMP there is a MP race in arpresolve() */
>                       error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
> +                     KERNEL_UNLOCK();
>                       if (error)
>                               return (error);
>                       break;
> @@ -529,12 +541,14 @@ ether_input(struct ifnet *ifp, struct mb
>       case ETHERTYPE_PPPOE:
>               if (m->m_flags & (M_MCAST | M_BCAST))
>                       goto dropanyway;
> +             KERNEL_LOCK();
>  #ifdef PIPEX
>               if (pipex_enable) {
>                       struct pipex_session *session;
>  
>                       if ((session = pipex_pppoe_lookup_session(m)) != NULL) {
>                               pipex_pppoe_input(m, session);
> +                             KERNEL_UNLOCK();
>                               return;
>                       }
>               }
> @@ -543,6 +557,7 @@ ether_input(struct ifnet *ifp, struct mb
>                       pppoe_disc_input(m);
>               else
>                       pppoe_data_input(m);
> +             KERNEL_UNLOCK();
>               return;
>  #endif
>  #ifdef MPLS
> Index: net/ifq.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/ifq.c,v
> retrieving revision 1.44
> diff -u -p -r1.44 ifq.c
> --- net/ifq.c 9 Jul 2021 01:22:05 -0000       1.44
> +++ net/ifq.c 3 Dec 2021 18:51:29 -0000
> @@ -243,7 +243,7 @@ void
>  ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
>  {
>       ifq->ifq_if = ifp;
> -     ifq->ifq_softnet = net_tq(ifp->if_index); /* + idx */
> +     ifq->ifq_softnet = net_tq(ifp->if_index + idx);
>       ifq->ifq_softc = NULL;
>  
>       mtx_init(&ifq->ifq_mtx, IPL_NET);
> @@ -620,7 +620,7 @@ void
>  ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
>  {
>       ifiq->ifiq_if = ifp;
> -     ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */
> +     ifiq->ifiq_softnet = net_tq(ifp->if_index + idx);
>       ifiq->ifiq_softc = NULL;
>  
>       mtx_init(&ifiq->ifiq_mtx, IPL_NET);
> Index: net/netisr.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/netisr.h,v
> retrieving revision 1.55
> diff -u -p -r1.55 netisr.h
> --- net/netisr.h      5 Jan 2021 20:43:36 -0000       1.55
> +++ net/netisr.h      3 Dec 2021 18:51:29 -0000
> @@ -41,8 +41,10 @@
>   * interrupt used for scheduling the network code to calls
>   * on the lowest level routine of each protocol.
>   */
> +#define      NETISR_IP       2               /* same as AF_INET */
>  #define      NETISR_PFSYNC   5               /* for pfsync "immediate" tx */
>  #define      NETISR_ARP      18              /* same as AF_LINK */
> +#define      NETISR_IPV6     24              /* same as AF_INET6 */
>  #define      NETISR_PPP      28              /* for PPP processing */
>  #define      NETISR_BRIDGE   29              /* for bridge processing */
>  #define      NETISR_SWITCH   31              /* for switch dataplane */
> @@ -57,6 +59,8 @@ extern int  netisr;                 /* scheduling bits 
>  extern struct task if_input_task_locked;
>  
>  void arpintr(void);
> +void ipintr(void);
> +void ip6intr(void);
>  void pppintr(void);
>  void bridgeintr(void);
>  void switchintr(void);
> Index: netinet/ip_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
> retrieving revision 1.364
> diff -u -p -r1.364 ip_input.c
> --- netinet/ip_input.c        22 Nov 2021 13:47:10 -0000      1.364
> +++ netinet/ip_input.c        3 Dec 2021 18:52:11 -0000
> @@ -130,6 +130,8 @@ const struct sysctl_bounded_args ipctl_v
>       { IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX },
>  };
>  
> +struct niqueue ipintrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IP);
> +
>  struct pool ipqent_pool;
>  struct pool ipq_pool;
>  
> @@ -143,6 +145,7 @@ static struct mbuf_queue  ipsendraw_mq;
>  extern struct niqueue                arpinq;
>  
>  int  ip_ours(struct mbuf **, int *, int, int);
> +int  ip_local(struct mbuf **, int *, int, int);
>  int  ip_dooptions(struct mbuf *, struct ifnet *);
>  int  in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **);
>  
> @@ -230,6 +233,43 @@ ip_init(void)
>  }
>  
>  /*
> + * Enqueue packet for local delivery.  Queuing is used as a boundary
> + * between the network layer (input/forward path) running with shared
> + * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively.
> + */
> +int
> +ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
> +{
> +     /* We are already in a IPv4/IPv6 local deliver loop. */
> +     if (af != AF_UNSPEC)
> +             return ip_local(mp, offp, nxt, af);
> +
> +     niq_enqueue(&ipintrq, *mp);
> +     *mp = NULL;
> +     return IPPROTO_DONE;
> +}
> +
> +/*
> + * Dequeue and process locally delivered packets.
> + */
> +void
> +ipintr(void)
> +{
> +     struct mbuf *m;
> +     int off, nxt;
> +
> +     while ((m = niq_dequeue(&ipintrq)) != NULL) {
> +#ifdef DIAGNOSTIC
> +             if ((m->m_flags & M_PKTHDR) == 0)
> +                     panic("ipintr no HDR");
> +#endif
> +             off = 0;
> +             nxt = ip_local(&m, &off, IPPROTO_IPV4, AF_UNSPEC);
> +             KASSERT(nxt == IPPROTO_DONE);
> +     }
> +}
> +
> +/*
>   * IPv4 input routine.
>   *
>   * Checksum and byte swap header.  Process options. Forward or deliver.
> @@ -514,7 +554,7 @@ ip_input_if(struct mbuf **mp, int *offp,
>   * If fragmented try to reassemble.  Pass to next level.
>   */
>  int
> -ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
> +ip_local(struct mbuf **mp, int *offp, int nxt, int af)
>  {
>       struct mbuf *m = *mp;
>       struct ip *ip = mtod(m, struct ip *);
> @@ -1663,7 +1703,8 @@ ip_sysctl(int *name, u_int namelen, void
>                   newlen));
>  #endif
>       case IPCTL_IFQUEUE:
> -             return (EOPNOTSUPP);
> +             return (sysctl_niq(name + 1, namelen - 1,
> +                 oldp, oldlenp, newp, newlen, &ipintrq));
>       case IPCTL_ARPQUEUE:
>               return (sysctl_niq(name + 1, namelen - 1,
>                   oldp, oldlenp, newp, newlen, &arpinq));
> Index: netinet/ip_var.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v
> retrieving revision 1.88
> diff -u -p -r1.88 ip_var.h
> --- netinet/ip_var.h  30 Mar 2021 08:37:11 -0000      1.88
> +++ netinet/ip_var.h  3 Dec 2021 18:51:29 -0000
> @@ -248,7 +248,6 @@ void       ip_stripoptions(struct mbuf *);
>  int   ip_sysctl(int *, u_int, void *, size_t *, void *, size_t);
>  void  ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
>           struct mbuf *);
> -void  ipintr(void);
>  int   ip_input_if(struct mbuf **, int *, int, int, struct ifnet *);
>  int   ip_deliver(struct mbuf **, int *, int, int);
>  void  ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int);
> Index: netinet6/ip6_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
> retrieving revision 1.237
> diff -u -p -r1.237 ip6_input.c
> --- netinet6/ip6_input.c      3 Jun 2021 04:47:54 -0000       1.237
> +++ netinet6/ip6_input.c      3 Dec 2021 18:52:28 -0000
> @@ -115,11 +115,14 @@
>  #include <netinet/ip_carp.h>
>  #endif
>  
> +struct niqueue ip6intrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IPV6);
> +
>  struct cpumem *ip6counters;
>  
>  uint8_t ip6_soiikey[IP6_SOIIKEY_LEN];
>  
>  int ip6_ours(struct mbuf **, int *, int, int);
> +int ip6_local(struct mbuf **, int *, int, int);
>  int ip6_check_rh0hdr(struct mbuf *, int *);
>  int ip6_hbhchcheck(struct mbuf *, int *, int *, int *);
>  int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
> @@ -162,6 +165,43 @@ ip6_init(void)
>       ip6counters = counters_alloc(ip6s_ncounters);
>  }
>  
> +/*
> + * Enqueue packet for local delivery.  Queuing is used as a boundary
> + * between the network layer (input/forward path) running with shared
> + * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively.
> + */
> +int
> +ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
> +{
> +     /* We are already in a IPv4/IPv6 local deliver loop. */
> +     if (af != AF_UNSPEC)
> +             return ip6_local(mp, offp, nxt, af);
> +
> +     niq_enqueue(&ip6intrq, *mp);
> +     *mp = NULL;
> +     return IPPROTO_DONE;
> +}
> +
> +/*
> + * Dequeue and process locally delivered packets.
> + */
> +void
> +ip6intr(void)
> +{
> +     struct mbuf *m;
> +     int off, nxt;
> +
> +     while ((m = niq_dequeue(&ip6intrq)) != NULL) {
> +#ifdef DIAGNOSTIC
> +             if ((m->m_flags & M_PKTHDR) == 0)
> +                     panic("ip6intr no HDR");
> +#endif
> +             off = 0;
> +             nxt = ip6_local(&m, &off, IPPROTO_IPV6, AF_UNSPEC);
> +             KASSERT(nxt == IPPROTO_DONE);
> +     }
> +}
> +
>  void
>  ipv6_input(struct ifnet *ifp, struct mbuf *m)
>  {
> @@ -544,7 +584,7 @@ ip6_input_if(struct mbuf **mp, int *offp
>  }
>  
>  int
> -ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
> +ip6_local(struct mbuf **mp, int *offp, int nxt, int af)
>  {
>       if (ip6_hbhchcheck(*mp, offp, &nxt, NULL))
>               return IPPROTO_DONE;
> @@ -1470,7 +1510,8 @@ ip6_sysctl(int *name, u_int namelen, voi
>               NET_UNLOCK();
>               return (error);
>       case IPV6CTL_IFQUEUE:
> -             return (EOPNOTSUPP);
> +             return (sysctl_niq(name + 1, namelen - 1,
> +                 oldp, oldlenp, newp, newlen, &ip6intrq));
>       case IPV6CTL_SOIIKEY:
>               return (ip6_sysctl_soiikey(oldp, oldlenp, newp, newlen));
>       default:
> 

Reply via email to