Not a big issue, but 'tdb->tdb_odrops' should be incremented with
netlock held too.

On Fri, Jul 16, 2021 at 10:57:24PM +0200, Alexander Bluhm wrote:
> On Fri, Jul 16, 2021 at 08:02:18PM +0200, Hrvoje Popovski wrote:
> > x3550m4# panic: malloc: out of space in kmem_map
> 
> > ddb{3}> show malloc
> >            Type InUse  MemUse  HighUse   Limit  Requests Type Lim
> >      xform_data 322973  40366K   70359K  78643K   5546065        0
> 
> > ddb{3}> show all pool
> > Name      Size Requests Fail Releases Pgreq Pgrel Npage Hiwat Minpg Maxpg 
> > Idle
> > cryptop    352  5872133    0  5549196 259257 229899 29358 51176   0     8   
> >  0
> > mcl2k2     2112  572160    0        0 38138     3 38135 38138     0     8   
> >  0
> > mbufpl     256   572222    0        0 35753     2 35751 35753     0     8   
> >  0
> 
> Queuing crypto operations in unlimited task queues is a bad idea.
> If new packets arrive faster than crypto and packet output is
> processing them, kernel will run out of memory.
> 
> Goal should be to run crypto synchronously, but use multiple cores
> for IPsec.  Queues in the packet path result in strange system
> behavior.  But the may be necessary on our way to MP.
> 
> I was fixing some MP bugs in the crypto layer anyway.  With the
> diff below, we can disable crypto queueing.  Then you cannot run
> into the out of memory situation.
> 
> The diff, you wanted to test, fixes a bug that only shows up if
> packets are delayed in queues.  So with the diff below, the hanging
> IPsec shoud go way no matter if you use my other fix or not.
> 
> All I said is more or less theory, I did not test it yet.
> 
> bluhm
> 
> Index: crypto/crypto.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/crypto/crypto.c,v
> retrieving revision 1.83
> diff -u -p -r1.83 crypto.c
> --- crypto/crypto.c   30 Jun 2021 12:21:02 -0000      1.83
> +++ crypto/crypto.c   16 Jul 2021 20:35:24 -0000
> @@ -387,26 +387,33 @@ crypto_unregister(u_int32_t driverid, in
>  int
>  crypto_dispatch(struct cryptop *crp)
>  {
> -     struct taskq *tq = crypto_taskq;
> -     int s;
> +     int error = 0, lock = 1, s;
>       u_int32_t hid;
>  
>       s = splvm();
>       hid = (crp->crp_sid >> 32) & 0xffffffff;
>       if (hid < crypto_drivers_num) {
>               if (crypto_drivers[hid].cc_flags & CRYPTOCAP_F_MPSAFE)
> -                     tq = crypto_taskq_mpsafe;
> +                     /* XXXSMP crypto_invoke() is not MP safe */
> +                     lock = 1;
>       }
>       splx(s);
>  
> -     if (tq && !(crp->crp_flags & CRYPTO_F_NOQUEUE)) {
> +     if (crp->crp_flags & CRYPTO_F_NOQUEUE) {
> +             if (lock)
> +                     KERNEL_LOCK();
> +             error = crypto_invoke(crp);
> +             if (lock)
> +                     KERNEL_UNLOCK();
> +     } else {
> +             struct taskq *tq;
> +
> +             tq = lock ? crypto_taskq : crypto_taskq_mpsafe;
>               task_set(&crp->crp_task, (void (*))crypto_invoke, crp);
>               task_add(tq, &crp->crp_task);
> -     } else {
> -             crypto_invoke(crp);
>       }
>  
> -     return 0;
> +     return error;
>  }
>  
>  /*
> @@ -424,6 +431,8 @@ crypto_invoke(struct cryptop *crp)
>       if (crp == NULL || crp->crp_callback == NULL)
>               return EINVAL;
>  
> +     KERNEL_ASSERT_LOCKED();
> +
>       s = splvm();
>       if (crp->crp_ndesc < 1 || crypto_drivers == NULL) {
>               crp->crp_etype = EINVAL;
> @@ -534,12 +543,25 @@ crypto_init(void)
>  void
>  crypto_done(struct cryptop *crp)
>  {
> +     int lock = 1;
> +
>       crp->crp_flags |= CRYPTO_F_DONE;
> +
> +     if (crp->crp_flags & CRYPTO_F_MPSAFE)
> +             lock = 0;
> +
>       if (crp->crp_flags & CRYPTO_F_NOQUEUE) {
>               /* not from the crypto queue, wakeup the userland process */
> +             if (lock)
> +                     KERNEL_LOCK();
>               crp->crp_callback(crp);
> +             if (lock)
> +                     KERNEL_UNLOCK();
>       } else {
> +             struct taskq *tq;
> +
> +             tq = lock ? crypto_taskq : crypto_taskq_mpsafe;
>               task_set(&crp->crp_task, (void (*))crp->crp_callback, crp);
> -             task_add(crypto_taskq, &crp->crp_task);
> +             task_add(tq, &crp->crp_task);
>       }
>  }
> Index: crypto/cryptodev.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/crypto/cryptodev.h,v
> retrieving revision 1.73
> diff -u -p -r1.73 cryptodev.h
> --- crypto/cryptodev.h        9 Jul 2021 20:43:28 -0000       1.73
> +++ crypto/cryptodev.h        16 Jul 2021 20:35:24 -0000
> @@ -171,6 +171,7 @@ struct cryptop {
>  
>  #define CRYPTO_F_IMBUF       0x0001  /* Input/output are mbuf chains, 
> otherwise contig */
>  #define CRYPTO_F_IOV 0x0002  /* Input/output are uio */
> +#define CRYPTO_F_MPSAFE      0x0004  /* Do not use kernel lock for callback 
> */
>  #define CRYPTO_F_NOQUEUE     0x0008  /* Don't use crypto queue/thread */
>  #define CRYPTO_F_DONE        0x0010  /* request completed */
>  
> Index: netinet/ip_ah.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ah.c,v
> retrieving revision 1.150
> diff -u -p -r1.150 ip_ah.c
> --- netinet/ip_ah.c   8 Jul 2021 21:07:19 -0000       1.150
> +++ netinet/ip_ah.c   16 Jul 2021 20:38:20 -0000
> @@ -684,7 +684,7 @@ ah_input(struct mbuf *m, struct tdb *tdb
>  
>       /* Crypto operation descriptor. */
>       crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */
> -     crp->crp_flags = CRYPTO_F_IMBUF;
> +     crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_MPSAFE | CRYPTO_F_NOQUEUE;
>       crp->crp_buf = (caddr_t)m;
>       crp->crp_callback = ipsec_input_cb;
>       crp->crp_sid = tdb->tdb_cryptoid;
> @@ -698,9 +698,7 @@ ah_input(struct mbuf *m, struct tdb *tdb
>       tc->tc_rdomain = tdb->tdb_rdomain;
>       memcpy(&tc->tc_dst, &tdb->tdb_dst, sizeof(union sockaddr_union));
>  
> -     KERNEL_LOCK();
>       error = crypto_dispatch(crp);
> -     KERNEL_UNLOCK();
>       return error;
>  
>   drop:
> @@ -1131,7 +1129,7 @@ ah_output(struct mbuf *m, struct tdb *td
>  
>       /* Crypto operation descriptor. */
>       crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */
> -     crp->crp_flags = CRYPTO_F_IMBUF;
> +     crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_MPSAFE | CRYPTO_F_NOQUEUE;
>       crp->crp_buf = (caddr_t)m;
>       crp->crp_callback = ipsec_output_cb;
>       crp->crp_sid = tdb->tdb_cryptoid;
> @@ -1145,9 +1143,7 @@ ah_output(struct mbuf *m, struct tdb *td
>       tc->tc_rdomain = tdb->tdb_rdomain;
>       memcpy(&tc->tc_dst, &tdb->tdb_dst, sizeof(union sockaddr_union));
>  
> -     KERNEL_LOCK();
>       error = crypto_dispatch(crp);
> -     KERNEL_UNLOCK();
>       return error;
>  
>   drop:
> Index: netinet/ip_esp.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_esp.c,v
> retrieving revision 1.168
> diff -u -p -r1.168 ip_esp.c
> --- netinet/ip_esp.c  16 Jul 2021 15:08:39 -0000      1.168
> +++ netinet/ip_esp.c  16 Jul 2021 20:38:27 -0000
> @@ -498,7 +498,7 @@ esp_input(struct mbuf *m, struct tdb *td
>  
>       /* Crypto operation descriptor */
>       crp->crp_ilen = m->m_pkthdr.len; /* Total input length */
> -     crp->crp_flags = CRYPTO_F_IMBUF;
> +     crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_MPSAFE | CRYPTO_F_NOQUEUE;
>       crp->crp_buf = (caddr_t)m;
>       crp->crp_callback = ipsec_input_cb;
>       crp->crp_sid = tdb->tdb_cryptoid;
> @@ -527,9 +527,7 @@ esp_input(struct mbuf *m, struct tdb *td
>                       crde->crd_len = m->m_pkthdr.len - (skip + hlen + alen);
>       }
>  
> -     KERNEL_LOCK();
>       error = crypto_dispatch(crp);
> -     KERNEL_UNLOCK();
>       return error;
>  
>   drop:
> @@ -980,7 +978,7 @@ esp_output(struct mbuf *m, struct tdb *t
>  
>       /* Crypto operation descriptor. */
>       crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */
> -     crp->crp_flags = CRYPTO_F_IMBUF;
> +     crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_MPSAFE | CRYPTO_F_NOQUEUE;
>       crp->crp_buf = (caddr_t)m;
>       crp->crp_callback = ipsec_output_cb;
>       crp->crp_opaque = (caddr_t)tc;
> @@ -1012,9 +1010,7 @@ esp_output(struct mbuf *m, struct tdb *t
>                       crda->crd_len = m->m_pkthdr.len - (skip + alen);
>       }
>  
> -     KERNEL_LOCK();
>       error = crypto_dispatch(crp);
> -     KERNEL_UNLOCK();
>       return error;
>  
>   drop:
> Index: netinet/ip_ipcomp.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipcomp.c,v
> retrieving revision 1.71
> diff -u -p -r1.71 ip_ipcomp.c
> --- netinet/ip_ipcomp.c       8 Jul 2021 21:07:19 -0000       1.71
> +++ netinet/ip_ipcomp.c       16 Jul 2021 20:38:32 -0000
> @@ -174,7 +174,7 @@ ipcomp_input(struct mbuf *m, struct tdb 
>  
>       /* Crypto operation descriptor */
>       crp->crp_ilen = m->m_pkthdr.len - (skip + hlen);
> -     crp->crp_flags = CRYPTO_F_IMBUF;
> +     crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_MPSAFE | CRYPTO_F_NOQUEUE;
>       crp->crp_buf = (caddr_t)m;
>       crp->crp_callback = ipsec_input_cb;
>       crp->crp_sid = tdb->tdb_cryptoid;
> @@ -188,9 +188,7 @@ ipcomp_input(struct mbuf *m, struct tdb 
>       tc->tc_rdomain = tdb->tdb_rdomain;
>       tc->tc_dst = tdb->tdb_dst;
>  
> -     KERNEL_LOCK();
>       error = crypto_dispatch(crp);
> -     KERNEL_UNLOCK();
>       return error;
>  }
>  
> @@ -479,15 +477,13 @@ ipcomp_output(struct mbuf *m, struct tdb
>  
>       /* Crypto operation descriptor */
>       crp->crp_ilen = m->m_pkthdr.len;        /* Total input length */
> -     crp->crp_flags = CRYPTO_F_IMBUF;
> +     crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_MPSAFE | CRYPTO_F_NOQUEUE;
>       crp->crp_buf = (caddr_t)m;
>       crp->crp_callback = ipsec_output_cb;
>       crp->crp_opaque = (caddr_t)tc;
>       crp->crp_sid = tdb->tdb_cryptoid;
>  
> -     KERNEL_LOCK();
>       error = crypto_dispatch(crp);
> -     KERNEL_UNLOCK();
>       return error;
>  
>   drop:
> Index: netinet/ipsec_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ipsec_input.c,v
> retrieving revision 1.175
> diff -u -p -r1.175 ipsec_input.c
> --- netinet/ipsec_input.c     8 Jul 2021 15:13:14 -0000       1.175
> +++ netinet/ipsec_input.c     16 Jul 2021 20:35:24 -0000
> @@ -380,14 +380,11 @@ ipsec_input_cb(struct cryptop *crp)
>       struct tdb *tdb = NULL;
>       int clen, error;
>  
> -     KERNEL_ASSERT_LOCKED();
> -
>       if (m == NULL) {
>               DPRINTF("bogus returned buffer from crypto");
>               ipsecstat_inc(ipsec_crypto);
>               goto droponly;
>       }
> -
>  
>       NET_LOCK();
>       tdb = gettdb(tc->tc_rdomain, tc->tc_spi, &tc->tc_dst, tc->tc_proto);
> Index: netinet/ipsec_output.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ipsec_output.c,v
> retrieving revision 1.82
> diff -u -p -r1.82 ipsec_output.c
> --- netinet/ipsec_output.c    8 Jul 2021 15:13:14 -0000       1.82
> +++ netinet/ipsec_output.c    16 Jul 2021 20:35:24 -0000
> @@ -395,8 +395,6 @@ ipsec_output_cb(struct cryptop *crp)
>       struct tdb *tdb = NULL;
>       int error, ilen, olen;
>  
> -     KERNEL_ASSERT_LOCKED();
> -
>       if (m == NULL) {
>               DPRINTF("bogus returned buffer from crypto");
>               ipsecstat_inc(ipsec_crypto);
> @@ -418,7 +416,12 @@ ipsec_output_cb(struct cryptop *crp)
>                       if (tdb->tdb_cryptoid != 0)
>                               tdb->tdb_cryptoid = crp->crp_sid;
>                       NET_UNLOCK();
> -                     crypto_dispatch(crp);
> +                     error = crypto_dispatch(crp);
> +                     if (error) {
> +                             DPRINTF("crypto dispatch error %d", error);
> +                             ipsecstat_inc(ipsec_odrops);
> +                             tdb->tdb_odrops++;
> +                     }
>                       return;
>               }
>               DPRINTF("crypto error %d", crp->crp_etype);
> @@ -479,6 +482,8 @@ ipsp_process_done(struct mbuf *m, struct
>       struct tdb_ident *tdbi;
>       struct m_tag *mtag;
>       int roff, error;
> +
> +     NET_ASSERT_LOCKED();
>  
>       tdb->tdb_last_used = gettime();
>  
> 

Reply via email to