On Fri, Jul 16, 2021 at 08:02:18PM +0200, Hrvoje Popovski wrote:
> x3550m4# panic: malloc: out of space in kmem_map

> ddb{3}> show malloc
>            Type InUse  MemUse  HighUse   Limit  Requests Type Lim
>      xform_data 322973  40366K   70359K  78643K   5546065        0

> ddb{3}> show all pool
> Name      Size Requests Fail Releases Pgreq Pgrel Npage Hiwat Minpg Maxpg Idle
> cryptop    352  5872133    0  5549196 259257 229899 29358 51176   0     8    0
> mcl2k2     2112  572160    0        0 38138     3 38135 38138     0     8    0
> mbufpl     256   572222    0        0 35753     2 35751 35753     0     8    0

Queuing crypto operations in unlimited task queues is a bad idea.
If new packets arrive faster than crypto and packet output is
processing them, kernel will run out of memory.

Goal should be to run crypto synchronously, but use multiple cores
for IPsec.  Queues in the packet path result in strange system
behavior.  But the may be necessary on our way to MP.

I was fixing some MP bugs in the crypto layer anyway.  With the
diff below, we can disable crypto queueing.  Then you cannot run
into the out of memory situation.

The diff, you wanted to test, fixes a bug that only shows up if
packets are delayed in queues.  So with the diff below, the hanging
IPsec shoud go way no matter if you use my other fix or not.

All I said is more or less theory, I did not test it yet.

bluhm

Index: crypto/crypto.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/crypto/crypto.c,v
retrieving revision 1.83
diff -u -p -r1.83 crypto.c
--- crypto/crypto.c     30 Jun 2021 12:21:02 -0000      1.83
+++ crypto/crypto.c     16 Jul 2021 20:35:24 -0000
@@ -387,26 +387,33 @@ crypto_unregister(u_int32_t driverid, in
 int
 crypto_dispatch(struct cryptop *crp)
 {
-       struct taskq *tq = crypto_taskq;
-       int s;
+       int error = 0, lock = 1, s;
        u_int32_t hid;
 
        s = splvm();
        hid = (crp->crp_sid >> 32) & 0xffffffff;
        if (hid < crypto_drivers_num) {
                if (crypto_drivers[hid].cc_flags & CRYPTOCAP_F_MPSAFE)
-                       tq = crypto_taskq_mpsafe;
+                       /* XXXSMP crypto_invoke() is not MP safe */
+                       lock = 1;
        }
        splx(s);
 
-       if (tq && !(crp->crp_flags & CRYPTO_F_NOQUEUE)) {
+       if (crp->crp_flags & CRYPTO_F_NOQUEUE) {
+               if (lock)
+                       KERNEL_LOCK();
+               error = crypto_invoke(crp);
+               if (lock)
+                       KERNEL_UNLOCK();
+       } else {
+               struct taskq *tq;
+
+               tq = lock ? crypto_taskq : crypto_taskq_mpsafe;
                task_set(&crp->crp_task, (void (*))crypto_invoke, crp);
                task_add(tq, &crp->crp_task);
-       } else {
-               crypto_invoke(crp);
        }
 
-       return 0;
+       return error;
 }
 
 /*
@@ -424,6 +431,8 @@ crypto_invoke(struct cryptop *crp)
        if (crp == NULL || crp->crp_callback == NULL)
                return EINVAL;
 
+       KERNEL_ASSERT_LOCKED();
+
        s = splvm();
        if (crp->crp_ndesc < 1 || crypto_drivers == NULL) {
                crp->crp_etype = EINVAL;
@@ -534,12 +543,25 @@ crypto_init(void)
 void
 crypto_done(struct cryptop *crp)
 {
+       int lock = 1;
+
        crp->crp_flags |= CRYPTO_F_DONE;
+
+       if (crp->crp_flags & CRYPTO_F_MPSAFE)
+               lock = 0;
+
        if (crp->crp_flags & CRYPTO_F_NOQUEUE) {
                /* not from the crypto queue, wakeup the userland process */
+               if (lock)
+                       KERNEL_LOCK();
                crp->crp_callback(crp);
+               if (lock)
+                       KERNEL_UNLOCK();
        } else {
+               struct taskq *tq;
+
+               tq = lock ? crypto_taskq : crypto_taskq_mpsafe;
                task_set(&crp->crp_task, (void (*))crp->crp_callback, crp);
-               task_add(crypto_taskq, &crp->crp_task);
+               task_add(tq, &crp->crp_task);
        }
 }
Index: crypto/cryptodev.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/crypto/cryptodev.h,v
retrieving revision 1.73
diff -u -p -r1.73 cryptodev.h
--- crypto/cryptodev.h  9 Jul 2021 20:43:28 -0000       1.73
+++ crypto/cryptodev.h  16 Jul 2021 20:35:24 -0000
@@ -171,6 +171,7 @@ struct cryptop {
 
 #define CRYPTO_F_IMBUF 0x0001  /* Input/output are mbuf chains, otherwise 
contig */
 #define CRYPTO_F_IOV   0x0002  /* Input/output are uio */
+#define CRYPTO_F_MPSAFE        0x0004  /* Do not use kernel lock for callback 
*/
 #define CRYPTO_F_NOQUEUE       0x0008  /* Don't use crypto queue/thread */
 #define CRYPTO_F_DONE  0x0010  /* request completed */
 
Index: netinet/ip_ah.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ah.c,v
retrieving revision 1.150
diff -u -p -r1.150 ip_ah.c
--- netinet/ip_ah.c     8 Jul 2021 21:07:19 -0000       1.150
+++ netinet/ip_ah.c     16 Jul 2021 20:38:20 -0000
@@ -684,7 +684,7 @@ ah_input(struct mbuf *m, struct tdb *tdb
 
        /* Crypto operation descriptor. */
        crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */
-       crp->crp_flags = CRYPTO_F_IMBUF;
+       crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_MPSAFE | CRYPTO_F_NOQUEUE;
        crp->crp_buf = (caddr_t)m;
        crp->crp_callback = ipsec_input_cb;
        crp->crp_sid = tdb->tdb_cryptoid;
@@ -698,9 +698,7 @@ ah_input(struct mbuf *m, struct tdb *tdb
        tc->tc_rdomain = tdb->tdb_rdomain;
        memcpy(&tc->tc_dst, &tdb->tdb_dst, sizeof(union sockaddr_union));
 
-       KERNEL_LOCK();
        error = crypto_dispatch(crp);
-       KERNEL_UNLOCK();
        return error;
 
  drop:
@@ -1131,7 +1129,7 @@ ah_output(struct mbuf *m, struct tdb *td
 
        /* Crypto operation descriptor. */
        crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */
-       crp->crp_flags = CRYPTO_F_IMBUF;
+       crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_MPSAFE | CRYPTO_F_NOQUEUE;
        crp->crp_buf = (caddr_t)m;
        crp->crp_callback = ipsec_output_cb;
        crp->crp_sid = tdb->tdb_cryptoid;
@@ -1145,9 +1143,7 @@ ah_output(struct mbuf *m, struct tdb *td
        tc->tc_rdomain = tdb->tdb_rdomain;
        memcpy(&tc->tc_dst, &tdb->tdb_dst, sizeof(union sockaddr_union));
 
-       KERNEL_LOCK();
        error = crypto_dispatch(crp);
-       KERNEL_UNLOCK();
        return error;
 
  drop:
Index: netinet/ip_esp.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_esp.c,v
retrieving revision 1.168
diff -u -p -r1.168 ip_esp.c
--- netinet/ip_esp.c    16 Jul 2021 15:08:39 -0000      1.168
+++ netinet/ip_esp.c    16 Jul 2021 20:38:27 -0000
@@ -498,7 +498,7 @@ esp_input(struct mbuf *m, struct tdb *td
 
        /* Crypto operation descriptor */
        crp->crp_ilen = m->m_pkthdr.len; /* Total input length */
-       crp->crp_flags = CRYPTO_F_IMBUF;
+       crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_MPSAFE | CRYPTO_F_NOQUEUE;
        crp->crp_buf = (caddr_t)m;
        crp->crp_callback = ipsec_input_cb;
        crp->crp_sid = tdb->tdb_cryptoid;
@@ -527,9 +527,7 @@ esp_input(struct mbuf *m, struct tdb *td
                        crde->crd_len = m->m_pkthdr.len - (skip + hlen + alen);
        }
 
-       KERNEL_LOCK();
        error = crypto_dispatch(crp);
-       KERNEL_UNLOCK();
        return error;
 
  drop:
@@ -980,7 +978,7 @@ esp_output(struct mbuf *m, struct tdb *t
 
        /* Crypto operation descriptor. */
        crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */
-       crp->crp_flags = CRYPTO_F_IMBUF;
+       crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_MPSAFE | CRYPTO_F_NOQUEUE;
        crp->crp_buf = (caddr_t)m;
        crp->crp_callback = ipsec_output_cb;
        crp->crp_opaque = (caddr_t)tc;
@@ -1012,9 +1010,7 @@ esp_output(struct mbuf *m, struct tdb *t
                        crda->crd_len = m->m_pkthdr.len - (skip + alen);
        }
 
-       KERNEL_LOCK();
        error = crypto_dispatch(crp);
-       KERNEL_UNLOCK();
        return error;
 
  drop:
Index: netinet/ip_ipcomp.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipcomp.c,v
retrieving revision 1.71
diff -u -p -r1.71 ip_ipcomp.c
--- netinet/ip_ipcomp.c 8 Jul 2021 21:07:19 -0000       1.71
+++ netinet/ip_ipcomp.c 16 Jul 2021 20:38:32 -0000
@@ -174,7 +174,7 @@ ipcomp_input(struct mbuf *m, struct tdb 
 
        /* Crypto operation descriptor */
        crp->crp_ilen = m->m_pkthdr.len - (skip + hlen);
-       crp->crp_flags = CRYPTO_F_IMBUF;
+       crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_MPSAFE | CRYPTO_F_NOQUEUE;
        crp->crp_buf = (caddr_t)m;
        crp->crp_callback = ipsec_input_cb;
        crp->crp_sid = tdb->tdb_cryptoid;
@@ -188,9 +188,7 @@ ipcomp_input(struct mbuf *m, struct tdb 
        tc->tc_rdomain = tdb->tdb_rdomain;
        tc->tc_dst = tdb->tdb_dst;
 
-       KERNEL_LOCK();
        error = crypto_dispatch(crp);
-       KERNEL_UNLOCK();
        return error;
 }
 
@@ -479,15 +477,13 @@ ipcomp_output(struct mbuf *m, struct tdb
 
        /* Crypto operation descriptor */
        crp->crp_ilen = m->m_pkthdr.len;        /* Total input length */
-       crp->crp_flags = CRYPTO_F_IMBUF;
+       crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_MPSAFE | CRYPTO_F_NOQUEUE;
        crp->crp_buf = (caddr_t)m;
        crp->crp_callback = ipsec_output_cb;
        crp->crp_opaque = (caddr_t)tc;
        crp->crp_sid = tdb->tdb_cryptoid;
 
-       KERNEL_LOCK();
        error = crypto_dispatch(crp);
-       KERNEL_UNLOCK();
        return error;
 
  drop:
Index: netinet/ipsec_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ipsec_input.c,v
retrieving revision 1.175
diff -u -p -r1.175 ipsec_input.c
--- netinet/ipsec_input.c       8 Jul 2021 15:13:14 -0000       1.175
+++ netinet/ipsec_input.c       16 Jul 2021 20:35:24 -0000
@@ -380,14 +380,11 @@ ipsec_input_cb(struct cryptop *crp)
        struct tdb *tdb = NULL;
        int clen, error;
 
-       KERNEL_ASSERT_LOCKED();
-
        if (m == NULL) {
                DPRINTF("bogus returned buffer from crypto");
                ipsecstat_inc(ipsec_crypto);
                goto droponly;
        }
-
 
        NET_LOCK();
        tdb = gettdb(tc->tc_rdomain, tc->tc_spi, &tc->tc_dst, tc->tc_proto);
Index: netinet/ipsec_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ipsec_output.c,v
retrieving revision 1.82
diff -u -p -r1.82 ipsec_output.c
--- netinet/ipsec_output.c      8 Jul 2021 15:13:14 -0000       1.82
+++ netinet/ipsec_output.c      16 Jul 2021 20:35:24 -0000
@@ -395,8 +395,6 @@ ipsec_output_cb(struct cryptop *crp)
        struct tdb *tdb = NULL;
        int error, ilen, olen;
 
-       KERNEL_ASSERT_LOCKED();
-
        if (m == NULL) {
                DPRINTF("bogus returned buffer from crypto");
                ipsecstat_inc(ipsec_crypto);
@@ -418,7 +416,12 @@ ipsec_output_cb(struct cryptop *crp)
                        if (tdb->tdb_cryptoid != 0)
                                tdb->tdb_cryptoid = crp->crp_sid;
                        NET_UNLOCK();
-                       crypto_dispatch(crp);
+                       error = crypto_dispatch(crp);
+                       if (error) {
+                               DPRINTF("crypto dispatch error %d", error);
+                               ipsecstat_inc(ipsec_odrops);
+                               tdb->tdb_odrops++;
+                       }
                        return;
                }
                DPRINTF("crypto error %d", crp->crp_etype);
@@ -479,6 +482,8 @@ ipsp_process_done(struct mbuf *m, struct
        struct tdb_ident *tdbi;
        struct m_tag *mtag;
        int roff, error;
+
+       NET_ASSERT_LOCKED();
 
        tdb->tdb_last_used = gettime();
 

Reply via email to