Not a big issue, but 'tdb->tdb_odrops' should be incremented with
netlock held too.
On Fri, Jul 16, 2021 at 10:57:24PM +0200, Alexander Bluhm wrote:
> On Fri, Jul 16, 2021 at 08:02:18PM +0200, Hrvoje Popovski wrote:
> > x3550m4# panic: malloc: out of space in kmem_map
>
> > ddb{3}> show malloc
> > Type InUse MemUse HighUse Limit Requests Type Lim
> > xform_data 322973 40366K 70359K 78643K 5546065 0
>
> > ddb{3}> show all pool
> > Name Size Requests Fail Releases Pgreq Pgrel Npage Hiwat Minpg Maxpg
> > Idle
> > cryptop 352 5872133 0 5549196 259257 229899 29358 51176 0 8
> > 0
> > mcl2k2 2112 572160 0 0 38138 3 38135 38138 0 8
> > 0
> > mbufpl 256 572222 0 0 35753 2 35751 35753 0 8
> > 0
>
> Queuing crypto operations in unlimited task queues is a bad idea.
> If new packets arrive faster than crypto and packet output is
> processing them, kernel will run out of memory.
>
> Goal should be to run crypto synchronously, but use multiple cores
> for IPsec. Queues in the packet path result in strange system
> behavior. But the may be necessary on our way to MP.
>
> I was fixing some MP bugs in the crypto layer anyway. With the
> diff below, we can disable crypto queueing. Then you cannot run
> into the out of memory situation.
>
> The diff, you wanted to test, fixes a bug that only shows up if
> packets are delayed in queues. So with the diff below, the hanging
> IPsec shoud go way no matter if you use my other fix or not.
>
> All I said is more or less theory, I did not test it yet.
>
> bluhm
>
> Index: crypto/crypto.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/crypto/crypto.c,v
> retrieving revision 1.83
> diff -u -p -r1.83 crypto.c
> --- crypto/crypto.c 30 Jun 2021 12:21:02 -0000 1.83
> +++ crypto/crypto.c 16 Jul 2021 20:35:24 -0000
> @@ -387,26 +387,33 @@ crypto_unregister(u_int32_t driverid, in
> int
> crypto_dispatch(struct cryptop *crp)
> {
> - struct taskq *tq = crypto_taskq;
> - int s;
> + int error = 0, lock = 1, s;
> u_int32_t hid;
>
> s = splvm();
> hid = (crp->crp_sid >> 32) & 0xffffffff;
> if (hid < crypto_drivers_num) {
> if (crypto_drivers[hid].cc_flags & CRYPTOCAP_F_MPSAFE)
> - tq = crypto_taskq_mpsafe;
> + /* XXXSMP crypto_invoke() is not MP safe */
> + lock = 1;
> }
> splx(s);
>
> - if (tq && !(crp->crp_flags & CRYPTO_F_NOQUEUE)) {
> + if (crp->crp_flags & CRYPTO_F_NOQUEUE) {
> + if (lock)
> + KERNEL_LOCK();
> + error = crypto_invoke(crp);
> + if (lock)
> + KERNEL_UNLOCK();
> + } else {
> + struct taskq *tq;
> +
> + tq = lock ? crypto_taskq : crypto_taskq_mpsafe;
> task_set(&crp->crp_task, (void (*))crypto_invoke, crp);
> task_add(tq, &crp->crp_task);
> - } else {
> - crypto_invoke(crp);
> }
>
> - return 0;
> + return error;
> }
>
> /*
> @@ -424,6 +431,8 @@ crypto_invoke(struct cryptop *crp)
> if (crp == NULL || crp->crp_callback == NULL)
> return EINVAL;
>
> + KERNEL_ASSERT_LOCKED();
> +
> s = splvm();
> if (crp->crp_ndesc < 1 || crypto_drivers == NULL) {
> crp->crp_etype = EINVAL;
> @@ -534,12 +543,25 @@ crypto_init(void)
> void
> crypto_done(struct cryptop *crp)
> {
> + int lock = 1;
> +
> crp->crp_flags |= CRYPTO_F_DONE;
> +
> + if (crp->crp_flags & CRYPTO_F_MPSAFE)
> + lock = 0;
> +
> if (crp->crp_flags & CRYPTO_F_NOQUEUE) {
> /* not from the crypto queue, wakeup the userland process */
> + if (lock)
> + KERNEL_LOCK();
> crp->crp_callback(crp);
> + if (lock)
> + KERNEL_UNLOCK();
> } else {
> + struct taskq *tq;
> +
> + tq = lock ? crypto_taskq : crypto_taskq_mpsafe;
> task_set(&crp->crp_task, (void (*))crp->crp_callback, crp);
> - task_add(crypto_taskq, &crp->crp_task);
> + task_add(tq, &crp->crp_task);
> }
> }
> Index: crypto/cryptodev.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/crypto/cryptodev.h,v
> retrieving revision 1.73
> diff -u -p -r1.73 cryptodev.h
> --- crypto/cryptodev.h 9 Jul 2021 20:43:28 -0000 1.73
> +++ crypto/cryptodev.h 16 Jul 2021 20:35:24 -0000
> @@ -171,6 +171,7 @@ struct cryptop {
>
> #define CRYPTO_F_IMBUF 0x0001 /* Input/output are mbuf chains,
> otherwise contig */
> #define CRYPTO_F_IOV 0x0002 /* Input/output are uio */
> +#define CRYPTO_F_MPSAFE 0x0004 /* Do not use kernel lock for callback
> */
> #define CRYPTO_F_NOQUEUE 0x0008 /* Don't use crypto queue/thread */
> #define CRYPTO_F_DONE 0x0010 /* request completed */
>
> Index: netinet/ip_ah.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ah.c,v
> retrieving revision 1.150
> diff -u -p -r1.150 ip_ah.c
> --- netinet/ip_ah.c 8 Jul 2021 21:07:19 -0000 1.150
> +++ netinet/ip_ah.c 16 Jul 2021 20:38:20 -0000
> @@ -684,7 +684,7 @@ ah_input(struct mbuf *m, struct tdb *tdb
>
> /* Crypto operation descriptor. */
> crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */
> - crp->crp_flags = CRYPTO_F_IMBUF;
> + crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_MPSAFE | CRYPTO_F_NOQUEUE;
> crp->crp_buf = (caddr_t)m;
> crp->crp_callback = ipsec_input_cb;
> crp->crp_sid = tdb->tdb_cryptoid;
> @@ -698,9 +698,7 @@ ah_input(struct mbuf *m, struct tdb *tdb
> tc->tc_rdomain = tdb->tdb_rdomain;
> memcpy(&tc->tc_dst, &tdb->tdb_dst, sizeof(union sockaddr_union));
>
> - KERNEL_LOCK();
> error = crypto_dispatch(crp);
> - KERNEL_UNLOCK();
> return error;
>
> drop:
> @@ -1131,7 +1129,7 @@ ah_output(struct mbuf *m, struct tdb *td
>
> /* Crypto operation descriptor. */
> crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */
> - crp->crp_flags = CRYPTO_F_IMBUF;
> + crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_MPSAFE | CRYPTO_F_NOQUEUE;
> crp->crp_buf = (caddr_t)m;
> crp->crp_callback = ipsec_output_cb;
> crp->crp_sid = tdb->tdb_cryptoid;
> @@ -1145,9 +1143,7 @@ ah_output(struct mbuf *m, struct tdb *td
> tc->tc_rdomain = tdb->tdb_rdomain;
> memcpy(&tc->tc_dst, &tdb->tdb_dst, sizeof(union sockaddr_union));
>
> - KERNEL_LOCK();
> error = crypto_dispatch(crp);
> - KERNEL_UNLOCK();
> return error;
>
> drop:
> Index: netinet/ip_esp.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_esp.c,v
> retrieving revision 1.168
> diff -u -p -r1.168 ip_esp.c
> --- netinet/ip_esp.c 16 Jul 2021 15:08:39 -0000 1.168
> +++ netinet/ip_esp.c 16 Jul 2021 20:38:27 -0000
> @@ -498,7 +498,7 @@ esp_input(struct mbuf *m, struct tdb *td
>
> /* Crypto operation descriptor */
> crp->crp_ilen = m->m_pkthdr.len; /* Total input length */
> - crp->crp_flags = CRYPTO_F_IMBUF;
> + crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_MPSAFE | CRYPTO_F_NOQUEUE;
> crp->crp_buf = (caddr_t)m;
> crp->crp_callback = ipsec_input_cb;
> crp->crp_sid = tdb->tdb_cryptoid;
> @@ -527,9 +527,7 @@ esp_input(struct mbuf *m, struct tdb *td
> crde->crd_len = m->m_pkthdr.len - (skip + hlen + alen);
> }
>
> - KERNEL_LOCK();
> error = crypto_dispatch(crp);
> - KERNEL_UNLOCK();
> return error;
>
> drop:
> @@ -980,7 +978,7 @@ esp_output(struct mbuf *m, struct tdb *t
>
> /* Crypto operation descriptor. */
> crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */
> - crp->crp_flags = CRYPTO_F_IMBUF;
> + crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_MPSAFE | CRYPTO_F_NOQUEUE;
> crp->crp_buf = (caddr_t)m;
> crp->crp_callback = ipsec_output_cb;
> crp->crp_opaque = (caddr_t)tc;
> @@ -1012,9 +1010,7 @@ esp_output(struct mbuf *m, struct tdb *t
> crda->crd_len = m->m_pkthdr.len - (skip + alen);
> }
>
> - KERNEL_LOCK();
> error = crypto_dispatch(crp);
> - KERNEL_UNLOCK();
> return error;
>
> drop:
> Index: netinet/ip_ipcomp.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipcomp.c,v
> retrieving revision 1.71
> diff -u -p -r1.71 ip_ipcomp.c
> --- netinet/ip_ipcomp.c 8 Jul 2021 21:07:19 -0000 1.71
> +++ netinet/ip_ipcomp.c 16 Jul 2021 20:38:32 -0000
> @@ -174,7 +174,7 @@ ipcomp_input(struct mbuf *m, struct tdb
>
> /* Crypto operation descriptor */
> crp->crp_ilen = m->m_pkthdr.len - (skip + hlen);
> - crp->crp_flags = CRYPTO_F_IMBUF;
> + crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_MPSAFE | CRYPTO_F_NOQUEUE;
> crp->crp_buf = (caddr_t)m;
> crp->crp_callback = ipsec_input_cb;
> crp->crp_sid = tdb->tdb_cryptoid;
> @@ -188,9 +188,7 @@ ipcomp_input(struct mbuf *m, struct tdb
> tc->tc_rdomain = tdb->tdb_rdomain;
> tc->tc_dst = tdb->tdb_dst;
>
> - KERNEL_LOCK();
> error = crypto_dispatch(crp);
> - KERNEL_UNLOCK();
> return error;
> }
>
> @@ -479,15 +477,13 @@ ipcomp_output(struct mbuf *m, struct tdb
>
> /* Crypto operation descriptor */
> crp->crp_ilen = m->m_pkthdr.len; /* Total input length */
> - crp->crp_flags = CRYPTO_F_IMBUF;
> + crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_MPSAFE | CRYPTO_F_NOQUEUE;
> crp->crp_buf = (caddr_t)m;
> crp->crp_callback = ipsec_output_cb;
> crp->crp_opaque = (caddr_t)tc;
> crp->crp_sid = tdb->tdb_cryptoid;
>
> - KERNEL_LOCK();
> error = crypto_dispatch(crp);
> - KERNEL_UNLOCK();
> return error;
>
> drop:
> Index: netinet/ipsec_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ipsec_input.c,v
> retrieving revision 1.175
> diff -u -p -r1.175 ipsec_input.c
> --- netinet/ipsec_input.c 8 Jul 2021 15:13:14 -0000 1.175
> +++ netinet/ipsec_input.c 16 Jul 2021 20:35:24 -0000
> @@ -380,14 +380,11 @@ ipsec_input_cb(struct cryptop *crp)
> struct tdb *tdb = NULL;
> int clen, error;
>
> - KERNEL_ASSERT_LOCKED();
> -
> if (m == NULL) {
> DPRINTF("bogus returned buffer from crypto");
> ipsecstat_inc(ipsec_crypto);
> goto droponly;
> }
> -
>
> NET_LOCK();
> tdb = gettdb(tc->tc_rdomain, tc->tc_spi, &tc->tc_dst, tc->tc_proto);
> Index: netinet/ipsec_output.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ipsec_output.c,v
> retrieving revision 1.82
> diff -u -p -r1.82 ipsec_output.c
> --- netinet/ipsec_output.c 8 Jul 2021 15:13:14 -0000 1.82
> +++ netinet/ipsec_output.c 16 Jul 2021 20:35:24 -0000
> @@ -395,8 +395,6 @@ ipsec_output_cb(struct cryptop *crp)
> struct tdb *tdb = NULL;
> int error, ilen, olen;
>
> - KERNEL_ASSERT_LOCKED();
> -
> if (m == NULL) {
> DPRINTF("bogus returned buffer from crypto");
> ipsecstat_inc(ipsec_crypto);
> @@ -418,7 +416,12 @@ ipsec_output_cb(struct cryptop *crp)
> if (tdb->tdb_cryptoid != 0)
> tdb->tdb_cryptoid = crp->crp_sid;
> NET_UNLOCK();
> - crypto_dispatch(crp);
> + error = crypto_dispatch(crp);
> + if (error) {
> + DPRINTF("crypto dispatch error %d", error);
> + ipsecstat_inc(ipsec_odrops);
> + tdb->tdb_odrops++;
> + }
> return;
> }
> DPRINTF("crypto error %d", crp->crp_etype);
> @@ -479,6 +482,8 @@ ipsp_process_done(struct mbuf *m, struct
> struct tdb_ident *tdbi;
> struct m_tag *mtag;
> int roff, error;
> +
> + NET_ASSERT_LOCKED();
>
> tdb->tdb_last_used = gettime();
>
>