ok mvs@
> On 11 Dec 2021, at 22:03, Alexander Bluhm <[email protected]> wrote:
>
> On Sat, Dec 11, 2021 at 12:53:35AM +0100, Alexander Bluhm wrote:
>> To cache lookups, the policy ipo is linked to its SA tdb. There
>> is a list of SAs that belong to a policy. To make it MP safe we
>> need a mutex around these pointers.
>>
>> Hrvoje: Can you test this alone and together with the ip forwarding
>> diff. I protects the data structure where the latter crashed.
>> Wonder if this helps.
>
> updated diff, merged with -current
>
> Index: net/pfkeyv2.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfkeyv2.c,v
> retrieving revision 1.227
> diff -u -p -r1.227 pfkeyv2.c
> --- net/pfkeyv2.c 8 Dec 2021 14:24:18 -0000 1.227
> +++ net/pfkeyv2.c 11 Dec 2021 18:52:54 -0000
> @@ -2004,12 +2004,15 @@ pfkeyv2_send(struct socket *so, void *me
> (caddr_t)&ipo->ipo_mask, rnh,
> ipo->ipo_nodes, 0)) == NULL) {
> /* Remove from linked list of policies on TDB */
> + mtx_enter(&ipo_tdb_mtx);
> if (ipo->ipo_tdb != NULL) {
> TAILQ_REMOVE(
> &ipo->ipo_tdb->tdb_policy_head,
> ipo, ipo_tdb_next);
> tdb_unref(ipo->ipo_tdb);
> + ipo->ipo_tdb = NULL;
> }
> + mtx_leave(&ipo_tdb_mtx);
> if (ipo->ipo_ids)
> ipsp_ids_free(ipo->ipo_ids);
> pool_put(&ipsec_policy_pool, ipo);
> Index: netinet/ip_ipsp.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.c,v
> retrieving revision 1.264
> diff -u -p -r1.264 ip_ipsp.c
> --- netinet/ip_ipsp.c 11 Dec 2021 16:33:47 -0000 1.264
> +++ netinet/ip_ipsp.c 11 Dec 2021 18:52:54 -0000
> @@ -934,12 +934,14 @@ tdb_cleanspd(struct tdb *tdbp)
> {
> struct ipsec_policy *ipo;
>
> + mtx_enter(&ipo_tdb_mtx);
> while ((ipo = TAILQ_FIRST(&tdbp->tdb_policy_head)) != NULL) {
> TAILQ_REMOVE(&tdbp->tdb_policy_head, ipo, ipo_tdb_next);
> tdb_unref(ipo->ipo_tdb);
> ipo->ipo_tdb = NULL;
> ipo->ipo_last_searched = 0; /* Force a re-search. */
> }
> + mtx_leave(&ipo_tdb_mtx);
> }
>
> void
> Index: netinet/ip_ipsp.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.h,v
> retrieving revision 1.230
> diff -u -p -r1.230 ip_ipsp.h
> --- netinet/ip_ipsp.h 11 Dec 2021 16:33:47 -0000 1.230
> +++ netinet/ip_ipsp.h 11 Dec 2021 18:53:53 -0000
> @@ -257,6 +257,10 @@ struct ipsec_acquire {
> TAILQ_ENTRY(ipsec_acquire) ipa_next;
> };
>
> +/*
> + * Locks used to protect struct members in this file:
> + * p ipo_tdb_mtx link policy to TDB global mutex
> + */
> struct ipsec_policy {
> struct radix_node ipo_nodes[2]; /* radix tree glue */
> struct sockaddr_encap ipo_addr;
> @@ -274,7 +278,7 @@ struct ipsec_policy {
> * mode was used.
> */
>
> - u_int64_t ipo_last_searched; /* Timestamp of last
> lookup */
> + u_int64_t ipo_last_searched; /* [p] Timestamp of lookup */
>
> u_int8_t ipo_flags; /* See IPSP_POLICY_*
> definitions */
> u_int8_t ipo_type; /* USE/ACQUIRE/... */
> @@ -283,7 +287,7 @@ struct ipsec_policy {
>
> int ipo_ref_count;
>
> - struct tdb *ipo_tdb; /* Cached entry */
> + struct tdb *ipo_tdb; /* [p] Cached TDB entry */
>
> struct ipsec_ids *ipo_ids;
>
> @@ -313,8 +317,9 @@ struct ipsec_policy {
> * Locks used to protect struct members in this file:
> * I immutable after creation
> * N net lock
> - * s tdb_sadb_mtx
> * m tdb_mtx
> + * p ipo_tdb_mtx link policy to TDB global mutex
> + * s tdb_sadb_mtx SA database global mutex
> */
> struct tdb { /* tunnel descriptor block */
> /*
> @@ -438,7 +443,7 @@ struct tdb { /* tunnel
> descriptor blo
> struct sockaddr_encap tdb_filter; /* What traffic is acceptable */
> struct sockaddr_encap tdb_filtermask; /* And the mask */
>
> - TAILQ_HEAD(tdb_policy_head, ipsec_policy) tdb_policy_head;
> + TAILQ_HEAD(tdb_policy_head, ipsec_policy) tdb_policy_head; /* [p] */
> TAILQ_ENTRY(tdb) tdb_sync_entry;
> };
> #define tdb_ipackets tdb_data.tdd_ipackets
> @@ -546,6 +551,7 @@ extern char ipsec_def_comp[];
> extern TAILQ_HEAD(ipsec_policy_head, ipsec_policy) ipsec_policy_head;
>
> extern struct mutex tdb_sadb_mtx;
> +extern struct mutex ipo_tdb_mtx;
>
> struct cryptop;
>
> Index: netinet/ip_spd.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_spd.c,v
> retrieving revision 1.108
> diff -u -p -r1.108 ip_spd.c
> --- netinet/ip_spd.c 3 Dec 2021 17:18:34 -0000 1.108
> +++ netinet/ip_spd.c 11 Dec 2021 18:52:54 -0000
> @@ -53,6 +53,12 @@ void ipsp_delete_acquire(struct ipsec_ac
> struct pool ipsec_policy_pool;
> struct pool ipsec_acquire_pool;
>
> +/*
> + * For tdb_walk() calling tdb_delete_locked() we need lock order
> + * tdb_sadb_mtx before ipo_tdb_mtx.
> + */
> +struct mutex ipo_tdb_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
> +
> /* Protected by the NET_LOCK(). */
> struct radix_node_head **spd_tables;
> unsigned int spd_table_max;
> @@ -360,6 +366,7 @@ ipsp_spd_lookup(struct mbuf *m, int af,
> }
>
> /* Do we have a cached entry ? If so, check if it's still valid. */
> + mtx_enter(&ipo_tdb_mtx);
> if (ipo->ipo_tdb != NULL &&
> (ipo->ipo_tdb->tdb_flags & TDBF_INVALID)) {
> TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo,
> @@ -367,6 +374,7 @@ ipsp_spd_lookup(struct mbuf *m, int af,
> tdb_unref(ipo->ipo_tdb);
> ipo->ipo_tdb = NULL;
> }
> + mtx_leave(&ipo_tdb_mtx);
>
> /* Outgoing packet policy check. */
> if (direction == IPSP_DIRECTION_OUT) {
> @@ -393,6 +401,7 @@ ipsp_spd_lookup(struct mbuf *m, int af,
> ids = ipsp_ids_lookup(ipsecflowinfo);
>
> /* Check that the cached TDB (if present), is appropriate. */
> + mtx_enter(&ipo_tdb_mtx);
> if (ipo->ipo_tdb != NULL) {
> if ((ipo->ipo_last_searched <= ipsec_last_added) ||
> (ipo->ipo_sproto != ipo->ipo_tdb->tdb_sproto) ||
> @@ -407,7 +416,9 @@ ipsp_spd_lookup(struct mbuf *m, int af,
> goto nomatchout;
>
> /* Cached entry is good. */
> - return ipsp_spd_inp(m, inp, ipo, tdbout);
> + error = ipsp_spd_inp(m, inp, ipo, tdbout);
> + mtx_leave(&ipo_tdb_mtx);
> + return error;
>
> nomatchout:
> /* Cached TDB was not good. */
> @@ -428,23 +439,38 @@ ipsp_spd_lookup(struct mbuf *m, int af,
> * explosion in the number of acquired SAs).
> */
> if (ipo->ipo_last_searched <= ipsec_last_added) {
> + struct tdb *tdbp_new;
> +
> /* "Touch" the entry. */
> if (dignore == 0)
> ipo->ipo_last_searched = getuptime();
>
> + /* gettdb() takes tdb_sadb_mtx, preserve lock order */
> + mtx_leave(&ipo_tdb_mtx);
> /* Find an appropriate SA from the existing ones. */
> - ipo->ipo_tdb = gettdbbydst(rdomain,
> + tdbp_new = gettdbbydst(rdomain,
> dignore ? &sdst : &ipo->ipo_dst,
> ipo->ipo_sproto, ids ? ids: ipo->ipo_ids,
> &ipo->ipo_addr, &ipo->ipo_mask);
> + mtx_enter(&ipo_tdb_mtx);
> + if (ipo->ipo_tdb != NULL) {
> + /* Remove cached TDB from parallel thread. */
> + TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head,
> + ipo, ipo_tdb_next);
> + tdb_unref(ipo->ipo_tdb);
> + }
> + ipo->ipo_tdb = tdbp_new;
> if (ipo->ipo_tdb != NULL) {
> /* gettdbbydst() has already refcounted tdb */
> TAILQ_INSERT_TAIL(
> &ipo->ipo_tdb->tdb_policy_head,
> ipo, ipo_tdb_next);
> - return ipsp_spd_inp(m, inp, ipo, tdbout);
> + error = ipsp_spd_inp(m, inp, ipo, tdbout);
> + mtx_leave(&ipo_tdb_mtx);
> + return error;
> }
> }
> + mtx_leave(&ipo_tdb_mtx);
>
> /* So, we don't have an SA -- just a policy. */
> switch (ipo->ipo_type) {
> @@ -490,8 +516,13 @@ ipsp_spd_lookup(struct mbuf *m, int af,
> tdbp = tdbp->tdb_inext;
>
> /* Direct match in the cache. */
> - if (ipo->ipo_tdb == tdbp)
> - return ipsp_spd_inp(m, inp, ipo, tdbout);
> + mtx_enter(&ipo_tdb_mtx);
> + if (ipo->ipo_tdb == tdbp) {
> + error = ipsp_spd_inp(m, inp, ipo, tdbout);
> + mtx_leave(&ipo_tdb_mtx);
> + return error;
> + }
> + mtx_leave(&ipo_tdb_mtx);
>
> if (memcmp(dignore ? &ssrc : &ipo->ipo_dst,
> &tdbp->tdb_src, tdbp->tdb_src.sa.sa_len) ||
> @@ -505,6 +536,7 @@ ipsp_spd_lookup(struct mbuf *m, int af,
> goto nomatchin;
>
> /* Add it to the cache. */
> + mtx_enter(&ipo_tdb_mtx);
> if (ipo->ipo_tdb != NULL) {
> TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head,
> ipo, ipo_tdb_next);
> @@ -513,13 +545,16 @@ ipsp_spd_lookup(struct mbuf *m, int af,
> ipo->ipo_tdb = tdb_ref(tdbp);
> TAILQ_INSERT_TAIL(&tdbp->tdb_policy_head, ipo,
> ipo_tdb_next);
> - return ipsp_spd_inp(m, inp, ipo, tdbout);
> + error = ipsp_spd_inp(m, inp, ipo, tdbout);
> + mtx_leave(&ipo_tdb_mtx);
> + return error;
>
> nomatchin: /* Nothing needed here, falling through */
> ;
> }
>
> /* Check whether cached entry applies. */
> + mtx_enter(&ipo_tdb_mtx);
> if (ipo->ipo_tdb != NULL) {
> /*
> * We only need to check that the correct
> @@ -543,13 +578,25 @@ ipsp_spd_lookup(struct mbuf *m, int af,
>
> /* Find whether there exists an appropriate SA. */
> if (ipo->ipo_last_searched <= ipsec_last_added) {
> + struct tdb *tdbp_new;
> +
> if (dignore == 0)
> ipo->ipo_last_searched = getuptime();
>
> - ipo->ipo_tdb = gettdbbysrc(rdomain,
> + /* gettdb() takes tdb_sadb_mtx, preserve lock order */
> + mtx_leave(&ipo_tdb_mtx);
> + tdbp_new = gettdbbysrc(rdomain,
> dignore ? &ssrc : &ipo->ipo_dst,
> ipo->ipo_sproto, ipo->ipo_ids,
> &ipo->ipo_addr, &ipo->ipo_mask);
> + mtx_enter(&ipo_tdb_mtx);
> + if (ipo->ipo_tdb != NULL) {
> + /* Remove cached TDB from parallel thread. */
> + TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head,
> + ipo, ipo_tdb_next);
> + tdb_unref(ipo->ipo_tdb);
> + }
> + ipo->ipo_tdb = tdbp_new;
> if (ipo->ipo_tdb != NULL) {
> /* gettdbbysrc() has already refcounted tdb */
> TAILQ_INSERT_TAIL(
> @@ -558,6 +605,7 @@ ipsp_spd_lookup(struct mbuf *m, int af,
> }
> }
> skipinputsearch:
> + mtx_leave(&ipo_tdb_mtx);
>
> switch (ipo->ipo_type) {
> case IPSP_IPSEC_REQUIRE:
> @@ -615,12 +663,14 @@ ipsec_delete_policy(struct ipsec_policy
> rn_delete(&ipo->ipo_addr, &ipo->ipo_mask, rnh, rn) == NULL)
> return (ESRCH);
>
> + mtx_enter(&ipo_tdb_mtx);
> if (ipo->ipo_tdb != NULL) {
> TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo,
> ipo_tdb_next);
> tdb_unref(ipo->ipo_tdb);
> ipo->ipo_tdb = NULL;
> }
> + mtx_leave(&ipo_tdb_mtx);
>
> while ((ipa = TAILQ_FIRST(&ipo->ipo_acquires)) != NULL)
> ipsp_delete_acquire(ipa);
> @@ -825,10 +875,9 @@ ipsp_spd_inp(struct mbuf *m, struct inpc
>
> justreturn:
> if (tdbout != NULL) {
> - if (ipo != NULL) {
> - tdb_ref(ipo->ipo_tdb);
> - *tdbout = ipo->ipo_tdb;
> - } else
> + if (ipo != NULL)
> + *tdbout = tdb_ref(ipo->ipo_tdb);
> + else
> *tdbout = NULL;
> }
> return 0;
>