Yes please. ok dlg@.

> On 10 Nov 2022, at 10:10 pm, Alexandr Nedvedicky <sas...@fastmail.net> wrote:
> 
> Hello,
> 
> David (dlg@) asked me to shrink the scope of the change.  The new diff
> introduces a mutex to pf_state and adjust pf_detach_state() to utilize the
> newly introduced mutex.  I've also added annotation to pf_state members. The
> members with '?' needs our attention. We need to verify if they are either
> protected by global state lock, or if we cover them by newly introduced
> mutex.
> 
> new diff also adds mtx_init() to pf_state_import(). This was missing in
> my earlier diff. It was found and reported by Hrvoje@.
> 
> thanks and
> regards
> sashan
> 
> 
> --------8<---------------8<---------------8<------------------8<--------
> diff --git a/sys/net/pf.c b/sys/net/pf.c
> index 2c13c99baac..c6022867dd4 100644
> --- a/sys/net/pf.c
> +++ b/sys/net/pf.c
> @@ -185,7 +185,8 @@ int pf_translate_icmp_af(struct pf_pdesc*, int, void *);
> void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int,
>    sa_family_t, struct pf_rule *, u_int);
> void pf_detach_state(struct pf_state *);
> -void pf_state_key_detach(struct pf_state *, int);
> +void pf_state_key_detach(struct pf_state *,
> +    struct pf_state_key *);
> u_int32_t pf_tcp_iss(struct pf_pdesc *);
> void pf_rule_to_actions(struct pf_rule *,
>    struct pf_rule_actions *);
> @@ -779,7 +780,8 @@ pf_state_key_attach(struct pf_state_key *sk, struct 
> pf_state *s, int idx)
> s->key[idx] = sk;
> 
> if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) {
> - pf_state_key_detach(s, idx);
> + pf_state_key_detach(s, s->key[idx]);
> + s->key[idx] = NULL;
> return (-1);
> }
> si->s = s;
> @@ -799,42 +801,50 @@ pf_state_key_attach(struct pf_state_key *sk, struct 
> pf_state *s, int idx)
> void
> pf_detach_state(struct pf_state *s)
> {
> - if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK])
> - s->key[PF_SK_WIRE] = NULL;
> + struct pf_state_key *key[2];
> +
> + mtx_enter(&s->mtx);
> + key[PF_SK_WIRE] = s->key[PF_SK_WIRE];
> + key[PF_SK_STACK] = s->key[PF_SK_STACK];
> + s->key[PF_SK_WIRE] = NULL;
> + s->key[PF_SK_STACK] = NULL;
> + mtx_leave(&s->mtx);
> +
> + if (key[PF_SK_WIRE] == key[PF_SK_STACK])
> + key[PF_SK_WIRE] = NULL;
> 
> - if (s->key[PF_SK_STACK] != NULL)
> - pf_state_key_detach(s, PF_SK_STACK);
> + if (key[PF_SK_STACK] != NULL)
> + pf_state_key_detach(s, key[PF_SK_STACK]);
> 
> - if (s->key[PF_SK_WIRE] != NULL)
> - pf_state_key_detach(s, PF_SK_WIRE);
> + if (key[PF_SK_WIRE] != NULL)
> + pf_state_key_detach(s, key[PF_SK_WIRE]);
> }
> 
> void
> -pf_state_key_detach(struct pf_state *s, int idx)
> +pf_state_key_detach(struct pf_state *s, struct pf_state_key *key)
> {
> struct pf_state_item *si;
> - struct pf_state_key *sk;
> 
> - if (s->key[idx] == NULL)
> + PF_STATE_ASSERT_LOCKED();
> +
> + if (key == NULL)
> return;
> 
> - si = TAILQ_FIRST(&s->key[idx]->states);
> + si = TAILQ_FIRST(&key->states);
> while (si && si->s != s)
>    si = TAILQ_NEXT(si, entry);
> 
> if (si) {
> - TAILQ_REMOVE(&s->key[idx]->states, si, entry);
> + TAILQ_REMOVE(&key->states, si, entry);
> pool_put(&pf_state_item_pl, si);
> }
> 
> - sk = s->key[idx];
> - s->key[idx] = NULL;
> - if (TAILQ_EMPTY(&sk->states)) {
> - RB_REMOVE(pf_state_tree, &pf_statetbl, sk);
> - sk->removed = 1;
> - pf_state_key_unlink_reverse(sk);
> - pf_state_key_unlink_inpcb(sk);
> - pf_state_key_unref(sk);
> + if (TAILQ_EMPTY(&key->states)) {
> + RB_REMOVE(pf_state_tree, &pf_statetbl, key);
> + key->removed = 1;
> + pf_state_key_unlink_reverse(key);
> + pf_state_key_unlink_inpcb(key);
> + pf_state_key_unref(key);
> }
> }
> 
> @@ -997,7 +1007,9 @@ pf_state_insert(struct pfi_kif *kif, struct pf_state_key 
> **skw,
> }
> *skw = s->key[PF_SK_WIRE];
> if (pf_state_key_attach(*sks, s, PF_SK_STACK)) {
> - pf_state_key_detach(s, PF_SK_WIRE);
> + pf_state_key_detach(s, s->key[PF_SK_WIRE]);
> + s->key[PF_SK_WIRE] = NULL;
> + *skw = NULL;
> PF_STATE_EXIT_WRITE();
> return (-1);
> }
> @@ -1429,6 +1441,7 @@ pf_state_import(const struct pfsync_state *sp, int 
> flags)
> st->sync_state = PFSYNC_S_NONE;
> 
> refcnt_init(&st->refcnt);
> + mtx_init(&st->mtx, IPL_NET);
> 
> /* XXX when we have anchors, use STATE_INC_COUNTERS */
> r->states_cur++;
> @@ -4348,6 +4361,7 @@ pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, 
> struct pf_rule *a,
> * pf_state_inserts() grabs reference for pfsync!
> */
> refcnt_init(&s->refcnt);
> + mtx_init(&s->mtx, IPL_NET);
> 
> switch (pd->proto) {
> case IPPROTO_TCP:
> diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
> index cc6c9f3dedc..9a75f9a1e6c 100644
> --- a/sys/net/pfvar.h
> +++ b/sys/net/pfvar.h
> @@ -743,37 +743,49 @@ struct pf_state_cmp {
> u_int8_t pad[3];
> };
> 
> +/*
> + * pf_state structure members require protection
> + * either by state lock (a global rw-lock) or
> + * needs to be protected by mutex which is part
> + * of state. The annotation goes as follows:
> + * - stateRW: means protected by exclusive state lock
> + * - mtx: protected by mutex (pf_state::state_mtx)
> + * - syncMTX: protected by mutex in pfsync
> + * - I: immutable since creation
> + * - ?: needs to be revisited (require mtx).
> + */
> struct pf_state {
> - u_int64_t id;
> - u_int32_t creatorid;
> - u_int8_t direction;
> + u_int64_t id; /* I */
> + u_int32_t creatorid; /* I */
> + u_int8_t direction; /* I */
> u_int8_t pad[3];
> 
> - TAILQ_ENTRY(pf_state) sync_list;
> - TAILQ_ENTRY(pf_state) sync_snap;
> - TAILQ_ENTRY(pf_state) entry_list;
> - SLIST_ENTRY(pf_state) gc_list;
> - RB_ENTRY(pf_state) entry_id;
> + TAILQ_ENTRY(pf_state) sync_list; /* syncMTX */
> + TAILQ_ENTRY(pf_state) sync_snap; /* syncMTX */
> + TAILQ_ENTRY(pf_state) entry_list; /* stateRW */
> + SLIST_ENTRY(pf_state) gc_list; /* syncMTX */
> + RB_ENTRY(pf_state) entry_id; /* stateRW */
> struct pf_state_peer src;
> struct pf_state_peer dst;
> - struct pf_rule_slist match_rules;
> - union pf_rule_ptr rule;
> - union pf_rule_ptr anchor;
> - union pf_rule_ptr natrule;
> - struct pf_addr rt_addr;
> - struct pf_sn_head src_nodes;
> - struct pf_state_key *key[2]; /* addresses stack and wire  */
> - struct pfi_kif *kif;
> - u_int64_t packets[2];
> - u_int64_t bytes[2];
> - int32_t creation;
> - int32_t expire;
> - int32_t pfsync_time;
> - int rtableid[2]; /* rtables stack and wire */
> - u_int16_t qid;
> - u_int16_t pqid;
> - u_int16_t tag;
> - u_int16_t state_flags;
> + struct pf_rule_slist match_rules; /* I */
> + union pf_rule_ptr rule; /* I */
> + union pf_rule_ptr anchor; /* I */
> + union pf_rule_ptr natrule; /* I */
> + struct pf_addr rt_addr; /* I */
> + struct pf_sn_head src_nodes; /* I */
> + struct pf_state_key *key[2]; /* mtx, stack and wire  */
> + struct pfi_kif *kif; /* I */
> + struct mutex mtx;
> + u_int64_t packets[2]; /* ? */
> + u_int64_t bytes[2]; /* ? */
> + int32_t creation; /* I */
> + int32_t expire; /* ? */
> + int32_t pfsync_time; /* ? */
> + int rtableid[2]; /* I rtables stack and wire */
> + u_int16_t qid; /* I */
> + u_int16_t pqid; /* I */
> + u_int16_t tag; /* I */
> + u_int16_t state_flags; /* ? */
> #define PFSTATE_ALLOWOPTS 0x0001
> #define PFSTATE_SLOPPY 0x0002
> #define PFSTATE_PFLOW 0x0004
> @@ -787,20 +799,20 @@ struct pf_state {
> #define PFSTATE_INP_UNLINKED 0x0400
> #define PFSTATE_SCRUBMASK (PFSTATE_NODF|PFSTATE_RANDOMID|PFSTATE_SCRUB_TCP)
> #define PFSTATE_SETMASK   (PFSTATE_SETTOS|PFSTATE_SETPRIO)
> - u_int8_t log;
> - u_int8_t timeout;
> - u_int8_t sync_state; /* PFSYNC_S_x */
> - u_int8_t sync_updates;
> - u_int8_t min_ttl;
> - u_int8_t set_tos;
> - u_int8_t set_prio[2];
> - u_int16_t max_mss;
> - u_int16_t if_index_in;
> - u_int16_t if_index_out;
> + u_int8_t log; /* I */
> + u_int8_t timeout; /* ? */
> + u_int8_t sync_state; /* ? PFSYNC_S_x */
> + u_int8_t sync_updates; /* ? */
> + u_int8_t min_ttl; /* I */
> + u_int8_t set_tos; /* I */
> + u_int8_t set_prio[2]; /* I */
> + u_int16_t max_mss; /* I */
> + u_int16_t if_index_in; /* I */
> + u_int16_t if_index_out; /* I */
> pf_refcnt_t refcnt;
> - u_int16_t delay;
> - u_int8_t rt;
> - u_int8_t snapped;
> + u_int16_t delay; /* I */
> + u_int8_t rt; /* I */
> + u_int8_t snapped; /* syncMTX */
> };
> 
> /*
> 

Reply via email to