hello, diff below adds a mutex to pf_state. It fixes a NULL pointer dereference panic reported by Hrvoje sometime ago [1]. Besides adding a mutex to state the diff addresses a race between pfsync and state purge thread. What happened in this particular case was that state expired and its state keys got detached while it was waiting to be processed by pfsync. Once pfsync got to it found state keys detached and tripped on null pointer dereference. This is the race change below fixes.
I'm not too much worried about contention on newly introduced mutex. the thing is it is not a global mutex it is a per state mutex (per object mutex). I don't expect to see two cpu's will be updating same state very often. thanks and regards sashan [1] https://marc.info/?l=openbsd-bugs&m=166006758231954&w=2 --------8<---------------8<---------------8<------------------8<-------- diff --git a/sys/net/if_pfsync.c b/sys/net/if_pfsync.c index d279ede9cd6..5f92ae6ec45 100644 --- a/sys/net/if_pfsync.c +++ b/sys/net/if_pfsync.c @@ -157,16 +157,16 @@ const struct { }; struct pfsync_q { - void (*write)(struct pf_state *, void *); + int (*write)(struct pf_state *, void *); size_t len; u_int8_t action; }; /* we have one of these for every PFSYNC_S_ */ -void pfsync_out_state(struct pf_state *, void *); -void pfsync_out_iack(struct pf_state *, void *); -void pfsync_out_upd_c(struct pf_state *, void *); -void pfsync_out_del(struct pf_state *, void *); +int pfsync_out_state(struct pf_state *, void *); +int pfsync_out_iack(struct pf_state *, void *); +int pfsync_out_upd_c(struct pf_state *, void *); +int pfsync_out_del(struct pf_state *, void *); struct pfsync_q pfsync_qs[] = { { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, @@ -1301,24 +1301,26 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) return (0); } -void +int pfsync_out_state(struct pf_state *st, void *buf) { struct pfsync_state *sp = buf; pf_state_export(sp, st); + return (0); } -void +int pfsync_out_iack(struct pf_state *st, void *buf) { struct pfsync_ins_ack *iack = buf; iack->id = st->id; iack->creatorid = st->creatorid; + return (0); } -void +int pfsync_out_upd_c(struct pf_state *st, void *buf) { struct pfsync_upd_c *up = buf; @@ -1329,9 +1331,10 @@ pfsync_out_upd_c(struct pf_state *st, void *buf) pf_state_peer_hton(&st->dst, &up->dst); up->creatorid = st->creatorid; up->timeout = st->timeout; + return (0); } -void +int pfsync_out_del(struct pf_state *st, void *buf) { struct pfsync_del_c *dp = buf; @@ -1340,6 +1343,7 @@ pfsync_out_del(struct pf_state *st, void *buf) dp->creatorid = st->creatorid; SET(st->state_flags, PFSTATE_NOSYNC); + return (0); } void @@ -1664,8 +1668,8 @@ pfsync_sendout(void) KASSERT(st->snapped == 1); st->sync_state = PFSYNC_S_NONE; st->snapped = 0; - pfsync_qs[q].write(st, m->m_data + offset); - offset += pfsync_qs[q].len; + if (pfsync_qs[q].write(st, m->m_data + offset) == 0) + offset += pfsync_qs[q].len; pf_state_unref(st); count++; diff --git a/sys/net/if_pfsync.h b/sys/net/if_pfsync.h index 5447b829d74..30c1df0de9c 100644 --- a/sys/net/if_pfsync.h +++ b/sys/net/if_pfsync.h @@ -326,7 +326,7 @@ int pfsync_sysctl(int *, u_int, void *, size_t *, #define PFSYNC_SI_CKSUM 0x02 #define PFSYNC_SI_ACK 0x04 int pfsync_state_import(struct pfsync_state *, int); -void pfsync_state_export(struct pfsync_state *, +int pfsync_state_export(struct pfsync_state *, struct pf_state *); void pfsync_insert_state(struct pf_state *); diff --git a/sys/net/pf.c b/sys/net/pf.c index c42f76dbc67..1083ee95b9a 100644 --- a/sys/net/pf.c +++ b/sys/net/pf.c @@ -185,7 +185,8 @@ int pf_translate_icmp_af(struct pf_pdesc*, int, void *); void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int, sa_family_t, struct pf_rule *, u_int); void pf_detach_state(struct pf_state *); -void pf_state_key_detach(struct pf_state *, int); +void pf_state_key_detach(struct pf_state *, + struct pf_state_key *); u_int32_t pf_tcp_iss(struct pf_pdesc *); void pf_rule_to_actions(struct pf_rule *, struct pf_rule_actions *); @@ -260,6 +261,9 @@ void pf_state_key_unlink_inpcb(struct pf_state_key *); void pf_inpcb_unlink_state_key(struct inpcb *); void pf_pktenqueue_delayed(void *); int32_t pf_state_expires(const struct pf_state *, uint8_t); +void pf_state_keys_take(struct pf_state *, + struct pf_state_key **); +void pf_state_keys_rele(struct pf_state_key **); #if NPFLOG > 0 void pf_log_matches(struct pf_pdesc *, struct pf_rule *, @@ -779,7 +783,8 @@ pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) s->key[idx] = sk; if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { - pf_state_key_detach(s, idx); + pf_state_key_detach(s, s->key[idx]); + s->key[idx] = NULL; return (-1); } si->s = s; @@ -799,42 +804,50 @@ pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) void pf_detach_state(struct pf_state *s) { - if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) - s->key[PF_SK_WIRE] = NULL; + struct pf_state_key *key[2]; + + mtx_enter(&s->mtx); + key[PF_SK_WIRE] = s->key[PF_SK_WIRE]; + key[PF_SK_STACK] = s->key[PF_SK_STACK]; + s->key[PF_SK_WIRE] = NULL; + s->key[PF_SK_STACK] = NULL; + mtx_leave(&s->mtx); + + if (key[PF_SK_WIRE] == key[PF_SK_STACK]) + key[PF_SK_WIRE] = NULL; - if (s->key[PF_SK_STACK] != NULL) - pf_state_key_detach(s, PF_SK_STACK); + if (key[PF_SK_STACK] != NULL) + pf_state_key_detach(s, key[PF_SK_STACK]); - if (s->key[PF_SK_WIRE] != NULL) - pf_state_key_detach(s, PF_SK_WIRE); + if (key[PF_SK_WIRE] != NULL) + pf_state_key_detach(s, key[PF_SK_WIRE]); } void -pf_state_key_detach(struct pf_state *s, int idx) +pf_state_key_detach(struct pf_state *s, struct pf_state_key *key) { struct pf_state_item *si; - struct pf_state_key *sk; - if (s->key[idx] == NULL) + PF_STATE_ASSERT_LOCKED(); + + if (key == NULL) return; - si = TAILQ_FIRST(&s->key[idx]->states); + si = TAILQ_FIRST(&key->states); while (si && si->s != s) si = TAILQ_NEXT(si, entry); if (si) { - TAILQ_REMOVE(&s->key[idx]->states, si, entry); + TAILQ_REMOVE(&key->states, si, entry); pool_put(&pf_state_item_pl, si); } - sk = s->key[idx]; - s->key[idx] = NULL; - if (TAILQ_EMPTY(&sk->states)) { - RB_REMOVE(pf_state_tree, &pf_statetbl, sk); - sk->removed = 1; - pf_state_key_unlink_reverse(sk); - pf_state_key_unlink_inpcb(sk); - pf_state_key_unref(sk); + if (TAILQ_EMPTY(&key->states)) { + RB_REMOVE(pf_state_tree, &pf_statetbl, key); + key->removed = 1; + pf_state_key_unlink_reverse(key); + pf_state_key_unlink_inpcb(key); + pf_state_key_unref(key); } } @@ -997,7 +1010,9 @@ pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, } *skw = s->key[PF_SK_WIRE]; if (pf_state_key_attach(*sks, s, PF_SK_STACK)) { - pf_state_key_detach(s, PF_SK_WIRE); + pf_state_key_detach(s, s->key[PF_SK_WIRE]); + s->key[PF_SK_WIRE] = NULL; + *skw = NULL; PF_STATE_EXIT_WRITE(); return (-1); } @@ -1189,30 +1204,35 @@ pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) return (ret ? ret->s : NULL); } -void +int pf_state_export(struct pfsync_state *sp, struct pf_state *st) { int32_t expire; + struct pf_state_key *key[2] = { NULL, NULL }; memset(sp, 0, sizeof(struct pfsync_state)); /* copy from state key */ - sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; - sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; - sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; - sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; - sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain); - sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af; - sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; - sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; - sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; - sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; - sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain); - sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af; + pf_state_keys_take(st, key); + if ((key[PF_SK_WIRE] == NULL) || (key[PF_SK_STACK] == NULL)) + return (-1); + + sp->key[PF_SK_WIRE].addr[0] = key[PF_SK_WIRE]->addr[0]; + sp->key[PF_SK_WIRE].addr[1] = key[PF_SK_WIRE]->addr[1]; + sp->key[PF_SK_WIRE].port[0] = key[PF_SK_WIRE]->port[0]; + sp->key[PF_SK_WIRE].port[1] = key[PF_SK_WIRE]->port[1]; + sp->key[PF_SK_WIRE].rdomain = htons(key[PF_SK_WIRE]->rdomain); + sp->key[PF_SK_WIRE].af = key[PF_SK_WIRE]->af; + sp->key[PF_SK_STACK].addr[0] = key[PF_SK_STACK]->addr[0]; + sp->key[PF_SK_STACK].addr[1] = key[PF_SK_STACK]->addr[1]; + sp->key[PF_SK_STACK].port[0] = key[PF_SK_STACK]->port[0]; + sp->key[PF_SK_STACK].port[1] = key[PF_SK_STACK]->port[1]; + sp->key[PF_SK_STACK].rdomain = htons(key[PF_SK_STACK]->rdomain); + sp->key[PF_SK_STACK].af = key[PF_SK_STACK]->af; sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]); sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]); - sp->proto = st->key[PF_SK_WIRE]->proto; - sp->af = st->key[PF_SK_WIRE]->af; + sp->proto = key[PF_SK_WIRE]->proto; + sp->af = key[PF_SK_WIRE]->af; /* copy from state */ strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); @@ -1259,6 +1279,10 @@ pf_state_export(struct pfsync_state *sp, struct pf_state *st) sp->set_tos = st->set_tos; sp->set_prio[0] = st->set_prio[0]; sp->set_prio[1] = st->set_prio[1]; + + pf_state_keys_rele(key); + + return (0); } int @@ -4348,6 +4372,7 @@ pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a, * pf_state_inserts() grabs reference for pfsync! */ refcnt_init(&s->refcnt); + mtx_init(&s->mtx, IPL_NET); switch (pd->proto) { case IPPROTO_TCP: @@ -8019,3 +8044,19 @@ pf_pktenqueue_delayed(void *arg) pool_put(&pf_pktdelay_pl, pdy); } + +void +pf_state_keys_take(struct pf_state *st, struct pf_state_key *keys[]) +{ + mtx_enter(&st->mtx); + keys[PF_SK_WIRE] = pf_state_key_ref(st->key[PF_SK_WIRE]); + keys[PF_SK_STACK] = pf_state_key_ref(st->key[PF_SK_STACK]); + mtx_leave(&st->mtx); +} + +void +pf_state_keys_rele(struct pf_state_key *keys[]) +{ + pf_state_key_unref(keys[PF_SK_WIRE]); + pf_state_key_unref(keys[PF_SK_STACK]); +} diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index cc6c9f3dedc..e99f5993783 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -764,6 +764,7 @@ struct pf_state { struct pf_sn_head src_nodes; struct pf_state_key *key[2]; /* addresses stack and wire */ struct pfi_kif *kif; + struct mutex mtx; u_int64_t packets[2]; u_int64_t bytes[2]; int32_t creation; @@ -1739,7 +1740,7 @@ void pf_state_rm_src_node(struct pf_state *, extern struct pf_state *pf_find_state_byid(struct pf_state_cmp *); extern struct pf_state *pf_find_state_all(struct pf_state_key_cmp *, u_int, int *); -extern void pf_state_export(struct pfsync_state *, +extern int pf_state_export(struct pfsync_state *, struct pf_state *); int pf_state_import(const struct pfsync_state *, int);