Hello, if you don't use pfsync on 7.2 stop reading now.
There are two reports already [1] which have the same cause: NULL pointer dereference in pf_state_export(). dlg@ has fixed this bug in CUURENT back in December [2]. Unfortunately the patch for 7.2 needs a manual merge, because CURRENT slightly differs. Below you can find a patch for 7.2. There is a positive feedback from Tamas [3]. I'd like to ask more people who run pfsync on 7.2 to give it a try and come back with report. Please also keep an eye on memory leaks: vmstat -m |egrep -e '^Name|^pfst' the InUse counter should be oscilating around some mean value which depends on your network load. thanks for your help regards sashan [1] https://marc.info/?l=openbsd-bugs&m=167195892512328&w=2 https://marc.info/?l=openbsd-bugs&m=167318980023225&w=2 [2] https://marc.info/?l=openbsd-cvs&m=167115592605571&w=2 [3] https://marc.info/?l=openbsd-bugs&m=167319072223544&w=2 --------8<----------------8<---------------8<-----------------8<-------- Index: sys/net/pf.c =================================================================== RCS file: /cvs/src/sys/net/pf.c,v retrieving revision 1.1140.2.1 diff -u -p -u -r1.1140.2.1 pf.c --- sys/net/pf.c 24 Nov 2022 22:51:23 -0000 1.1140.2.1 +++ sys/net/pf.c 9 Jan 2023 21:55:58 -0000 @@ -185,6 +185,8 @@ int pf_translate_icmp_af(struct pf_pd void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int, sa_family_t, struct pf_rule *, u_int); void pf_detach_state(struct pf_state *); +struct pf_state_key *pf_state_key_attach(struct pf_state_key *, + struct pf_state *, int); void pf_state_key_detach(struct pf_state *, int); u_int32_t pf_tcp_iss(struct pf_pdesc *); void pf_rule_to_actions(struct pf_rule *, @@ -723,17 +725,26 @@ pf_state_compare_id(struct pf_state *a, return (0); } -int +/* + * on failure, pf_state_key_attach() releases the pf_state_key + * reference and returns NULL. + */ +struct pf_state_key * pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) { struct pf_state_item *si; struct pf_state_key *cur; struct pf_state *olds = NULL; + PF_ASSERT_LOCKED(); + KASSERT(s->key[idx] == NULL); - if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) { + sk->removed = 0; + cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk); + if (cur != NULL) { + sk->removed = 1; /* key exists. check for same kif, if none, add to key */ - TAILQ_FOREACH(si, &cur->states, entry) + TAILQ_FOREACH(si, &cur->states, entry) { if (si->s->kif == s->kif && ((si->s->key[PF_SK_WIRE]->af == sk->af && si->s->direction == s->direction) || @@ -769,44 +780,53 @@ pf_state_key_attach(struct pf_state_key /* remove late or sks can go away */ olds = si->s; } else { - pool_put(&pf_state_key_pl, sk); - return (-1); /* collision! */ + pf_state_key_unref(sk); + return (NULL); /* collision! */ } } - pool_put(&pf_state_key_pl, sk); - s->key[idx] = cur; - } else - s->key[idx] = sk; + } + + /* reuse the existing state key */ + pf_state_key_unref(sk); + sk = cur; + } if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { - pf_state_key_detach(s, idx); - return (-1); + if (TAILQ_EMPTY(&sk->states)) { + KASSERT(cur == NULL); + RB_REMOVE(pf_state_tree, &pf_statetbl, sk); + sk->removed = 1; + pf_state_key_unref(sk); + } + + return (NULL); } - si->s = s; + + s->key[idx] = pf_state_key_ref(sk); /* give a ref to state */ + si->s = pf_state_ref(s); /* list is sorted, if-bound states before floating */ if (s->kif == pfi_all) - TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); + TAILQ_INSERT_TAIL(&sk->states, si, entry); else - TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); + TAILQ_INSERT_HEAD(&sk->states, si, entry); if (olds) pf_remove_state(olds); - return (0); + /* caller owns the pf_state ref, which owns a pf_state_key ref now */ + return (sk); } void pf_detach_state(struct pf_state *s) { - if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) - s->key[PF_SK_WIRE] = NULL; + KASSERT(s->key[PF_SK_WIRE] != NULL); + pf_state_key_detach(s, PF_SK_WIRE); - if (s->key[PF_SK_STACK] != NULL) + KASSERT(s->key[PF_SK_STACK] != NULL); + if (s->key[PF_SK_STACK] != s->key[PF_SK_WIRE]) pf_state_key_detach(s, PF_SK_STACK); - - if (s->key[PF_SK_WIRE] != NULL) - pf_state_key_detach(s, PF_SK_WIRE); } void @@ -815,20 +835,22 @@ pf_state_key_detach(struct pf_state *s, struct pf_state_item *si; struct pf_state_key *sk; - if (s->key[idx] == NULL) + PF_ASSERT_LOCKED(); + + sk = s->key[idx]; + if (sk == NULL) return; - si = TAILQ_FIRST(&s->key[idx]->states); - while (si && si->s != s) - si = TAILQ_NEXT(si, entry); - - if (si) { - TAILQ_REMOVE(&s->key[idx]->states, si, entry); - pool_put(&pf_state_item_pl, si); + TAILQ_FOREACH(si, &sk->states, entry) { + if (si->s == s) + break; } + if (si == NULL) + return; + + TAILQ_REMOVE(&sk->states, si, entry); + pool_put(&pf_state_item_pl, si); - sk = s->key[idx]; - s->key[idx] = NULL; if (TAILQ_EMPTY(&sk->states)) { RB_REMOVE(pf_state_tree, &pf_statetbl, sk); sk->removed = 1; @@ -836,6 +858,8 @@ pf_state_key_detach(struct pf_state *s, pf_state_key_unlink_inpcb(sk); pf_state_key_unref(sk); } + + pf_state_unref(s); } struct pf_state_key * @@ -845,7 +869,10 @@ pf_alloc_state_key(int pool_flags) if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) return (NULL); + + PF_REF_INIT(sk->refcnt); TAILQ_INIT(&sk->states); + sk->removed = 1; return (sk); } @@ -919,8 +946,6 @@ pf_state_key_setup(struct pf_pdesc *pd, sk1->proto = pd->proto; sk1->af = pd->af; sk1->rdomain = pd->rdomain; - PF_REF_INIT(sk1->refcnt); - sk1->removed = 0; if (rtableid >= 0) wrdom = rtable_l2(rtableid); @@ -929,7 +954,7 @@ pf_state_key_setup(struct pf_pdesc *pd, pd->nsport != pd->osport || pd->ndport != pd->odport || wrdom != pd->rdomain || afto) { /* NAT/NAT64 */ if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) { - pool_put(&pf_state_key_pl, sk1); + pf_state_key_unref(sk1); return (ENOMEM); } pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx, @@ -952,10 +977,8 @@ pf_state_key_setup(struct pf_pdesc *pd, sk2->proto = pd->proto; sk2->af = pd->naf; sk2->rdomain = wrdom; - PF_REF_INIT(sk2->refcnt); - sk2->removed = 0; } else - sk2 = sk1; + sk2 = pf_state_key_ref(sk1); if (pd->dir == PF_IN) { *skw = sk1; @@ -974,34 +997,47 @@ pf_state_key_setup(struct pf_pdesc *pd, return (0); } +/* + * pf_state_insert() does the following: + * - links the pf_state up with pf_state_key(s). + * - inserts the pf_state_keys into pf_state_tree. + * - inserts the pf_state into the into pf_state_tree_id. + * - tells pfsync about the state. + * + * pf_state_insert() owns the references to the pf_state_key structs + * it is given. on failure to insert, these references are released. + * on success, the caller owns a pf_state reference that allows it + * to access the state keys. + */ + int -pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, - struct pf_state_key **sks, struct pf_state *s) +pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skwp, + struct pf_state_key **sksp, struct pf_state *s) { + struct pf_state_key *skw = *skwp; + struct pf_state_key *sks = *sksp; + int same = (skw == sks); + PF_ASSERT_LOCKED(); s->kif = kif; PF_STATE_ENTER_WRITE(); - if (*skw == *sks) { - if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) { - PF_STATE_EXIT_WRITE(); - return (-1); - } - *skw = *sks = s->key[PF_SK_WIRE]; - s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; - } else { - if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) { - pool_put(&pf_state_key_pl, *sks); - PF_STATE_EXIT_WRITE(); - return (-1); - } - *skw = s->key[PF_SK_WIRE]; - if (pf_state_key_attach(*sks, s, PF_SK_STACK)) { - pf_state_key_detach(s, PF_SK_WIRE); - PF_STATE_EXIT_WRITE(); - return (-1); - } - *sks = s->key[PF_SK_STACK]; + + skw = pf_state_key_attach(skw, s, PF_SK_WIRE); + if (skw == NULL) { + pf_state_key_unref(sks); + PF_STATE_EXIT_WRITE(); + return (-1); + } + + if (same) { + /* pf_state_key_attach might have swapped skw */ + pf_state_key_unref(sks); + s->key[PF_SK_STACK] = sks = pf_state_key_ref(skw); + } else if (pf_state_key_attach(sks, s, PF_SK_STACK) == NULL) { + pf_state_key_detach(s, PF_SK_WIRE); + PF_STATE_EXIT_WRITE(); + return (-1); } if (s->id == 0 && s->creatorid == 0) { @@ -1027,6 +1063,10 @@ pf_state_insert(struct pfi_kif *kif, str pfsync_insert_state(s); #endif /* NPFSYNC > 0 */ PF_STATE_EXIT_WRITE(); + + *skwp = skw; + *sksp = sks; + return (0); } @@ -1349,10 +1389,9 @@ pf_state_expires(const struct pf_state * */ /* handle all PFTM_* > PFTM_MAX here */ - if (stimeout == PFTM_PURGE) + if (stimeout > PFTM_MAX) return (0); - KASSERT(stimeout != PFTM_UNLINKED); KASSERT(stimeout < PFTM_MAX); timeout = state->rule.ptr->timeout[stimeout]; @@ -4228,7 +4267,6 @@ pf_create_state(struct pf_pdesc *pd, str } if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, s)) { - pf_detach_state(s); *sks = *skw = NULL; REASON_SET(&reason, PFRES_STATEINS); goto csfailed; @@ -7761,8 +7799,9 @@ pf_state_unref(struct pf_state *s) #endif /* NPFSYNC */ KASSERT((TAILQ_NEXT(s, entry_list) == NULL) || (TAILQ_NEXT(s, entry_list) == _Q_INVALID)); - KASSERT((s->key[PF_SK_WIRE] == NULL) && - (s->key[PF_SK_STACK] == NULL)); + + pf_state_key_unref(s->key[PF_SK_WIRE]); + pf_state_key_unref(s->key[PF_SK_STACK]); pool_put(&pf_state_pl, s); } Index: sys/net/if_pfsync.c =================================================================== RCS file: /cvs/src/sys/net/if_pfsync.c,v retrieving revision 1.305 diff -u -p -u -r1.305 if_pfsync.c --- sys/net/if_pfsync.c 21 Apr 2022 15:22:49 -0000 1.305 +++ sys/net/if_pfsync.c 9 Jan 2023 21:55:59 -0000 @@ -99,6 +99,9 @@ sizeof(struct ip) + \ sizeof(struct pfsync_header)) +extern struct pf_state_key *pf_state_key_ref(struct pf_state_key *); +extern void pf_state_key_unref(struct pf_state_key *); + int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, struct pfsync_state_peer *); @@ -589,7 +592,7 @@ pfsync_state_import(struct pfsync_state if ((sks = pf_alloc_state_key(pool_flags)) == NULL) goto cleanup; } else - sks = skw; + sks = pf_state_key_ref(skw); /* allocate memory for scrub info */ if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || @@ -602,7 +605,6 @@ pfsync_state_import(struct pfsync_state skw->port[0] = sp->key[PF_SK_WIRE].port[0]; skw->port[1] = sp->key[PF_SK_WIRE].port[1]; skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); - PF_REF_INIT(skw->refcnt); skw->proto = sp->proto; if (!(skw->af = sp->key[PF_SK_WIRE].af)) skw->af = sp->af; @@ -612,7 +614,6 @@ pfsync_state_import(struct pfsync_state sks->port[0] = sp->key[PF_SK_STACK].port[0]; sks->port[1] = sp->key[PF_SK_STACK].port[1]; sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); - PF_REF_INIT(sks->refcnt); if (!(sks->af = sp->key[PF_SK_STACK].af)) sks->af = sp->af; if (sks->af != skw->af) { @@ -711,12 +712,10 @@ pfsync_state_import(struct pfsync_state return (0); cleanup: - if (skw == sks) - sks = NULL; if (skw != NULL) - pool_put(&pf_state_key_pl, skw); + pf_state_key_unref(skw); if (sks != NULL) - pool_put(&pf_state_key_pl, sks); + pf_state_key_unref(sks); cleanup_state: /* pf_state_insert frees the state keys */ if (st) { Index: sys/net/pfvar.h =================================================================== RCS file: /cvs/src/sys/net/pfvar.h,v retrieving revision 1.510 diff -u -p -u -r1.510 pfvar.h --- sys/net/pfvar.h 3 Sep 2022 14:57:54 -0000 1.510 +++ sys/net/pfvar.h 9 Jan 2023 21:55:59 -0000 @@ -1806,7 +1806,6 @@ void pf_pkt_addr_changed(struct mbuf *); struct inpcb *pf_inp_lookup(struct mbuf *); void pf_inp_link(struct mbuf *, struct inpcb *); void pf_inp_unlink(struct inpcb *); -int pf_state_key_attach(struct pf_state_key *, struct pf_state *, int); int pf_translate(struct pf_pdesc *, struct pf_addr *, u_int16_t, struct pf_addr *, u_int16_t, u_int16_t, int); int pf_translate_af(struct pf_pdesc *);