If I understand this patch: Rule removal requires the consistency lock but to acquire it one must be able to sleep. pf_test_rule() cannot sleep as it executes in a (soft) interrupt handler, so it passes the expired rule to a thread which can.
However, the code to remove a 'once' rule may wish to also remove its parent rule, now computed in the other thread. The updated patch below avoids passing that state between threads in struct pf_rule by explictly passing the parent rule to be purged when necessary. There also seem to me a couple of issues, one hairier than the other: - r->rule_flag should be updated atomically with respect to other (potential) updates (included in the attached patch). - Multiple rule-matching threads may match the same 'once' rule before one of them reaches the code to set PFRULE_EXPIRED. A thread may even match a 'once' rule after PFRULE_EXPIRED is set, if that occurs during the 'once' rule's candidacy as a potential match. (This issue is not addressed by the attached patch.) This creates two problems: a) rules may be purged more than once b) what to do with packets matched by expired 'once' rules In general, we do not know whether or not a 'once' rule candidate is the match until a new candidate rule replaces it or the rule matching process completes. Serialising this window of candicacy would prevent a 'once' rule becoming a candidate, and possibly the match, for multiple packets at once. That's one approach. A weaker approach instead permits multiple matches of one 'once' rule but processes these serially in a critical section afterwards. The first thread to reach it is deemed the first match (note this isn't necessarily the first matching packet received at the interface!). It then atomically marks it PFRULE_EXPIRED and places it on the purge queue. Packets for matches of expired 'once' rule, it seems to me, can be dropped as if we never saw them. An alternative might be to retry them against the updated rules. Another possibility would be to require 'once' rules to be 'quick'. This closes the candidacy window and makes its serialisation, to preclude multiple matches, more feasible. Yet another possibility is to drop 'once' rules as too complex to implement for multiprocessor but I have no idea if this is viable. best, Richard. Tested via pf.conf set skip on lo pass block in proto icmp anchor selfdestruct { pass in proto icmp once no state } Index: net/pf.c =================================================================== --- net.orig/pf.c +++ net/pf.c @@ -295,6 +295,9 @@ RB_GENERATE(pf_state_tree, pf_state_key, RB_GENERATE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id); +SLIST_HEAD(pf_rule_gcl, pf_rule) pf_rule_gcl = + SLIST_HEAD_INITIALIZER(pf_rule_gcl); + __inline int pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af) { @@ -1137,6 +1140,24 @@ pf_state_export(struct pfsync_state *sp, /* END state table stuff */ void +pf_purge_expired_rules(void) +{ + struct pf_rule *r; + + if (SLIST_EMPTY(&pf_rule_gcl)) { + return; + } + + rw_enter_write(&pf_consistency_lock); + while ((r = SLIST_FIRST(&pf_rule_gcl)) != NULL) { + SLIST_REMOVE(&pf_rule_gcl, r, pf_rule, gcle); + KASSERT(r->rule_flag & PFRULE_EXPIRED); + pf_purge_rule(r); + } + rw_exit_write(&pf_consistency_lock); +} + +void pf_purge_thread(void *v) { int nloops = 0, s; @@ -1154,6 +1175,7 @@ pf_purge_thread(void *v) if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { pf_purge_expired_fragments(); pf_purge_expired_src_nodes(0); + pf_purge_expired_rules(); nloops = 0; } @@ -3135,6 +3157,10 @@ pf_test_rule(struct pf_pdesc *pd, struct ruleset = &pf_main_ruleset; r = TAILQ_FIRST(pf_main_ruleset.rules.active.ptr); while (r != NULL) { + if (r->rule_flag & PFRULE_EXPIRED) { + r = TAILQ_NEXT(r, entries); + goto nextrule; + } r->evaluations++; PF_TEST_ATTRIB((pfi_kif_match(r->kif, pd->kif) == r->ifnot), r->skip[PF_SKIP_IFP].ptr); @@ -3433,8 +3459,18 @@ pf_test_rule(struct pf_pdesc *pd, struct } #endif /* NPFSYNC > 0 */ - if (r->rule_flag & PFRULE_ONCE) - pf_purge_rule(ruleset, r, aruleset, a); + if (r->rule_flag & PFRULE_ONCE) { + int only_rule = (TAILQ_NEXT(r, entries) == NULL && + TAILQ_PREV(r, pf_rulequeue, entries) == NULL); + + if (only_rule && a) { + atomic_setbits_int(&a->rule_flag, PFRULE_EXPIRED); + SLIST_INSERT_HEAD(&pf_rule_gcl, a, gcle); + } + + atomic_setbits_int(&r->rule_flag, PFRULE_EXPIRED); + SLIST_INSERT_HEAD(&pf_rule_gcl, r, gcle); + } #ifdef INET6 if (rewrite && skw->af != sks->af) Index: net/pf_ioctl.c =================================================================== --- net.orig/pf_ioctl.c +++ net/pf_ioctl.c @@ -301,12 +301,13 @@ pf_rm_rule(struct pf_rulequeue *rulequeu } void -pf_purge_rule(struct pf_ruleset *ruleset, struct pf_rule *rule, - struct pf_ruleset *aruleset, struct pf_rule *arule) +pf_purge_rule(struct pf_rule *rule) { u_int32_t nr = 0; + struct pf_ruleset *ruleset; - KASSERT(ruleset != NULL && rule != NULL); + KASSERT((rule != NULL) && (rule->myruleset != NULL)); + ruleset = rule->myruleset; pf_rm_rule(ruleset->rules.active.ptr, rule); ruleset->rules.active.rcount--; @@ -314,16 +315,6 @@ pf_purge_rule(struct pf_ruleset *ruleset rule->nr = nr++; ruleset->rules.active.ticket++; pf_calc_skip_steps(ruleset->rules.active.ptr); - - /* remove the parent anchor rule */ - if (nr == 0 && arule && aruleset) { - pf_rm_rule(aruleset->rules.active.ptr, arule); - aruleset->rules.active.rcount--; - TAILQ_FOREACH(rule, aruleset->rules.active.ptr, entries) - rule->nr = nr++; - aruleset->rules.active.ticket++; - pf_calc_skip_steps(aruleset->rules.active.ptr); - } } u_int16_t @@ -1209,6 +1200,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a } TAILQ_INSERT_TAIL(ruleset->rules.inactive.ptr, rule, entries); + rule->myruleset = ruleset; ruleset->rules.inactive.rcount++; break; } Index: net/pfvar.h =================================================================== --- net.orig/pfvar.h +++ net/pfvar.h @@ -563,6 +563,9 @@ struct pf_rule { struct pf_addr addr; u_int16_t port; } divert, divert_packet; + + SLIST_ENTRY(pf_rule) gcle; + struct pf_ruleset *myruleset; }; /* rule flags */ @@ -581,6 +584,7 @@ struct pf_rule { #define PFRULE_PFLOW 0x00040000 #define PFRULE_ONCE 0x00100000 /* one shot rule */ #define PFRULE_AFTO 0x00200000 /* af-to rule */ +#define PFRULE_EXPIRED 0x00400000 /* one shot rule hit by pkt */ #define PFSTATE_HIWAT 10000 /* default state table size */ #define PFSTATE_ADAPT_START 6000 /* default adaptive timeout start */ @@ -1701,9 +1705,7 @@ extern void pf_addrcpy(struct pf_addr sa_family_t); void pf_rm_rule(struct pf_rulequeue *, struct pf_rule *); -void pf_purge_rule(struct pf_ruleset *, - struct pf_rule *, struct pf_ruleset *, - struct pf_rule *); +void pf_purge_rule(struct pf_rule *); struct pf_divert *pf_find_divert(struct mbuf *); int pf_setup_pdesc(struct pf_pdesc *, void *, sa_family_t, int, struct pfi_kif *,