Hello, I've reworked the anchor handling so the traversal uses true recursion now. Using recursion here will allow us to implement ruleset locking in nicer fashion. The idea is to split current pf_test_rule() into two functions: pf_test_rule() and pf_match_rule().
pf_step_into_anchor() is changed to drive recursive anchor traversal. It calls pf_match_rule() to match rules in nested rulesets. pf_step_out_of_anchor() has been merged into new pf_step_into_anchor() To minimize stack frame size a pf_test_ctx is introduced. Its members are various variables, which used to be local at former pf_test_rule(). The pf_test_ctx instance is a local variable of new pf_test_rule(). pf_match_rule() receives pointer to pf_test_ctx as its argument so it can reach all variables it needs. The goal is to move out as many local variables from pf_match_rule() and pf_step_into_anchor() as possible to save memory. To minimize amount of differences macros to access members in pf_test_ctx are introduced. Once consensus on proposed approach will be reached, we can polish the patch a bit. I did some basic testing with rules as follows: pass all anchor "ap" self to 10.0.0.0/8 { block proto tcp from self to 10.0.0.138 port 23 pass proto tcp from self to 10.0.0.138 port 23 once } and wildcard variant. It seems to me it works, but I'll be glad for any further testing tips. regards sasha ---------8<----------------8<---------------8<-------- Index: pf.c =================================================================== RCS file: /cvs/src/sys/net/pf.c,v retrieving revision 1.935 diff -u -p -r1.935 pf.c --- pf.c 21 Jul 2015 02:32:04 -0000 1.935 +++ pf.c 8 Aug 2015 09:44:01 -0000 @@ -114,13 +114,36 @@ u_char pf_tcp_secret[16]; int pf_tcp_secret_init; int pf_tcp_iss_off; -struct pf_anchor_stackframe { - struct pf_ruleset *rs; - struct pf_rule *r; - struct pf_anchor_node *parent; - struct pf_anchor *child; -} pf_anchor_stack[64]; +struct pf_test_ctx { + int _test_status; + struct pf_pdesc *_pd; + struct pf_rule_actions _act; + u_int8_t _icmpcode; + u_int8_t _icmptype; + int _icmp_dir; + int _state_icmp; + int _tag; + u_short _reason; + struct pf_rule_item *_ri; + struct pf_src_node *_sns[PF_SN_MAX]; + struct pf_rule_slist _rules; + struct pf_rule *_nr; + struct pf_rule **_rm; + struct pf_rule *_a; + struct pf_rule **_am; + struct pf_ruleset **_rsm; + struct pf_ruleset *_arsm; + struct pf_ruleset *_aruleset; + int _depth; +}; + +#define PF_ANCHOR_STACK_MAX 64 +enum { + PF_FAIL = -1, + PF_OK, + PF_QUICK +}; /* * Cannot fold into pf_pdesc directly, unknown storage size outside pf.c. * Keep in sync with union pf_headers in pflog_bpfcopy() in if_pflog.c. @@ -225,11 +248,8 @@ struct pf_state *pf_find_state(struct p struct pf_state_key_cmp *, u_int, struct mbuf *); int pf_src_connlimit(struct pf_state **); int pf_match_rcvif(struct mbuf *, struct pf_rule *); -void pf_step_into_anchor(int *, struct pf_ruleset **, - struct pf_rule **, struct pf_rule **); -int pf_step_out_of_anchor(int *, struct pf_ruleset **, - struct pf_rule **, struct pf_rule **, - int *); +int pf_step_into_anchor(struct pf_test_ctx *, struct pf_rule *); +int pf_match_rule(struct pf_test_ctx *, struct pf_ruleset *); void pf_counters_inc(int, struct pf_pdesc *, struct pf_state *, struct pf_rule *, struct pf_rule *); @@ -2626,74 +2646,44 @@ pf_tag_packet(struct mbuf *m, int tag, i m->m_pkthdr.ph_rtableid = (u_int)rtableid; } -void -pf_step_into_anchor(int *depth, struct pf_ruleset **rs, - struct pf_rule **r, struct pf_rule **a) +int +pf_step_into_anchor(struct pf_test_ctx *tctx, struct pf_rule *r) { - struct pf_anchor_stackframe *f; +#define depth (tctx->_depth) +#define am (tctx->_am) +#define parent r->anchor->children + int rv; - if (*depth >= sizeof(pf_anchor_stack) / - sizeof(pf_anchor_stack[0])) { + if (depth >= PF_ANCHOR_STACK_MAX) { log(LOG_ERR, "pf_step_into_anchor: stack overflow\n"); - *r = TAILQ_NEXT(*r, entries); - return; - } else if (a != NULL) - *a = *r; - f = pf_anchor_stack + (*depth)++; - f->rs = *rs; - f->r = *r; - if ((*r)->anchor_wildcard) { - f->parent = &(*r)->anchor->children; - if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == NULL) { - *r = NULL; - return; - } - *rs = &f->child->ruleset; - } else { - f->parent = NULL; - f->child = NULL; - *rs = &(*r)->anchor->ruleset; + return (PF_FAIL); } - *r = TAILQ_FIRST((*rs)->rules.active.ptr); -} -int -pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, - struct pf_rule **r, struct pf_rule **a, int *match) -{ - struct pf_anchor_stackframe *f; - int quick = 0; + depth++; - do { - if (*depth <= 0) - break; - f = pf_anchor_stack + *depth - 1; - if (f->parent != NULL && f->child != NULL) { - f->child = RB_NEXT(pf_anchor_node, f->parent, f->child); - if (f->child != NULL) { - *rs = &f->child->ruleset; - *r = TAILQ_FIRST((*rs)->rules.active.ptr); - if (*r == NULL) - continue; - else - break; + if (r->anchor_wildcard) { + struct pf_anchor *child; + rv = PF_OK; + RB_FOREACH(child, pf_anchor_node, &parent) { + rv = pf_match_rule(tctx, &child->ruleset); + if (rv != 0) { + /* + * break on quick rule or failure + */ + break; } } - (*depth)--; - if (*depth == 0 && a != NULL) - *a = NULL; - else if (a != NULL) - *a = f->r; - *rs = f->rs; - if (*match > *depth) { - *match = *depth; - if (f->r->quick) - quick = 1; - } - *r = TAILQ_NEXT(f->r, entries); - } while (*r == NULL); + } else { + rv = pf_match_rule(tctx, &r->anchor->ruleset); + } + + depth--; + + return (rv); - return (quick); +#undef depth +#undef am +#undef parent } void @@ -3067,74 +3057,31 @@ pf_rule_to_actions(struct pf_rule *r, st } while (0) int -pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm, - struct pf_rule **am, struct pf_ruleset **rsm) +pf_match_rule(struct pf_test_ctx *tctx, struct pf_ruleset *ruleset) { - struct pf_rule *r; - struct pf_rule *nr = NULL; - struct pf_rule *a = NULL; - struct pf_ruleset *arsm = NULL; - struct pf_ruleset *aruleset = NULL; - struct pf_ruleset *ruleset = NULL; - struct pf_rule_slist rules; - struct pf_rule_item *ri; - struct pf_src_node *sns[PF_SN_MAX]; - struct tcphdr *th = pd->hdr.tcp; - struct pf_state_key *skw = NULL, *sks = NULL; - struct pf_rule_actions act; - u_short reason; - int rewrite = 0; - int tag = -1; - int asd = 0; - int match = 0; - int state_icmp = 0, icmp_dir = 0; - u_int16_t virtual_type, virtual_id; - u_int8_t icmptype = 0, icmpcode = 0; - int action = PF_DROP; +#define test_status (tctx->_test_status) +#define pd (tctx->_pd) +#define act (tctx->_act) +#define icmpcode (tctx->_icmpcode) +#define icmptype (tctx->_icmptype) +#define icmp_dir (tctx->_icmp_dir) +#define state_icmp (tctx->_state_icmp) +#define _tag (tctx->_tag) +#define reason (tctx->_reason) +#define ri (tctx->_ri) +#define sns (tctx->_sns) +#define _rules (tctx->_rules) +#define nr (tctx->_nr) +#define rm (tctx->_rm) +#define _a (tctx->_a) +#define am (tctx->_am) +#define rsm (tctx->_rsm) +#define arsm (tctx->_arsm) +#define aruleset (tctx->_aruleset) +#define th (tctx->_pd->hdr.tcp) + struct pf_rule *r; - bzero(&act, sizeof(act)); - bzero(sns, sizeof(sns)); - act.rtableid = pd->rdomain; - SLIST_INIT(&rules); - - if (pd->dir == PF_IN && if_congested()) { - REASON_SET(&reason, PFRES_CONGEST); - return (PF_DROP); - } - - switch (pd->virtual_proto) { - case IPPROTO_ICMP: - icmptype = pd->hdr.icmp->icmp_type; - icmpcode = pd->hdr.icmp->icmp_code; - state_icmp = pf_icmp_mapping(pd, icmptype, - &icmp_dir, &virtual_id, &virtual_type); - if (icmp_dir == PF_IN) { - pd->osport = pd->nsport = virtual_id; - pd->odport = pd->ndport = virtual_type; - } else { - pd->osport = pd->nsport = virtual_type; - pd->odport = pd->ndport = virtual_id; - } - break; -#ifdef INET6 - case IPPROTO_ICMPV6: - icmptype = pd->hdr.icmp6->icmp6_type; - icmpcode = pd->hdr.icmp6->icmp6_code; - state_icmp = pf_icmp_mapping(pd, icmptype, - &icmp_dir, &virtual_id, &virtual_type); - if (icmp_dir == PF_IN) { - pd->osport = pd->nsport = virtual_id; - pd->odport = pd->ndport = virtual_type; - } else { - pd->osport = pd->nsport = virtual_type; - pd->odport = pd->ndport = virtual_id; - } - break; -#endif /* INET6 */ - } - - ruleset = &pf_main_ruleset; - r = TAILQ_FIRST(pf_main_ruleset.rules.active.ptr); + r = TAILQ_FIRST(ruleset->rules.active.ptr); while (r != NULL) { r->evaluations++; PF_TEST_ATTRIB((pfi_kif_match(r->kif, pd->kif) == r->ifnot), @@ -3233,7 +3180,7 @@ pf_test_rule(struct pf_pdesc *pd, struct PF_TEST_ATTRIB((r->prob && r->prob <= arc4random_uniform(UINT_MAX - 1) + 1), TAILQ_NEXT(r, entries)); - PF_TEST_ATTRIB((r->match_tag && !pf_match_tag(pd->m, r, &tag)), + PF_TEST_ATTRIB((r->match_tag && !pf_match_tag(pd->m, r, &_tag)), TAILQ_NEXT(r, entries)); PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(pd->m, r) == r->rcvifnot), @@ -3244,57 +3191,177 @@ pf_test_rule(struct pf_pdesc *pd, struct /* FALLTHROUGH */ if (r->tag) - tag = r->tag; + _tag = r->tag; if (r->anchor == NULL) { if (r->action == PF_MATCH) { if ((ri = pool_get(&pf_rule_item_pl, PR_NOWAIT)) == NULL) { REASON_SET(&reason, PFRES_MEMORY); - goto cleanup; + test_status = PF_FAIL; + break; } ri->r = r; /* order is irrelevant */ - SLIST_INSERT_HEAD(&rules, ri, entry); + SLIST_INSERT_HEAD(&_rules, ri, entry); + ri = NULL; pf_rule_to_actions(r, &act); if (r->rule_flag & PFRULE_AFTO) pd->naf = r->naf; if (pf_get_transaddr(r, pd, sns, &nr) == -1) { REASON_SET(&reason, PFRES_TRANSLATE); - goto cleanup; + test_status = PF_FAIL; + break; } #if NPFLOG > 0 if (r->log) { REASON_SET(&reason, PFRES_MATCH); - PFLOG_PACKET(pd, reason, r, a, ruleset, + PFLOG_PACKET(pd, reason, r, _a, ruleset, NULL); } #endif /* NPFLOG > 0 */ } else { - match = asd; + /* + * found matching r + */ *rm = r; - *am = a; + /* + * anchor, with ruleset, where r belongs to + */ + *am = _a; + /* + * ruleset where r belongs to + */ *rsm = ruleset; + /* + * ruleset, where anchor belongs to. + */ arsm = aruleset; } #if NPFLOG > 0 if (act.log & PF_LOG_MATCHES) - pf_log_matches(pd, r, a, ruleset, &rules); + pf_log_matches(pd, r, _a, ruleset, &_rules); #endif /* NPFLOG > 0 */ - if (r->quick) + if (r->quick) { + test_status = PF_QUICK; break; - r = TAILQ_NEXT(r, entries); + } } else { - aruleset = ruleset; - pf_step_into_anchor(&asd, &ruleset, &r, &a); + _a = r; /* remember anchor */ + aruleset = ruleset; /* and its ruleset */ + if (pf_step_into_anchor(tctx, r) != PF_OK) { + break; + } } + r = TAILQ_NEXT(r, entries); +nextrule:; + } - nextrule: - if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, - &r, &a, &match)) - break; + return (test_status); +#undef test_status +#undef pd +#undef act +#undef icmpcode +#undef icmptype +#undef icmp_dir +#undef state_icmp +#undef _tag +#undef reason +#undef ri +#undef sns +#undef _rules +#undef nr +#undef rm +#undef _a +#undef am +#undef rsm +#undef arsm +#undef aruleset +#undef th +} + +int +pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm, + struct pf_rule **am, struct pf_ruleset **rsm) +{ +#define act (tctx._act) +#define sns (tctx._sns) +#define _rules (tctx._rules) +#define icmptype (tctx._icmptype) +#define icmpcode (tctx._icmpcode) +#define icmp_dir (tctx._icmp_dir) +#define state_icmp (tctx._state_icmp) +#define reason (tctx._reason) +#define aruleset (tctx._aruleset) +#define arsm (tctx._arsm) +#define nr (tctx._nr) +#define th (tctx._pd->hdr.tcp) +#define _tag (tctx._tag) +#define ri (tctx._ri) + struct pf_rule *r = NULL; + struct pf_rule *a = NULL; + struct pf_ruleset *ruleset = NULL; + struct pf_state_key *skw = NULL, *sks = NULL; + int rewrite = 0; + u_int16_t virtual_type, virtual_id; + int action = PF_DROP; + struct pf_test_ctx tctx; + int rv; + + bzero(&tctx, sizeof(tctx)); + tctx._pd = pd; + tctx._rm = rm; + tctx._am = am; + tctx._rsm = rsm; + act.rtableid = pd->rdomain; + SLIST_INIT(&_rules); + + if (pd->dir == PF_IN && if_congested()) { + REASON_SET(&reason, PFRES_CONGEST); + return (PF_DROP); } + + switch (pd->virtual_proto) { + case IPPROTO_ICMP: + icmptype = pd->hdr.icmp->icmp_type; + icmpcode = pd->hdr.icmp->icmp_code; + state_icmp = pf_icmp_mapping(pd, icmptype, + &icmp_dir, &virtual_id, &virtual_type); + if (icmp_dir == PF_IN) { + pd->osport = pd->nsport = virtual_id; + pd->odport = pd->ndport = virtual_type; + } else { + pd->osport = pd->nsport = virtual_type; + pd->odport = pd->ndport = virtual_id; + } + break; +#ifdef INET6 + case IPPROTO_ICMPV6: + icmptype = pd->hdr.icmp6->icmp6_type; + icmpcode = pd->hdr.icmp6->icmp6_code; + state_icmp = pf_icmp_mapping(pd, icmptype, + &icmp_dir, &virtual_id, &virtual_type); + if (icmp_dir == PF_IN) { + pd->osport = pd->nsport = virtual_id; + pd->odport = pd->ndport = virtual_type; + } else { + pd->osport = pd->nsport = virtual_type; + pd->odport = pd->ndport = virtual_id; + } + break; +#endif /* INET6 */ + } + + ruleset = &pf_main_ruleset; + rv = pf_match_rule(&tctx, ruleset); + if (rv == PF_FAIL) { + /* + * Reason has been set in pf_match_rule() already. + */ + goto cleanup; + } + r = *rm; /* matching rule */ a = *am; /* rule that defines an anchor containing 'r' */ ruleset = *rsm; /* ruleset of the anchor defined by the rule 'a' */ @@ -3314,7 +3381,7 @@ pf_test_rule(struct pf_pdesc *pd, struct if (r->log) PFLOG_PACKET(pd, reason, r, a, ruleset, NULL); if (act.log & PF_LOG_MATCHES) - pf_log_matches(pd, r, a, ruleset, &rules); + pf_log_matches(pd, r, a, ruleset, &_rules); #endif /* NPFLOG > 0 */ if (pd->virtual_proto != PF_VPROTO_FRAGMENT && @@ -3357,7 +3424,7 @@ pf_test_rule(struct pf_pdesc *pd, struct if (r->action == PF_DROP) goto cleanup; - pf_tag_packet(pd->m, tag, act.rtableid); + pf_tag_packet(pd->m, _tag, act.rtableid); if (act.rtableid >= 0 && rtable_l2(act.rtableid) != pd->rdomain) pd->destchg = 1; @@ -3389,7 +3456,7 @@ pf_test_rule(struct pf_pdesc *pd, struct } action = pf_create_state(pd, r, a, nr, &skw, &sks, &rewrite, - sm, tag, &rules, &act, sns); + sm, _tag, &_rules, &act, sns); if (action != PF_PASS) goto cleanup; @@ -3408,8 +3475,8 @@ pf_test_rule(struct pf_pdesc *pd, struct virtual_type, icmp_dir); } } else { - while ((ri = SLIST_FIRST(&rules))) { - SLIST_REMOVE_HEAD(&rules, entry); + while ((ri = SLIST_FIRST(&_rules))) { + SLIST_REMOVE_HEAD(&_rules, entry); pool_put(&pf_rule_item_pl, ri); } } @@ -3445,12 +3512,26 @@ pf_test_rule(struct pf_pdesc *pd, struct return (PF_PASS); cleanup: - while ((ri = SLIST_FIRST(&rules))) { - SLIST_REMOVE_HEAD(&rules, entry); + while ((ri = SLIST_FIRST(&_rules))) { + SLIST_REMOVE_HEAD(&_rules, entry); pool_put(&pf_rule_item_pl, ri); } return (action); +#undef act +#undef sns +#undef _rules +#undef icmptype +#undef icmpcode +#undef icmp_dir +#undef state_icmp +#undef reason +#undef aruleset +#undef arsm +#undef nr +#undef th +#undef _tag +#undef ri } static __inline int