Re: pf: percpu anchor stacks
On Fri, May 19, 2017 at 11:47:21PM +0200, Alexandr Nedvedicky wrote: > would you be able to try patch below to check if it will fix pf_forward > failures? Yes, this fixes it. OK bluhm@ > thanks a lot > and sorry for inconveniences Thanks for the quick fix. And there was no inconvenience, I have written the pf tests to find regressions. bluhm > 8<---8<---8<--8< > diff -r eb40d8d52679 src/sys/net/pf.c > --- a/src/sys/net/pf.c Fri May 19 23:35:22 2017 +0200 > +++ b/src/sys/net/pf.c Fri May 19 23:40:35 2017 +0200 > @@ -3644,6 +3644,7 @@ pf_test_rule(struct pf_pdesc *pd, struct > ctx.rsm = rsm; > ctx.th = >hdr.tcp; > ctx.act.rtableid = pd->rdomain; > + ctx.tag = -1; > SLIST_INIT(); > > if (pd->dir == PF_IN && if_congested()) { > 8<---8<---8<--8<
Re: pf: percpu anchor stacks
Hello, would you be able to try patch below to check if it will fix pf_forward failures? thanks a lot and sorry for inconveniences regards sasha 8<---8<---8<--8< diff -r eb40d8d52679 src/sys/net/pf.c --- a/src/sys/net/pf.c Fri May 19 23:35:22 2017 +0200 +++ b/src/sys/net/pf.c Fri May 19 23:40:35 2017 +0200 @@ -3644,6 +3644,7 @@ pf_test_rule(struct pf_pdesc *pd, struct ctx.rsm = rsm; ctx.th = >hdr.tcp; ctx.act.rtableid = pd->rdomain; + ctx.tag = -1; SLIST_INIT(); if (pd->dir == PF_IN && if_congested()) { 8<---8<---8<--8<
Re: pf: percpu anchor stacks
Hello, On Fri, May 19, 2017 at 06:10:54PM +0200, Alexander Bluhm wrote: > On Mon, May 15, 2017 at 03:19:19PM +0200, Alexandr Nedvedicky wrote: > > I'm attaching updated final patch, which accepts your suggestion. > > I think this broke sys/net/pf_forward. > http://bluhm.genua.de/regress/results/regress.html > When backing out pf.c rev 1.1024 it works again. > > I guess it is a problem with tagged route-to rules in an anchor. > I cannot investigate right now, but will do later. > I have not seen those failures when running pf_forward test. I guess my test set up is somewhat broken. I'll need your help to get it to shape. I'll try to follow with on this off-list. regards sasha
Re: pf: percpu anchor stacks
On Mon, May 15, 2017 at 03:19:19PM +0200, Alexandr Nedvedicky wrote: > I'm attaching updated final patch, which accepts your suggestion. I think this broke sys/net/pf_forward. http://bluhm.genua.de/regress/results/regress.html When backing out pf.c rev 1.1024 it works again. I guess it is a problem with tagged route-to rules in an anchor. I cannot investigate right now, but will do later. bluhm > > thanks and > regards > sasha > > 8<---8<---8<--8< > diff -r d1adecdc78cc src/sys/net/pf.c > --- a/src/sys/net/pf.cFri May 12 00:09:06 2017 +0200 > +++ b/src/sys/net/pf.cMon May 15 13:36:45 2017 +0200 > @@ -119,12 +119,54 @@ u_char pf_tcp_secret[16]; > int pf_tcp_secret_init; > int pf_tcp_iss_off; > > -struct pf_anchor_stackframe { > - struct pf_ruleset *rs; > - struct pf_rule *r; > - struct pf_anchor_node *parent; > - struct pf_anchor*child; > -} pf_anchor_stack[64]; > +enum pf_test_status { > + PF_TEST_FAIL = -1, > + PF_TEST_OK, > + PF_TEST_QUICK > +}; > + > +struct pf_test_ctx { > + enum pf_test_status test_status; > + struct pf_pdesc *pd; > + struct pf_rule_actionsact; > + u_int8_t icmpcode; > + u_int8_t icmptype; > + int icmp_dir; > + int state_icmp; > + int tag; > + u_short reason; > + struct pf_rule_item *ri; > + struct pf_src_node *sns[PF_SN_MAX]; > + struct pf_rule_slist rules; > + struct pf_rule *nr; > + struct pf_rule **rm; > + struct pf_rule *a; > + struct pf_rule **am; > + struct pf_ruleset **rsm; > + struct pf_ruleset*arsm; > + struct pf_ruleset*aruleset; > + struct tcphdr*th; > + int depth; > +}; > + > +#define PF_ANCHOR_STACK_MAX 64 > + > +/* > + * Cannot fold into pf_pdesc directly, unknown storage size outside pf.c. > + * Keep in sync with union pf_headers in pflog_bpfcopy() in if_pflog.c. > + */ > +union pf_headers { > + struct tcphdr tcp; > + struct udphdr udp; > + struct icmp icmp; > +#ifdef INET6 > + struct icmp6_hdricmp6; > + struct mld_hdr mld; > + struct nd_neighbor_solicit nd_ns; > +#endif /* INET6 */ > +}; > + > + > > struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl; > struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; > @@ -211,11 +253,8 @@ struct pf_state *pf_find_state(struct p > struct pf_state_key_cmp *, u_int, struct mbuf *); > int pf_src_connlimit(struct pf_state **); > int pf_match_rcvif(struct mbuf *, struct pf_rule *); > -void pf_step_into_anchor(int *, struct pf_ruleset **, > - struct pf_rule **, struct pf_rule **); > -int pf_step_out_of_anchor(int *, struct pf_ruleset **, > - struct pf_rule **, struct pf_rule **, > - int *); > +int pf_step_into_anchor(struct pf_test_ctx *, struct > pf_rule *); > +int pf_match_rule(struct pf_test_ctx *, struct pf_ruleset > *); > void pf_counters_inc(int, struct pf_pdesc *, > struct pf_state *, struct pf_rule *, > struct pf_rule *); > @@ -3020,74 +3059,39 @@ pf_tag_packet(struct mbuf *m, int tag, i > m->m_pkthdr.ph_rtableid = (u_int)rtableid; > } > > -void > -pf_step_into_anchor(int *depth, struct pf_ruleset **rs, > -struct pf_rule **r, struct pf_rule **a) > +enum pf_test_status > +pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_rule *r) > { > - struct pf_anchor_stackframe *f; > - > - if (*depth >= sizeof(pf_anchor_stack) / > - sizeof(pf_anchor_stack[0])) { > - log(LOG_ERR, "pf: anchor stack overflow\n"); > - *r = TAILQ_NEXT(*r, entries); > - return; > - } else if (a != NULL) > - *a = *r; > - f = pf_anchor_stack + (*depth)++; > - f->rs = *rs; > - f->r = *r; > - if ((*r)->anchor_wildcard) { > - f->parent = &(*r)->anchor->children; > - if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == NULL) { > - *r = NULL; > - return; > - } > - *rs = >child->ruleset; > - } else { > - f->parent = NULL; > - f->child = NULL; > - *rs = &(*r)->anchor->ruleset; > - } > - *r = TAILQ_FIRST((*rs)->rules.active.ptr); > -} > - > -int >
Re: pf: percpu anchor stacks
On Mon, May 15, 2017 at 15:19 +0200, Alexandr Nedvedicky wrote: > Hello, > > > Now *is* the time to commit the first step, the refactoring. Once > > that's done we can discuss the introduction of the context. > > > > Could you come up with such diff? > > first of all: I have not managed to finish the re-factoring step yet, work > is still in progress. I stole some cycles from other projects, but it was > not enough apparently. Must try harder next time... > > > > > > Does this pass pfctl regress tests? > > > > > > I'm about to run those tests for OpenBSD. > > > > Did you manage to do that? > > I have some update on testing of final patch. I've used pf_forward tests > to > make sure the code I'm changing gets executed. I'm still working on > testcase, which covers deeper anchor tree with once-rules. > > the pf_forward tests show no harm caused by my changes, though I saw some > failures: > > Makefile:217 'run-regress-udp-inet-RTT_IN' > Makefile:217 'run-regress-udp-inet6-ECO_IN' > Makefile:217 'run-regress-udp-inet6-ECO_OUT' > Makefile:217 'run-regress-udp-inet6-RDR_IN' > Makefile:217 'run-regress-udp-inet6-RDR_OUT' > Makefile:217 'run-regress-udp-inet6-RTT_IN' > Makefile:215 'run-regress-udp-inet6-RPT_OUT' > Makefile:257 'run-regress-traceroute-udp-inet6-AF_IN' > > I could see same failures in baseline (tree _without_ my changes). I took > a > closer look to find out what's going on there. I took a tcpdump at ECO: > # > # tcpdump -i vnet1 running on ECO (192.168.214.188, 192.168.3.20) > # > 13:27:31.712955 192.168.1.10.42707 > 192.168.214.188.echo: udp 3 > 13:27:31.713616 192.168.3.20.echo > 192.168.1.10.42707: udp 3 > 13:27:31.714693 192.168.1.10 > 192.168.3.20: icmp: 192.168.1.10 > udp port 42707 unreachable > # > # output above shows we get answer from .3.20 instead of .214.188 > # looks as a kind of yet another bug. > # > > There are multiple IP addresses bound to ECO IN/OUT interface. However > UDP socket at ECO always answers using primary IP address bound to ECO > interface. The answer triggers ICMP port unreachable at SRC (192.168.1.10) > > > > > - s/test_status/action/ as it's done everywhere else? > > > > > > I've opted to test_status, because it's something different to > > > 'action' > > > as we use it in current code. > > > > I agree with you for test_status. What about naming the enum and use it > > instead of 'int' for the variable? This implicitly documents the possible > > option and allow the compiler to check for missing cases in switch. > > I'm attaching updated final patch, which accepts your suggestion. > > thanks and > regards > sasha > I think you can go ahead with your change. OK mikeb
Re: pf: percpu anchor stacks
Hello, > Now *is* the time to commit the first step, the refactoring. Once > that's done we can discuss the introduction of the context. > > Could you come up with such diff? first of all: I have not managed to finish the re-factoring step yet, work is still in progress. I stole some cycles from other projects, but it was not enough apparently. Must try harder next time... > > > Does this pass pfctl regress tests? > > > > I'm about to run those tests for OpenBSD. > > Did you manage to do that? I have some update on testing of final patch. I've used pf_forward tests to make sure the code I'm changing gets executed. I'm still working on testcase, which covers deeper anchor tree with once-rules. the pf_forward tests show no harm caused by my changes, though I saw some failures: Makefile:217 'run-regress-udp-inet-RTT_IN' Makefile:217 'run-regress-udp-inet6-ECO_IN' Makefile:217 'run-regress-udp-inet6-ECO_OUT' Makefile:217 'run-regress-udp-inet6-RDR_IN' Makefile:217 'run-regress-udp-inet6-RDR_OUT' Makefile:217 'run-regress-udp-inet6-RTT_IN' Makefile:215 'run-regress-udp-inet6-RPT_OUT' Makefile:257 'run-regress-traceroute-udp-inet6-AF_IN' I could see same failures in baseline (tree _without_ my changes). I took a closer look to find out what's going on there. I took a tcpdump at ECO: # # tcpdump -i vnet1 running on ECO (192.168.214.188, 192.168.3.20) # 13:27:31.712955 192.168.1.10.42707 > 192.168.214.188.echo: udp 3 13:27:31.713616 192.168.3.20.echo > 192.168.1.10.42707: udp 3 13:27:31.714693 192.168.1.10 > 192.168.3.20: icmp: 192.168.1.10 udp port 42707 unreachable # # output above shows we get answer from .3.20 instead of .214.188 # looks as a kind of yet another bug. # There are multiple IP addresses bound to ECO IN/OUT interface. However UDP socket at ECO always answers using primary IP address bound to ECO interface. The answer triggers ICMP port unreachable at SRC (192.168.1.10) > > > - s/test_status/action/ as it's done everywhere else? > > > > I've opted to test_status, because it's something different to 'action' > > as we use it in current code. > > I agree with you for test_status. What about naming the enum and use it > instead of 'int' for the variable? This implicitly documents the possible > option and allow the compiler to check for missing cases in switch. I'm attaching updated final patch, which accepts your suggestion. thanks and regards sasha 8<---8<---8<--8< diff -r d1adecdc78cc src/sys/net/pf.c --- a/src/sys/net/pf.c Fri May 12 00:09:06 2017 +0200 +++ b/src/sys/net/pf.c Mon May 15 13:36:45 2017 +0200 @@ -119,12 +119,54 @@ u_char pf_tcp_secret[16]; int pf_tcp_secret_init; int pf_tcp_iss_off; -struct pf_anchor_stackframe { - struct pf_ruleset *rs; - struct pf_rule *r; - struct pf_anchor_node *parent; - struct pf_anchor*child; -} pf_anchor_stack[64]; +enum pf_test_status { + PF_TEST_FAIL = -1, + PF_TEST_OK, + PF_TEST_QUICK +}; + +struct pf_test_ctx { + enum pf_test_status test_status; + struct pf_pdesc *pd; + struct pf_rule_actionsact; + u_int8_t icmpcode; + u_int8_t icmptype; + int icmp_dir; + int state_icmp; + int tag; + u_short reason; + struct pf_rule_item *ri; + struct pf_src_node *sns[PF_SN_MAX]; + struct pf_rule_slist rules; + struct pf_rule *nr; + struct pf_rule **rm; + struct pf_rule *a; + struct pf_rule **am; + struct pf_ruleset **rsm; + struct pf_ruleset*arsm; + struct pf_ruleset*aruleset; + struct tcphdr*th; + int depth; +}; + +#definePF_ANCHOR_STACK_MAX 64 + +/* + * Cannot fold into pf_pdesc directly, unknown storage size outside pf.c. + * Keep in sync with union pf_headers in pflog_bpfcopy() in if_pflog.c. + */ +union pf_headers { + struct tcphdr tcp; + struct udphdr udp; + struct icmp icmp; +#ifdef INET6 + struct icmp6_hdricmp6; + struct mld_hdr mld; + struct nd_neighbor_solicit nd_ns; +#endif /* INET6 */ +}; + + struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl; struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; @@ -211,11 +253,8 @@ struct pf_state*pf_find_state(struct p
Re: pf: percpu anchor stacks
On 28/03/17(Tue) 13:02, Alexandr Nedvedicky wrote: > [...] > > > > - s/test_status/action/ as it's done everywhere else? > > I've opted to test_status, because it's something different to 'action' > as we use it in current code. I agree with you for test_status. What about naming the enum and use it instead of 'int' for the variable? This implicitly documents the possible option and allow the compiler to check for missing cases in switch. > > Does this pass pfctl regress tests? > > I'm about to run those tests for OpenBSD. Did you manage to do that? > > While I haven't noticed anything criminal here, it makes me > > wonder if it'd be possible to do this change in a few steps: > > factor out rule maching from pf_test_rule and then bring in > > anchor changes? > > > > if I understand you right, you basically want me to make change > in two steps: > > the first step splits current pf_test_rule() to pf_match_rule() and > pf_test_rule() > > the second step will kill global anchor stack array by introducing > a true recursion. The patch will remove pf_step_out_of_anchor() > function. > > I think I can do it. And also as Theo pointed out there is no rush > to get that patch to tree. Now *is* the time to commit the first step, the refactoring. Once that's done we can discuss the introduction of the context. Could you come up with such diff? Cheers, Martin
Re: pf: percpu anchor stacks
Hello Mike, thank you for looking at my patch. I accept most of your comments. I believe the items below deserve further discussion. > - instead of checking "rv" against 0 in the "break on quick >rule or failure" I'd like to see an actual check against >PF_TEST_* values so that it's grep'able; this is the 'edited' diff to highlight the place, which your comment is related to. It shows the change to patch I've sent in earlier mail [1]. [1] http://openbsd-archive.7691.n7.nabble.com/pf-percpu-anchor-stacks-tt314935.html#a315309 8<---8<---8<--8< -pf_step_into_anchor(struct pf_test_ctx *cx, struct pf_rule *r) +pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_rule *r) { int rv; if (r->anchor_wildcard) { struct pf_anchor*child; rv = PF_TEST_OK; RB_FOREACH(child, pf_anchor_node, >anchor->children) { - rv = pf_match_rule(cx, >ruleset); - if (rv != 0) { + rv = pf_match_rule(ctx, >ruleset); + if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) { /* -* break on quick rule or failure +* we either hit a rule qith quick action +* (more likely), or hit some runtime +* error (e.g. pool_get() faillure). */ break; } } } else { - rv = pf_match_rule(cx, >anchor->ruleset); - } - - cx->depth--; + rv = pf_match_rule(ctx, >anchor->ruleset); + } + + ctx->depth--; return (rv); } 8<---8<---8<--8< > > - s/test_status/action/ as it's done everywhere else? I've opted to test_status, because it's something different to 'action' as we use it in current code. the 'action' usually comes from rule (or state match) and orders, what PF should do with packet (pass/block/translate/...) the test_result rather indicates a status of rule processing, which is: terminate due to failure (PF_TEST_FAIL), caused by runtime error, continue (PF_TEST_OK), terminate (PF_TEST_QUICK) due to hitting a rule with 'quick' action > > - I'm not certain I like extra set of PASS/BLOCK macros. >I know you want to represent the "quick" pass separately, >but perhaps there's a way to do it while using PF_PASS... I somewhat understand your point, but as I've said earlier, I see 'action' and 'test_status' as two distinct things, which I prefer to clearly separate. However I don't insist on the current code in patch. I think the existing enum, which defines PF_PASS/PF_BLOCK/... can be extended. If we will go this way, then I would rather do s/test_status/virtual_action using virtual, follows the pattern, which got established for protocol: proto & virtual_proto > > Does this pass pfctl regress tests? I'm about to run those tests for OpenBSD. > > While I haven't noticed anything criminal here, it makes me > wonder if it'd be possible to do this change in a few steps: > factor out rule maching from pf_test_rule and then bring in > anchor changes? > if I understand you right, you basically want me to make change in two steps: the first step splits current pf_test_rule() to pf_match_rule() and pf_test_rule() the second step will kill global anchor stack array by introducing a true recursion. The patch will remove pf_step_out_of_anchor() function. I think I can do it. And also as Theo pointed out there is no rush to get that patch to tree. thanks and regards sasha 8<---8<---8<--8< diff -r b483ee1b4a65 src/sys/net/pf.c --- a/src/sys/net/pf.c Tue Mar 28 10:39:14 2017 +0200 +++ b/src/sys/net/pf.c Tue Mar 28 11:44:12 2017 +0200 @@ -119,12 +119,53 @@ u_char pf_tcp_secret[16]; int pf_tcp_secret_init; int pf_tcp_iss_off; -struct pf_anchor_stackframe { - struct pf_ruleset *rs; - struct pf_rule *r; - struct pf_anchor_node *parent; - struct pf_anchor*child; -} pf_anchor_stack[64]; +struct pf_test_ctx { + int test_status; + struct pf_pdesc *pd; + struct pf_rule_actions act; + u_int8_ticmpcode; + u_int8_ticmptype; + int icmp_dir; + int state_icmp; + int tag; + u_short reason; +
Re: pf: percpu anchor stacks
On Fri, Mar 24, 2017 at 12:19 +0100, Alexandr Nedvedicky wrote: > Hello, > > I'm attaching patch, which removes stack-as-a-global variable. > it's updated patch [1] to current tree. > > sorry for being pushy advocating my old, rusty patch. > I think your diff is the way to go indeed. If we can avoid using the global stack altogether, then all the better. This diff also splits giant pf_test_rule into several chunks which is a good thing in my opinion. A few random comments: - some lines appear to be longer than 80 symbols; - "cx" is an uncommon abbreviation for a "context" in OpenBSD, we normally use "ctx"; - PF_TEST_ATTRIB could use a "continue" statement instead of the goto; - instead of checking "rv" against 0 in the "break on quick rule or failure" I'd like to see an actual check against PF_TEST_* values so that it's grep'able; - s/test_status/action/ as it's done everywhere else? - I'm not certain I like extra set of PASS/BLOCK macros. I know you want to represent the "quick" pass separately, but perhaps there's a way to do it while using PF_PASS... Does this pass pfctl regress tests? While I haven't noticed anything criminal here, it makes me wonder if it'd be possible to do this change in a few steps: factor out rule maching from pf_test_rule and then bring in anchor changes? > thanks and > regards > sasha > > [1] > http://openbsd-archive.7691.n7.nabble.com/Re-PF-SMP-making-anchor-stack-multithreaded-tt275915.html#none > > 8<---8<---8<--8< > diff -r d6e3a6338889 src/sys/net/pf.c > --- a/src/sys/net/pf.cMon Mar 20 01:10:40 2017 +0100 > +++ b/src/sys/net/pf.cFri Mar 24 11:28:18 2017 +0100 > @@ -119,12 +119,37 @@ u_char pf_tcp_secret[16]; > int pf_tcp_secret_init; > int pf_tcp_iss_off; > > -struct pf_anchor_stackframe { > - struct pf_ruleset *rs; > - struct pf_rule *r; > - struct pf_anchor_node *parent; > - struct pf_anchor*child; > -} pf_anchor_stack[64]; > +struct pf_test_ctx { > + int test_status; > + struct pf_pdesc *pd; > + struct pf_rule_actions act; > + u_int8_ticmpcode; > + u_int8_ticmptype; > + int icmp_dir; > + int state_icmp; > + int tag; > + u_short reason; > + struct pf_rule_item *ri; > + struct pf_src_node *sns[PF_SN_MAX]; > + struct pf_rule_slistrules; > + struct pf_rule *nr; > + struct pf_rule **rm; > + struct pf_rule *a; > + struct pf_rule **am; > + struct pf_ruleset **rsm; > + struct pf_ruleset *arsm; > + struct pf_ruleset *aruleset; > + struct tcphdr *th; > + int depth; > +}; > + > +#define PF_ANCHOR_STACK_MAX 64 > + > +enum { > + PF_TEST_FAIL = -1, > + PF_TEST_OK, > + PF_TEST_QUICK > +}; > > struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl; > struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; > @@ -211,11 +236,8 @@ struct pf_state *pf_find_state(struct p > struct pf_state_key_cmp *, u_int, struct mbuf *); > int pf_src_connlimit(struct pf_state **); > int pf_match_rcvif(struct mbuf *, struct pf_rule *); > -void pf_step_into_anchor(int *, struct pf_ruleset **, > - struct pf_rule **, struct pf_rule **); > -int pf_step_out_of_anchor(int *, struct pf_ruleset **, > - struct pf_rule **, struct pf_rule **, > - int *); > +int pf_step_into_anchor(struct pf_test_ctx *, struct > pf_rule *); > +int pf_match_rule(struct pf_test_ctx *, struct pf_ruleset > *); > void pf_counters_inc(int, struct pf_pdesc *, > struct pf_state *, struct pf_rule *, > struct pf_rule *); > @@ -3019,74 +3041,37 @@ pf_tag_packet(struct mbuf *m, int tag, i > m->m_pkthdr.ph_rtableid = (u_int)rtableid; > } > > -void > -pf_step_into_anchor(int *depth, struct pf_ruleset **rs, > -struct pf_rule **r, struct pf_rule **a) > +int > +pf_step_into_anchor(struct pf_test_ctx *cx, struct pf_rule *r) > { > - struct pf_anchor_stackframe *f; > - > - if (*depth >= sizeof(pf_anchor_stack) / > - sizeof(pf_anchor_stack[0])) { > - log(LOG_ERR, "pf: anchor stack overflow\n"); > - *r = TAILQ_NEXT(*r, entries); > - return; > - } else if (a != NULL) > - *a = *r; > - f = pf_anchor_stack + (*depth)++; > - f->rs = *rs; > -
Re: pf: percpu anchor stacks
Hello, I'm attaching patch, which removes stack-as-a-global variable. it's updated patch [1] to current tree. sorry for being pushy advocating my old, rusty patch. thanks and regards sasha [1] http://openbsd-archive.7691.n7.nabble.com/Re-PF-SMP-making-anchor-stack-multithreaded-tt275915.html#none 8<---8<---8<--8< diff -r d6e3a6338889 src/sys/net/pf.c --- a/src/sys/net/pf.c Mon Mar 20 01:10:40 2017 +0100 +++ b/src/sys/net/pf.c Fri Mar 24 11:28:18 2017 +0100 @@ -119,12 +119,37 @@ u_char pf_tcp_secret[16]; int pf_tcp_secret_init; int pf_tcp_iss_off; -struct pf_anchor_stackframe { - struct pf_ruleset *rs; - struct pf_rule *r; - struct pf_anchor_node *parent; - struct pf_anchor*child; -} pf_anchor_stack[64]; +struct pf_test_ctx { + int test_status; + struct pf_pdesc *pd; + struct pf_rule_actions act; + u_int8_ticmpcode; + u_int8_ticmptype; + int icmp_dir; + int state_icmp; + int tag; + u_short reason; + struct pf_rule_item *ri; + struct pf_src_node *sns[PF_SN_MAX]; + struct pf_rule_slistrules; + struct pf_rule *nr; + struct pf_rule **rm; + struct pf_rule *a; + struct pf_rule **am; + struct pf_ruleset **rsm; + struct pf_ruleset *arsm; + struct pf_ruleset *aruleset; + struct tcphdr *th; + int depth; +}; + +#definePF_ANCHOR_STACK_MAX 64 + +enum { + PF_TEST_FAIL = -1, + PF_TEST_OK, + PF_TEST_QUICK +}; struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl; struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; @@ -211,11 +236,8 @@ struct pf_state*pf_find_state(struct p struct pf_state_key_cmp *, u_int, struct mbuf *); int pf_src_connlimit(struct pf_state **); int pf_match_rcvif(struct mbuf *, struct pf_rule *); -voidpf_step_into_anchor(int *, struct pf_ruleset **, - struct pf_rule **, struct pf_rule **); -int pf_step_out_of_anchor(int *, struct pf_ruleset **, -struct pf_rule **, struct pf_rule **, -int *); +int pf_step_into_anchor(struct pf_test_ctx *, struct pf_rule *); +int pf_match_rule(struct pf_test_ctx *, struct pf_ruleset *); voidpf_counters_inc(int, struct pf_pdesc *, struct pf_state *, struct pf_rule *, struct pf_rule *); @@ -3019,74 +3041,37 @@ pf_tag_packet(struct mbuf *m, int tag, i m->m_pkthdr.ph_rtableid = (u_int)rtableid; } -void -pf_step_into_anchor(int *depth, struct pf_ruleset **rs, -struct pf_rule **r, struct pf_rule **a) +int +pf_step_into_anchor(struct pf_test_ctx *cx, struct pf_rule *r) { - struct pf_anchor_stackframe *f; - - if (*depth >= sizeof(pf_anchor_stack) / - sizeof(pf_anchor_stack[0])) { - log(LOG_ERR, "pf: anchor stack overflow\n"); - *r = TAILQ_NEXT(*r, entries); - return; - } else if (a != NULL) - *a = *r; - f = pf_anchor_stack + (*depth)++; - f->rs = *rs; - f->r = *r; - if ((*r)->anchor_wildcard) { - f->parent = &(*r)->anchor->children; - if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == NULL) { - *r = NULL; - return; - } - *rs = >child->ruleset; - } else { - f->parent = NULL; - f->child = NULL; - *rs = &(*r)->anchor->ruleset; - } - *r = TAILQ_FIRST((*rs)->rules.active.ptr); -} - -int -pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, -struct pf_rule **r, struct pf_rule **a, int *match) -{ - struct pf_anchor_stackframe *f; - int quick = 0; - - do { - if (*depth <= 0) - break; - f = pf_anchor_stack + *depth - 1; - if (f->parent != NULL && f->child != NULL) { - f->child = RB_NEXT(pf_anchor_node, f->parent, f->child); - if (f->child != NULL) { - *rs = >child->ruleset; - *r = TAILQ_FIRST((*rs)->rules.active.ptr); - if (*r == NULL) - continue; -
Re: pf: percpu anchor stacks
Hello, I've sent different patch [1], which was touching same functions some time ago. The old patch [1] basically splits pf_test_rule() to two functions: pf_test_rule() pf_match_rule(), which walks anchor stack recursively. the recursion depth is limited to 64. the memory foot print for true recursion is same as for array of stack frames. The only difference comes from the place where the memory gets allocated. regards sasha [1] https://marc.info/?l=openbsd-tech=143902905917671=4