Re: pf: percpu anchor stacks

2017-05-19 Thread Alexander Bluhm
On Fri, May 19, 2017 at 11:47:21PM +0200, Alexandr Nedvedicky wrote:
> would you be able to try patch below to check if it will fix pf_forward 
> failures?

Yes, this fixes it.  OK bluhm@

> thanks a lot
> and sorry for inconveniences

Thanks for the quick fix.  And there was no inconvenience, I have
written the pf tests to find regressions.

bluhm

> 8<---8<---8<--8<
> diff -r eb40d8d52679 src/sys/net/pf.c
> --- a/src/sys/net/pf.c  Fri May 19 23:35:22 2017 +0200
> +++ b/src/sys/net/pf.c  Fri May 19 23:40:35 2017 +0200
> @@ -3644,6 +3644,7 @@ pf_test_rule(struct pf_pdesc *pd, struct
> ctx.rsm = rsm;
> ctx.th = >hdr.tcp;
> ctx.act.rtableid = pd->rdomain;
> +   ctx.tag = -1;
> SLIST_INIT();
>  
> if (pd->dir == PF_IN && if_congested()) {
> 8<---8<---8<--8<



Re: pf: percpu anchor stacks

2017-05-19 Thread Alexandr Nedvedicky
Hello,

would you be able to try patch below to check if it will fix pf_forward 
failures?

thanks a lot
and sorry for inconveniences

regards
sasha

8<---8<---8<--8<
diff -r eb40d8d52679 src/sys/net/pf.c
--- a/src/sys/net/pf.c  Fri May 19 23:35:22 2017 +0200
+++ b/src/sys/net/pf.c  Fri May 19 23:40:35 2017 +0200
@@ -3644,6 +3644,7 @@ pf_test_rule(struct pf_pdesc *pd, struct
ctx.rsm = rsm;
ctx.th = >hdr.tcp;
ctx.act.rtableid = pd->rdomain;
+   ctx.tag = -1;
SLIST_INIT();
 
if (pd->dir == PF_IN && if_congested()) {
8<---8<---8<--8<



Re: pf: percpu anchor stacks

2017-05-19 Thread Alexandr Nedvedicky
Hello,

On Fri, May 19, 2017 at 06:10:54PM +0200, Alexander Bluhm wrote:
> On Mon, May 15, 2017 at 03:19:19PM +0200, Alexandr Nedvedicky wrote:
> > I'm attaching updated final patch, which accepts your suggestion.
> 
> I think this broke sys/net/pf_forward.
> http://bluhm.genua.de/regress/results/regress.html
> When backing out pf.c rev 1.1024 it works again.
> 
> I guess it is a problem with tagged route-to rules in an anchor.
> I cannot investigate right now, but will do later.
> 

I have not seen those failures when running pf_forward test. I guess my
test set up is somewhat broken. I'll need your help to get it to shape.
I'll try to follow with on this off-list.


regards
sasha



Re: pf: percpu anchor stacks

2017-05-19 Thread Alexander Bluhm
On Mon, May 15, 2017 at 03:19:19PM +0200, Alexandr Nedvedicky wrote:
> I'm attaching updated final patch, which accepts your suggestion.

I think this broke sys/net/pf_forward.
http://bluhm.genua.de/regress/results/regress.html
When backing out pf.c rev 1.1024 it works again.

I guess it is a problem with tagged route-to rules in an anchor.
I cannot investigate right now, but will do later.

bluhm

> 
> thanks and
> regards
> sasha
> 
> 8<---8<---8<--8<
> diff -r d1adecdc78cc src/sys/net/pf.c
> --- a/src/sys/net/pf.cFri May 12 00:09:06 2017 +0200
> +++ b/src/sys/net/pf.cMon May 15 13:36:45 2017 +0200
> @@ -119,12 +119,54 @@ u_char   pf_tcp_secret[16];
>  int   pf_tcp_secret_init;
>  int   pf_tcp_iss_off;
>  
> -struct pf_anchor_stackframe {
> - struct pf_ruleset   *rs;
> - struct pf_rule  *r;
> - struct pf_anchor_node   *parent;
> - struct pf_anchor*child;
> -} pf_anchor_stack[64];
> +enum pf_test_status {
> + PF_TEST_FAIL = -1,
> + PF_TEST_OK,
> + PF_TEST_QUICK
> +};
> +
> +struct pf_test_ctx {
> + enum pf_test_status   test_status;
> + struct pf_pdesc  *pd;
> + struct pf_rule_actionsact;
> + u_int8_t  icmpcode;
> + u_int8_t  icmptype;
> + int   icmp_dir;
> + int   state_icmp;
> + int   tag;
> + u_short   reason;
> + struct pf_rule_item  *ri;
> + struct pf_src_node   *sns[PF_SN_MAX];
> + struct pf_rule_slist  rules;
> + struct pf_rule   *nr;
> + struct pf_rule  **rm;
> + struct pf_rule   *a;
> + struct pf_rule  **am;
> + struct pf_ruleset   **rsm;
> + struct pf_ruleset*arsm;
> + struct pf_ruleset*aruleset;
> + struct tcphdr*th;
> + int   depth;
> +};
> +
> +#define  PF_ANCHOR_STACK_MAX 64
> +
> +/*
> + * Cannot fold into pf_pdesc directly, unknown storage size outside pf.c.
> + * Keep in sync with union pf_headers in pflog_bpfcopy() in if_pflog.c.
> + */
> +union pf_headers {
> + struct tcphdr   tcp;
> + struct udphdr   udp;
> + struct icmp icmp;
> +#ifdef INET6
> + struct icmp6_hdricmp6;
> + struct mld_hdr  mld;
> + struct nd_neighbor_solicit nd_ns;
> +#endif /* INET6 */
> +};
> +
> +
>  
>  struct pool   pf_src_tree_pl, pf_rule_pl, pf_queue_pl;
>  struct pool   pf_state_pl, pf_state_key_pl, pf_state_item_pl;
> @@ -211,11 +253,8 @@ struct pf_state  *pf_find_state(struct p
>   struct pf_state_key_cmp *, u_int, struct mbuf *);
>  int   pf_src_connlimit(struct pf_state **);
>  int   pf_match_rcvif(struct mbuf *, struct pf_rule *);
> -void  pf_step_into_anchor(int *, struct pf_ruleset **,
> - struct pf_rule **, struct pf_rule **);
> -int   pf_step_out_of_anchor(int *, struct pf_ruleset **,
> -  struct pf_rule **, struct pf_rule **,
> -  int *);
> +int   pf_step_into_anchor(struct pf_test_ctx *, struct 
> pf_rule *);
> +int   pf_match_rule(struct pf_test_ctx *, struct pf_ruleset 
> *);
>  void  pf_counters_inc(int, struct pf_pdesc *,
>   struct pf_state *, struct pf_rule *,
>   struct pf_rule *);
> @@ -3020,74 +3059,39 @@ pf_tag_packet(struct mbuf *m, int tag, i
>   m->m_pkthdr.ph_rtableid = (u_int)rtableid;
>  }
>  
> -void
> -pf_step_into_anchor(int *depth, struct pf_ruleset **rs,
> -struct pf_rule **r, struct pf_rule **a)
> +enum pf_test_status
> +pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_rule *r)
>  {
> - struct pf_anchor_stackframe *f;
> -
> - if (*depth >= sizeof(pf_anchor_stack) /
> - sizeof(pf_anchor_stack[0])) {
> - log(LOG_ERR, "pf: anchor stack overflow\n");
> - *r = TAILQ_NEXT(*r, entries);
> - return;
> - } else if (a != NULL)
> - *a = *r;
> - f = pf_anchor_stack + (*depth)++;
> - f->rs = *rs;
> - f->r = *r;
> - if ((*r)->anchor_wildcard) {
> - f->parent = &(*r)->anchor->children;
> - if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == NULL) {
> - *r = NULL;
> - return;
> - }
> - *rs = >child->ruleset;
> - } else {
> - f->parent = NULL;
> - f->child = NULL;
> - *rs = &(*r)->anchor->ruleset;
> - }
> - *r = TAILQ_FIRST((*rs)->rules.active.ptr);
> -}
> -
> -int
> 

Re: pf: percpu anchor stacks

2017-05-15 Thread Mike Belopuhov
On Mon, May 15, 2017 at 15:19 +0200, Alexandr Nedvedicky wrote:
> Hello,
> 
> > Now *is* the time to commit the first step, the refactoring.  Once
> > that's done we can discuss the introduction of the context.
> > 
> > Could you come up with such diff?
> 
> first of all: I have not managed to finish the re-factoring step yet, work
> is still in progress. I stole some cycles from other projects, but it was
> not enough apparently. Must try harder next time...
> 
> 
> > > > Does this pass pfctl regress tests?
> > > 
> > > I'm about to run those tests for OpenBSD.
> > 
> > Did you manage to do that?
> 
> I have some update on testing of final patch. I've used pf_forward tests 
> to
> make sure the code I'm changing gets executed. I'm still working on
> testcase, which covers deeper anchor tree with once-rules.
> 
> the pf_forward tests show no harm caused by my changes, though I saw some
> failures:
> 
>   Makefile:217 'run-regress-udp-inet-RTT_IN'
>   Makefile:217 'run-regress-udp-inet6-ECO_IN'
>   Makefile:217 'run-regress-udp-inet6-ECO_OUT'
>   Makefile:217 'run-regress-udp-inet6-RDR_IN'
>   Makefile:217 'run-regress-udp-inet6-RDR_OUT'
>   Makefile:217 'run-regress-udp-inet6-RTT_IN'
>   Makefile:215 'run-regress-udp-inet6-RPT_OUT'
>   Makefile:257 'run-regress-traceroute-udp-inet6-AF_IN'
> 
> I could see same failures in baseline (tree _without_ my changes). I took 
> a
> closer look to find out what's going on there. I took a tcpdump at ECO:
>   #
>   # tcpdump -i vnet1 running on ECO (192.168.214.188, 192.168.3.20)
>   #
>   13:27:31.712955 192.168.1.10.42707 > 192.168.214.188.echo: udp 3
>   13:27:31.713616 192.168.3.20.echo > 192.168.1.10.42707: udp 3
>   13:27:31.714693 192.168.1.10 > 192.168.3.20: icmp: 192.168.1.10
>   udp port 42707 unreachable
>   #
>   # output above shows we get answer from .3.20 instead of .214.188
>   # looks as a kind of yet another bug.
>   #
> 
> There are multiple IP addresses bound to ECO IN/OUT interface. However
> UDP socket at ECO always answers using primary IP address bound to ECO
> interface. The answer triggers ICMP port unreachable at SRC (192.168.1.10)
> 
> > > >  - s/test_status/action/ as it's done everywhere else?
> > > 
> > > I've opted to test_status, because it's something different to 
> > > 'action'
> > > as we use it in current code.
> > 
> > I agree with you for test_status.  What about naming the enum and use it
> > instead of 'int' for the variable?  This implicitly documents the possible
> > option and allow the compiler to check for missing cases in switch.
> 
> I'm attaching updated final patch, which accepts your suggestion.
> 
> thanks and
> regards
> sasha
> 

I think you can go ahead with your change.  OK mikeb



Re: pf: percpu anchor stacks

2017-05-15 Thread Alexandr Nedvedicky
Hello,

> Now *is* the time to commit the first step, the refactoring.  Once
> that's done we can discuss the introduction of the context.
> 
> Could you come up with such diff?

first of all: I have not managed to finish the re-factoring step yet, work
is still in progress. I stole some cycles from other projects, but it was
not enough apparently. Must try harder next time...


> > > Does this pass pfctl regress tests?
> > 
> > I'm about to run those tests for OpenBSD.
> 
> Did you manage to do that?

I have some update on testing of final patch. I've used pf_forward tests to
make sure the code I'm changing gets executed. I'm still working on
testcase, which covers deeper anchor tree with once-rules.

the pf_forward tests show no harm caused by my changes, though I saw some
failures:

Makefile:217 'run-regress-udp-inet-RTT_IN'
Makefile:217 'run-regress-udp-inet6-ECO_IN'
Makefile:217 'run-regress-udp-inet6-ECO_OUT'
Makefile:217 'run-regress-udp-inet6-RDR_IN'
Makefile:217 'run-regress-udp-inet6-RDR_OUT'
Makefile:217 'run-regress-udp-inet6-RTT_IN'
Makefile:215 'run-regress-udp-inet6-RPT_OUT'
Makefile:257 'run-regress-traceroute-udp-inet6-AF_IN'

I could see same failures in baseline (tree _without_ my changes). I took a
closer look to find out what's going on there. I took a tcpdump at ECO:
#
# tcpdump -i vnet1 running on ECO (192.168.214.188, 192.168.3.20)
#
13:27:31.712955 192.168.1.10.42707 > 192.168.214.188.echo: udp 3
13:27:31.713616 192.168.3.20.echo > 192.168.1.10.42707: udp 3
13:27:31.714693 192.168.1.10 > 192.168.3.20: icmp: 192.168.1.10
udp port 42707 unreachable
#
# output above shows we get answer from .3.20 instead of .214.188
# looks as a kind of yet another bug.
#

There are multiple IP addresses bound to ECO IN/OUT interface. However
UDP socket at ECO always answers using primary IP address bound to ECO
interface. The answer triggers ICMP port unreachable at SRC (192.168.1.10)

> > >  - s/test_status/action/ as it's done everywhere else?
> > 
> > I've opted to test_status, because it's something different to 'action'
> > as we use it in current code.
> 
> I agree with you for test_status.  What about naming the enum and use it
> instead of 'int' for the variable?  This implicitly documents the possible
> option and allow the compiler to check for missing cases in switch.

I'm attaching updated final patch, which accepts your suggestion.

thanks and
regards
sasha

8<---8<---8<--8<
diff -r d1adecdc78cc src/sys/net/pf.c
--- a/src/sys/net/pf.c  Fri May 12 00:09:06 2017 +0200
+++ b/src/sys/net/pf.c  Mon May 15 13:36:45 2017 +0200
@@ -119,12 +119,54 @@ u_char pf_tcp_secret[16];
 int pf_tcp_secret_init;
 int pf_tcp_iss_off;
 
-struct pf_anchor_stackframe {
-   struct pf_ruleset   *rs;
-   struct pf_rule  *r;
-   struct pf_anchor_node   *parent;
-   struct pf_anchor*child;
-} pf_anchor_stack[64];
+enum pf_test_status {
+   PF_TEST_FAIL = -1,
+   PF_TEST_OK,
+   PF_TEST_QUICK
+};
+
+struct pf_test_ctx {
+   enum pf_test_status   test_status;
+   struct pf_pdesc  *pd;
+   struct pf_rule_actionsact;
+   u_int8_t  icmpcode;
+   u_int8_t  icmptype;
+   int   icmp_dir;
+   int   state_icmp;
+   int   tag;
+   u_short   reason;
+   struct pf_rule_item  *ri;
+   struct pf_src_node   *sns[PF_SN_MAX];
+   struct pf_rule_slist  rules;
+   struct pf_rule   *nr;
+   struct pf_rule  **rm;
+   struct pf_rule   *a;
+   struct pf_rule  **am;
+   struct pf_ruleset   **rsm;
+   struct pf_ruleset*arsm;
+   struct pf_ruleset*aruleset;
+   struct tcphdr*th;
+   int   depth;
+};
+
+#definePF_ANCHOR_STACK_MAX 64
+
+/*
+ * Cannot fold into pf_pdesc directly, unknown storage size outside pf.c.
+ * Keep in sync with union pf_headers in pflog_bpfcopy() in if_pflog.c.
+ */
+union pf_headers {
+   struct tcphdr   tcp;
+   struct udphdr   udp;
+   struct icmp icmp;
+#ifdef INET6
+   struct icmp6_hdricmp6;
+   struct mld_hdr  mld;
+   struct nd_neighbor_solicit nd_ns;
+#endif /* INET6 */
+};
+
+
 
 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl;
 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl;
@@ -211,11 +253,8 @@ struct pf_state*pf_find_state(struct p
   

Re: pf: percpu anchor stacks

2017-05-08 Thread Martin Pieuchot
On 28/03/17(Tue) 13:02, Alexandr Nedvedicky wrote:
> [...] 
> > 
> >  - s/test_status/action/ as it's done everywhere else?
> 
> I've opted to test_status, because it's something different to 'action'
> as we use it in current code.

I agree with you for test_status.  What about naming the enum and use it
instead of 'int' for the variable?  This implicitly documents the possible
option and allow the compiler to check for missing cases in switch.

> > Does this pass pfctl regress tests?
> 
> I'm about to run those tests for OpenBSD.

Did you manage to do that?

> > While I haven't noticed anything criminal here, it makes me
> > wonder if it'd be possible to do this change in a few steps:
> > factor out rule maching from pf_test_rule and then bring in
> > anchor changes?
> > 
> 
> if I understand you right, you basically want me to make change
> in two steps:
> 
>   the first step splits current pf_test_rule() to pf_match_rule() and
>   pf_test_rule()
> 
>   the second step will kill global anchor stack array by introducing
>   a true recursion. The patch will remove pf_step_out_of_anchor()
>   function.
> 
> I think I can do it. And also as Theo pointed out there is no rush
> to get that patch to tree.

Now *is* the time to commit the first step, the refactoring.  Once
that's done we can discuss the introduction of the context.

Could you come up with such diff?

Cheers,
Martin



Re: pf: percpu anchor stacks

2017-03-28 Thread Alexandr Nedvedicky
Hello Mike,

thank you for looking at my patch. I accept most of your comments.
I believe the items below deserve further discussion.

>  - instead of checking "rv" against 0 in the "break on quick
>rule or failure" I'd like to see an actual check against
>PF_TEST_* values so that it's grep'able;

this is the 'edited' diff to highlight the place, which your comment is
related to. It shows the change to patch I've sent in earlier mail [1].

[1] 
http://openbsd-archive.7691.n7.nabble.com/pf-percpu-anchor-stacks-tt314935.html#a315309
8<---8<---8<--8<
-pf_step_into_anchor(struct pf_test_ctx *cx, struct pf_rule *r)
+pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_rule *r)
 {
int rv;
 
if (r->anchor_wildcard) {
struct pf_anchor*child;
rv = PF_TEST_OK;
RB_FOREACH(child, pf_anchor_node, >anchor->children) {
-   rv = pf_match_rule(cx, >ruleset);
-   if (rv != 0) {
+   rv = pf_match_rule(ctx, >ruleset);
+   if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) {
/*
-* break on quick rule or failure
+* we either hit a rule qith quick action
+* (more likely), or hit some runtime
+* error (e.g. pool_get() faillure).
 */
break;
}
}
} else {
-   rv = pf_match_rule(cx, >anchor->ruleset);
-   }
-
-   cx->depth--;
+   rv = pf_match_rule(ctx, >anchor->ruleset);
+   }
+
+   ctx->depth--;
 
return (rv);
 }
8<---8<---8<--8<

> 
>  - s/test_status/action/ as it's done everywhere else?

I've opted to test_status, because it's something different to 'action'
as we use it in current code.

the 'action' usually comes from rule (or state match) and orders, what
PF should do with packet (pass/block/translate/...)

the test_result rather indicates a status of rule processing, which is:
terminate due to failure (PF_TEST_FAIL), caused by runtime error,
continue (PF_TEST_OK),
terminate (PF_TEST_QUICK) due to hitting a rule with 'quick' action

> 
>  - I'm not certain I like extra set of PASS/BLOCK macros.
>I know you want to represent the "quick" pass separately,
>but perhaps there's a way to do it while using PF_PASS...

I somewhat understand your point, but as I've said earlier, I see 'action'
and 'test_status' as two distinct things, which I prefer to clearly
separate.

However I don't insist on the current code in patch. I think the existing
enum, which defines PF_PASS/PF_BLOCK/... can be extended. If we will go
this way, then I would rather do s/test_status/virtual_action
using virtual, follows the pattern, which got established for
protocol: proto & virtual_proto


> 
> Does this pass pfctl regress tests?

I'm about to run those tests for OpenBSD.

> 
> While I haven't noticed anything criminal here, it makes me
> wonder if it'd be possible to do this change in a few steps:
> factor out rule maching from pf_test_rule and then bring in
> anchor changes?
> 

if I understand you right, you basically want me to make change
in two steps:

the first step splits current pf_test_rule() to pf_match_rule() and
pf_test_rule()

the second step will kill global anchor stack array by introducing
a true recursion. The patch will remove pf_step_out_of_anchor()
function.

I think I can do it. And also as Theo pointed out there is no rush
to get that patch to tree.

thanks and
regards
sasha

8<---8<---8<--8<

diff -r b483ee1b4a65 src/sys/net/pf.c
--- a/src/sys/net/pf.c  Tue Mar 28 10:39:14 2017 +0200
+++ b/src/sys/net/pf.c  Tue Mar 28 11:44:12 2017 +0200
@@ -119,12 +119,53 @@ u_char pf_tcp_secret[16];
 int pf_tcp_secret_init;
 int pf_tcp_iss_off;
 
-struct pf_anchor_stackframe {
-   struct pf_ruleset   *rs;
-   struct pf_rule  *r;
-   struct pf_anchor_node   *parent;
-   struct pf_anchor*child;
-} pf_anchor_stack[64];
+struct pf_test_ctx {
+   int test_status;
+   struct pf_pdesc *pd;
+   struct pf_rule_actions  act;
+   u_int8_ticmpcode;
+   u_int8_ticmptype;
+   int icmp_dir;
+   int state_icmp;
+   int tag;
+   u_short reason;
+ 

Re: pf: percpu anchor stacks

2017-03-27 Thread Mike Belopuhov
On Fri, Mar 24, 2017 at 12:19 +0100, Alexandr Nedvedicky wrote:
> Hello,
> 
> I'm attaching patch, which removes stack-as-a-global variable.
> it's updated patch [1] to current tree.
> 
> sorry for being pushy advocating my old, rusty patch.
>

I think your diff is the way to go indeed.  If we can avoid
using the global stack altogether, then all the better.
This diff also splits giant pf_test_rule into several chunks
which is a good thing in my opinion.

A few random comments:

 - some lines appear to be longer than 80 symbols;

 - "cx" is an uncommon abbreviation for a "context" in OpenBSD,
   we normally use "ctx";

 - PF_TEST_ATTRIB could use a "continue" statement instead of
   the goto;

 - instead of checking "rv" against 0 in the "break on quick
   rule or failure" I'd like to see an actual check against
   PF_TEST_* values so that it's grep'able;

 - s/test_status/action/ as it's done everywhere else?

 - I'm not certain I like extra set of PASS/BLOCK macros.
   I know you want to represent the "quick" pass separately,
   but perhaps there's a way to do it while using PF_PASS...

Does this pass pfctl regress tests?

While I haven't noticed anything criminal here, it makes me
wonder if it'd be possible to do this change in a few steps:
factor out rule maching from pf_test_rule and then bring in
anchor changes?

> thanks and
> regards
> sasha
> 
> [1] 
> http://openbsd-archive.7691.n7.nabble.com/Re-PF-SMP-making-anchor-stack-multithreaded-tt275915.html#none
> 
> 8<---8<---8<--8<
> diff -r d6e3a6338889 src/sys/net/pf.c
> --- a/src/sys/net/pf.cMon Mar 20 01:10:40 2017 +0100
> +++ b/src/sys/net/pf.cFri Mar 24 11:28:18 2017 +0100
> @@ -119,12 +119,37 @@ u_char   pf_tcp_secret[16];
>  int   pf_tcp_secret_init;
>  int   pf_tcp_iss_off;
>  
> -struct pf_anchor_stackframe {
> - struct pf_ruleset   *rs;
> - struct pf_rule  *r;
> - struct pf_anchor_node   *parent;
> - struct pf_anchor*child;
> -} pf_anchor_stack[64];
> +struct pf_test_ctx {
> + int test_status;
> + struct pf_pdesc *pd;
> + struct pf_rule_actions  act;
> + u_int8_ticmpcode;
> + u_int8_ticmptype;
> + int icmp_dir;
> + int state_icmp;
> + int tag;
> + u_short reason;
> + struct pf_rule_item *ri;
> + struct pf_src_node  *sns[PF_SN_MAX];
> + struct pf_rule_slistrules;
> + struct pf_rule  *nr;
> + struct pf_rule  **rm;
> + struct pf_rule  *a;
> + struct pf_rule  **am;
> + struct pf_ruleset   **rsm;
> + struct pf_ruleset   *arsm;
> + struct pf_ruleset   *aruleset;
> + struct tcphdr   *th;
> + int  depth;
> +};
> +
> +#define  PF_ANCHOR_STACK_MAX 64
> +
> +enum {
> + PF_TEST_FAIL = -1,
> + PF_TEST_OK,
> + PF_TEST_QUICK
> +};
>  
>  struct pool   pf_src_tree_pl, pf_rule_pl, pf_queue_pl;
>  struct pool   pf_state_pl, pf_state_key_pl, pf_state_item_pl;
> @@ -211,11 +236,8 @@ struct pf_state  *pf_find_state(struct p
>   struct pf_state_key_cmp *, u_int, struct mbuf *);
>  int   pf_src_connlimit(struct pf_state **);
>  int   pf_match_rcvif(struct mbuf *, struct pf_rule *);
> -void  pf_step_into_anchor(int *, struct pf_ruleset **,
> - struct pf_rule **, struct pf_rule **);
> -int   pf_step_out_of_anchor(int *, struct pf_ruleset **,
> -  struct pf_rule **, struct pf_rule **,
> -  int *);
> +int   pf_step_into_anchor(struct pf_test_ctx *, struct 
> pf_rule *);
> +int   pf_match_rule(struct pf_test_ctx *, struct pf_ruleset 
> *);
>  void  pf_counters_inc(int, struct pf_pdesc *,
>   struct pf_state *, struct pf_rule *,
>   struct pf_rule *);
> @@ -3019,74 +3041,37 @@ pf_tag_packet(struct mbuf *m, int tag, i
>   m->m_pkthdr.ph_rtableid = (u_int)rtableid;
>  }
>  
> -void
> -pf_step_into_anchor(int *depth, struct pf_ruleset **rs,
> -struct pf_rule **r, struct pf_rule **a)
> +int
> +pf_step_into_anchor(struct pf_test_ctx *cx, struct pf_rule *r)
>  {
> - struct pf_anchor_stackframe *f;
> -
> - if (*depth >= sizeof(pf_anchor_stack) /
> - sizeof(pf_anchor_stack[0])) {
> - log(LOG_ERR, "pf: anchor stack overflow\n");
> - *r = TAILQ_NEXT(*r, entries);
> - return;
> - } else if (a != NULL)
> - *a = *r;
> - f = pf_anchor_stack + (*depth)++;
> - f->rs = *rs;
> - 

Re: pf: percpu anchor stacks

2017-03-24 Thread Alexandr Nedvedicky
Hello,

I'm attaching patch, which removes stack-as-a-global variable.
it's updated patch [1] to current tree.

sorry for being pushy advocating my old, rusty patch.

thanks and
regards
sasha

[1] 
http://openbsd-archive.7691.n7.nabble.com/Re-PF-SMP-making-anchor-stack-multithreaded-tt275915.html#none

8<---8<---8<--8<
diff -r d6e3a6338889 src/sys/net/pf.c
--- a/src/sys/net/pf.c  Mon Mar 20 01:10:40 2017 +0100
+++ b/src/sys/net/pf.c  Fri Mar 24 11:28:18 2017 +0100
@@ -119,12 +119,37 @@ u_char pf_tcp_secret[16];
 int pf_tcp_secret_init;
 int pf_tcp_iss_off;
 
-struct pf_anchor_stackframe {
-   struct pf_ruleset   *rs;
-   struct pf_rule  *r;
-   struct pf_anchor_node   *parent;
-   struct pf_anchor*child;
-} pf_anchor_stack[64];
+struct pf_test_ctx {
+   int test_status;
+   struct pf_pdesc *pd;
+   struct pf_rule_actions  act;
+   u_int8_ticmpcode;
+   u_int8_ticmptype;
+   int icmp_dir;
+   int state_icmp;
+   int tag;
+   u_short reason;
+   struct pf_rule_item *ri;
+   struct pf_src_node  *sns[PF_SN_MAX];
+   struct pf_rule_slistrules;
+   struct pf_rule  *nr;
+   struct pf_rule  **rm;
+   struct pf_rule  *a;
+   struct pf_rule  **am;
+   struct pf_ruleset   **rsm;
+   struct pf_ruleset   *arsm;
+   struct pf_ruleset   *aruleset;
+   struct tcphdr   *th;
+   int  depth;
+};
+
+#definePF_ANCHOR_STACK_MAX 64
+
+enum {
+   PF_TEST_FAIL = -1,
+   PF_TEST_OK,
+   PF_TEST_QUICK
+};
 
 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl;
 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl;
@@ -211,11 +236,8 @@ struct pf_state*pf_find_state(struct p
struct pf_state_key_cmp *, u_int, struct mbuf *);
 int pf_src_connlimit(struct pf_state **);
 int pf_match_rcvif(struct mbuf *, struct pf_rule *);
-voidpf_step_into_anchor(int *, struct pf_ruleset **,
-   struct pf_rule **, struct pf_rule **);
-int pf_step_out_of_anchor(int *, struct pf_ruleset **,
-struct pf_rule **, struct pf_rule **,
-int *);
+int pf_step_into_anchor(struct pf_test_ctx *, struct 
pf_rule *);
+int pf_match_rule(struct pf_test_ctx *, struct pf_ruleset 
*);
 voidpf_counters_inc(int, struct pf_pdesc *,
struct pf_state *, struct pf_rule *,
struct pf_rule *);
@@ -3019,74 +3041,37 @@ pf_tag_packet(struct mbuf *m, int tag, i
m->m_pkthdr.ph_rtableid = (u_int)rtableid;
 }
 
-void
-pf_step_into_anchor(int *depth, struct pf_ruleset **rs,
-struct pf_rule **r, struct pf_rule **a)
+int
+pf_step_into_anchor(struct pf_test_ctx *cx, struct pf_rule *r)
 {
-   struct pf_anchor_stackframe *f;
-
-   if (*depth >= sizeof(pf_anchor_stack) /
-   sizeof(pf_anchor_stack[0])) {
-   log(LOG_ERR, "pf: anchor stack overflow\n");
-   *r = TAILQ_NEXT(*r, entries);
-   return;
-   } else if (a != NULL)
-   *a = *r;
-   f = pf_anchor_stack + (*depth)++;
-   f->rs = *rs;
-   f->r = *r;
-   if ((*r)->anchor_wildcard) {
-   f->parent = &(*r)->anchor->children;
-   if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == NULL) {
-   *r = NULL;
-   return;
-   }
-   *rs = >child->ruleset;
-   } else {
-   f->parent = NULL;
-   f->child = NULL;
-   *rs = &(*r)->anchor->ruleset;
-   }
-   *r = TAILQ_FIRST((*rs)->rules.active.ptr);
-}
-
-int
-pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs,
-struct pf_rule **r, struct pf_rule **a, int *match)
-{
-   struct pf_anchor_stackframe *f;
-   int quick = 0;
-
-   do {
-   if (*depth <= 0)
-   break;
-   f = pf_anchor_stack + *depth - 1;
-   if (f->parent != NULL && f->child != NULL) {
-   f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
-   if (f->child != NULL) {
-   *rs = >child->ruleset;
-   *r = TAILQ_FIRST((*rs)->rules.active.ptr);
-   if (*r == NULL)
-   continue;
-

Re: pf: percpu anchor stacks

2017-03-19 Thread Alexandr Nedvedicky
Hello,

I've sent different patch [1], which was touching same functions some time ago.
The old patch [1] basically splits pf_test_rule() to two functions:
pf_test_rule()

pf_match_rule(), which walks anchor stack recursively. the recursion depth
is limited to 64.

the memory foot print for true recursion is same as for array of stack frames.
The only difference comes from the place where the memory gets allocated.

regards
sasha

[1] https://marc.info/?l=openbsd-tech=143902905917671=4