On Mon, Dec 12, 2022 at 1:28 AM venu iyer <[email protected]> wrote: > > Currently, even stateless flows are subject to connection tracking when there are > LB rules (for DNAT). However, if a flow needs to be subjected to LB, then it shouldn't > be configured as stateless. > > Stateless flow means we should not track it, and this change exempts stateless > flows from being tracked regardless of whether LB rules are present or not. > > Signed-off-by: venu iyer <[email protected]> > Acked-by: Han Zhou <[email protected]> > --- > northd/northd.c | 25 +++- > northd/ovn-northd.8.xml | 57 ++++---- > ovn-nb.xml | 3 + > tests/ovn-northd.at | 76 +++++------ > tests/ovn.at | 4 +- > tests/system-ovn.at | 296 ++++++++++++++++++++++++++++++++++++++++ > 6 files changed, 383 insertions(+), 78 deletions(-) > > diff --git a/northd/northd.c b/northd/northd.c > index 7c48bb3b4..5d8ef612f 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -140,8 +140,8 @@ enum ovn_stage { > PIPELINE_STAGE(SWITCH, IN, L2_UNKNOWN, 26, "ls_in_l2_unknown") \ > \ > /* Logical switch egress stages. */ \ > - PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \ > - PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \ > + PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") \ > + PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 1, "ls_out_pre_lb") \ > PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \ > PIPELINE_STAGE(SWITCH, OUT, ACL_HINT, 3, "ls_out_acl_hint") \ > PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \ > @@ -215,6 +215,7 @@ enum ovn_stage { > #define REGBIT_ACL_LABEL "reg0[13]" > #define REGBIT_FROM_RAMP "reg0[14]" > #define REGBIT_PORT_SEC_DROP "reg0[15]" > +#define REGBIT_ACL_STATELESS "reg0[16]" > > #define REG_ORIG_DIP_IPV4 "reg1" > #define REG_ORIG_DIP_IPV6 "xxreg1" > @@ -290,7 +291,7 @@ enum ovn_stage { > * | R0 | REGBIT_{CONNTRACK/DHCP/DNS} | | | > * | | REGBIT_{HAIRPIN/HAIRPIN_REPLY} | | | > * | | REGBIT_ACL_HINT_{ALLOW_NEW/ALLOW/DROP/BLOCK} | | | > - * | | REGBIT_ACL_LABEL | X | | > + * | | REGBIT_ACL_{LABEL/STATELESS} | X | | > * +----+----------------------------------------------+ X | | > * | R5 | UNUSED | X | LB_L2_AFF_BACKEND_IP6 | > * | R1 | ORIG_DIP_IPV4 (>= IN_PRE_STATEFUL) | R | | > @@ -5693,17 +5694,18 @@ build_stateless_filter(struct ovn_datapath *od, > const struct nbrec_acl *acl, > struct hmap *lflows) > { > + const char *action = REGBIT_ACL_STATELESS" = 1; next;"; > if (!strcmp(acl->direction, "from-lport")) { > ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_ACL, > acl->priority + OVN_ACL_PRI_OFFSET, > acl->match, > - "next;", > + action, > &acl->header_); > } else { > ovn_lflow_add_with_hint(lflows, od, S_SWITCH_OUT_PRE_ACL, > acl->priority + OVN_ACL_PRI_OFFSET, > acl->match, > - "next;", > + action, > &acl->header_); > } > } > @@ -5795,6 +5797,10 @@ build_pre_acls(struct ovn_datapath *od, const struct hmap *port_groups, > REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", > REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > + } else if (od->has_lb_vip) { > + /* We'll build stateless filters if there are LB rules so that > + * the stateless flows are not tracked in pre-lb. */ > + build_stateless_filters(od, port_groups, lflows); > } > } > > @@ -5930,6 +5936,12 @@ build_pre_lb(struct ovn_datapath *od, const struct shash *meter_groups, > 110, lflows); > } > > + /* Do not sent statless flows via conntrack */ > + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110, > + REGBIT_ACL_STATELESS" == 1", "next;"); > + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110, > + REGBIT_ACL_STATELESS" == 1", "next;"); > + > /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send > * packet to conntrack for defragmentation and possibly for unNATting. > * > @@ -6935,7 +6947,8 @@ build_lb_rules_pre_stateful(struct hmap *lflows, struct ovn_northd_lb *lb, > } > ds_put_format(action, "%s;", ct_lb_mark ? "ct_lb_mark" : "ct_lb"); > > - ds_put_format(match, "%s.dst == %s", ip_match, lb_vip->vip_str); > + ds_put_format(match, REGBIT_CONNTRACK_NAT" == 1 && %s.dst == %s", > + ip_match, lb_vip->vip_str); > if (lb_vip->port_str) { > ds_put_format(match, " && %s.dst == %s", proto, lb_vip->port_str); > } > diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml > index dffbba96d..ce5603169 100644 > --- a/northd/ovn-northd.8.xml > +++ b/northd/ovn-northd.8.xml > @@ -474,7 +474,9 @@ > priority-110 flow is added to skip over stateful ACLs. Multicast, IPv6 > Neighbor Discovery and MLD traffic also skips stateful ACLs. For > "allow-stateless" ACLs, a flow is added to bypass setting the hint for > - connection tracker processing. > + connection tracker processing when there are stateful ACLs or LB rules; > + <code>REGBIT_ACL_STATELESS</code> is set for traffic matching stateless > + ACL flows. > </p> > > <p> > @@ -494,8 +496,10 @@ > in ingress table <code>LB</code> and <code>Stateful</code>. It contains > a priority-0 flow that simply moves traffic to the next table. Moreover > it contains two priority-110 flows to move multicast, IPv6 Neighbor > - Discovery and MLD traffic to the next table. If load balancing rules with > - virtual IP addresses (and ports) are configured in > + Discovery and MLD traffic to the next table. It also contains two > + priority-110 flows to move stateless traffic, i.e traffic for which > + <code>REGBIT_ACL_STATELESS</code> is set, to the next table. If load > + balancing rules with virtual IP addresses (and ports) are configured in > <code>OVN_Northbound</code> database for a logical switch datapath, a > priority-100 flow is added with the match <code>ip</code> to match on IP > packets and sets the action <code>reg0[2] = 1; next;</code> to act as a > @@ -1973,19 +1977,11 @@ output; > </li> > </ul> > > - <h3>Egress Table 0: Pre-LB</h3> > + <h3>Egress Table 0: <code>to-lport</code> Pre-ACLs</h3> > > <p> > - This table is similar to ingress table <code>Pre-LB</code>. It > - contains a priority-0 flow that simply moves traffic to the next table. > - Moreover it contains two priority-110 flows to move multicast, IPv6 > - Neighbor Discovery and MLD traffic to the next table. If any load > - balancing rules exist for the datapath, a priority-100 flow is added with > - a match of <code>ip</code> and action of <code>reg0[2] = 1; next;</code> > - to act as a hint for table <code>Pre-stateful</code> to send IP packets > - to the connection tracker for packet de-fragmentation and possibly DNAT > - the destination VIP to one of the selected backend for already committed > - load balanced traffic. > + This is similar to ingress table <code>Pre-ACLs</code> except for > + <code>to-lport</code> traffic. > </p> > > <p> > @@ -1998,11 +1994,29 @@ output; > db="OVN_Northbound"/> table. > </p> > > - <h3>Egress Table 1: <code>to-lport</code> Pre-ACLs</h3> > + <p> > + This table also has a priority-110 flow with the match > + <code>outport == <var>I</var></code> for all logical switch > + datapaths to move traffic to the next table. Where <var>I</var> > + is the peer of a logical router port. This flow is added to > + skip the connection tracking of packets which will be entering > + logical router datapath from logical switch datapath for routing. > + </p> > + > + > + <h3>Egress Table 1: Pre-LB</h3> > > <p> > - This is similar to ingress table <code>Pre-ACLs</code> except for > - <code>to-lport</code> traffic. > + This table is similar to ingress table <code>Pre-LB</code>. It > + contains a priority-0 flow that simply moves traffic to the next table. > + Moreover it contains two priority-110 flows to move multicast, IPv6 > + Neighbor Discovery and MLD traffic to the next table. If any load > + balancing rules exist for the datapath, a priority-100 flow is added with > + a match of <code>ip</code> and action of <code>reg0[2] = 1; next;</code> > + to act as a hint for table <code>Pre-stateful</code> to send IP packets > + to the connection tracker for packet de-fragmentation and possibly DNAT > + the destination VIP to one of the selected backend for already committed > + load balanced traffic. > </p> > > <p> > @@ -2015,15 +2029,6 @@ output; > db="OVN_Northbound"/> table. > </p> > > - <p> > - This table also has a priority-110 flow with the match > - <code>outport == <var>I</var></code> for all logical switch > - datapaths to move traffic to the next table. Where <var>I</var> > - is the peer of a logical router port. This flow is added to > - skip the connection tracking of packets which will be entering > - logical router datapath from logical switch datapath for routing. > - </p> > - > <h3>Egress Table 2: Pre-stateful</h3> > > <p> > diff --git a/ovn-nb.xml b/ovn-nb.xml > index 0edc3da96..3ac7785e1 100644 > --- a/ovn-nb.xml > +++ b/ovn-nb.xml > @@ -2159,6 +2159,9 @@ or > outgoing TCP traffic directed to an IP address, then you probably > also want to define another rule to allow incoming TCP traffic coming > from this same IP address. > + In addition, traffic that matches stateless ACLs will bypass > + load-balancer DNAT/un-DNAT processing. Stateful ACLs should be > + used instead if the traffic is supposed to be load-balanced. > </li> > > <li> > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > index ca4263eac..a2f8e8a20 100644 > --- a/tests/ovn-northd.at > +++ b/tests/ovn-northd.at > @@ -2024,7 +2024,7 @@ AT_CLEANUP > > # This test case tests that when a logical switch has load balancers associated > # (with VIPs configured), the below logical flow is added by ovn-northd. > -# table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[0]] = 1; next;) > +# table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[0]] = 1; next;) > # This test case is added for the BZ - > # https://bugzilla.redhat.com/show_bug.cgi?id=1849162 > # > @@ -2063,27 +2063,27 @@ check ovn-nbctl ls-lb-add sw0 lb1 > check ovn-nbctl add load_balancer_group $lbg load_balancer $lb3 > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > check ovn-nbctl ls-lb-add sw0 lb2 > check ovn-nbctl add load_balancer_group $lbg load_balancer $lb4 > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > check ovn-nbctl clear load_balancer $lb1 vips > check ovn-nbctl clear load_balancer $lb3 vips > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > check ovn-nbctl clear load_balancer $lb2 vips > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > check ovn-nbctl clear load_balancer $lb4 vips > @@ -2098,7 +2098,7 @@ check ovn-nbctl set load_balancer $lb4 vips:"10.0.0.13"="10.0.0.6" > > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > # Now reverse the order of clearing the vip. > @@ -2106,13 +2106,13 @@ check ovn-nbctl clear load_balancer $lb2 vips > check ovn-nbctl clear load_balancer $lb4 vips > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > check ovn-nbctl clear load_balancer $lb1 vips > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > check ovn-nbctl clear load_balancer $lb3 vips > @@ -3057,18 +3057,10 @@ for direction in from to; do > done > ovn-nbctl --wait=sb sync > > -# TCP packets should go to conntrack for load balancing. > +# TCP packets should not go to conntrack for load balancing. > flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" > AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl > -ct_lb_mark { > - ct_lb_mark { > - reg0[[6]] = 0; > - reg0[[12]] = 0; > - ct_lb_mark /* default (use --ct to customize) */ { > - output("lsp2"); > - }; > - }; > -}; > +output("lsp2"); > ]) > > # UDP packets still go to conntrack. > @@ -3201,18 +3193,10 @@ for direction in from to; do > done > ovn-nbctl --wait=sb sync > > -# TCP packets should go to conntrack for load balancing. > +# TCP packets should not go to conntrack for load balancing. > flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" > AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl > -ct_lb_mark { > - ct_lb_mark { > - reg0[[6]] = 0; > - reg0[[12]] = 0; > - ct_lb_mark /* default (use --ct to customize) */ { > - output("lsp2"); > - }; > - }; > -}; > +output("lsp2"); > ]) > > # UDP packets still go to conntrack. > @@ -4026,14 +4010,15 @@ check_stateful_flows() { > table=? (ls_in_pre_lb ), priority=110 , match=(eth.mcast), action=(next;) > table=? (ls_in_pre_lb ), priority=110 , match=(ip && inport == "sw0-lr0"), action=(next;) > table=? (ls_in_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) > + table=? (ls_in_pre_lb ), priority=110 , match=(reg0[[16]] == 1), action=(next;) > ]) > > AT_CHECK([grep "ls_in_pre_stateful" sw0flows | sort | sed 's/table=./table=?/'], [0], [dnl > table=? (ls_in_pre_stateful ), priority=0 , match=(1), action=(next;) > table=? (ls_in_pre_stateful ), priority=100 , match=(reg0[[0]] == 1), action=(ct_next;) > table=? (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) > - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; reg2[[0..15]] = 80; ct_lb_mark;) > + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; reg2[[0..15]] = 80; ct_lb_mark;) > ]) > > AT_CHECK([grep "ls_in_lb " sw0flows | sort | sed 's/table=../table=??/'], [0], [dnl > @@ -4049,12 +4034,13 @@ check_stateful_flows() { > ]) > > AT_CHECK([grep "ls_out_pre_lb" sw0flows | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=0 , match=(1), action=(next;) > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), action=(next;) > - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.src == $svc_monitor_mac), action=(next;) > - table=0 (ls_out_pre_lb ), priority=110 , match=(ip && outport == "sw0-lr0"), action=(next;) > - table=0 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) > + table=1 (ls_out_pre_lb ), priority=0 , match=(1), action=(next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), action=(next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.src == $svc_monitor_mac), action=(next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(ip && outport == "sw0-lr0"), action=(next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(reg0[[16]] == 1), action=(next;) > ]) > > AT_CHECK([grep "ls_out_pre_stateful" sw0flows | sort], [0], [dnl > @@ -4094,6 +4080,7 @@ AT_CHECK([grep "ls_in_pre_lb" sw0flows | sort | sed 's/table=./table=?/'], [0], > table=? (ls_in_pre_lb ), priority=110 , match=(eth.mcast), action=(next;) > table=? (ls_in_pre_lb ), priority=110 , match=(ip && inport == "sw0-lr0"), action=(next;) > table=? (ls_in_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) > + table=? (ls_in_pre_lb ), priority=110 , match=(reg0[[16]] == 1), action=(next;) > ]) > > AT_CHECK([grep "ls_in_pre_stateful" sw0flows | sort | sed 's/table=./table=?/'], [0], [dnl > @@ -4113,11 +4100,12 @@ AT_CHECK([grep "ls_in_stateful" sw0flows | sort | sed 's/table=../table=??/'], [ > ]) > > AT_CHECK([grep "ls_out_pre_lb" sw0flows | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=0 , match=(1), action=(next;) > - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), action=(next;) > - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.src == $svc_monitor_mac), action=(next;) > - table=0 (ls_out_pre_lb ), priority=110 , match=(ip && outport == "sw0-lr0"), action=(next;) > - table=0 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) > + table=1 (ls_out_pre_lb ), priority=0 , match=(1), action=(next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), action=(next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.src == $svc_monitor_mac), action=(next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(ip && outport == "sw0-lr0"), action=(next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(reg0[[16]] == 1), action=(next;) > ]) > > AT_CHECK([grep "ls_out_pre_stateful" sw0flows | sort], [0], [dnl > @@ -7677,7 +7665,7 @@ check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > table=7 (lr_in_dnat ), priority=110 , match=(ct.est && !ct.rel && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1), action=(next;) > table=7 (lr_in_dnat ), priority=110 , match=(ct.new && !ct.rel && ip4 && reg0 == 66.66.66.66), action=(ct_lb_mark(backends=42.42.42.2);) > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) > table=12(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb_mark(backends=42.42.42.2);) > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) > @@ -7689,7 +7677,7 @@ check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > table=7 (lr_in_dnat ), priority=110 , match=(ct.est && !ct.rel && ip4 && reg0 == 66.66.66.66 && ct_label.natted == 1), action=(next;) > table=7 (lr_in_dnat ), priority=110 , match=(ct.new && !ct.rel && ip4 && reg0 == 66.66.66.66), action=(ct_lb(backends=42.42.42.2);) > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb;) > table=12(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb(backends=42.42.42.2);) > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb;) > @@ -7701,7 +7689,7 @@ check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > table=7 (lr_in_dnat ), priority=110 , match=(ct.est && !ct.rel && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1), action=(next;) > table=7 (lr_in_dnat ), priority=110 , match=(ct.new && !ct.rel && ip4 && reg0 == 66.66.66.66), action=(ct_lb_mark(backends=42.42.42.2);) > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) > table=12(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb_mark(backends=42.42.42.2);) > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) > diff --git a/tests/ovn.at b/tests/ovn.at > index f3bd53242..7abded46c 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -23777,7 +23777,7 @@ OVS_WAIT_FOR_OUTPUT( > [ovn-sbctl dump-flows > sbflows > ovn-sbctl dump-flows sw0 | grep ct_lb_mark | grep priority=120 | sed 's/table=..//'], 0, > [dnl > - (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > + (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > (ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg0[[1]] = 0; ct_lb_mark(backends= 10.0.0.3:80,20.0.0.3:80; hash_fields="ip_dst,ip_src,tcp_dst,tcp_src");) > ]) > > @@ -23820,7 +23820,7 @@ ovn-sbctl dump-flows sw0 > sbflows3 > AT_CHECK( > [grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" sbflows3 | grep priority=120 |\ > sed 's/table=../table=??/'], [0], [dnl > - table=??(ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > + table=??(ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > table=??(ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(drop;) > ]) > > diff --git a/tests/system-ovn.at b/tests/system-ovn.at > index b99578b9e..4bc9fb84f 100644 > --- a/tests/system-ovn.at > +++ b/tests/system-ovn.at > @@ -9511,3 +9511,299 @@ NS_CHECK_EXEC([vm3], [nc 6666::1 666 -z], [0], [ignore], [ignore]) > > AT_CLEANUP > ]) > + > +# for packets that match stateless ACL flows, make sure we bypass > +# connection tracking, even with a LB in the switch. Testing for > +# TCP should suffice. For v4 and v6. > +# > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([omit connection tracking for stateless flows v4]) > + > +CHECK_CONNTRACK() > +ovn_start > +OVS_TRAFFIC_VSWITCHD_START() > +ADD_BR([br-int]) > + > +# Set external-ids in br-int needed for ovn-controller > +ovs-vsctl \ > + -- set Open_vSwitch . external-ids:system-id=hv1 \ > + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ > + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ > + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ > + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true > + > +# Start ovn-controller > +start_daemon ovn-controller > + > +# Logical network: > +# One LR R1 with switches foo (192.168.1.0/24), bar (192.168.2.0/24), > +# > +# foo -- R1 -- bar > + > +ovn-nbctl lr-add R1 > + > +ovn-nbctl ls-add foo > +ovn-nbctl ls-add bar > + > +ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 192.168.1.1/24 > +ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 192.168.2.1/24 > + > +# Connect foo to R1 > +ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \ > + type=router options:router-port=foo \ > + -- lsp-set-addresses rp-foo router > + > +# Connect bar to R1 > +ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \ > + type=router options:router-port=bar \ > + -- lsp-set-addresses rp-bar router > + > +# Logical port 'foo1' in switch 'foo'. > +ADD_NAMESPACES(foo1) > +ADD_VETH(foo1, foo1, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \ > + "192.168.1.1") > +ovn-nbctl lsp-add foo foo1 \ > +-- lsp-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2" > + > +# Logical port 'bar1' in switch 'bar'. > +ADD_NAMESPACES(bar1) > +ADD_VETH(bar1, bar1, br-int, "192.168.2.2/24", "f0:00:00:01:02:04", \ > + "192.168.2.1") > +ovn-nbctl lsp-add bar bar1 \ > +-- lsp-set-addresses bar1 "f0:00:00:01:02:04 192.168.2.2" > + > +# Config OVN load-balancer with a VIP. > +ovn-nbctl lb-add lb1 30.30.30.30:80 "192.168.2.2:80" tcp > +ovn-nbctl ls-lb-add foo lb1 > + > +# Wait for ovn-controller to catch up. > +ovn-nbctl --wait=hv sync > + > +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ > +grep 'nat(dst=192.168.2.2:80)']) > + > +zone_id=$(ovn-appctl -t ovn-controller ct-zone-list | grep foo1 | cut -d ' ' -f2) > + > +OVS_START_L7([bar1], [http]) > + > +AT_CHECK([ip netns exec foo1 wget 192.168.2.2 -t 3 -T 1], [0], [ignore], [ignore]) > + > +# check conntrack zone has tcp entry > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > +FORMAT_CT(192.168.1.2) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > +tcp,orig=(src=192.168.1.2,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +# now check wirh VIP > +AT_CHECK([ip netns exec foo1 wget 30.30.30.30 -t 3 -T 1], [0], [ignore], [ignore]) > + > +# check conntrack zone has tcp entry > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > +FORMAT_CT(30.30.30.30) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > +tcp,orig=(src=192.168.1.2,dst=30.30.30.30,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=2,protoinfo=(state=<cleared>) > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +# remove lb > +ovn-nbctl ls-lb-del foo lb1 > + > +# add stateless acl > +check ovn-nbctl acl-add foo from-lport 1 1 allow-stateless > +check ovn-nbctl acl-add foo to-lport 1 1 allow-stateless > + > +AT_CHECK([ip netns exec foo1 wget 192.168.2.2 -t 3 -T 1], [0], [ignore], [ignore]) > + > +# check conntrack zone has no tcp entry > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > +FORMAT_CT(192.168.1.2) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +# add lb back > +ovn-nbctl ls-lb-add foo lb1 > + > +# Wait for ovn-controller to catch up. > +ovn-nbctl --wait=hv sync > + > +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ > +grep 'nat(dst=192.168.2.2:80)']) > + > +# should not dnat so will not be able to connect > +AT_CHECK([ip netns exec foo1 wget 30.30.30.30 -t 3 -T 1], [4], [ignore], [ignore]) > + > +# check conntrack zone has no tcp entry > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > +FORMAT_CT(30.30.30.30) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +OVS_APP_EXIT_AND_WAIT([ovn-controller]) > + > +as ovn-sb > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > + > +as ovn-nb > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > + > +as northd > +OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE]) > + > +as > +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d > +/connection dropped.*/d"]) > +AT_CLEANUP > +]) > + > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([omit connection tracking for stateless flows v6]) > + > +CHECK_CONNTRACK() > +ovn_start > +OVS_TRAFFIC_VSWITCHD_START() > +ADD_BR([br-int]) > + > +# Set external-ids in br-int needed for ovn-controller > +ovs-vsctl \ > + -- set Open_vSwitch . external-ids:system-id=hv1 \ > + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ > + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ > + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ > + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true > + > +# Start ovn-controller > +start_daemon ovn-controller > + > +# Logical network: > +# One LR - R1 with switchess foo (fd11::/64) and > +# bar (fd12::/64) connected to it > +# > +# foo -- R1 -- bar > + > +ovn-nbctl lr-add R1 > + > +ovn-nbctl ls-add foo > +ovn-nbctl ls-add bar > + > +# Connect foo to R1 > +ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 fd11::1/64 > +ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \ > + type=router options:router-port=foo addresses=\"00:00:01:01:02:03\" > + > +# Connect bar to R1 > +ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 fd12::1/64 > +ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \ > + type=router options:router-port=bar addresses=\"00:00:01:01:02:04\" > + > +# Logical port 'foo1' in switch 'foo'. > +ADD_NAMESPACES(foo1) > +ADD_VETH(foo1, foo1, br-int, "fd11::2/64", "f0:00:00:01:02:03", \ > + "fd11::1") > +ovn-nbctl lsp-add foo foo1 \ > +-- lsp-set-addresses foo1 "f0:00:00:01:02:03 fd11::2" > + > +# Logical port 'bar1' in switch 'bar'. > +ADD_NAMESPACES(bar1) > +ADD_VETH(bar1, bar1, br-int, "fd12::2/64", "f0:00:00:01:02:05", \ > +"fd12::1") > +ovn-nbctl lsp-add bar bar1 \ > +-- lsp-set-addresses bar1 "f0:00:00:01:02:05 fd12::2" > + > +# Config OVN load-balancer with a VIP. > +ovn-nbctl lb-add lb1 [[fd30::2]]:80 [[fd12::2]]:80 tcp > +ovn-nbctl ls-lb-add foo lb1 > + > +# Wait for ovn-controller to catch up. > +ovn-nbctl --wait=hv sync > + > +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ > +grep 'nat(dst=\[[fd12::2\]]:80)']) > + > +zone_id=$(ovn-appctl -t ovn-controller ct-zone-list | grep foo1 | cut -d ' ' -f2) > + > +OVS_START_L7([bar1], [http6]) > + > +AT_CHECK([ip netns exec foo1 wget http://[[fd12::2]] -t 3 -T 1], [0], [ignore], [ignore]) > + > +# check conntrack zone has tcp entry > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > +FORMAT_CT(fd12::2) | grep -v fe80 | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > +tcp,orig=(src=fd11::2,dst=fd12::2,sport=<cleared>,dport=<cleared>),reply=(src=fd12::2,dst=fd11::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +# now check wirh VIP > +AT_CHECK([ip netns exec foo1 wget http://[[fd30::2]] -t 3 -T 1], [0], [ignore], [ignore]) > + > +# check conntrack zone has tcp entry > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > +FORMAT_CT(fd30::2) | grep -v fe80 | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > +tcp,orig=(src=fd11::2,dst=fd30::2,sport=<cleared>,dport=<cleared>),reply=(src=fd12::2,dst=fd11::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=2,protoinfo=(state=<cleared>) > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +# remove lb > +ovn-nbctl ls-lb-del foo lb1 > + > +# add stateless acl > +check ovn-nbctl acl-add foo from-lport 1 1 allow-stateless > +check ovn-nbctl acl-add foo to-lport 1 1 allow-stateless > + > +AT_CHECK([ip netns exec foo1 wget http://[[fd12::2]] -t 3 -T 1], [0], [ignore], [ignore]) > + > +# check conntrack zone has no tcp entry > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > +FORMAT_CT(fd12::2) | grep -v fe80 | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +# add lb back > +ovn-nbctl ls-lb-add foo lb1 > + > +# Wait for ovn-controller to catch up. > +ovn-nbctl --wait=hv sync > + > +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ > +grep 'nat(dst=\[[fd12::2\]]:80)']) > + > +# should not dnat so will not be able to connect > +AT_CHECK([ip netns exec foo1 wget http://[[fd30::2]] -t 3 -T 1], [4], [ignore], [ignore]) > +# > +# check conntrack zone has no tcp entry > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > +FORMAT_CT(fd30::2) | grep -v fe80 | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +OVS_APP_EXIT_AND_WAIT([ovn-controller]) > + > +as ovn-sb > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > + > +as ovn-nb > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > + > +as northd > +OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE]) > + > +as > +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d > +/connection dropped.*/d"]) > +AT_CLEANUP > +]) > -- > 2.17.1 >
Thanks Venu for v2. Also thanks Numan for reviewing v1, and I saw that Numan's comment was addressed. So, applied to main branch. Han _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
