On Mon, Dec 12, 2022 at 1:28 AM venu iyer <[email protected]> wrote:
>
> Currently, even stateless flows are subject to connection tracking when
there are
> LB rules (for DNAT). However, if a flow needs to be subjected to LB, then
it shouldn't
> be configured as stateless.
>
> Stateless flow means we should not track it, and this change exempts
stateless
> flows from being tracked regardless of whether LB rules are present or
not.
>
> Signed-off-by: venu iyer <[email protected]>
> Acked-by: Han Zhou <[email protected]>
> ---
>  northd/northd.c         |  25 +++-
>  northd/ovn-northd.8.xml |  57 ++++----
>  ovn-nb.xml              |   3 +
>  tests/ovn-northd.at     |  76 +++++------
>  tests/ovn.at            |   4 +-
>  tests/system-ovn.at     | 296 ++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 383 insertions(+), 78 deletions(-)
>
> diff --git a/northd/northd.c b/northd/northd.c
> index 7c48bb3b4..5d8ef612f 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -140,8 +140,8 @@ enum ovn_stage {
>      PIPELINE_STAGE(SWITCH, IN,  L2_UNKNOWN,    26, "ls_in_l2_unknown")
 \
>
 \
>      /* Logical switch egress stages. */
  \
> -    PIPELINE_STAGE(SWITCH, OUT, PRE_LB,       0, "ls_out_pre_lb")
  \
> -    PIPELINE_STAGE(SWITCH, OUT, PRE_ACL,      1, "ls_out_pre_acl")
 \
> +    PIPELINE_STAGE(SWITCH, OUT, PRE_ACL,      0, "ls_out_pre_acl")
 \
> +    PIPELINE_STAGE(SWITCH, OUT, PRE_LB,       1, "ls_out_pre_lb")
  \
>      PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful")
  \
>      PIPELINE_STAGE(SWITCH, OUT, ACL_HINT,     3, "ls_out_acl_hint")
  \
>      PIPELINE_STAGE(SWITCH, OUT, ACL,          4, "ls_out_acl")
 \
> @@ -215,6 +215,7 @@ enum ovn_stage {
>  #define REGBIT_ACL_LABEL          "reg0[13]"
>  #define REGBIT_FROM_RAMP          "reg0[14]"
>  #define REGBIT_PORT_SEC_DROP      "reg0[15]"
> +#define REGBIT_ACL_STATELESS      "reg0[16]"
>
>  #define REG_ORIG_DIP_IPV4         "reg1"
>  #define REG_ORIG_DIP_IPV6         "xxreg1"
> @@ -290,7 +291,7 @@ enum ovn_stage {
>   * | R0 |     REGBIT_{CONNTRACK/DHCP/DNS}              |   |
                      |
>   * |    |     REGBIT_{HAIRPIN/HAIRPIN_REPLY}           |   |
                      |
>   * |    | REGBIT_ACL_HINT_{ALLOW_NEW/ALLOW/DROP/BLOCK} |   |
                      |
> - * |    |     REGBIT_ACL_LABEL                         | X |
                      |
> + * |    |     REGBIT_ACL_{LABEL/STATELESS}             | X |
                      |
>   * +----+----------------------------------------------+ X |
                      |
>   * | R5 |                   UNUSED                     | X |
LB_L2_AFF_BACKEND_IP6       |
>   * | R1 |         ORIG_DIP_IPV4 (>= IN_PRE_STATEFUL)   | R |
                      |
> @@ -5693,17 +5694,18 @@ build_stateless_filter(struct ovn_datapath *od,
>                         const struct nbrec_acl *acl,
>                         struct hmap *lflows)
>  {
> +    const char *action = REGBIT_ACL_STATELESS" = 1; next;";
>      if (!strcmp(acl->direction, "from-lport")) {
>          ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_ACL,
>                                  acl->priority + OVN_ACL_PRI_OFFSET,
>                                  acl->match,
> -                                "next;",
> +                                action,
>                                  &acl->header_);
>      } else {
>          ovn_lflow_add_with_hint(lflows, od, S_SWITCH_OUT_PRE_ACL,
>                                  acl->priority + OVN_ACL_PRI_OFFSET,
>                                  acl->match,
> -                                "next;",
> +                                action,
>                                  &acl->header_);
>      }
>  }
> @@ -5795,6 +5797,10 @@ build_pre_acls(struct ovn_datapath *od, const
struct hmap *port_groups,
>                        REGBIT_CONNTRACK_DEFRAG" = 1; next;");
>          ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip",
>                        REGBIT_CONNTRACK_DEFRAG" = 1; next;");
> +    } else if (od->has_lb_vip) {
> +        /* We'll build stateless filters if there are LB rules so that
> +         * the stateless flows are not tracked in pre-lb. */
> +         build_stateless_filters(od, port_groups, lflows);
>      }
>  }
>
> @@ -5930,6 +5936,12 @@ build_pre_lb(struct ovn_datapath *od, const struct
shash *meter_groups,
>                                   110, lflows);
>      }
>
> +    /* Do not sent statless flows via conntrack */
> +    ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110,
> +                  REGBIT_ACL_STATELESS" == 1", "next;");
> +    ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110,
> +                  REGBIT_ACL_STATELESS" == 1", "next;");
> +
>      /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send
>       * packet to conntrack for defragmentation and possibly for
unNATting.
>       *
> @@ -6935,7 +6947,8 @@ build_lb_rules_pre_stateful(struct hmap *lflows,
struct ovn_northd_lb *lb,
>          }
>          ds_put_format(action, "%s;", ct_lb_mark ? "ct_lb_mark" :
"ct_lb");
>
> -        ds_put_format(match, "%s.dst == %s", ip_match, lb_vip->vip_str);
> +        ds_put_format(match, REGBIT_CONNTRACK_NAT" == 1 && %s.dst == %s",
> +                      ip_match, lb_vip->vip_str);
>          if (lb_vip->port_str) {
>              ds_put_format(match, " && %s.dst == %s", proto,
lb_vip->port_str);
>          }
> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> index dffbba96d..ce5603169 100644
> --- a/northd/ovn-northd.8.xml
> +++ b/northd/ovn-northd.8.xml
> @@ -474,7 +474,9 @@
>        priority-110 flow is added to skip over stateful ACLs. Multicast,
IPv6
>        Neighbor Discovery and MLD traffic also skips stateful ACLs. For
>        "allow-stateless" ACLs, a flow is added to bypass setting the hint
for
> -      connection tracker processing.
> +      connection tracker processing when there are stateful ACLs or LB
rules;
> +      <code>REGBIT_ACL_STATELESS</code> is set for traffic matching
stateless
> +      ACL flows.
>      </p>
>
>      <p>
> @@ -494,8 +496,10 @@
>        in ingress table <code>LB</code> and <code>Stateful</code>.  It
contains
>        a priority-0 flow that simply moves traffic to the next table.
Moreover
>        it contains two priority-110 flows to move multicast, IPv6 Neighbor
> -      Discovery and MLD traffic to the next table. If load balancing
rules with
> -      virtual IP addresses (and ports) are configured in
> +      Discovery and MLD traffic to the next table. It also contains two
> +      priority-110 flows to move stateless traffic, i.e traffic for which
> +      <code>REGBIT_ACL_STATELESS</code> is set, to the next table. If
load
> +      balancing rules with virtual IP addresses (and ports) are
configured in
>        <code>OVN_Northbound</code> database for a logical switch
datapath, a
>        priority-100 flow is added with the match <code>ip</code> to match
on IP
>        packets and sets the action <code>reg0[2] = 1; next;</code> to act
as a
> @@ -1973,19 +1977,11 @@ output;
>        </li>
>      </ul>
>
> -    <h3>Egress Table 0: Pre-LB</h3>
> +    <h3>Egress Table 0: <code>to-lport</code> Pre-ACLs</h3>
>
>      <p>
> -      This table is similar to ingress table <code>Pre-LB</code>.  It
> -      contains a priority-0 flow that simply moves traffic to the next
table.
> -      Moreover it contains two priority-110 flows to move multicast, IPv6
> -      Neighbor Discovery and MLD traffic to the next table. If any load
> -      balancing rules exist for the datapath, a priority-100 flow is
added with
> -      a match of <code>ip</code> and action of <code>reg0[2] = 1;
next;</code>
> -      to act as a hint for table <code>Pre-stateful</code> to send IP
packets
> -      to the connection tracker for packet de-fragmentation and possibly
DNAT
> -      the destination VIP to one of the selected backend for already
committed
> -      load balanced traffic.
> +      This is similar to ingress table <code>Pre-ACLs</code> except for
> +     <code>to-lport</code> traffic.
>      </p>
>
>      <p>
> @@ -1998,11 +1994,29 @@ output;
>        db="OVN_Northbound"/> table.
>      </p>
>
> -    <h3>Egress Table 1: <code>to-lport</code> Pre-ACLs</h3>
> +    <p>
> +      This table also has a priority-110 flow with the match
> +      <code>outport == <var>I</var></code> for all logical switch
> +      datapaths to move traffic to the next table. Where <var>I</var>
> +      is the peer of a logical router port. This flow is added to
> +      skip the connection tracking of packets which will be entering
> +      logical router datapath from logical switch datapath for routing.
> +    </p>
> +
> +
> +    <h3>Egress Table 1: Pre-LB</h3>
>
>      <p>
> -      This is similar to ingress table <code>Pre-ACLs</code> except for
> -     <code>to-lport</code> traffic.
> +      This table is similar to ingress table <code>Pre-LB</code>.  It
> +      contains a priority-0 flow that simply moves traffic to the next
table.
> +      Moreover it contains two priority-110 flows to move multicast, IPv6
> +      Neighbor Discovery and MLD traffic to the next table. If any load
> +      balancing rules exist for the datapath, a priority-100 flow is
added with
> +      a match of <code>ip</code> and action of <code>reg0[2] = 1;
next;</code>
> +      to act as a hint for table <code>Pre-stateful</code> to send IP
packets
> +      to the connection tracker for packet de-fragmentation and possibly
DNAT
> +      the destination VIP to one of the selected backend for already
committed
> +      load balanced traffic.
>      </p>
>
>      <p>
> @@ -2015,15 +2029,6 @@ output;
>        db="OVN_Northbound"/> table.
>      </p>
>
> -    <p>
> -      This table also has a priority-110 flow with the match
> -      <code>outport == <var>I</var></code> for all logical switch
> -      datapaths to move traffic to the next table. Where <var>I</var>
> -      is the peer of a logical router port. This flow is added to
> -      skip the connection tracking of packets which will be entering
> -      logical router datapath from logical switch datapath for routing.
> -    </p>
> -
>      <h3>Egress Table 2: Pre-stateful</h3>
>
>      <p>
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index 0edc3da96..3ac7785e1 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -2159,6 +2159,9 @@ or
>            outgoing TCP traffic directed to an IP address, then you
probably
>            also want to define another rule to allow incoming TCP traffic
coming
>            from this same IP address.
> +          In addition, traffic that matches stateless ACLs will bypass
> +          load-balancer DNAT/un-DNAT processing. Stateful ACLs should be
> +          used instead if the traffic is supposed to be load-balanced.
>          </li>
>
>          <li>
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index ca4263eac..a2f8e8a20 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -2024,7 +2024,7 @@ AT_CLEANUP
>
>  # This test case tests that when a logical switch has load balancers
associated
>  # (with VIPs configured), the below logical flow is added by ovn-northd.
> -# table=0 (ls_out_pre_lb      ), priority=100  , match=(ip),
action=(reg0[[0]] = 1; next;)
> +# table=1 (ls_out_pre_lb      ), priority=100  , match=(ip),
action=(reg0[[0]] = 1; next;)
>  # This test case is added for the BZ -
>  # https://bugzilla.redhat.com/show_bug.cgi?id=1849162
>  #
> @@ -2063,27 +2063,27 @@ check ovn-nbctl ls-lb-add sw0 lb1
>  check ovn-nbctl add load_balancer_group $lbg load_balancer $lb3
>  check ovn-nbctl --wait=sb sync
>  AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" |
grep reg0 | sort], [0], [dnl
> -  table=0 (ls_out_pre_lb      ), priority=100  , match=(ip),
action=(reg0[[2]] = 1; next;)
> +  table=1 (ls_out_pre_lb      ), priority=100  , match=(ip),
action=(reg0[[2]] = 1; next;)
>  ])
>
>  check ovn-nbctl ls-lb-add sw0 lb2
>  check ovn-nbctl add load_balancer_group $lbg load_balancer $lb4
>  check ovn-nbctl --wait=sb sync
>  AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" |
grep reg0 | sort], [0], [dnl
> -  table=0 (ls_out_pre_lb      ), priority=100  , match=(ip),
action=(reg0[[2]] = 1; next;)
> +  table=1 (ls_out_pre_lb      ), priority=100  , match=(ip),
action=(reg0[[2]] = 1; next;)
>  ])
>
>  check ovn-nbctl clear load_balancer $lb1 vips
>  check ovn-nbctl clear load_balancer $lb3 vips
>  check ovn-nbctl --wait=sb sync
>  AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" |
grep reg0 | sort], [0], [dnl
> -  table=0 (ls_out_pre_lb      ), priority=100  , match=(ip),
action=(reg0[[2]] = 1; next;)
> +  table=1 (ls_out_pre_lb      ), priority=100  , match=(ip),
action=(reg0[[2]] = 1; next;)
>  ])
>
>  check ovn-nbctl clear load_balancer $lb2 vips
>  check ovn-nbctl --wait=sb sync
>  AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" |
grep reg0 | sort], [0], [dnl
> -  table=0 (ls_out_pre_lb      ), priority=100  , match=(ip),
action=(reg0[[2]] = 1; next;)
> +  table=1 (ls_out_pre_lb      ), priority=100  , match=(ip),
action=(reg0[[2]] = 1; next;)
>  ])
>
>  check ovn-nbctl clear load_balancer $lb4 vips
> @@ -2098,7 +2098,7 @@ check ovn-nbctl set load_balancer $lb4
vips:"10.0.0.13"="10.0.0.6"
>
>  check ovn-nbctl --wait=sb sync
>  AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" |
grep reg0 | sort], [0], [dnl
> -  table=0 (ls_out_pre_lb      ), priority=100  , match=(ip),
action=(reg0[[2]] = 1; next;)
> +  table=1 (ls_out_pre_lb      ), priority=100  , match=(ip),
action=(reg0[[2]] = 1; next;)
>  ])
>
>  # Now reverse the order of clearing the vip.
> @@ -2106,13 +2106,13 @@ check ovn-nbctl clear load_balancer $lb2 vips
>  check ovn-nbctl clear load_balancer $lb4 vips
>  check ovn-nbctl --wait=sb sync
>  AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" |
grep reg0 | sort], [0], [dnl
> -  table=0 (ls_out_pre_lb      ), priority=100  , match=(ip),
action=(reg0[[2]] = 1; next;)
> +  table=1 (ls_out_pre_lb      ), priority=100  , match=(ip),
action=(reg0[[2]] = 1; next;)
>  ])
>
>  check ovn-nbctl clear load_balancer $lb1 vips
>  check ovn-nbctl --wait=sb sync
>  AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" |
grep reg0 | sort], [0], [dnl
> -  table=0 (ls_out_pre_lb      ), priority=100  , match=(ip),
action=(reg0[[2]] = 1; next;)
> +  table=1 (ls_out_pre_lb      ), priority=100  , match=(ip),
action=(reg0[[2]] = 1; next;)
>  ])
>
>  check ovn-nbctl clear load_balancer $lb3 vips
> @@ -3057,18 +3057,10 @@ for direction in from to; do
>  done
>  ovn-nbctl --wait=sb sync
>
> -# TCP packets should go to conntrack for load balancing.
> +# TCP packets should not go to conntrack for load balancing.
>  flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}"
>  AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"],
[0], [dnl
> -ct_lb_mark {
> -    ct_lb_mark {
> -        reg0[[6]] = 0;
> -        reg0[[12]] = 0;
> -        ct_lb_mark /* default (use --ct to customize) */ {
> -            output("lsp2");
> -        };
> -    };
> -};
> +output("lsp2");
>  ])
>
>  # UDP packets still go to conntrack.
> @@ -3201,18 +3193,10 @@ for direction in from to; do
>  done
>  ovn-nbctl --wait=sb sync
>
> -# TCP packets should go to conntrack for load balancing.
> +# TCP packets should not go to conntrack for load balancing.
>  flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}"
>  AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"],
[0], [dnl
> -ct_lb_mark {
> -    ct_lb_mark {
> -        reg0[[6]] = 0;
> -        reg0[[12]] = 0;
> -        ct_lb_mark /* default (use --ct to customize) */ {
> -            output("lsp2");
> -        };
> -    };
> -};
> +output("lsp2");
>  ])
>
>  # UDP packets still go to conntrack.
> @@ -4026,14 +4010,15 @@ check_stateful_flows() {
>    table=? (ls_in_pre_lb       ), priority=110  , match=(eth.mcast),
action=(next;)
>    table=? (ls_in_pre_lb       ), priority=110  , match=(ip && inport ==
"sw0-lr0"), action=(next;)
>    table=? (ls_in_pre_lb       ), priority=110  , match=(nd || nd_rs ||
nd_ra || mldv1 || mldv2), action=(next;)
> +  table=? (ls_in_pre_lb       ), priority=110  , match=(reg0[[16]] ==
1), action=(next;)
>  ])
>
>      AT_CHECK([grep "ls_in_pre_stateful" sw0flows | sort | sed
's/table=./table=?/'], [0], [dnl
>    table=? (ls_in_pre_stateful ), priority=0    , match=(1),
action=(next;)
>    table=? (ls_in_pre_stateful ), priority=100  , match=(reg0[[0]] == 1),
action=(ct_next;)
>    table=? (ls_in_pre_stateful ), priority=110  , match=(reg0[[2]] == 1),
action=(ct_lb_mark;)
> -  table=? (ls_in_pre_stateful ), priority=120  , match=(ip4.dst ==
10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80;
ct_lb_mark;)
> -  table=? (ls_in_pre_stateful ), priority=120  , match=(ip4.dst ==
10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; reg2[[0..15]] = 80;
ct_lb_mark;)
> +  table=? (ls_in_pre_stateful ), priority=120  , match=(reg0[[2]] == 1
&& ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10;
reg2[[0..15]] = 80; ct_lb_mark;)
> +  table=? (ls_in_pre_stateful ), priority=120  , match=(reg0[[2]] == 1
&& ip4.dst == 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20;
reg2[[0..15]] = 80; ct_lb_mark;)
>  ])
>
>      AT_CHECK([grep "ls_in_lb " sw0flows | sort | sed
's/table=../table=??/'], [0], [dnl
> @@ -4049,12 +4034,13 @@ check_stateful_flows() {
>  ])
>
>      AT_CHECK([grep "ls_out_pre_lb" sw0flows | sort], [0], [dnl
> -  table=0 (ls_out_pre_lb      ), priority=0    , match=(1),
action=(next;)
> -  table=0 (ls_out_pre_lb      ), priority=100  , match=(ip),
action=(reg0[[2]] = 1; next;)
> -  table=0 (ls_out_pre_lb      ), priority=110  , match=(eth.mcast),
action=(next;)
> -  table=0 (ls_out_pre_lb      ), priority=110  , match=(eth.src ==
$svc_monitor_mac), action=(next;)
> -  table=0 (ls_out_pre_lb      ), priority=110  , match=(ip && outport ==
"sw0-lr0"), action=(next;)
> -  table=0 (ls_out_pre_lb      ), priority=110  , match=(nd || nd_rs ||
nd_ra || mldv1 || mldv2), action=(next;)
> +  table=1 (ls_out_pre_lb      ), priority=0    , match=(1),
action=(next;)
> +  table=1 (ls_out_pre_lb      ), priority=100  , match=(ip),
action=(reg0[[2]] = 1; next;)
> +  table=1 (ls_out_pre_lb      ), priority=110  , match=(eth.mcast),
action=(next;)
> +  table=1 (ls_out_pre_lb      ), priority=110  , match=(eth.src ==
$svc_monitor_mac), action=(next;)
> +  table=1 (ls_out_pre_lb      ), priority=110  , match=(ip && outport ==
"sw0-lr0"), action=(next;)
> +  table=1 (ls_out_pre_lb      ), priority=110  , match=(nd || nd_rs ||
nd_ra || mldv1 || mldv2), action=(next;)
> +  table=1 (ls_out_pre_lb      ), priority=110  , match=(reg0[[16]] ==
1), action=(next;)
>  ])
>
>      AT_CHECK([grep "ls_out_pre_stateful" sw0flows | sort], [0], [dnl
> @@ -4094,6 +4080,7 @@ AT_CHECK([grep "ls_in_pre_lb" sw0flows | sort | sed
's/table=./table=?/'], [0],
>    table=? (ls_in_pre_lb       ), priority=110  , match=(eth.mcast),
action=(next;)
>    table=? (ls_in_pre_lb       ), priority=110  , match=(ip && inport ==
"sw0-lr0"), action=(next;)
>    table=? (ls_in_pre_lb       ), priority=110  , match=(nd || nd_rs ||
nd_ra || mldv1 || mldv2), action=(next;)
> +  table=? (ls_in_pre_lb       ), priority=110  , match=(reg0[[16]] ==
1), action=(next;)
>  ])
>
>  AT_CHECK([grep "ls_in_pre_stateful" sw0flows | sort | sed
's/table=./table=?/'], [0], [dnl
> @@ -4113,11 +4100,12 @@ AT_CHECK([grep "ls_in_stateful" sw0flows | sort |
sed 's/table=../table=??/'], [
>  ])
>
>  AT_CHECK([grep "ls_out_pre_lb" sw0flows | sort], [0], [dnl
> -  table=0 (ls_out_pre_lb      ), priority=0    , match=(1),
action=(next;)
> -  table=0 (ls_out_pre_lb      ), priority=110  , match=(eth.mcast),
action=(next;)
> -  table=0 (ls_out_pre_lb      ), priority=110  , match=(eth.src ==
$svc_monitor_mac), action=(next;)
> -  table=0 (ls_out_pre_lb      ), priority=110  , match=(ip && outport ==
"sw0-lr0"), action=(next;)
> -  table=0 (ls_out_pre_lb      ), priority=110  , match=(nd || nd_rs ||
nd_ra || mldv1 || mldv2), action=(next;)
> +  table=1 (ls_out_pre_lb      ), priority=0    , match=(1),
action=(next;)
> +  table=1 (ls_out_pre_lb      ), priority=110  , match=(eth.mcast),
action=(next;)
> +  table=1 (ls_out_pre_lb      ), priority=110  , match=(eth.src ==
$svc_monitor_mac), action=(next;)
> +  table=1 (ls_out_pre_lb      ), priority=110  , match=(ip && outport ==
"sw0-lr0"), action=(next;)
> +  table=1 (ls_out_pre_lb      ), priority=110  , match=(nd || nd_rs ||
nd_ra || mldv1 || mldv2), action=(next;)
> +  table=1 (ls_out_pre_lb      ), priority=110  , match=(reg0[[16]] ==
1), action=(next;)
>  ])
>
>  AT_CHECK([grep "ls_out_pre_stateful" sw0flows | sort], [0], [dnl
> @@ -7677,7 +7665,7 @@ check ovn-nbctl --wait=sb sync
>  AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl
>    table=7 (lr_in_dnat         ), priority=110  , match=(ct.est &&
!ct.rel && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1),
action=(next;)
>    table=7 (lr_in_dnat         ), priority=110  , match=(ct.new &&
!ct.rel && ip4 && reg0 == 66.66.66.66),
action=(ct_lb_mark(backends=42.42.42.2);)
> -  table=6 (ls_in_pre_stateful ), priority=120  , match=(ip4.dst ==
66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;)
> +  table=6 (ls_in_pre_stateful ), priority=120  , match=(reg0[[2]] == 1
&& ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;)
>    table=6 (ls_in_pre_stateful ), priority=110  , match=(reg0[[2]] == 1),
action=(ct_lb_mark;)
>    table=12(ls_in_lb           ), priority=110  , match=(ct.new &&
ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0;
ct_lb_mark(backends=42.42.42.2);)
>    table=2 (ls_out_pre_stateful), priority=110  , match=(reg0[[2]] == 1),
action=(ct_lb_mark;)
> @@ -7689,7 +7677,7 @@ check ovn-nbctl --wait=sb sync
>  AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl
>    table=7 (lr_in_dnat         ), priority=110  , match=(ct.est &&
!ct.rel && ip4 && reg0 == 66.66.66.66 && ct_label.natted == 1),
action=(next;)
>    table=7 (lr_in_dnat         ), priority=110  , match=(ct.new &&
!ct.rel && ip4 && reg0 == 66.66.66.66), action=(ct_lb(backends=42.42.42.2);)
> -  table=6 (ls_in_pre_stateful ), priority=120  , match=(ip4.dst ==
66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;)
> +  table=6 (ls_in_pre_stateful ), priority=120  , match=(reg0[[2]] == 1
&& ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;)
>    table=6 (ls_in_pre_stateful ), priority=110  , match=(reg0[[2]] == 1),
action=(ct_lb;)
>    table=12(ls_in_lb           ), priority=110  , match=(ct.new &&
ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb(backends=42.42.42.2);)
>    table=2 (ls_out_pre_stateful), priority=110  , match=(reg0[[2]] == 1),
action=(ct_lb;)
> @@ -7701,7 +7689,7 @@ check ovn-nbctl --wait=sb sync
>  AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl
>    table=7 (lr_in_dnat         ), priority=110  , match=(ct.est &&
!ct.rel && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1),
action=(next;)
>    table=7 (lr_in_dnat         ), priority=110  , match=(ct.new &&
!ct.rel && ip4 && reg0 == 66.66.66.66),
action=(ct_lb_mark(backends=42.42.42.2);)
> -  table=6 (ls_in_pre_stateful ), priority=120  , match=(ip4.dst ==
66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;)
> +  table=6 (ls_in_pre_stateful ), priority=120  , match=(reg0[[2]] == 1
&& ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;)
>    table=6 (ls_in_pre_stateful ), priority=110  , match=(reg0[[2]] == 1),
action=(ct_lb_mark;)
>    table=12(ls_in_lb           ), priority=110  , match=(ct.new &&
ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0;
ct_lb_mark(backends=42.42.42.2);)
>    table=2 (ls_out_pre_stateful), priority=110  , match=(reg0[[2]] == 1),
action=(ct_lb_mark;)
> diff --git a/tests/ovn.at b/tests/ovn.at
> index f3bd53242..7abded46c 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -23777,7 +23777,7 @@ OVS_WAIT_FOR_OUTPUT(
>    [ovn-sbctl dump-flows > sbflows
>     ovn-sbctl dump-flows sw0 | grep ct_lb_mark | grep priority=120 | sed
's/table=..//'], 0,
>    [dnl
> -  (ls_in_pre_stateful ), priority=120  , match=(ip4.dst == 10.0.0.10 &&
tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;)
> +  (ls_in_pre_stateful ), priority=120  , match=(reg0[[2]] == 1 &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10;
reg2[[0..15]] = 80; ct_lb_mark;)
>    (ls_in_lb           ), priority=120  , match=(ct.new && ip4.dst ==
10.0.0.10 && tcp.dst == 80), action=(reg0[[1]] = 0; ct_lb_mark(backends=
10.0.0.3:80,20.0.0.3:80; hash_fields="ip_dst,ip_src,tcp_dst,tcp_src");)
>  ])
>
> @@ -23820,7 +23820,7 @@ ovn-sbctl dump-flows sw0 > sbflows3
>  AT_CHECK(
>    [grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" sbflows3 | grep
priority=120 |\
>     sed 's/table=../table=??/'], [0], [dnl
> -  table=??(ls_in_pre_stateful ), priority=120  , match=(ip4.dst ==
10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80;
ct_lb_mark;)
> +  table=??(ls_in_pre_stateful ), priority=120  , match=(reg0[[2]] == 1
&& ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10;
reg2[[0..15]] = 80; ct_lb_mark;)
>    table=??(ls_in_lb           ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(drop;)
>  ])
>
> diff --git a/tests/system-ovn.at b/tests/system-ovn.at
> index b99578b9e..4bc9fb84f 100644
> --- a/tests/system-ovn.at
> +++ b/tests/system-ovn.at
> @@ -9511,3 +9511,299 @@ NS_CHECK_EXEC([vm3], [nc 6666::1 666 -z], [0],
[ignore], [ignore])
>
>  AT_CLEANUP
>  ])
> +
> +# for packets that match stateless ACL flows, make sure we bypass
> +# connection tracking, even with a LB in the switch. Testing for
> +# TCP should suffice. For v4 and v6.
> +#
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([omit connection  tracking for stateless flows v4])
> +
> +CHECK_CONNTRACK()
> +ovn_start
> +OVS_TRAFFIC_VSWITCHD_START()
> +ADD_BR([br-int])
> +
> +# Set external-ids in br-int needed for ovn-controller
> +ovs-vsctl \
> +        -- set Open_vSwitch . external-ids:system-id=hv1 \
> +        -- set Open_vSwitch .
external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
> +        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
> +        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
> +        -- set bridge br-int fail-mode=secure
other-config:disable-in-band=true
> +
> +# Start ovn-controller
> +start_daemon ovn-controller
> +
> +# Logical network:
> +# One LR R1 with switches foo (192.168.1.0/24), bar (192.168.2.0/24),
> +#
> +#    foo -- R1 -- bar
> +
> +ovn-nbctl lr-add R1
> +
> +ovn-nbctl ls-add foo
> +ovn-nbctl ls-add bar
> +
> +ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 192.168.1.1/24
> +ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 192.168.2.1/24
> +
> +# Connect foo to R1
> +ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \
> +    type=router options:router-port=foo \
> +    -- lsp-set-addresses rp-foo router
> +
> +# Connect bar to R1
> +ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \
> +    type=router options:router-port=bar \
> +    -- lsp-set-addresses rp-bar router
> +
> +# Logical port 'foo1' in switch 'foo'.
> +ADD_NAMESPACES(foo1)
> +ADD_VETH(foo1, foo1, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \
> +         "192.168.1.1")
> +ovn-nbctl lsp-add foo foo1 \
> +-- lsp-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2"
> +
> +# Logical port 'bar1' in switch 'bar'.
> +ADD_NAMESPACES(bar1)
> +ADD_VETH(bar1, bar1, br-int, "192.168.2.2/24", "f0:00:00:01:02:04", \
> +         "192.168.2.1")
> +ovn-nbctl lsp-add bar bar1 \
> +-- lsp-set-addresses bar1 "f0:00:00:01:02:04 192.168.2.2"
> +
> +# Config OVN load-balancer with a VIP.
> +ovn-nbctl lb-add lb1 30.30.30.30:80 "192.168.2.2:80" tcp
> +ovn-nbctl ls-lb-add foo lb1
> +
> +# Wait for ovn-controller to catch up.
> +ovn-nbctl --wait=hv sync
> +
> +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \
> +grep 'nat(dst=192.168.2.2:80)'])
> +
> +zone_id=$(ovn-appctl -t ovn-controller ct-zone-list | grep foo1 | cut -d
' ' -f2)
> +
> +OVS_START_L7([bar1], [http])
> +
> +AT_CHECK([ip netns exec foo1 wget   192.168.2.2 -t 3 -T 1], [0],
[ignore], [ignore])
> +
> +# check conntrack zone has tcp entry
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \
> +FORMAT_CT(192.168.1.2) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
>
+tcp,orig=(src=192.168.1.2,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>)
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack])
> +
> +# now check wirh VIP
> +AT_CHECK([ip netns exec foo1 wget   30.30.30.30  -t 3 -T 1], [0],
[ignore], [ignore])
> +
> +# check conntrack zone has tcp entry
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \
> +FORMAT_CT(30.30.30.30) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
>
+tcp,orig=(src=192.168.1.2,dst=30.30.30.30,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=2,protoinfo=(state=<cleared>)
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack])
> +
> +# remove lb
> +ovn-nbctl ls-lb-del foo lb1
> +
> +# add stateless acl
> +check ovn-nbctl acl-add foo from-lport 1 1 allow-stateless
> +check ovn-nbctl acl-add foo to-lport 1 1 allow-stateless
> +
> +AT_CHECK([ip netns exec foo1 wget   192.168.2.2 -t 3 -T 1], [0],
[ignore], [ignore])
> +
> +# check conntrack zone has no tcp entry
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \
> +FORMAT_CT(192.168.1.2) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack])
> +
> +# add lb back
> +ovn-nbctl ls-lb-add foo lb1
> +
> +# Wait for ovn-controller to catch up.
> +ovn-nbctl --wait=hv sync
> +
> +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \
> +grep 'nat(dst=192.168.2.2:80)'])
> +
> +# should not dnat so will not be able to connect
> +AT_CHECK([ip netns exec foo1 wget   30.30.30.30  -t 3 -T 1], [4],
[ignore], [ignore])
> +
> +# check conntrack zone has no tcp entry
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \
> +FORMAT_CT(30.30.30.30) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack])
> +
> +OVS_APP_EXIT_AND_WAIT([ovn-controller])
> +
> +as ovn-sb
> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> +
> +as ovn-nb
> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> +
> +as northd
> +OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE])
> +
> +as
> +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
> +/connection dropped.*/d"])
> +AT_CLEANUP
> +])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([omit connection  tracking for stateless flows v6])
> +
> +CHECK_CONNTRACK()
> +ovn_start
> +OVS_TRAFFIC_VSWITCHD_START()
> +ADD_BR([br-int])
> +
> +# Set external-ids in br-int needed for ovn-controller
> +ovs-vsctl \
> +        -- set Open_vSwitch . external-ids:system-id=hv1 \
> +        -- set Open_vSwitch .
external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
> +        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
> +        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
> +        -- set bridge br-int fail-mode=secure
other-config:disable-in-band=true
> +
> +# Start ovn-controller
> +start_daemon ovn-controller
> +
> +# Logical network:
> +# One LR - R1 with switchess foo (fd11::/64) and
> +# bar (fd12::/64) connected to it
> +#
> +#    foo -- R1 -- bar
> +
> +ovn-nbctl lr-add R1
> +
> +ovn-nbctl ls-add foo
> +ovn-nbctl ls-add bar
> +
> +# Connect foo to R1
> +ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 fd11::1/64
> +ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \
> +    type=router options:router-port=foo addresses=\"00:00:01:01:02:03\"
> +
> +# Connect bar to R1
> +ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 fd12::1/64
> +ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \
> +    type=router options:router-port=bar addresses=\"00:00:01:01:02:04\"
> +
> +# Logical port 'foo1' in switch 'foo'.
> +ADD_NAMESPACES(foo1)
> +ADD_VETH(foo1, foo1, br-int, "fd11::2/64", "f0:00:00:01:02:03", \
> +         "fd11::1")
> +ovn-nbctl lsp-add foo foo1 \
> +-- lsp-set-addresses foo1 "f0:00:00:01:02:03 fd11::2"
> +
> +# Logical port 'bar1' in switch 'bar'.
> +ADD_NAMESPACES(bar1)
> +ADD_VETH(bar1, bar1, br-int, "fd12::2/64", "f0:00:00:01:02:05", \
> +"fd12::1")
> +ovn-nbctl lsp-add bar bar1 \
> +-- lsp-set-addresses bar1 "f0:00:00:01:02:05 fd12::2"
> +
> +# Config OVN load-balancer with a VIP.
> +ovn-nbctl lb-add lb1 [[fd30::2]]:80 [[fd12::2]]:80 tcp
> +ovn-nbctl ls-lb-add foo lb1
> +
> +# Wait for ovn-controller to catch up.
> +ovn-nbctl --wait=hv sync
> +
> +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \
> +grep 'nat(dst=\[[fd12::2\]]:80)'])
> +
> +zone_id=$(ovn-appctl -t ovn-controller ct-zone-list | grep foo1 | cut -d
' ' -f2)
> +
> +OVS_START_L7([bar1], [http6])
> +
> +AT_CHECK([ip netns exec foo1  wget http://[[fd12::2]] -t 3 -T 1], [0],
[ignore], [ignore])
> +
> +# check conntrack zone has tcp entry
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \
> +FORMAT_CT(fd12::2) |  grep -v fe80 | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
>
+tcp,orig=(src=fd11::2,dst=fd12::2,sport=<cleared>,dport=<cleared>),reply=(src=fd12::2,dst=fd11::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>)
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack])
> +
> +# now check wirh VIP
> +AT_CHECK([ip netns exec foo1 wget  http://[[fd30::2]]  -t 3 -T 1], [0],
[ignore], [ignore])
> +
> +# check conntrack zone has tcp entry
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \
> +FORMAT_CT(fd30::2) |  grep -v fe80 | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
>
+tcp,orig=(src=fd11::2,dst=fd30::2,sport=<cleared>,dport=<cleared>),reply=(src=fd12::2,dst=fd11::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=2,protoinfo=(state=<cleared>)
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack])
> +
> +# remove lb
> +ovn-nbctl ls-lb-del foo lb1
> +
> +# add stateless acl
> +check ovn-nbctl acl-add foo from-lport 1 1 allow-stateless
> +check ovn-nbctl acl-add foo to-lport 1 1 allow-stateless
> +
> +AT_CHECK([ip netns exec foo1  wget http://[[fd12::2]] -t 3 -T 1], [0],
[ignore], [ignore])
> +
> +# check conntrack zone has no tcp entry
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \
> +FORMAT_CT(fd12::2) |  grep -v fe80 | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack])
> +
> +# add lb back
> +ovn-nbctl ls-lb-add foo lb1
> +
> +# Wait for ovn-controller to catch up.
> +ovn-nbctl --wait=hv sync
> +
> +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \
> +grep 'nat(dst=\[[fd12::2\]]:80)'])
> +
> +# should not dnat so will not be able to connect
> +AT_CHECK([ip netns exec foo1 wget  http://[[fd30::2]]  -t 3 -T 1], [4],
[ignore], [ignore])
> +#
> +# check conntrack zone has no tcp entry
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \
> +FORMAT_CT(fd30::2) | grep -v fe80 | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack])
> +
> +OVS_APP_EXIT_AND_WAIT([ovn-controller])
> +
> +as ovn-sb
> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> +
> +as ovn-nb
> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> +
> +as northd
> +OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE])
> +
> +as
> +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
> +/connection dropped.*/d"])
> +AT_CLEANUP
> +])
> --
> 2.17.1
>

Thanks Venu for v2. Also thanks Numan for reviewing v1, and I saw that
Numan's comment was addressed. So, applied to main branch.

Han
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Reply via email to