Hi Dumitru,

Please see my comments below.

Thanks,
Han

On Thu, Aug 20, 2020 at 4:19 AM Dumitru Ceara <[email protected]> wrote:
>
> A new table is added to OVN_Northbound: Stateless_Filter. Users can
> populate this table with records consisting of <priority, match>. These
> records generate logical flows in the PRE_ACL stages of the logical
> switch pipeline.
>
> Packets matching these flows will completely bypass connection tracking
> for ACL purposes. In specific scenarios CMSs can predetermine which
> traffic must be firewalled statefully or not, e.g., UDP vs TCP. However,
> until now, if at least one stateful ACL (allow-related) is configured
> on the switch, all traffic gets sent to connection tracking.
> This induces a hit in performance when forwarding packets that don't
> need stateful processing.
>
> New command line arguments are added to ovn-nbctl (stateless-filter-*)
> to allow the users to interact with the Stateless_Filter table.
>
> Signed-off-by: Dumitru Ceara <[email protected]>
> ---
> V2:
> - address Numan's comments:
>   - fix spacing in the logical flow match.
>   - add a new table to the NB DB instead of using a config option on the
>     logical switch.
> - add ovn-nbctl CLI commands for the new table and also unit tests for
>   them.
> - reword the commit message.
> NOTE: checkpatch.py will complain about lines lacking whitespacec around
> operators in the ovn-nbctl help string but this is a false positive and
> should be ignored.
> ---
>  NEWS                          |   3 +
>  northd/ovn-northd.8.xml       |  20 ++++
>  northd/ovn-northd.c           | 146 ++++++++++++++++++-----
>  ovn-nb.ovsschema              |  26 ++++-
>  ovn-nb.xml                    |  57 ++++++++-
>  tests/ovn-nbctl.at            |  53 +++++++++
>  tests/ovn-northd.at           | 263
++++++++++++++++++++++++++++++++++++++++++
>  tests/system-common-macros.at |   8 ++
>  tests/system-ovn.at           | 113 ++++++++++++++++++
>  utilities/ovn-detrace.in      |  12 ++
>  utilities/ovn-nbctl.c         | 213 ++++++++++++++++++++++++++++++++--
>  11 files changed, 871 insertions(+), 43 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index a1ce4e8..eedd091 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -11,6 +11,9 @@ Post-v20.06.0
>       called Chassis_Private now contains the nb_cfg column which is
updated
>       by incrementing the value in the NB_Global table, CMSes relying on
>       this mechanism should update their code to use this new table.
> +   - Added support for bypassing connection tracking for ACL processing
for
> +     specific types of traffic through the user supplied Stateless_Filter
> +     configuration.
>
>  OVN v20.06.0
>  --------------------------
> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> index 989e364..1f89942 100644
> --- a/northd/ovn-northd.8.xml
> +++ b/northd/ovn-northd.8.xml
> @@ -322,6 +322,16 @@
>      </p>
>
>      <p>
> +      For each record in table <code>Stateful_Filter</code> in the
> +      <code>OVN_Northbound</code> database, a flow with
> +      <code>priority + 1000</code> is added and sets <code>reg0[7] =
1</code>
> +      for traffic that matches the condition in the <code>match</code>
> +      column and advances to next table.  <code>reg0[7]</code> acts as a
hint
> +      for tables <code>Pre-Stateful</code> and <code>ACL</code> to avoid
> +      sending this traffic to the connection tracker.
> +    </p>
> +

It seems documentation is missing for the flows that uses reg0[7] in
Pre-Stateful and ACL stages.

> +    <p>
>        This table also has a priority-110 flow with the match
>        <code>eth.dst == <var>E</var></code> for all logical switch
>        datapaths to move traffic to the next table. Where <var>E</var>
> @@ -1383,6 +1393,16 @@ output;
>      </p>
>
>      <p>
> +      For each record in table <code>Stateful_Filter</code> in the
> +      <code>OVN_Northbound</code> database, a flow with
> +      <code>priority + 1000</code> is added and sets <code>reg0[7] =
1</code>
> +      for traffic that matches the condition in the <code>match</code>
> +      column and advances to next table.  <code>reg0[7]</code> acts as a
hint
> +      for tables <code>Pre-Stateful</code> and <code>ACL</code> to avoid
> +      sending this traffic to the connection tracker.
> +    </p>
> +
> +    <p>
>        This table also has a priority-110 flow with the match
>        <code>eth.src == <var>E</var></code> for all logical switch
>        datapaths to move traffic to the next table. Where <var>E</var>
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 212de2f..b8f457b 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -211,6 +211,7 @@ enum ovn_stage {
>  #define REGBIT_DNS_LOOKUP_RESULT "reg0[4]"
>  #define REGBIT_ND_RA_OPTS_RESULT "reg0[5]"
>  #define REGBIT_HAIRPIN           "reg0[6]"
> +#define REGBIT_SKIP_ACL_CT       "reg0[7]"
>
>  /* Register definitions for switches and routers. */
>
> @@ -245,11 +246,11 @@ enum ovn_stage {
>   * OVS register usage:
>   *
>   * Logical Switch pipeline:
> - * +---------+-------------------------------------+
> - * | R0      | REGBIT_{CONNTRACK/DHCP/DNS/HAIRPIN} |
> - * +---------+-------------------------------------+
> - * | R1 - R9 |              UNUSED                 |
> - * +---------+-------------------------------------+
> + * +---------+-------------------------------------------------+
> + * | R0      | REGBIT_{CONNTRACK/DHCP/DNS/HAIRPIN/SKIP_ACL_CT} |
> + * +---------+-------------------------------------------------+
> + * | R1 - R9 |              UNUSED                             |
> + * +---------+-------------------------------------------------+
>   *
>   * Logical Router pipeline:
>   *
+-----+--------------------------+---+-----------------+---+---------------+
> @@ -4713,6 +4714,12 @@ has_stateful_acl(struct ovn_datapath *od)
>      return false;
>  }
>
> +static bool
> +has_stateful_acl_bypass(struct ovn_datapath *od)
> +{
> +    return od->nbs->n_stateless_filters > 0;
> +}
> +
>  static void
>  build_lswitch_input_port_sec(struct hmap *ports, struct hmap *datapaths,
>                               struct hmap *lflows)
> @@ -4881,7 +4888,47 @@ skip_port_from_conntrack(struct ovn_datapath *od,
struct ovn_port *op,
>  }
>
>  static void
> -build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
> +build_stateless_filter(struct ovn_datapath *od,
> +                       const struct nbrec_stateless_filter *filter,
> +                       struct hmap *lflows)
> +{
> +    /* Stateless filters must be applied in both directions so that reply
> +     * traffic bypasses conntrack too.
> +     */
> +    ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_ACL,
> +                            filter->priority + OVN_ACL_PRI_OFFSET,
> +                            filter->match,
> +                            REGBIT_SKIP_ACL_CT" = 1; next;",
> +                            &filter->header_);
> +    ovn_lflow_add_with_hint(lflows, od, S_SWITCH_OUT_PRE_ACL,
> +                            filter->priority + OVN_ACL_PRI_OFFSET,
> +                            filter->match,
> +                            REGBIT_SKIP_ACL_CT" = 1; next;",
> +                            &filter->header_);
> +}
> +
> +static void
> +build_stateless_filters(struct ovn_datapath *od, struct hmap
*port_groups,
> +                        struct hmap *lflows)
> +{
> +    for (size_t i = 0; i < od->nbs->n_stateless_filters; i++) {
> +        build_stateless_filter(od, od->nbs->stateless_filters[i],
lflows);
> +    }
> +
> +    struct ovn_port_group *pg;
> +    HMAP_FOR_EACH (pg, key_node, port_groups) {
> +        if (ovn_port_group_ls_find(pg, &od->nbs->header_.uuid)) {
> +            for (size_t i = 0; i < pg->nb_pg->n_stateless_filters; i++) {
> +                build_stateless_filter(od,
pg->nb_pg->stateless_filters[i],
> +                                       lflows);
> +            }
> +        }
> +    }
> +}
> +
> +static void
> +build_pre_acls(struct ovn_datapath *od, struct hmap *port_groups,
> +               struct hmap *lflows)
>  {
>      bool has_stateful = has_stateful_acl(od);
>
> @@ -4926,6 +4973,13 @@ build_pre_acls(struct ovn_datapath *od, struct
hmap *lflows)
>                        "nd || nd_rs || nd_ra || "
>                        "(udp && udp.src == 546 && udp.dst == 547)",
"next;");
>
> +        /* Ingress and Egress Pre-ACL Table (Stateless_Filter).
> +         *
> +         * If the logical switch is configured to bypass conntrack for
> +         * specific types of traffic, skip conntrack for that traffic.
> +         */
> +        build_stateless_filters(od, port_groups, lflows);
> +
>          /* Ingress and Egress Pre-ACL Table (Priority 100).
>           *
>           * Regardless of whether the ACL is "from-lport" or "to-lport",
> @@ -5260,7 +5314,8 @@ build_reject_acl_rules(struct ovn_datapath *od,
struct hmap *lflows,
>
>  static void
>  consider_acl(struct hmap *lflows, struct ovn_datapath *od,
> -             struct nbrec_acl *acl, bool has_stateful)
> +             struct nbrec_acl *acl, bool has_stateful,
> +             bool has_stateful_bypass)
>  {
>      bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
>      enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
> @@ -5285,7 +5340,19 @@ consider_acl(struct hmap *lflows, struct
ovn_datapath *od,
>              struct ds match = DS_EMPTY_INITIALIZER;
>              struct ds actions = DS_EMPTY_INITIALIZER;
>
> -            /* Commit the connection tracking entry if it's a new
> +            /* If traffic matched the acl-stateful-bypass rule, we don't
> +             * need to commit the connection tracking entry.
> +             */
> +            if (has_stateful_bypass) {
> +                ds_put_format(&match, "(" REGBIT_SKIP_ACL_CT "== 1 &&
(%s)",
> +                              acl->match);
> +                build_acl_log(&actions, acl);
> +                ds_put_format(&actions, "next;");
> +                ds_clear(&match);
> +                ds_clear(&actions);

It seems this whole "if" block is useless. The match and actions are set
but then cleared without being used.

> +            }
> +
> +            /* Otherwise commit the connection tracking entry if it's a
new
>               * connection that matches this ACL.  After this commit,
>               * the reply traffic is allowed by a flow we create at
>               * priority 65535, defined earlier.
> @@ -5297,10 +5364,11 @@ consider_acl(struct hmap *lflows, struct
ovn_datapath *od,
>               * by ct_commit in the "stateful" stage) to indicate that the
>               * connection should be allowed to resume.
>               */
> -            ds_put_format(&match, "((ct.new && !ct.est)"
> -                                  " || (!ct.new && ct.est && !ct.rpl "
> -                                       "&& ct_label.blocked == 1)) "
> -                                  "&& (%s)", acl->match);
> +            ds_put_format(&match, REGBIT_SKIP_ACL_CT " == 0 "
> +                          "&& ((ct.new && !ct.est)"
> +                          " || (!ct.new && ct.est && !ct.rpl "
> +                               "&& ct_label.blocked == 1)) "
> +                          "&& (%s)", acl->match);
>              ds_put_cstr(&actions, REGBIT_CONNTRACK_COMMIT" = 1; ");
>              build_acl_log(&actions, acl);
>              ds_put_cstr(&actions, "next;");
> @@ -5315,11 +5383,16 @@ consider_acl(struct hmap *lflows, struct
ovn_datapath *od,
>               * deletion.  There is no need to commit here, so we can just
>               * proceed to the next table. We use this to ensure that this
>               * connection is still allowed by the currently defined
> -             * policy. Match untracked packets too. */
> +             * policy. Match untracked packets too.
> +             *
> +             * This flow also allows traffic that matches the
> +             * acl-stateful-bypass rule.
> +             */
>              ds_clear(&match);
>              ds_clear(&actions);
>              ds_put_format(&match,
> -                          "(!ct.trk || (!ct.new && ct.est && !ct.rpl"
> +                          "(" REGBIT_SKIP_ACL_CT " == 1 || !ct.trk || "
> +                          "(!ct.new && ct.est && !ct.rpl"
>                            " && ct_label.blocked == 0)) && (%s)",
>                            acl->match);

Because of this lflow, each ACL is translated to 3 extra OVS flows (2
before this patch). If large address set/port groups used in the ACL the
cost can be huge. One way to optimize it could be introducing a new stage
with a single logical flow to match (" REGBIT_SKIP_ACL_CT " == 1 || !ct.trk
||  (!ct.new && ct.est && !ct.rpl && ct_label.blocked == 0), and set a new
flag "NO_TRACK", and then in the current ACL table it only needs a single
(extra) flow for each ACL: NO_TRACK == 1 && <acl match>.

Something similar can be done for "reject/drop" rules handling for the
lflows with several (x '||' y) operators plus a "&&" with the real ACL
match.

I am not 100% sure if a new stage worth it, but I think at least it is
something to be considered.

>
> @@ -5346,7 +5419,7 @@ consider_acl(struct hmap *lflows, struct
ovn_datapath *od,
>              /* If the packet is not tracked or not part of an established
>               * connection, then we can simply reject/drop it. */
>              ds_put_cstr(&match,
> -                        "(!ct.trk || !ct.est"
> +                        "(" REGBIT_SKIP_ACL_CT " == 1 || !ct.trk ||
!ct.est"
>                          " || (ct.est && ct_label.blocked == 1))");
>              if (!strcmp(acl->action, "reject")) {
>                  build_reject_acl_rules(od, lflows, stage, acl, &match,
> @@ -5373,7 +5446,8 @@ consider_acl(struct hmap *lflows, struct
ovn_datapath *od,
>               */
>              ds_clear(&match);
>              ds_clear(&actions);
> -            ds_put_cstr(&match, "ct.est && ct_label.blocked == 0");
> +            ds_put_cstr(&match, REGBIT_SKIP_ACL_CT " == 0 "
> +                        "&& ct.est && ct_label.blocked == 0");
>              ds_put_cstr(&actions, "ct_commit { ct_label.blocked = 1; };
");
>              if (!strcmp(acl->action, "reject")) {
>                  build_reject_acl_rules(od, lflows, stage, acl, &match,
> @@ -5478,6 +5552,7 @@ build_acls(struct ovn_datapath *od, struct hmap
*lflows,
>             struct hmap *port_groups)
>  {
>      bool has_stateful = has_stateful_acl(od);
> +    bool has_stateful_bypass = has_stateful_acl_bypass(od);
>
>      /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
>       * default.  A related rule at priority 1 is added below if there
> @@ -5508,11 +5583,15 @@ build_acls(struct ovn_datapath *od, struct hmap
*lflows,
>           * Subsequent packets will hit the flow at priority 0 that just
>           * uses "next;". */
>          ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1,
> -                      "ip && (!ct.est || (ct.est && ct_label.blocked ==
1))",
> -                       REGBIT_CONNTRACK_COMMIT" = 1; next;");
> +                      REGBIT_SKIP_ACL_CT " == 0 "
> +                      "&& ip "
> +                      "&& (!ct.est || (ct.est && ct_label.blocked ==
1))",
> +                      REGBIT_CONNTRACK_COMMIT" = 1; next;");
>          ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1,
> -                      "ip && (!ct.est || (ct.est && ct_label.blocked ==
1))",
> -                       REGBIT_CONNTRACK_COMMIT" = 1; next;");
> +                      REGBIT_SKIP_ACL_CT " == 0 "
> +                      "&& ip "
> +                      "&& (!ct.est || (ct.est && ct_label.blocked ==
1))",
> +                      REGBIT_CONNTRACK_COMMIT" = 1; next;");
>
>          /* Ingress and Egress ACL Table (Priority 65535).
>           *
> @@ -5522,10 +5601,14 @@ build_acls(struct ovn_datapath *od, struct hmap
*lflows,
>           *
>           * This is enforced at a higher priority than ACLs can be
defined. */
>          ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
> -                      "ct.inv || (ct.est && ct.rpl && ct_label.blocked
== 1)",
> +                      REGBIT_SKIP_ACL_CT " == 0 "
> +                      "&& (ct.inv "
> +                           "|| (ct.est && ct.rpl && ct_label.blocked ==
1))",
>                        "drop;");
>          ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
> -                      "ct.inv || (ct.est && ct.rpl && ct_label.blocked
== 1)",
> +                      REGBIT_SKIP_ACL_CT " == 0 "
> +                      "&& (ct.inv "
> +                           "|| (ct.est && ct.rpl && ct_label.blocked ==
1))",
>                        "drop;");
>
>          /* Ingress and Egress ACL Table (Priority 65535).
> @@ -5538,11 +5621,13 @@ build_acls(struct ovn_datapath *od, struct hmap
*lflows,
>           *
>           * This is enforced at a higher priority than ACLs can be
defined. */
>          ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
> -                      "ct.est && !ct.rel && !ct.new && !ct.inv "
> +                      REGBIT_SKIP_ACL_CT "== 0 "
> +                      "&& ct.est && !ct.rel && !ct.new && !ct.inv "
>                        "&& ct.rpl && ct_label.blocked == 0",
>                        "next;");
>          ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
> -                      "ct.est && !ct.rel && !ct.new && !ct.inv "
> +                      REGBIT_SKIP_ACL_CT "== 0 "
> +                      "&& ct.est && !ct.rel && !ct.new && !ct.inv "
>                        "&& ct.rpl && ct_label.blocked == 0",
>                        "next;");
>
> @@ -5558,11 +5643,13 @@ build_acls(struct ovn_datapath *od, struct hmap
*lflows,
>           * related traffic such as an ICMP Port Unreachable through
>           * that's generated from a non-listening UDP port.  */
>          ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
> -                      "!ct.est && ct.rel && !ct.new && !ct.inv "
> +                      REGBIT_SKIP_ACL_CT "== 0 "
> +                      "&& !ct.est && ct.rel && !ct.new && !ct.inv "
>                        "&& ct_label.blocked == 0",
>                        "next;");
>          ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
> -                      "!ct.est && ct.rel && !ct.new && !ct.inv "
> +                      REGBIT_SKIP_ACL_CT "== 0 "
> +                      "&& !ct.est && ct.rel && !ct.new && !ct.inv "
>                        "&& ct_label.blocked == 0",
>                        "next;");
>
> @@ -5578,13 +5665,14 @@ build_acls(struct ovn_datapath *od, struct hmap
*lflows,
>      /* Ingress or Egress ACL Table (Various priorities). */
>      for (size_t i = 0; i < od->nbs->n_acls; i++) {
>          struct nbrec_acl *acl = od->nbs->acls[i];
> -        consider_acl(lflows, od, acl, has_stateful);
> +        consider_acl(lflows, od, acl, has_stateful, has_stateful_bypass);
>      }
>      struct ovn_port_group *pg;
>      HMAP_FOR_EACH (pg, key_node, port_groups) {
>          if (ovn_port_group_ls_find(pg, &od->nbs->header_.uuid)) {
>              for (size_t i = 0; i < pg->nb_pg->n_acls; i++) {
> -                consider_acl(lflows, od, pg->nb_pg->acls[i],
has_stateful);
> +                consider_acl(lflows, od, pg->nb_pg->acls[i],
has_stateful,
> +                             has_stateful_bypass);
>              }
>          }
>      }
> @@ -6617,7 +6705,7 @@ build_lswitch_flows(struct hmap *datapaths, struct
hmap *ports,
>              continue;
>          }
>
> -        build_pre_acls(od, lflows);
> +        build_pre_acls(od, port_groups, lflows);
>          build_pre_lb(od, lflows, meter_groups, lbs);
>          build_pre_stateful(od, lflows);
>          build_acls(od, lflows, port_groups);
> diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
> index 0c939b7..ef0121d 100644
> --- a/ovn-nb.ovsschema
> +++ b/ovn-nb.ovsschema
> @@ -1,7 +1,7 @@
>  {
>      "name": "OVN_Northbound",
> -    "version": "5.25.0",
> -    "cksum": "1354137211 26116",
> +    "version": "5.26.0",
> +    "cksum": "1450952466 27225",
>      "tables": {
>          "NB_Global": {
>              "columns": {
> @@ -35,6 +35,12 @@
>                                             "refType": "strong"},
>                                     "min": 0,
>                                     "max": "unlimited"}},
> +                "stateless_filters": {
> +                    "type": {"key": {"type": "uuid",
> +                                     "refTable": "Stateless_Filter",
> +                                     "refType": "strong"},
> +                             "min": 0,
> +                             "max": "unlimited"}},
>                  "acls": {"type": {"key": {"type": "uuid",
>                                            "refTable": "ACL",
>                                            "refType": "strong"},
> @@ -150,6 +156,12 @@
>                                             "refType": "weak"},
>                                     "min": 0,
>                                     "max": "unlimited"}},
> +                "stateless_filters": {
> +                    "type": {"key": {"type": "uuid",
> +                                     "refTable": "Stateless_Filter",
> +                                     "refType": "strong"},
> +                             "min": 0,
> +                             "max": "unlimited"}},
>                  "acls": {"type": {"key": {"type": "uuid",
>                                            "refTable": "ACL",
>                                            "refType": "strong"},
> @@ -201,6 +213,16 @@
>                      "type": {"key": "string", "value": "string",
>                               "min": 0, "max": "unlimited"}}},
>              "isRoot": false},
> +        "Stateless_Filter": {
> +            "columns": {
> +                "priority": {"type": {"key": {"type": "integer",
> +                                              "minInteger": 0,
> +                                              "maxInteger": 32767}}},
> +                "match": {"type": "string"},
> +                "external_ids": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}}},
> +            "isRoot": false},

Is there any specific consideration that "direction" is not needed?

>          "ACL": {
>              "columns": {
>                  "name": {"type": {"key": {"type": "string",
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index 9f3621d..ccb0cbc 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -271,9 +271,16 @@
>        ip addresses.
>      </column>
>
> -    <column name="acls">
> -      Access control rules that apply to packets within the logical
switch.
> -    </column>
> +    <group title="ACL processing">
> +      <column name="acls">
> +        Access control rules that apply to packets within the logical
switch.
> +      </column>
> +
> +      <column name="stateless_filters">
> +        Stateless filters to bypass connection tracking that apply to
packets
> +        within the logical switch.
> +      </column>
> +    </group>
>
>      <column name="qos_rules">
>        QoS marking and metering rules that apply to packets within the
> @@ -1430,6 +1437,11 @@
>        lswitches that the ports of the port group belong to.
>      </column>
>
> +    <column name="stateless_filters">
> +      Stateless filters to bypass connection tracking that apply to the
> +      port_group.
> +    </column>
> +
>      <group title="Common Columns">
>        <column name="external_ids">
>          See <em>External IDs</em> at the beginning of this document.
> @@ -1589,6 +1601,45 @@
>      </group>
>    </table>
>
> +  <table name="Stateless_Filter" title="Filters to bypass ACL
processing">
> +    <p>
> +      Each row in this table represents a rule to determine if traffic
should
> +      be processed in a stateless way in the ACL stage, without
recirculating
> +      through connection tracking, regardless of the type of ACL that is
hit.
> +
> +      In normal operation, whenever an ACL associated to a Logical_Switch
> +      has action <code>allow-related</code>, all IP traffic gets sent
> +      to conntrack and related traffic is allowed.
> +
> +      If <ref column="match"/> is set to <code>E</code> all
> +      <code>allow</code> and <code>allow-related</code> ACLs that match

Shall we simply say that all ACLs match the filter are considered
stateless, regardless of the action? (even reject/drop would have some
implication of stateful, so I think it would be better not mentioning the
allow/allow-related to avoid confusion)

> +      packets for which <code>E</code> is true are applied
> +      in a stateless way, without recirculating through connection
tracking.
> +
> +      This also implies that the CMS should add an explicit
<code>allow</code>
> +      ACL for return traffic, because return traffic will not go to
conntrack
> +      either so it has to be explicitly allowed.
> +
> +      This is useful when some specific types of traffic do not need
> +      stateful processing.
> +    </p>
> +    <column name="priority">
> +      The priority of the filter rule.  Rules with numerically higher
priority
> +      take precedence.
> +    </column>
> +    <column name="match">
> +      The packets that the stateless filter should match, in the same
> +      expression language used for the <ref column="match"
table="Logical_Flow"
> +      db="OVN_Southbound"/> column in the OVN Southbound database's
> +      <ref table="Logical_Flow" db="OVN_Southbound"/> table.
> +    </column>
> +    <group title="Common Columns">
> +      <column name="external_ids">
> +        See <em>External IDs</em> at the beginning of this document.
> +      </column>
> +    </group>
> +  </table>
> +
>    <table name="ACL" title="Access Control List (ACL) rule">
>      <p>
>        Each row in this table represents one ACL rule for a logical switch
> diff --git a/tests/ovn-nbctl.at b/tests/ovn-nbctl.at
> index 619051d..b55ee03 100644
> --- a/tests/ovn-nbctl.at
> +++ b/tests/ovn-nbctl.at
> @@ -270,6 +270,59 @@ AT_CHECK([ovn-nbctl --type=port-group acl-add ls0
to-lport 100 ip drop], [0], [i
>
>  dnl ---------------------------------------------------------------------
>
> +OVN_NBCTL_TEST([ovn_nbctl_stateless_filters], [Stateless_Filters], [
> +ovn_nbctl_test_stateless_filters() {
> +   AT_CHECK([ovn-nbctl $2 stateless-filter-add $1 300 udp])
> +   AT_CHECK([ovn-nbctl $2 stateless-filter-add $1 200 tcp])
> +   AT_CHECK([ovn-nbctl $2 stateless-filter-add $1 100 ip])
> +   dnl Add duplicated Stateless_Filter
> +   AT_CHECK([ovn-nbctl $2 stateless-filter-add $1 100 ip], [1], [],
[stderr])
> +   AT_CHECK([grep 'already existed' stderr], [0], [ignore])
> +   AT_CHECK([ovn-nbctl $2 --may-exist stateless-filter-add $1 100 ip])
> +
> +   AT_CHECK([ovn-nbctl $2 stateless-filter-list $1], [0], [dnl
> +  300 (udp)
> +  200 (tcp)
> +  100 (ip)
> +])
> +
> +   dnl Delete all Stateless_Filters.
> +   AT_CHECK([ovn-nbctl $2 stateless-filter-del $1])
> +   AT_CHECK([ovn-nbctl $2 stateless-filter-list $1], [0], [dnl
> +])
> +
> +   AT_CHECK([ovn-nbctl $2 stateless-filter-add $1 300 udp])
> +   AT_CHECK([ovn-nbctl $2 stateless-filter-add $1 200 tcp])
> +   AT_CHECK([ovn-nbctl $2 stateless-filter-add $1 100 ip])
> +
> +   dnl Delete a single filter.
> +   AT_CHECK([ovn-nbctl $2 stateless-filter-del $1 200 tcp])
> +   AT_CHECK([ovn-nbctl $2 stateless-filter-list $1], [0], [dnl
> +  300 (udp)
> +  100 (ip)
> +])
> +}
> +
> +AT_CHECK([ovn-nbctl ls-add ls0])
> +ovn_nbctl_test_stateless_filters ls0
> +AT_CHECK([ovn-nbctl ls-add ls1])
> +ovn_nbctl_test_stateless_filters ls1 --type=switch
> +AT_CHECK([ovn-nbctl create port_group name=pg0], [0], [ignore])
> +ovn_nbctl_test_stateless_filters pg0 --type=port-group
> +
> +dnl Test when port group doesn't exist
> +AT_CHECK([ovn-nbctl --type=port-group stateless-filter-add pg1 100 ip],
[1], [], [dnl
> +ovn-nbctl: pg1: port group name not found
> +])
> +
> +dnl Test when same name exists in logical switches and portgroups
> +AT_CHECK([ovn-nbctl create port_group name=ls0], [0], [ignore])
> +AT_CHECK([ovn-nbctl stateless-filter-add ls0 100 ip], [1], [], [stderr])
> +AT_CHECK([grep 'exists in both' stderr], [0], [ignore])
> +AT_CHECK([ovn-nbctl --type=port-group stateless-filter-add ls0 100 ip],
[0], [ignore])])
> +
> +dnl ---------------------------------------------------------------------
> +
>  OVN_NBCTL_TEST([ovn_nbctl_qos], [QoS], [
>  AT_CHECK([ovn-nbctl ls-add ls0])
>  AT_CHECK([ovn-nbctl qos-add ls0 from-lport 600 tcp dscp=63])
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index 8344c7f..0cbc092 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -1781,3 +1781,266 @@ AT_CHECK([ovn-sbctl lflow-list | grep
"ls_out_pre_lb.*priority=100" | grep reg0
>  ])
>
>  AT_CLEANUP
> +
> +AT_SETUP([ovn -- ACL Stateful Bypass - Logical_Switch])
> +ovn_start
> +
> +ovn-nbctl ls-add ls
> +ovn-nbctl lsp-add ls lsp1
> +ovn-nbctl lsp-set-addresses lsp1 00:00:00:00:00:01
> +ovn-nbctl lsp-add ls lsp2
> +ovn-nbctl lsp-set-addresses lsp2 00:00:00:00:00:02
> +
> +ovn-nbctl acl-add ls from-lport 3 "tcp" allow
> +ovn-nbctl acl-add ls from-lport 2 "udp" allow-related
> +ovn-nbctl acl-add ls from-lport 1 "ip" drop
> +ovn-nbctl --wait=sb sync
> +
> +flow_eth='eth.src == 00:00:00:00:00:01 && eth.dst == 00:00:00:00:00:02'
> +flow_ip='ip.ttl==64 && ip4.src == 42.42.42.1 && ip4.dst == 66.66.66.66'
> +flow_tcp='tcp && tcp.dst == 80'
> +flow_udp='udp && udp.dst == 80'
> +
> +# TCP packets should go to conntrack.
> +flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}"
> +AT_CHECK([ovn-trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl
> +#
tcp,reg14=0x1,vlan_tci=0x0000,dl_src=00:00:00:00:00:01,dl_dst=00:00:00:00:00:02,nw_src=42.42.42.1,nw_dst=66.66.66.66,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80,tcp_flags=0
> +ct_next(ct_state=new|trk) {
> +    ct_next(ct_state=new|trk) {
> +        output("lsp2");
> +    };
> +};
> +])
> +
> +# UDP packets should go to conntrack.
> +flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_udp}"
> +AT_CHECK([ovn-trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl
> +#
udp,reg14=0x1,vlan_tci=0x0000,dl_src=00:00:00:00:00:01,dl_dst=00:00:00:00:00:02,nw_src=42.42.42.1,nw_dst=66.66.66.66,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80
> +ct_next(ct_state=new|trk) {
> +    ct_next(ct_state=new|trk) {
> +        output("lsp2");
> +    };
> +};
> +])
> +
> +# Enable Stateful Bypass for TCP.
> +ovn-nbctl stateless-filter-add ls 1 tcp
> +ovn-nbctl --wait=sb sync
> +
> +# TCP packets should not go to conntrack anymore.
> +flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}"
> +AT_CHECK([ovn-trace --minimal ls "${flow}"], [0], [dnl
> +#
tcp,reg14=0x1,vlan_tci=0x0000,dl_src=00:00:00:00:00:01,dl_dst=00:00:00:00:00:02,nw_src=42.42.42.1,nw_dst=66.66.66.66,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80,tcp_flags=0
> +output("lsp2");
> +])
> +
> +# UDP packets still go to conntrack.
> +flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_udp}"
> +AT_CHECK([ovn-trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl
> +#
udp,reg14=0x1,vlan_tci=0x0000,dl_src=00:00:00:00:00:01,dl_dst=00:00:00:00:00:02,nw_src=42.42.42.1,nw_dst=66.66.66.66,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80
> +ct_next(ct_state=new|trk) {
> +    ct_next(ct_state=new|trk) {
> +        output("lsp2");
> +    };
> +};
> +])
> +
> +# Add a load balancer.
> +ovn-nbctl lb-add lb-tcp 66.66.66.66:80 42.42.42.2:8080 tcp
> +ovn-nbctl lb-add lb-udp 66.66.66.66:80 42.42.42.2:8080 udp
> +ovn-nbctl ls-lb-add ls lb-tcp
> +ovn-nbctl ls-lb-add ls lb-udp
> +
> +# Disable Stateful Bypass for TCP.
> +ovn-nbctl stateless-filter-del ls
> +ovn-nbctl --wait=sb sync
> +
> +# TCP packets should go to conntrack.
> +flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}"
> +AT_CHECK([ovn-trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl
> +#
tcp,reg14=0x1,vlan_tci=0x0000,dl_src=00:00:00:00:00:01,dl_dst=00:00:00:00:00:02,nw_src=42.42.42.1,nw_dst=66.66.66.66,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80,tcp_flags=0
> +ct_next(ct_state=new|trk) {
> +    ct_lb {
> +        ct_next(ct_state=new|trk) {
> +            output("lsp2");
> +        };
> +    };
> +};
> +])
> +
> +# UDP packets should go to conntrack.
> +flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_udp}"
> +AT_CHECK([ovn-trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl
> +#
udp,reg14=0x1,vlan_tci=0x0000,dl_src=00:00:00:00:00:01,dl_dst=00:00:00:00:00:02,nw_src=42.42.42.1,nw_dst=66.66.66.66,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80
> +ct_next(ct_state=new|trk) {
> +    ct_lb {
> +        ct_next(ct_state=new|trk) {
> +            output("lsp2");
> +        };
> +    };
> +};
> +])
> +
> +# Enable Stateful Bypass for TCP.
> +ovn-nbctl stateless-filter-add ls 1 tcp
> +ovn-nbctl --wait=sb sync
> +
> +# TCP packets should go to conntrack for load balancing.
> +flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}"
> +AT_CHECK([ovn-trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl
> +#
tcp,reg14=0x1,vlan_tci=0x0000,dl_src=00:00:00:00:00:01,dl_dst=00:00:00:00:00:02,nw_src=42.42.42.1,nw_dst=66.66.66.66,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80,tcp_flags=0
> +ct_next(ct_state=new|trk) {
> +    ct_lb {
> +        ct_next(ct_state=new|trk) {
> +            output("lsp2");
> +        };
> +    };
> +};
> +])
> +
> +# UDP packets still go to conntrack.
> +flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_udp}"
> +AT_CHECK([ovn-trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl
> +#
udp,reg14=0x1,vlan_tci=0x0000,dl_src=00:00:00:00:00:01,dl_dst=00:00:00:00:00:02,nw_src=42.42.42.1,nw_dst=66.66.66.66,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80
> +ct_next(ct_state=new|trk) {
> +    ct_lb {
> +        ct_next(ct_state=new|trk) {
> +            output("lsp2");
> +        };
> +    };
> +};
> +])
> +
> +AT_CLEANUP
> +
> +AT_SETUP([ovn -- ACL Stateful Bypass - Port_Group])
> +ovn_start
> +
> +ovn-nbctl ls-add ls
> +ovn-nbctl lsp-add ls lsp1
> +ovn-nbctl lsp-set-addresses lsp1 00:00:00:00:00:01
> +ovn-nbctl lsp-add ls lsp2
> +ovn-nbctl lsp-set-addresses lsp2 00:00:00:00:00:02
> +
> +ovn-nbctl pg-add pg lsp1 lsp2
> +ovn-nbctl acl-add pg from-lport 3 "tcp" allow
> +ovn-nbctl acl-add pg from-lport 2 "udp" allow-related
> +ovn-nbctl acl-add pg from-lport 1 "ip" drop
> +ovn-nbctl --wait=sb sync
> +
> +flow_eth='eth.src == 00:00:00:00:00:01 && eth.dst == 00:00:00:00:00:02'
> +flow_ip='ip.ttl==64 && ip4.src == 42.42.42.1 && ip4.dst == 66.66.66.66'
> +flow_tcp='tcp && tcp.dst == 80'
> +flow_udp='udp && udp.dst == 80'
> +
> +# TCP packets should go to conntrack.
> +flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}"
> +AT_CHECK([ovn-trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl
> +#
tcp,reg14=0x1,vlan_tci=0x0000,dl_src=00:00:00:00:00:01,dl_dst=00:00:00:00:00:02,nw_src=42.42.42.1,nw_dst=66.66.66.66,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80,tcp_flags=0
> +ct_next(ct_state=new|trk) {
> +    ct_next(ct_state=new|trk) {
> +        output("lsp2");
> +    };
> +};
> +])
> +
> +# UDP packets should go to conntrack.
> +flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_udp}"
> +AT_CHECK([ovn-trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl
> +#
udp,reg14=0x1,vlan_tci=0x0000,dl_src=00:00:00:00:00:01,dl_dst=00:00:00:00:00:02,nw_src=42.42.42.1,nw_dst=66.66.66.66,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80
> +ct_next(ct_state=new|trk) {
> +    ct_next(ct_state=new|trk) {
> +        output("lsp2");
> +    };
> +};
> +])
> +
> +# Enable Stateful Bypass for TCP.
> +ovn-nbctl stateless-filter-add pg 1 tcp
> +ovn-nbctl --wait=sb sync
> +
> +# TCP packets should not go to conntrack anymore.
> +flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}"
> +AT_CHECK([ovn-trace --minimal ls "${flow}"], [0], [dnl
> +#
tcp,reg14=0x1,vlan_tci=0x0000,dl_src=00:00:00:00:00:01,dl_dst=00:00:00:00:00:02,nw_src=42.42.42.1,nw_dst=66.66.66.66,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80,tcp_flags=0
> +output("lsp2");
> +])
> +
> +# UDP packets still go to conntrack.
> +flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_udp}"
> +AT_CHECK([ovn-trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl
> +#
udp,reg14=0x1,vlan_tci=0x0000,dl_src=00:00:00:00:00:01,dl_dst=00:00:00:00:00:02,nw_src=42.42.42.1,nw_dst=66.66.66.66,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80
> +ct_next(ct_state=new|trk) {
> +    ct_next(ct_state=new|trk) {
> +        output("lsp2");
> +    };
> +};
> +])
> +
> +# Add a load balancer.
> +ovn-nbctl lb-add lb-tcp 66.66.66.66:80 42.42.42.2:8080 tcp
> +ovn-nbctl lb-add lb-udp 66.66.66.66:80 42.42.42.2:8080 udp
> +ovn-nbctl ls-lb-add ls lb-tcp
> +ovn-nbctl ls-lb-add ls lb-udp
> +
> +# Disable Stateful Bypass for TCP.
> +ovn-nbctl stateless-filter-del pg
> +ovn-nbctl --wait=sb sync
> +
> +# TCP packets should go to conntrack.
> +flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}"
> +AT_CHECK([ovn-trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl
> +#
tcp,reg14=0x1,vlan_tci=0x0000,dl_src=00:00:00:00:00:01,dl_dst=00:00:00:00:00:02,nw_src=42.42.42.1,nw_dst=66.66.66.66,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80,tcp_flags=0
> +ct_next(ct_state=new|trk) {
> +    ct_lb {
> +        ct_next(ct_state=new|trk) {
> +            output("lsp2");
> +        };
> +    };
> +};
> +])
> +
> +# UDP packets should go to conntrack.
> +flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_udp}"
> +AT_CHECK([ovn-trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl
> +#
udp,reg14=0x1,vlan_tci=0x0000,dl_src=00:00:00:00:00:01,dl_dst=00:00:00:00:00:02,nw_src=42.42.42.1,nw_dst=66.66.66.66,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80
> +ct_next(ct_state=new|trk) {
> +    ct_lb {
> +        ct_next(ct_state=new|trk) {
> +            output("lsp2");
> +        };
> +    };
> +};
> +])
> +
> +# Enable Stateful Bypass for TCP.
> +ovn-nbctl stateless-filter-add pg 1 tcp
> +ovn-nbctl --wait=sb sync
> +
> +# TCP packets should go to conntrack for load balancing.
> +flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}"
> +AT_CHECK([ovn-trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl
> +#
tcp,reg14=0x1,vlan_tci=0x0000,dl_src=00:00:00:00:00:01,dl_dst=00:00:00:00:00:02,nw_src=42.42.42.1,nw_dst=66.66.66.66,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80,tcp_flags=0
> +ct_next(ct_state=new|trk) {
> +    ct_lb {
> +        ct_next(ct_state=new|trk) {
> +            output("lsp2");
> +        };
> +    };
> +};
> +])
> +
> +# UDP packets still go to conntrack.
> +flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_udp}"
> +AT_CHECK([ovn-trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl
> +#
udp,reg14=0x1,vlan_tci=0x0000,dl_src=00:00:00:00:00:01,dl_dst=00:00:00:00:00:02,nw_src=42.42.42.1,nw_dst=66.66.66.66,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80
> +ct_next(ct_state=new|trk) {
> +    ct_lb {
> +        ct_next(ct_state=new|trk) {
> +            output("lsp2");
> +        };
> +    };
> +};
> +])
> +
> +AT_CLEANUP
> diff --git a/tests/system-common-macros.at b/tests/system-common-macros.at
> index c8fa6f0..65904ed 100644
> --- a/tests/system-common-macros.at
> +++ b/tests/system-common-macros.at
> @@ -234,6 +234,14 @@ m4_define([FORMAT_PING], [grep "transmitted" | sed
's/time.*ms$/time 0ms/'])
>  #
>  m4_define([STRIP_MONITOR_CSUM], [grep "csum:" | sed 's/csum:.*/csum:
<skip>/'])
>
> +# FORMAT_CT_STATE([ip-addr])
> +#
> +# Strip content from the piped input which would differ from test to test
> +# and limit the output to the rows containing 'ip-addr'. Don't strip
state.
> +#
> +m4_define([FORMAT_CT_STATE],
> +    [[grep "dst=$1" | sed -e 's/port=[0-9]*/port=<cleared>/g' -e
's/id=[0-9]*/id=<cleared>/g' | sort | uniq]])
> +
>  # FORMAT_CT([ip-addr])
>  #
>  # Strip content from the piped input which would differ from test to test
> diff --git a/tests/system-ovn.at b/tests/system-ovn.at
> index 40ba6e4..b1c890b 100644
> --- a/tests/system-ovn.at
> +++ b/tests/system-ovn.at
> @@ -5397,3 +5397,116 @@ as
>  OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d
>  /.*terminating with signal 15.*/d"])
>  AT_CLEANUP
> +
> +AT_SETUP([ovn -- ACL Stateful Bypass + Load balancer])
> +AT_SKIP_IF([test $HAVE_NC = no])
> +AT_KEYWORDS([lb])
> +AT_KEYWORDS([conntrack])
> +ovn_start
> +
> +OVS_TRAFFIC_VSWITCHD_START()
> +ADD_BR([br-int])
> +
> +# Set external-ids in br-int needed for ovn-controller
> +ovs-vsctl \
> +        -- set Open_vSwitch . external-ids:system-id=hv1 \
> +        -- set Open_vSwitch .
external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
> +        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
> +        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
> +        -- set bridge br-int fail-mode=secure
other-config:disable-in-band=true
> +
> +# Start ovn-controller
> +start_daemon ovn-controller
> +
> +# Logical network:
> +# One logical switch with a load balancer with one backend.
> +# On the LS we add "allow" ACLs for TCP and "allow-related" ACLs for UDP.
> +# The "allow-related" ACL normally forces all traffic to go to conntrack.
> +# We enable ACL stateful bypass for TCP so TCP traffic should not be
> +# sent to conntrack for ACLs (only for LB).
> +
> +ovn-nbctl ls-add ls
> +ovn-nbctl lsp-add ls lsp1
> +ovn-nbctl lsp-set-addresses lsp1 00:00:00:00:00:01
> +ovn-nbctl lsp-add ls lsp2
> +ovn-nbctl lsp-set-addresses lsp2 00:00:00:00:00:02
> +
> +ovn-nbctl acl-add ls from-lport 3 "tcp" allow
> +ovn-nbctl acl-add ls from-lport 2 "udp" allow-related
> +ovn-nbctl acl-add ls from-lport 1 "ip" drop
> +
> +ovn-nbctl lr-add rtr
> +ovn-nbctl lrp-add rtr rtr-ls 00:00:00:00:01:00 42.42.42.254/24
> +ovn-nbctl lsp-add ls ls-rtr                       \
> +    -- lsp-set-type ls-rtr router                 \
> +    -- lsp-set-addresses ls-rtr 00:00:00:00:01:00 \
> +    -- lsp-set-options ls-rtr router-port=rtr-ls
> +
> +# Add a load balancer.
> +ovn-nbctl lb-add lb-tcp 66.66.66.66:80 42.42.42.2:8080 tcp
> +ovn-nbctl lb-add lb-udp 66.66.66.66:80 42.42.42.2:8080 udp
> +ovn-nbctl ls-lb-add ls lb-tcp
> +ovn-nbctl ls-lb-add ls lb-udp
> +
> +# Enable Stateful Bypass for TCP.
> +ovn-nbctl \
> +    --id=@f1 create Stateless_Filter priority=1 match="tcp" -- \
> +    set Logical_Switch ls stateless_filters='@f1'
> +
> +ADD_NAMESPACES(lsp1)
> +ADD_VETH(lsp1, lsp1, br-int, "42.42.42.1/24", "00:00:00:00:00:01", \
> +         "42.42.42.254")
> +
> +ADD_NAMESPACES(lsp2)
> +ADD_VETH(lsp2, lsp2, br-int, "42.42.42.2/24", "00:00:00:00:00:02", \
> +         "42.42.42.254")
> +
> +ovn-nbctl --wait=hv sync
> +
> +# Start a UDP server on lsp2.
> +NETNS_DAEMONIZE([lsp2], [nc -l --no-shutdown -u 42.42.42.2 8080],
[nc2.pid])
> +
> +# Start a UDP connection.
> +NS_CHECK_EXEC([lsp1], [echo "foo" | nc --no-shutdown -u 66.66.66.66 80])
> +
> +# There should be 2 UDP conntrack entries:
> +# - one for the allow-related ACL.
> +# - one for the LB dnat.
> +OVS_WAIT_UNTIL([test "$(ovs-appctl dpctl/dump-conntrack | grep udp |
grep '42.42.42.1' -c)" = "2"])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT_STATE(42.42.42.1)
| grep udp | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
>
+udp,orig=(src=42.42.42.1,dst=42.42.42.2,sport=<cleared>,dport=<cleared>),reply=(src=42.42.42.2,dst=42.42.42.1,sport=<cleared>,dport=<cleared>),zone=<cleared>
>
+udp,orig=(src=42.42.42.1,dst=66.66.66.66,sport=<cleared>,dport=<cleared>),reply=(src=42.42.42.2,dst=42.42.42.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,labels=0x2
> +])
> +
> +# Start a TCP server on lsp2.
> +NETNS_DAEMONIZE([lsp2], [nc -l --no-shutdown 42.42.42.2 8080], [nc0.pid])
> +
> +# Start a TCP connection.
> +NETNS_DAEMONIZE([lsp1], [nc --no-shutdown 66.66.66.66 80], [nc1.pid])
> +
> +OVS_WAIT_UNTIL([test "$(ovs-appctl dpctl/dump-conntrack | grep tcp |
grep '42.42.42.1' -c)" = "1"])
> +
> +# There should be only one TCP conntrack entry, for the LB dnat.
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT_STATE(42.42.42.1)
| grep tcp | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
>
+tcp,orig=(src=42.42.42.1,dst=66.66.66.66,sport=<cleared>,dport=<cleared>),reply=(src=42.42.42.2,dst=42.42.42.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,labels=0x2,protoinfo=(state=ESTABLISHED)
> +])
> +
> +OVS_APP_EXIT_AND_WAIT([ovn-controller])
> +
> +as ovn-sb
> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> +
> +as ovn-nb
> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> +
> +as northd
> +OVS_APP_EXIT_AND_WAIT([ovn-northd])
> +
> +as
> +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
> +/connection dropped.*/d"])
> +
> +AT_CLEANUP
> diff --git a/utilities/ovn-detrace.in b/utilities/ovn-detrace.in
> index 4f8dd5f..343965d 100755
> --- a/utilities/ovn-detrace.in
> +++ b/utilities/ovn-detrace.in
> @@ -232,6 +232,17 @@ class StaticRouteHintHandler(CookieHandlerByUUUID):
>                      route.ip_prefix, route.nexthop, route.output_port,
>                      route.policy))
>
> +class StatelessFilterHintHandler(CookieHandlerByUUUID):
> +    def __init__(self, ovnnb_db):
> +        super(StatelessFilterHintHandler, self).__init__(ovnnb_db,
> +
'Stateless_Filter')
> +
> +    def print_record(self, s_filter):
> +        output = 'Stateless_Filter: priority=%s, match=(%s)' % (
> +            s_filter.priority,
> +            s_filter.match.strip('"'))
> +        print_h(output)
> +
>  class QoSHintHandler(CookieHandlerByUUUID):
>      def __init__(self, ovnnb_db):
>          super(QoSHintHandler, self).__init__(ovnnb_db, 'QoS')
> @@ -254,6 +265,7 @@ class LogicalFlowHandler(CookieHandlerByUUUID):
>              LoadBalancerHintHandler(ovnnb_db),
>              NATHintHandler(ovnnb_db),
>              StaticRouteHintHandler(ovnnb_db),
> +            StatelessFilterHintHandler(ovnnb_db),
>              QoSHintHandler(ovnnb_db),
>          ]
>
> diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
> index d7bb4b4..7716dcd 100644
> --- a/utilities/ovn-nbctl.c
> +++ b/utilities/ovn-nbctl.c
> @@ -601,6 +601,17 @@ ACL commands:\n\
>    acl-list {SWITCH | PORTGROUP}\n\
>                              print ACLs for SWITCH\n\
>  \n\
> +Stateless filter commands:\n\
> +  [--type={switch | port-group}] [--may-exist]\n\
> +  stateless-filter-add {SWITCH | PORTGROUP} PRIORITY MATCH \n\
> +                            add a stateless filter to SWITCH/PORTGROUP\n\
> +  [--type={switch | port-group}]\n\
> +  stateless-filter-del {SWITCH | PORTGROUP} [PRIORITY MATCH]\n\
> +                            remove stateless filters from
SWITCH/PORTGROUP\n\
> +  [--type={switch | port-group}]\n\
> +  stateless-filter-list {SWITCH | PORTGROUP}\n\
> +                            print stateless filters for SWITCH\n\
> +\n\
>  QoS commands:\n\
>    qos-add SWITCH DIRECTION PRIORITY MATCH [rate=RATE [burst=BURST]]
[dscp=DSCP]\n\
>                              add an QoS rule to SWITCH\n\
> @@ -725,7 +736,8 @@ LB commands:\n\
>    ls-lb-add SWITCH LB       add a load-balancer to SWITCH\n\
>    ls-lb-del SWITCH [LB]     remove load-balancers from SWITCH\n\
>    ls-lb-list SWITCH         print load-balancers\n\
> -\n\
> +\n\n",program_name, program_name);
> +    printf("\
>  DHCP Options commands:\n\
>    dhcp-options-create CIDR [EXTERNAL_IDS]\n\
>                             create a DHCP options row with CIDR\n\
> @@ -743,8 +755,7 @@ Connection commands:\n\
>    del-connection             delete the connections\n\
>    [--inactivity-probe=MSECS]\n\
>    set-connection TARGET...   set the list of connections to TARGET...\n\
> -\n\n",program_name, program_name);
> -    printf("\
> +\n\
>  SSL commands:\n\
>    get-ssl                     print the SSL configuration\n\
>    del-ssl                     delete the SSL configuration\n\
> @@ -2021,9 +2032,9 @@ acl_cmp(const void *acl1_, const void *acl2_)
>  }
>
>  static char * OVS_WARN_UNUSED_RESULT
> -acl_cmd_get_pg_or_ls(struct ctl_context *ctx,
> -                     const struct nbrec_logical_switch **ls,
> -                     const struct nbrec_port_group **pg)
> +cmd_get_pg_or_ls(struct ctl_context *ctx,
> +                 const struct nbrec_logical_switch **ls,
> +                 const struct nbrec_port_group **pg)
>  {
>      const char *opt_type = shash_find_data(&ctx->options, "--type");
>      char *error;
> @@ -2073,7 +2084,7 @@ nbctl_acl_list(struct ctl_context *ctx)
>      const struct nbrec_acl **acls;
>      size_t i;
>
> -    char *error = acl_cmd_get_pg_or_ls(ctx, &ls, &pg);
> +    char *error = cmd_get_pg_or_ls(ctx, &ls, &pg);
>      if (error) {
>          ctx->error = error;
>          return;
> @@ -2173,7 +2184,7 @@ nbctl_acl_add(struct ctl_context *ctx)
>      const struct nbrec_port_group *pg = NULL;
>      const char *action = ctx->argv[5];
>
> -    char *error = acl_cmd_get_pg_or_ls(ctx, &ls, &pg);
> +    char *error = cmd_get_pg_or_ls(ctx, &ls, &pg);
>      if (error) {
>          ctx->error = error;
>          return;
> @@ -2264,7 +2275,7 @@ nbctl_acl_del(struct ctl_context *ctx)
>      const struct nbrec_logical_switch *ls = NULL;
>      const struct nbrec_port_group *pg = NULL;
>
> -    char *error = acl_cmd_get_pg_or_ls(ctx, &ls, &pg);
> +    char *error = cmd_get_pg_or_ls(ctx, &ls, &pg);
>      if (error) {
>          ctx->error = error;
>          return;
> @@ -2351,6 +2362,181 @@ nbctl_acl_del(struct ctl_context *ctx)
>      }
>  }
>
> +static int
> +stateless_filter_cmp(const void *filter1_, const void *filter2_)
> +{
> +    const struct nbrec_stateless_filter *const *filter1p = filter1_;
> +    const struct nbrec_stateless_filter *const *filter2p = filter2_;
> +    const struct nbrec_stateless_filter *filter1 = *filter1p;
> +    const struct nbrec_stateless_filter *filter2 = *filter2p;
> +
> +    if (filter1->priority != filter2->priority) {
> +        return filter1->priority > filter2->priority ? -1 : 1;
> +    } else {
> +        return strcmp(filter1->match, filter2->match);
> +    }
> +}
> +
> +static void
> +nbctl_stateless_filter_list(struct ctl_context *ctx)
> +{
> +    const struct nbrec_logical_switch *ls = NULL;
> +    const struct nbrec_port_group *pg = NULL;
> +    const struct nbrec_stateless_filter **filters;
> +    size_t i;
> +
> +    char *error = cmd_get_pg_or_ls(ctx, &ls, &pg);
> +    if (error) {
> +        ctx->error = error;
> +        return;
> +    }
> +
> +    size_t n_filters = pg ? pg->n_stateless_filters :
ls->n_stateless_filters;
> +    struct nbrec_stateless_filter **nb_filters = pg
> +                                                 ? pg->stateless_filters
> +                                                 : ls->stateless_filters;
> +
> +    filters = xmalloc(sizeof *filters * n_filters);
> +    for (i = 0; i < n_filters; i++) {
> +        filters[i] = nb_filters[i];
> +    }
> +
> +    qsort(filters, n_filters, sizeof *filters, stateless_filter_cmp);
> +
> +    for (i = 0; i < n_filters; i++) {
> +        const struct nbrec_stateless_filter *filter = filters[i];
> +        ds_put_format(&ctx->output, "%5"PRId64" (%s)\n",
> +                      filter->priority, filter->match);
> +    }
> +
> +    free(filters);
> +}
> +
> +static void
> +nbctl_stateless_filter_add(struct ctl_context *ctx)
> +{
> +    const struct nbrec_logical_switch *ls = NULL;
> +    const struct nbrec_port_group *pg = NULL;
> +
> +    char *error = cmd_get_pg_or_ls(ctx, &ls, &pg);
> +    if (error) {
> +        ctx->error = error;
> +        return;
> +    }
> +
> +    int64_t priority;
> +    error = parse_priority(ctx->argv[2], &priority);
> +    if (error) {
> +        ctx->error = error;
> +        return;
> +    }
> +
> +    /* Create the filter. */
> +    struct nbrec_stateless_filter *filter =
> +        nbrec_stateless_filter_insert(ctx->txn);
> +    nbrec_stateless_filter_set_priority(filter, priority);
> +    nbrec_stateless_filter_set_match(filter, ctx->argv[3]);
> +
> +    /* Check if same filter already exists for the ls/portgroup */
> +    size_t n_filters = pg ? pg->n_stateless_filters :
ls->n_stateless_filters;
> +    struct nbrec_stateless_filter **filters = pg
> +                                              ? pg->stateless_filters
> +                                              : ls->stateless_filters;
> +    for (size_t i = 0; i < n_filters; i++) {
> +        if (!stateless_filter_cmp(&filters[i], &filter)) {
> +            bool may_exist = shash_find(&ctx->options, "--may-exist") !=
NULL;
> +            if (!may_exist) {
> +                ctl_error(ctx,
> +                          "Same filter already existed on ls or pg %s.",
> +                          ctx->argv[1]);
> +                return;
> +            }
> +            return;
> +        }
> +    }
> +
> +    /* Insert the filter into the logical switch/port group. */
> +    struct nbrec_stateless_filter **new_filters =
> +        xmalloc(sizeof *new_filters * (n_filters + 1));
> +    nullable_memcpy(new_filters, filters, sizeof *new_filters *
n_filters);
> +    new_filters[n_filters] = filter;
> +    if (pg) {
> +        nbrec_port_group_verify_stateless_filters(pg);
> +        nbrec_port_group_set_stateless_filters(pg, new_filters,
> +                                               n_filters + 1);
> +    } else {
> +        nbrec_logical_switch_verify_stateless_filters(ls);
> +        nbrec_logical_switch_set_stateless_filters(ls, new_filters,
> +                                                   n_filters + 1);
> +    }
> +    free(new_filters);
> +}
> +
> +static void
> +nbctl_stateless_filter_del(struct ctl_context *ctx)
> +{
> +    const struct nbrec_logical_switch *ls = NULL;
> +    const struct nbrec_port_group *pg = NULL;
> +
> +    char *error = cmd_get_pg_or_ls(ctx, &ls, &pg);
> +    if (error) {
> +        ctx->error = error;
> +        return;
> +    }
> +
> +    if (ctx->argc == 2) {
> +        /* If priority and match are not specified, delete filters. */
> +        if (pg) {
> +            nbrec_port_group_verify_stateless_filters(pg);
> +            nbrec_port_group_set_stateless_filters(pg, NULL, 0);
> +        } else {
> +            nbrec_logical_switch_verify_stateless_filters(ls);
> +            nbrec_logical_switch_set_stateless_filters(ls, NULL, 0);
> +        }
> +        return;
> +    }
> +
> +    int64_t priority;
> +    error = parse_priority(ctx->argv[2], &priority);
> +    if (error) {
> +        ctx->error = error;
> +        return;
> +    }
> +
> +    if (ctx->argc == 3) {
> +        ctl_error(ctx, "cannot specify priority without match");
> +        return;
> +    }
> +
> +    size_t n_filters = pg ? pg->n_stateless_filters :
ls->n_stateless_filters;
> +    struct nbrec_stateless_filter **filters = pg
> +                                              ? pg->stateless_filters
> +                                              : ls->stateless_filters;
> +
> +    /* Remove the matching rule. */
> +    for (size_t i = 0; i < n_filters; i++) {
> +        struct nbrec_stateless_filter *filter = filters[i];
> +
> +        if (priority == filter->priority
> +            && !strcmp(ctx->argv[3], filter->match)) {
> +            struct nbrec_stateless_filter **new_filters
> +                = xmemdup(filters, sizeof *new_filters * n_filters);
> +            new_filters[i] = filters[n_filters - 1];
> +            if (pg) {
> +                nbrec_port_group_verify_stateless_filters(pg);
> +                nbrec_port_group_set_stateless_filters(pg, new_filters,
> +                                                       n_filters - 1);
> +            } else {
> +                nbrec_logical_switch_verify_stateless_filters(ls);
> +                nbrec_logical_switch_set_stateless_filters(ls,
new_filters,
> +                                                           n_filters -
1);
> +            }
> +            free(new_filters);
> +            return;
> +        }
> +    }
> +}
> +
>  static void
>  nbctl_qos_list(struct ctl_context *ctx)
>  {
> @@ -6283,6 +6469,15 @@ static const struct ctl_command_syntax
nbctl_commands[] = {
>      { "acl-list", 1, 1, "{SWITCH | PORTGROUP}",
>        NULL, nbctl_acl_list, NULL, "--type=", RO },
>
> +    /* stateless filter commands. */
> +    { "stateless-filter-add", 3, 4, "{SWITCH | PORTGROUP} PRIORITY
MATCH",
> +      NULL, nbctl_stateless_filter_add, NULL,
> +      "--may-exist,--type=", RW },
> +    { "stateless-filter-del", 1, 4, "{SWITCH | PORTGROUP} [PRIORITY
MATCH]",
> +      NULL, nbctl_stateless_filter_del, NULL, "--type=", RW },
> +    { "stateless-filter-list", 1, 1, "{SWITCH | PORTGROUP}",
> +      NULL, nbctl_stateless_filter_list, NULL, "--type=", RO },
> +
>      /* qos commands. */
>      { "qos-add", 5, 7,
>        "SWITCH DIRECTION PRIORITY MATCH [rate=RATE [burst=BURST]]
[dscp=DSCP]",
> --
> 1.8.3.1
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to