From: Numan Siddique <[email protected]> Presently we add 65535 priority lflows in the stages - 'ls_in_acl' and 'ls_out_acl' to drop packets which match on 'ct.inv'.
This patch provides an option to not use this field in the logical flow matches for the following reasons: • Some of the smart NICs which support offloading datapath flows may not support this field. In such cases, almost all the datapath flows (matching on the ct_state field +inv or -inv cannot be offloaded if ACLs/load balancers are configured on the logical switch datapath. • A recent commit in kernel ovs datapath sets the committed connection tracking entry to be liberal for out-of-window tcp packets (nf_ct_set_tcp_be_liberal()). Such TCP packets will not be marked as invalid. There are still certain scenarios where a packet can be marked invalid. Like • If the tcp flags are not correct. • If there are checksum errors. In such cases, the packet will be delivered to the destination port. So CMS should evaluate their usecases before enabling this option. Signed-off-by: Numan Siddique <[email protected]> Co-authored-by: Ben Pfaff <[email protected]> Signed-off-by: Ben Pfaff <[email protected]> --- v2 -> v3 ---- * Updated the documentation in ovn-nb.xml as per Mark G's suggestion. v1 -> v2 ---- * Adopted the code changes shared by Ben Pfaff for the ddlog code. NEWS | 7 +- northd/lswitch.dl | 7 + northd/ovn-northd.c | 40 +++--- northd/ovn_northd.dl | 304 ++++++++++++++++++++++--------------------- ovn-nb.xml | 15 +++ 5 files changed, 207 insertions(+), 166 deletions(-) diff --git a/NEWS b/NEWS index 1ddde15f86..5f1365775d 100644 --- a/NEWS +++ b/NEWS @@ -6,11 +6,16 @@ Post-v21.03.0 expected to scale better than the C implementation, for large deployments. (This may take testing and tuning to be effective.) This version of OVN requires DDLog 0.36. - - Introduce ovn-controller incremetal processing engine statistics + - Introduce ovn-controller incremental processing engine statistics - Introduced parallel processing in ovn-northd with the NB_Global config option 'use_parallel_build' to enable it. It is disabled by default. - Support vlan-passthru mode for tag=0 localnet ports. - Support custom 802.11ad EthType for localnet ports. + - Add a new NB Global option - use_ct_inv_match with the default value of true. + If this is set to false, then the logical field - "ct.inv" will not be + used in the logical flow matches. CMS can consider setting this to false, + if they want to use smart NICs which don't support offloading datapath flows + with this field used. OVN v21.03.0 - 12 Mar 2021 ------------------------- diff --git a/northd/lswitch.dl b/northd/lswitch.dl index 47c497e0cf..27ac3cadc5 100644 --- a/northd/lswitch.dl +++ b/northd/lswitch.dl @@ -742,3 +742,10 @@ function put_svc_monitor_mac(options: Map<string,string>, relation SvcMonitorMac(mac: eth_addr) SvcMonitorMac(get_svc_monitor_mac(options, uuid)) :- nb::NB_Global(._uuid = uuid, .options = options). + +relation UseCtInvMatch[bool] +UseCtInvMatch[options.get_bool_def("use_ct_inv_match", true)] :- + nb::NB_Global(.options = options). +UseCtInvMatch[true] :- + Unit(), + not nb in nb::NB_Global(). diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index de1aa896d3..750e943ab8 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -4099,6 +4099,9 @@ do_ovn_lflow_add(struct hmap *lflow_map, bool shared, hmap_insert_fast(lflow_map, &lflow->hmap_node, hash); } +/* If this option is 'true' northd will make use of ct.inv match fields. + * Otherwise, it will avoid using it. The default is true. */ +static bool use_ct_inv_match = true; /* Adds a row with the specified contents to the Logical_Flow table. */ static void @@ -5712,12 +5715,13 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows, * for deletion (bit 0 of ct_label is set). * * This is enforced at a higher priority than ACLs can be defined. */ + char *match = xasprintf("%s(ct.est && ct.rpl && ct_label.blocked == 1)", + use_ct_inv_match ? "ct.inv || " : ""); ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, - "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)", - "drop;"); + match, "drop;"); ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, - "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)", - "drop;"); + match, "drop;"); + free(match); /* Ingress and Egress ACL Table (Priority 65535). * @@ -5728,14 +5732,15 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows, * direction to hit the currently defined policy from ACLs. * * This is enforced at a higher priority than ACLs can be defined. */ + match = xasprintf("ct.est && !ct.rel && !ct.new%s && " + "ct.rpl && ct_label.blocked == 0", + use_ct_inv_match ? " && !ct.inv" : ""); + ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, - "ct.est && !ct.rel && !ct.new && !ct.inv " - "&& ct.rpl && ct_label.blocked == 0", - "next;"); + match, "next;"); ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, - "ct.est && !ct.rel && !ct.new && !ct.inv " - "&& ct.rpl && ct_label.blocked == 0", - "next;"); + match, "next;"); + free(match); /* Ingress and Egress ACL Table (Priority 65535). * @@ -5748,14 +5753,14 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows, * a dynamically negotiated FTP data channel), but will allow * related traffic such as an ICMP Port Unreachable through * that's generated from a non-listening UDP port. */ + match = xasprintf("!ct.est && ct.rel && !ct.new%s && " + "ct_label.blocked == 0", + use_ct_inv_match ? " && !ct.inv" : ""); ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, - "!ct.est && ct.rel && !ct.new && !ct.inv " - "&& ct_label.blocked == 0", - "next;"); + match, "next;"); ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, - "!ct.est && ct.rel && !ct.new && !ct.inv " - "&& ct_label.blocked == 0", - "next;"); + match, "next;"); + free(match); /* Ingress and Egress ACL Table (Priority 65535). * @@ -13195,6 +13200,9 @@ ovnnb_db_run(struct northd_context *ctx, use_logical_dp_groups = smap_get_bool(&nb->options, "use_logical_dp_groups", false); + use_ct_inv_match = smap_get_bool(&nb->options, + "use_ct_inv_match", true); + /* deprecated, use --event instead */ controller_event_en = smap_get_bool(&nb->options, "controller_event", false); diff --git a/northd/ovn_northd.dl b/northd/ovn_northd.dl index 2f5d36c599..a0f7944103 100644 --- a/northd/ovn_northd.dl +++ b/northd/ovn_northd.dl @@ -2280,173 +2280,179 @@ for (Reject(lsuuid, pipeline, stage, acl, fair_meter, extra_match_, extra_action } /* build_acls */ -for (sw in &Switch(.ls = ls)) -var has_stateful = sw.has_stateful_acl or sw.has_lb_vip in -{ - /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by - * default. A related rule at priority 1 is added below if there - * are any stateful ACLs in this datapath. */ - Flow(.logical_datapath = ls._uuid, - .stage = s_SWITCH_IN_ACL(), - .priority = 0, - .__match = "1", - .actions = "next;", - .external_ids = map_empty()); - Flow(.logical_datapath = ls._uuid, - .stage = s_SWITCH_OUT_ACL(), - .priority = 0, - .__match = "1", - .actions = "next;", - .external_ids = map_empty()); - - if (has_stateful) { - /* Ingress and Egress ACL Table (Priority 1). - * - * By default, traffic is allowed. This is partially handled by - * the Priority 0 ACL flows added earlier, but we also need to - * commit IP flows. This is because, while the initiater's - * direction may not have any stateful rules, the server's may - * and then its return traffic would not have an associated - * conntrack entry and would return "+invalid". - * - * We use "ct_commit" for a connection that is not already known - * by the connection tracker. Once a connection is committed, - * subsequent packets will hit the flow at priority 0 that just - * uses "next;" - * - * We also check for established connections that have ct_label.blocked - * set on them. That's a connection that was disallowed, but is - * now allowed by policy again since it hit this default-allow flow. - * We need to set ct_label.blocked=0 to let the connection continue, - * which will be done by ct_commit() in the "stateful" stage. - * Subsequent packets will hit the flow at priority 0 that just - * uses "next;". */ - Flow(.logical_datapath = ls._uuid, - .stage = s_SWITCH_IN_ACL(), - .priority = 1, - .__match = "ip && (!ct.est || (ct.est && ct_label.blocked == 1))", - .actions = "${rEGBIT_CONNTRACK_COMMIT()} = 1; next;", - .external_ids = map_empty()); - Flow(.logical_datapath = ls._uuid, - .stage = s_SWITCH_OUT_ACL(), - .priority = 1, - .__match = "ip && (!ct.est || (ct.est && ct_label.blocked == 1))", - .actions = "${rEGBIT_CONNTRACK_COMMIT()} = 1; next;", - .external_ids = map_empty()); - - /* Ingress and Egress ACL Table (Priority 65535). - * - * Always drop traffic that's in an invalid state. Also drop - * reply direction packets for connections that have been marked - * for deletion (bit 0 of ct_label is set). - * - * This is enforced at a higher priority than ACLs can be defined. */ - Flow(.logical_datapath = ls._uuid, - .stage = s_SWITCH_IN_ACL(), - .priority = 65535, - .__match = "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)", - .actions = "drop;", - .external_ids = map_empty()); - Flow(.logical_datapath = ls._uuid, - .stage = s_SWITCH_OUT_ACL(), - .priority = 65535, - .__match = "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)", - .actions = "drop;", - .external_ids = map_empty()); - - /* Ingress and Egress ACL Table (Priority 65535). - * - * Allow reply traffic that is part of an established - * conntrack entry that has not been marked for deletion - * (bit 0 of ct_label). We only match traffic in the - * reply direction because we want traffic in the request - * direction to hit the currently defined policy from ACLs. - * - * This is enforced at a higher priority than ACLs can be defined. */ +for (UseCtInvMatch[use_ct_inv_match]) { + (var ct_inv_or, var and_not_ct_inv) = match (use_ct_inv_match) { + true -> ("ct.inv || ", "&& !ct.inv "), + false -> ("", ""), + } in + for (sw in &Switch(.ls = ls)) + var has_stateful = sw.has_stateful_acl or sw.has_lb_vip in + { + /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by + * default. A related rule at priority 1 is added below if there + * are any stateful ACLs in this datapath. */ Flow(.logical_datapath = ls._uuid, .stage = s_SWITCH_IN_ACL(), - .priority = 65535, - .__match = "ct.est && !ct.rel && !ct.new && !ct.inv " - "&& ct.rpl && ct_label.blocked == 0", + .priority = 0, + .__match = "1", .actions = "next;", .external_ids = map_empty()); Flow(.logical_datapath = ls._uuid, .stage = s_SWITCH_OUT_ACL(), - .priority = 65535, - .__match = "ct.est && !ct.rel && !ct.new && !ct.inv " - "&& ct.rpl && ct_label.blocked == 0", + .priority = 0, + .__match = "1", .actions = "next;", .external_ids = map_empty()); - /* Ingress and Egress ACL Table (Priority 65535). - * - * Allow traffic that is related to an existing conntrack entry that - * has not been marked for deletion (bit 0 of ct_label). - * - * This is enforced at a higher priority than ACLs can be defined. - * - * NOTE: This does not support related data sessions (eg, - * a dynamically negotiated FTP data channel), but will allow - * related traffic such as an ICMP Port Unreachable through - * that's generated from a non-listening UDP port. */ - Flow(.logical_datapath = ls._uuid, - .stage = s_SWITCH_IN_ACL(), - .priority = 65535, - .__match = "!ct.est && ct.rel && !ct.new && !ct.inv " - "&& ct_label.blocked == 0", - .actions = "next;", - .external_ids = map_empty()); - Flow(.logical_datapath = ls._uuid, - .stage = s_SWITCH_OUT_ACL(), - .priority = 65535, - .__match = "!ct.est && ct.rel && !ct.new && !ct.inv " - "&& ct_label.blocked == 0", - .actions = "next;", - .external_ids = map_empty()); + if (has_stateful) { + /* Ingress and Egress ACL Table (Priority 1). + * + * By default, traffic is allowed. This is partially handled by + * the Priority 0 ACL flows added earlier, but we also need to + * commit IP flows. This is because, while the initiater's + * direction may not have any stateful rules, the server's may + * and then its return traffic would not have an associated + * conntrack entry and would return "+invalid". + * + * We use "ct_commit" for a connection that is not already known + * by the connection tracker. Once a connection is committed, + * subsequent packets will hit the flow at priority 0 that just + * uses "next;" + * + * We also check for established connections that have ct_label.blocked + * set on them. That's a connection that was disallowed, but is + * now allowed by policy again since it hit this default-allow flow. + * We need to set ct_label.blocked=0 to let the connection continue, + * which will be done by ct_commit() in the "stateful" stage. + * Subsequent packets will hit the flow at priority 0 that just + * uses "next;". */ + Flow(.logical_datapath = ls._uuid, + .stage = s_SWITCH_IN_ACL(), + .priority = 1, + .__match = "ip && (!ct.est || (ct.est && ct_label.blocked == 1))", + .actions = "${rEGBIT_CONNTRACK_COMMIT()} = 1; next;", + .external_ids = map_empty()); + Flow(.logical_datapath = ls._uuid, + .stage = s_SWITCH_OUT_ACL(), + .priority = 1, + .__match = "ip && (!ct.est || (ct.est && ct_label.blocked == 1))", + .actions = "${rEGBIT_CONNTRACK_COMMIT()} = 1; next;", + .external_ids = map_empty()); - /* Ingress and Egress ACL Table (Priority 65535). - * - * Not to do conntrack on ND packets. */ + /* Ingress and Egress ACL Table (Priority 65535). + * + * Always drop traffic that's in an invalid state. Also drop + * reply direction packets for connections that have been marked + * for deletion (bit 0 of ct_label is set). + * + * This is enforced at a higher priority than ACLs can be defined. */ + Flow(.logical_datapath = ls._uuid, + .stage = s_SWITCH_IN_ACL(), + .priority = 65535, + .__match = ct_inv_or ++ "(ct.est && ct.rpl && ct_label.blocked == 1)", + .actions = "drop;", + .external_ids = map_empty()); + Flow(.logical_datapath = ls._uuid, + .stage = s_SWITCH_OUT_ACL(), + .priority = 65535, + .__match = ct_inv_or ++ "(ct.est && ct.rpl && ct_label.blocked == 1)", + .actions = "drop;", + .external_ids = map_empty()); + + /* Ingress and Egress ACL Table (Priority 65535). + * + * Allow reply traffic that is part of an established + * conntrack entry that has not been marked for deletion + * (bit 0 of ct_label). We only match traffic in the + * reply direction because we want traffic in the request + * direction to hit the currently defined policy from ACLs. + * + * This is enforced at a higher priority than ACLs can be defined. */ + Flow(.logical_datapath = ls._uuid, + .stage = s_SWITCH_IN_ACL(), + .priority = 65535, + .__match = "ct.est && !ct.rel && !ct.new " ++ and_not_ct_inv ++ + "&& ct.rpl && ct_label.blocked == 0", + .actions = "next;", + .external_ids = map_empty()); + Flow(.logical_datapath = ls._uuid, + .stage = s_SWITCH_OUT_ACL(), + .priority = 65535, + .__match = "ct.est && !ct.rel && !ct.new " ++ and_not_ct_inv ++ + "&& ct.rpl && ct_label.blocked == 0", + .actions = "next;", + .external_ids = map_empty()); + + /* Ingress and Egress ACL Table (Priority 65535). + * + * Allow traffic that is related to an existing conntrack entry that + * has not been marked for deletion (bit 0 of ct_label). + * + * This is enforced at a higher priority than ACLs can be defined. + * + * NOTE: This does not support related data sessions (eg, + * a dynamically negotiated FTP data channel), but will allow + * related traffic such as an ICMP Port Unreachable through + * that's generated from a non-listening UDP port. */ + Flow(.logical_datapath = ls._uuid, + .stage = s_SWITCH_IN_ACL(), + .priority = 65535, + .__match = "!ct.est && ct.rel && !ct.new " ++ and_not_ct_inv ++ + "&& ct_label.blocked == 0", + .actions = "next;", + .external_ids = map_empty()); + Flow(.logical_datapath = ls._uuid, + .stage = s_SWITCH_OUT_ACL(), + .priority = 65535, + .__match = "!ct.est && ct.rel && !ct.new " ++ and_not_ct_inv ++ + "&& ct_label.blocked == 0", + .actions = "next;", + .external_ids = map_empty()); + + /* Ingress and Egress ACL Table (Priority 65535). + * + * Not to do conntrack on ND packets. */ + Flow(.logical_datapath = ls._uuid, + .stage = s_SWITCH_IN_ACL(), + .priority = 65535, + .__match = "nd || nd_ra || nd_rs || mldv1 || mldv2", + .actions = "next;", + .external_ids = map_empty()); + Flow(.logical_datapath = ls._uuid, + .stage = s_SWITCH_OUT_ACL(), + .priority = 65535, + .__match = "nd || nd_ra || nd_rs || mldv1 || mldv2", + .actions = "next;", + .external_ids = map_empty()) + }; + + /* Add a 34000 priority flow to advance the DNS reply from ovn-controller, + * if the CMS has configured DNS records for the datapath. + */ + if (sw.has_dns_records) { + Flow(.logical_datapath = ls._uuid, + .stage = s_SWITCH_OUT_ACL(), + .priority = 34000, + .__match = "udp.src == 53", + .actions = if has_stateful "ct_commit; next;" else "next;", + .external_ids = map_empty()) + }; + + /* Add a 34000 priority flow to advance the service monitor reply + * packets to skip applying ingress ACLs. */ Flow(.logical_datapath = ls._uuid, .stage = s_SWITCH_IN_ACL(), - .priority = 65535, - .__match = "nd || nd_ra || nd_rs || mldv1 || mldv2", + .priority = 34000, + .__match = "eth.dst == $svc_monitor_mac", .actions = "next;", .external_ids = map_empty()); - Flow(.logical_datapath = ls._uuid, - .stage = s_SWITCH_OUT_ACL(), - .priority = 65535, - .__match = "nd || nd_ra || nd_rs || mldv1 || mldv2", - .actions = "next;", - .external_ids = map_empty()) - }; - - /* Add a 34000 priority flow to advance the DNS reply from ovn-controller, - * if the CMS has configured DNS records for the datapath. - */ - if (sw.has_dns_records) { Flow(.logical_datapath = ls._uuid, .stage = s_SWITCH_OUT_ACL(), .priority = 34000, - .__match = "udp.src == 53", - .actions = if has_stateful "ct_commit; next;" else "next;", + .__match = "eth.src == $svc_monitor_mac", + .actions = "next;", .external_ids = map_empty()) - }; - - /* Add a 34000 priority flow to advance the service monitor reply - * packets to skip applying ingress ACLs. */ - Flow(.logical_datapath = ls._uuid, - .stage = s_SWITCH_IN_ACL(), - .priority = 34000, - .__match = "eth.dst == $svc_monitor_mac", - .actions = "next;", - .external_ids = map_empty()); - Flow(.logical_datapath = ls._uuid, - .stage = s_SWITCH_OUT_ACL(), - .priority = 34000, - .__match = "eth.src == $svc_monitor_mac", - .actions = "next;", - .external_ids = map_empty()) + } } /* This stage builds hints for the IN/OUT_ACL stage. Based on various diff --git a/ovn-nb.xml b/ovn-nb.xml index feb38b5d31..e337be4eb3 100644 --- a/ovn-nb.xml +++ b/ovn-nb.xml @@ -240,6 +240,21 @@ </p> </column> + <column name="options" key="use_ct_inv_match"> + <p> + If set to false, <code>ovn-northd</code> will not use the + <code>ct.inv</code> field in any of the logical flow matches. + The default value is true. If the NIC supports offloading + OVS datapath flows but doesn't support offloading ct_state + <code>inv</code> flag, then the datapath flows matching on this flag + (either <code>+inv</code> or <code>-inv</code>) will not be + offloaded. CMS should consider setting <code>use_ct_inv_match</code> + to <code>false</code> in such cases. This results in a side effect + of the invalid packets getting delivered to the destination VIF, which + otherwise would have been dropped by <code>OVN</code>. + </p> + </column> + <group title="Options for configuring interconnection route advertisement"> <p> These options control how routes are advertised between OVN -- 2.29.2 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
