Add LR option to commit all traffic that is not already committed by either NAT or LB. This ensures that the traffic is tracked, and we don't erroneously commit reply traffic, or reply traffic is not marked as invalid.
To achieve the commit we need to perform lookup on every packet that goes through LR pipeline whenever there is stateful NAT. The SNAT lookup requires additional flag as the unSNAT is happening in ingress pipeline and at that point we need to know if the packet is reply or not. This is not required for DNAT, because unDNAT stage happens in egress. This also helps with HWOL as there shouldn't be any match on ct.new for established sessions as we will commit everything in addition to already existing stateful NATs and LBs. Reported-at: https://issues.redhat.com/browse/FDP-787 Acked-by: Mark Michelson <[email protected]> Acked-by: Han Zhou <[email protected]> Fixes: ffe267317c25 ("northd: Don't skip the unSNAT stage for traffic towards VIPs.") Signed-off-by: Ales Musil <[email protected]> --- v6: Rebase on top of latest main. Address typos in documentation. Avoid smap parsing for each NAT and pass it as argument. Add missing documentation about HWOL. Add missing Fixes tag. Add ack from Mark. Add ack from Han. v5: Rebase on top of latest main. Address typos in commit message and documentation. Keep the flow ct_commit {} intact if all NATs are stateless. Add extra optimization flag when the unSNAT reports ct.new skip additional lookup in egress pipeline. v4: Rebase on top of latest main. Adjust the option name. Adjust the NEWS entry. Update ovn-northd.8.xml. Change the option so it affects both zones regardless if there is only stateful SNAT or DNAT. Add comment about !ct.rpl optimization. v3: Rebase on top of latest main. Add extra system test that checks specific scenario which was broken without this option. --- NEWS | 2 + include/ovn/logical-fields.h | 8 + lib/logical-fields.c | 8 + northd/northd.c | 202 +++++++++++-- northd/northd.h | 39 +-- northd/ovn-northd.8.xml | 119 ++++++-- ovn-nb.xml | 11 + tests/ovn-northd.at | 268 +++++++++++++++++ tests/system-ovn-kmod.at | 564 +++++++++++++++++++++++++++++++++++ 9 files changed, 1155 insertions(+), 66 deletions(-) diff --git a/NEWS b/NEWS index 0b57792a2..f4eaa39d3 100644 --- a/NEWS +++ b/NEWS @@ -78,6 +78,8 @@ Post v24.09.0 learned from a linux interfaces that is locally bound to the referenced LSP will be learned. Additionally support local overwrites for arbitrary interface names using "dynamic-routing-port-mapping". + - Add "options:ct-commit-all" to LR, that enables commit of all traffic + to DNAT and SNAT zone when LR is stateful. OVN v24.09.0 - 13 Sep 2024 -------------------------- diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h index fbe73763e..196ac9dd8 100644 --- a/include/ovn/logical-fields.h +++ b/include/ovn/logical-fields.h @@ -95,6 +95,8 @@ enum mff_log_flags_bits { MLF_ICMP_SNAT_BIT = 17, MLF_OVERRIDE_LOCAL_ONLY_BIT = 18, MLF_FROM_CTRL_BIT = 19, + MLF_UNSNAT_NEW_BIT = 20, + MLF_UNSNAT_NOT_TRACKED_BIT = 21, }; /* MFF_LOG_FLAGS_REG flag assignments */ @@ -152,6 +154,12 @@ enum mff_log_flags { MLF_ICMP_SNAT = (1 << MLF_ICMP_SNAT_BIT), MLF_OVERRIDE_LOCAL_ONLY = (1 << MLF_OVERRIDE_LOCAL_ONLY_BIT), + + /* Indicate that the packet went through unSNAT and had ct.new state. */ + MLF_UNSNAT_NEW = (1 << MLF_UNSNAT_NEW_BIT), + + /* Indicate that the packet didn't go through unSNAT. */ + MLF_UNSNAT_NOT_TRACKED = (1 << MLF_UNSNAT_NOT_TRACKED_BIT) }; /* OVN logical fields diff --git a/lib/logical-fields.c b/lib/logical-fields.c index df1b4243c..ed287f42b 100644 --- a/lib/logical-fields.c +++ b/lib/logical-fields.c @@ -139,6 +139,14 @@ ovn_init_symtab(struct shash *symtab) flags_str); snprintf(flags_str, sizeof flags_str, "flags[%d]", MLF_RX_FROM_TUNNEL_BIT); expr_symtab_add_subfield(symtab, "flags.tunnel_rx", NULL, flags_str); + snprintf(flags_str, sizeof flags_str, "flags[%d]", + MLF_UNSNAT_NEW_BIT); + expr_symtab_add_subfield(symtab, "flags.unsnat_new", NULL, + flags_str); + snprintf(flags_str, sizeof flags_str, "flags[%d]", + MLF_UNSNAT_NOT_TRACKED_BIT); + expr_symtab_add_subfield(symtab, "flags.unsnat_not_tracked", NULL, + flags_str); snprintf(flags_str, sizeof flags_str, "flags[%d]", MLF_FROM_CTRL_BIT); expr_symtab_add_subfield(symtab, "flags.from_ctrl", NULL, flags_str); diff --git a/northd/northd.c b/northd/northd.c index c6d632266..84e45f0ff 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -16316,7 +16316,7 @@ build_lrouter_out_snat_flow(struct lflow_table *lflows, struct ds *actions, bool distributed_nat, struct eth_addr mac, int cidr_bits, bool is_v6, struct ovn_port *l3dgw_port, - struct lflow_ref *lflow_ref, + struct lflow_ref *lflow_ref, bool commit_all, const struct chassis_features *features) { if (!(nat_entry->type == SNAT || nat_entry->type == DNAT_AND_SNAT)) { @@ -16353,7 +16353,7 @@ build_lrouter_out_snat_flow(struct lflow_table *lflows, * properly tracked so we can decide whether to perform SNAT on traffic * exiting the network. */ if (features->ct_commit_to_zone && features->ct_next_zone && - nat_entry->type == SNAT && !od->is_gw_router) { + nat_entry->type == SNAT && !od->is_gw_router && !commit_all) { /* For traffic that comes from SNAT network, initiate CT state before * entering S_ROUTER_OUT_SNAT to allow matching on various CT states. */ @@ -16657,6 +16657,8 @@ static void build_lr_nat_defrag_and_lb_default_flows( /* Packets are allowed by default. */ ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;", lflow_ref); ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;", lflow_ref); + ovn_lflow_add(lflows, od, S_ROUTER_IN_POST_UNSNAT, 0, "1", "next;", + lflow_ref); ovn_lflow_add(lflows, od, S_ROUTER_OUT_CHECK_DNAT_LOCAL, 0, "1", REGBIT_DST_NAT_IP_LOCAL" = 0; next;", lflow_ref); ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;", lflow_ref); @@ -16685,6 +16687,135 @@ static void build_lr_nat_defrag_and_lb_default_flows( lflow_ref); } +static void +build_gw_lrouter_commit_all(const struct ovn_datapath *od, + struct lflow_table *lflows, + const struct chassis_features *features, + struct lflow_ref *lflow_ref) +{ + ovs_assert(od->is_gw_router); + if (!(features->ct_commit_to_zone && features->ct_next_zone)) { + return; + } + + /* Note: We can use match on "!ct.rpl" as optimization here, even if the + * previous state is from different zone. The packet that is already reply + * should be reply in both zones. */ + ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 10, + "ip && (!ct.trk || !ct.rpl)", + "ct_next(dnat);", lflow_ref); + ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 10, + "ip && ct.new", "ct_commit_to_zone(dnat);", lflow_ref); + + /* We would lose the CT state especially the ct.new flag if we have + * mixed SNAT and DNAT on single LR. In order to know if we actually + * can commit into SNAT zone keep the flag in register. The SNAT flows + * in the egress pipeline can then check the flag and commit + * based on that. */ + ovn_lflow_add(lflows, od, S_ROUTER_IN_POST_UNSNAT, 10, + "ip && ct.new", "flags.unsnat_new = 1; next;", lflow_ref); + ovn_lflow_add(lflows, od, S_ROUTER_IN_POST_UNSNAT, 10, + "ip && !ct.trk", "flags.unsnat_not_tracked = 1; next;", + lflow_ref); + + /* Note: We can use match on "!ct.rpl" as optimization here, even if the + * previous state is from different zone. The packet that is already reply + * should be reply in both zones. */ + ovn_lflow_add(lflows, od, S_ROUTER_OUT_POST_UNDNAT, 10, + "ip && (!ct.trk || !ct.rpl) && " + "flags.unsnat_not_tracked == 1", "ct_next(snat);", + lflow_ref); + ovn_lflow_add(lflows, od, S_ROUTER_OUT_POST_UNDNAT, 10, + "ip && flags.unsnat_new == 1", "next;", lflow_ref); + + ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 10, + "ip && (!ct.trk || !ct.rpl) && flags.unsnat_new == 1", + "ct_commit_to_zone(snat);", lflow_ref); + ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 10, + "ip && ct.new && flags.unsnat_not_tracked == 1", + "ct_commit_to_zone(snat);", lflow_ref); +} + +static void +build_dgp_lrouter_commit_all(const struct ovn_datapath *od, + const struct ovn_port *l3dgw_port, + struct lflow_table *lflows, + const struct chassis_features *features, + struct ds *match, struct lflow_ref *lflow_ref) +{ + ovs_assert(od->n_l3dgw_ports); + if (!(features->ct_commit_to_zone && features->ct_next_zone)) { + return; + } + + /* Note: We can use match on "!ct.rpl" as optimization here, even if the + * previous state is from different zone. The packet that is already reply + * should be reply in both zones. */ + ds_clear(match); + ds_put_format(match, "ip && (!ct.trk || !ct.rpl) && " + "inport == %s && is_chassis_resident(%s)", + l3dgw_port->json_key, l3dgw_port->cr_port->json_key); + ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 10, ds_cstr(match), + "ct_next(dnat);", lflow_ref); + + ds_clear(match); + ds_put_format(match, "ip && ct.new && inport == %s && " + "is_chassis_resident(%s)", l3dgw_port->json_key, + l3dgw_port->cr_port->json_key); + ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 10, ds_cstr(match), + "ct_commit_to_zone(dnat);", lflow_ref); + + /* We would lose the CT state especially the ct.new flag if we have + * mixed SNAT and DNAT on single LR. In order to know if we actually + * can commit into SNAT zone keep the flag in register. The SNAT flows + * in the egress pipeline can then check the flag and commit + * based on that. */ + ds_clear(match); + ds_put_format(match, "ip && ct.new && " + "inport == %s && is_chassis_resident(%s)", + l3dgw_port->json_key, l3dgw_port->cr_port->json_key); + ovn_lflow_add(lflows, od, S_ROUTER_IN_POST_UNSNAT, 10, ds_cstr(match), + "flags.unsnat_new = 1; next;", lflow_ref); + ds_clear(match); + ds_put_format(match, "ip && !ct.trk && " + "inport == %s && is_chassis_resident(%s)", + l3dgw_port->json_key, l3dgw_port->cr_port->json_key); + ovn_lflow_add(lflows, od, S_ROUTER_IN_POST_UNSNAT, 10, ds_cstr(match), + "flags.unsnat_not_tracked = 1; next;", + lflow_ref); + + /* Note: We can use match on "!ct.rpl" as optimization here, even if the + * previous state is from different zone. The packet that is already reply + * should be reply in both zones. */ + ds_clear(match); + ds_put_format(match, "ip && (!ct.trk || !ct.rpl) && " + "flags.unsnat_not_tracked == 1 && outport == %s && " + "is_chassis_resident(%s)", l3dgw_port->json_key, + l3dgw_port->cr_port->json_key); + ovn_lflow_add(lflows, od, S_ROUTER_OUT_POST_UNDNAT, 10, ds_cstr(match), + "ct_next(snat);", lflow_ref); + ds_clear(match); + ds_put_format(match, "ip && flags.unsnat_new == 1 && outport == %s && " + "is_chassis_resident(%s)", l3dgw_port->json_key, + l3dgw_port->cr_port->json_key); + ovn_lflow_add(lflows, od, S_ROUTER_OUT_POST_UNDNAT, 10, ds_cstr(match), + "next;", lflow_ref); + + ds_clear(match); + ds_put_format(match, "ip && (!ct.trk || !ct.rpl) && " + "flags.unsnat_new == 1 && outport == %s && " + "is_chassis_resident(%s)", + l3dgw_port->json_key, l3dgw_port->cr_port->json_key); + ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 10, ds_cstr(match), + "ct_commit_to_zone(snat);", lflow_ref); + ds_clear(match); + ds_put_format(match, "ip && ct.new && flags.unsnat_not_tracked == 1 && " + "outport == %s && is_chassis_resident(%s)", + l3dgw_port->json_key, l3dgw_port->cr_port->json_key); + ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 10, ds_cstr(match), + "ct_commit_to_zone(snat);", lflow_ref); +} + static void build_lrouter_nat_defrag_and_lb( const struct lr_stateful_record *lr_stateful_rec, @@ -16695,6 +16826,7 @@ build_lrouter_nat_defrag_and_lb( const struct chassis_features *features, struct lflow_ref *lflow_ref) { + bool commit_all = smap_get_bool(&od->nbr->options, "ct-commit-all", false); /* Ingress DNAT (Priority 50/70). * * Allow traffic that is related to an existing conntrack entry. @@ -16752,28 +16884,6 @@ build_lrouter_nat_defrag_and_lb( "next;", lflow_ref); } - /* If the router has load balancer or DNAT rules, re-circulate every packet - * through the DNAT zone so that packets that need to be unDNATed in the - * reverse direction get unDNATed. - * - * We also commit newly initiated connections in the reply direction to the - * DNAT zone. This ensures that these flows are tracked. If the flow was - * not committed, it would produce ongoing datapath flows with the ct.new - * flag set. Some NICs are unable to offload these flows. - */ - if (od->is_gw_router && (od->nbr->n_nat || lr_stateful_rec->has_lb_vip)) { - /* Do not send ND or ICMP packets to connection tracking. */ - ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 100, - "nd || nd_rs || nd_ra", "next;", - lflow_ref); - ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 50, - "ip", "flags.loopback = 1; ct_dnat;", - lflow_ref); - ovn_lflow_add(lflows, od, S_ROUTER_OUT_POST_UNDNAT, 50, - "ip && ct.new", "ct_commit { } ; next; ", - lflow_ref); - } - /* NAT rules are only valid on Gateway routers and routers with * l3dgw_ports (router has port(s) with gateway chassis * specified). */ @@ -16789,7 +16899,8 @@ build_lrouter_nat_defrag_and_lb( !lport_addresses_is_empty(&lrnat_rec->dnat_force_snat_addrs); bool lb_force_snat_ip = !lport_addresses_is_empty(&lrnat_rec->lb_force_snat_addrs); - + bool stateful = (lr_stateful_rec->has_lb_vip || dnat_force_snat_ip || + lb_force_snat_ip || lrnat_rec->lb_force_snat_router_ip); for (size_t i = 0; i < lrnat_rec->n_nat_entries; i++) { struct ovn_nat *nat_entry = &lrnat_rec->nat_entries[i]; const struct nbrec_nat *nat = nat_entry->nb; @@ -16807,6 +16918,8 @@ build_lrouter_nat_defrag_and_lb( continue; } + stateful |= !stateless; + /* S_ROUTER_IN_UNSNAT * Ingress UNSNAT table: It is for already established connections' * reverse traffic. i.e., SNAT has already been done in egress @@ -16929,7 +17042,8 @@ build_lrouter_nat_defrag_and_lb( } else { build_lrouter_out_snat_flow(lflows, od, nat_entry, match, actions, distributed_nat, mac, cidr_bits, is_v6, - l3dgw_port, lflow_ref, features); + l3dgw_port, lflow_ref, commit_all, + features); } /* S_ROUTER_IN_ADMISSION - S_ROUTER_IN_IP_INPUT */ @@ -17019,6 +17133,42 @@ build_lrouter_nat_defrag_and_lb( } } + /* If the router has load balancer or DNAT rules, re-circulate every packet + * through the DNAT zone so that packets that need to be unDNATed in the + * reverse direction get unDNATed. + * + * We also commit newly initiated connections in the reply direction to the + * DNAT zone. This ensures that these flows are tracked. If the flow was + * not committed, it would produce ongoing datapath flows with the ct.new + * flag set. Some NICs are unable to offload these flows. + */ + if (od->is_gw_router && (od->nbr->n_nat || lr_stateful_rec->has_lb_vip)) { + /* Do not send ND or ICMP packets to connection tracking. */ + ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 100, + "nd || nd_rs || nd_ra", "next;", + lflow_ref); + ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 50, + "ip", "flags.loopback = 1; ct_dnat;", + lflow_ref); + if (!(commit_all && stateful)) { + ovn_lflow_add(lflows, od, S_ROUTER_OUT_POST_UNDNAT, 50, + "ip && ct.new", "ct_commit { } ; next; ", + lflow_ref); + } + } + + if (commit_all && stateful) { + if (od->is_gw_router) { + build_gw_lrouter_commit_all(od, lflows, features, lflow_ref); + } + + for (size_t i = 0; i < od->n_l3dgw_ports; i++) { + struct ovn_port *l3dgw_port = od->l3dgw_ports[i]; + build_dgp_lrouter_commit_all(od, l3dgw_port, lflows, + features, match, lflow_ref); + } + } + if (use_common_zone && od->nbr->n_nat) { ds_clear(match); ds_put_cstr(match, "ip && ct_mark.natted == 1"); diff --git a/northd/northd.h b/northd/northd.h index 1f29645c7..b2c8da8fb 100644 --- a/northd/northd.h +++ b/northd/northd.h @@ -517,27 +517,28 @@ enum ovn_stage { PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 3, "lr_in_ip_input") \ PIPELINE_STAGE(ROUTER, IN, DHCP_RELAY_REQ, 4, "lr_in_dhcp_relay_req") \ PIPELINE_STAGE(ROUTER, IN, UNSNAT, 5, "lr_in_unsnat") \ - PIPELINE_STAGE(ROUTER, IN, DEFRAG, 6, "lr_in_defrag") \ - PIPELINE_STAGE(ROUTER, IN, LB_AFF_CHECK, 7, "lr_in_lb_aff_check") \ - PIPELINE_STAGE(ROUTER, IN, DNAT, 8, "lr_in_dnat") \ - PIPELINE_STAGE(ROUTER, IN, LB_AFF_LEARN, 9, "lr_in_lb_aff_learn") \ - PIPELINE_STAGE(ROUTER, IN, ECMP_STATEFUL, 10, "lr_in_ecmp_stateful") \ - PIPELINE_STAGE(ROUTER, IN, ND_RA_OPTIONS, 11, "lr_in_nd_ra_options") \ - PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 12, "lr_in_nd_ra_response") \ - PIPELINE_STAGE(ROUTER, IN, IP_ROUTING_PRE, 13, "lr_in_ip_routing_pre") \ - PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 14, "lr_in_ip_routing") \ - PIPELINE_STAGE(ROUTER, IN, IP_ROUTING_ECMP, 15, "lr_in_ip_routing_ecmp") \ - PIPELINE_STAGE(ROUTER, IN, POLICY, 16, "lr_in_policy") \ - PIPELINE_STAGE(ROUTER, IN, POLICY_ECMP, 17, "lr_in_policy_ecmp") \ - PIPELINE_STAGE(ROUTER, IN, DHCP_RELAY_RESP_CHK, 18, \ + PIPELINE_STAGE(ROUTER, IN, POST_UNSNAT, 6, "lr_in_post_unsnat") \ + PIPELINE_STAGE(ROUTER, IN, DEFRAG, 7, "lr_in_defrag") \ + PIPELINE_STAGE(ROUTER, IN, LB_AFF_CHECK, 8, "lr_in_lb_aff_check") \ + PIPELINE_STAGE(ROUTER, IN, DNAT, 9, "lr_in_dnat") \ + PIPELINE_STAGE(ROUTER, IN, LB_AFF_LEARN, 10, "lr_in_lb_aff_learn") \ + PIPELINE_STAGE(ROUTER, IN, ECMP_STATEFUL, 11, "lr_in_ecmp_stateful") \ + PIPELINE_STAGE(ROUTER, IN, ND_RA_OPTIONS, 12, "lr_in_nd_ra_options") \ + PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 13, "lr_in_nd_ra_response") \ + PIPELINE_STAGE(ROUTER, IN, IP_ROUTING_PRE, 14, "lr_in_ip_routing_pre") \ + PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 15, "lr_in_ip_routing") \ + PIPELINE_STAGE(ROUTER, IN, IP_ROUTING_ECMP, 16, "lr_in_ip_routing_ecmp") \ + PIPELINE_STAGE(ROUTER, IN, POLICY, 17, "lr_in_policy") \ + PIPELINE_STAGE(ROUTER, IN, POLICY_ECMP, 18, "lr_in_policy_ecmp") \ + PIPELINE_STAGE(ROUTER, IN, DHCP_RELAY_RESP_CHK, 19, \ "lr_in_dhcp_relay_resp_chk") \ - PIPELINE_STAGE(ROUTER, IN, DHCP_RELAY_RESP, 19, \ + PIPELINE_STAGE(ROUTER, IN, DHCP_RELAY_RESP, 20, \ "lr_in_dhcp_relay_resp") \ - PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 20, "lr_in_arp_resolve") \ - PIPELINE_STAGE(ROUTER, IN, CHK_PKT_LEN, 21, "lr_in_chk_pkt_len") \ - PIPELINE_STAGE(ROUTER, IN, LARGER_PKTS, 22, "lr_in_larger_pkts") \ - PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 23, "lr_in_gw_redirect") \ - PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 24, "lr_in_arp_request") \ + PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 21, "lr_in_arp_resolve") \ + PIPELINE_STAGE(ROUTER, IN, CHK_PKT_LEN, 22, "lr_in_chk_pkt_len") \ + PIPELINE_STAGE(ROUTER, IN, LARGER_PKTS, 23, "lr_in_larger_pkts") \ + PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 24, "lr_in_gw_redirect") \ + PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 25, "lr_in_arp_request") \ \ /* Logical router egress stages. */ \ PIPELINE_STAGE(ROUTER, OUT, CHECK_DNAT_LOCAL, 0, \ diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index 93b1a9135..022210948 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -3667,7 +3667,26 @@ next; </li> </ul> - <h3>Ingress Table 6: DEFRAG</h3> + <h3>Ingress Table 6: POST USNAT</h3> + + <p> + This is to check whether the packet is already tracked in SNAT zone. + It contains a priority-0 flow that simply moves traffic to the next + table. + </p> + + <p> + If the <code>options:ct-commit-all</code> is set to <code>true</code> the + following two flows are configured matching on <code>ip && + ct.new</code> with an action <code>flags.unsnat_new = 1; next; </code> + and <code>ip && !ct.trk</code> with an action + <code>flags.unsnat_not_tracked = 1; next;</code> Which sets one of the + flags that is used in later stages. There is extra match on both when + there is configured DGP + <code>inport == DGP && is_chassis_resident(CHASSIS)</code>. + </p> + + <h3>Ingress Table 7: DEFRAG</h3> <p> This is to send packets to connection tracker for tracking and @@ -3710,7 +3729,15 @@ next; this allows potentially related ICMP traffic to pass through CT. </p> - <h3>Ingress Table 7: Load balancing affinity check</h3> + <p> + If the <code>options:ct-commit-all</code> is set to <code>true</code> + the following flow is configured matching on <code>ip && + (!ct.trk || !ct.rpl)</code> with an action <code>ct_next(dnat);</code>. + There is extra match when the LR is configured as DGP + <code>inport == DGP && is_chassis_resident(CHASSIS)</code>. + </p> + + <h3>Ingress Table 8: Load balancing affinity check</h3> <p> Load balancing affinity check table contains the following @@ -3737,7 +3764,7 @@ next; </li> </ul> - <h3>Ingress Table 8: DNAT</h3> + <h3>Ingress Table 9: DNAT</h3> <p> Packets enter the pipeline with destination IP address that needs to @@ -3866,7 +3893,7 @@ next; </li> </ul> - <p>Ingress Table 8: DNAT on Gateway Routers</p> + <p>Ingress Table 9: DNAT on Gateway Routers</p> <ul> <li> @@ -3919,13 +3946,21 @@ next; </p> </li> + <li> + <p> + If the <code>options:ct-commit-all</code> is set to <code>true</code> + the following flow is configured matching on <code>ip && + ct.new</code> with an action <code>ct_commit_to_zone(dnat);</code>. + </p> + </li> + <li> A priority-0 logical flow with match <code>1</code> has actions <code>next;</code>. </li> </ul> - <p>Ingress Table 8: DNAT on Distributed Routers</p> + <p>Ingress Table 9: DNAT on Distributed Routers</p> <p> On distributed routers, the DNAT table only handles packets @@ -3973,6 +4008,14 @@ next; <code>exempted_ext_ips</code>. </p> + <p> + If the <code>options:ct-commit-all</code> is set to <code>true</code> + the following flow is configured matching on <code>ip && + ct.new && inport == DGP && + is_chassis_resident(CHASSIS)</code> with an action + <code>ct_commit_to_zone(dnat);</code>. + </p> + <p> A priority-0 logical flow with match <code>1</code> has actions <code>next;</code>. @@ -3980,7 +4023,7 @@ next; </li> </ul> - <h3>Ingress Table 9: Load balancing affinity learn</h3> + <h3>Ingress Table 10: Load balancing affinity learn</h3> <p> Load balancing affinity learn table contains the following @@ -4008,7 +4051,7 @@ next; </li> </ul> - <h3>Ingress Table 10: ECMP symmetric reply processing</h3> + <h3>Ingress Table 11: ECMP symmetric reply processing</h3> <ul> <li> If ECMP routes with symmetric reply are configured in the @@ -4027,7 +4070,7 @@ next; </li> </ul> - <h3>Ingress Table 11: IPv6 ND RA option processing</h3> + <h3>Ingress Table 12: IPv6 ND RA option processing</h3> <ul> <li> @@ -4057,7 +4100,7 @@ reg0[5] = put_nd_ra_opts(<var>options</var>);next; </li> </ul> - <h3>Ingress Table 12: IPv6 ND RA responder</h3> + <h3>Ingress Table 13: IPv6 ND RA responder</h3> <p> This table implements IPv6 ND RA responder for the IPv6 ND RA replies @@ -4102,7 +4145,7 @@ output; </li> </ul> - <h3>Ingress Table 13: IP Routing Pre</h3> + <h3>Ingress Table 14: IP Routing Pre</h3> <p> If a packet arrived at this table from Logical Router Port <var>P</var> @@ -4132,7 +4175,7 @@ output; </li> </ul> - <h3>Ingress Table 14: IP Routing</h3> + <h3>Ingress Table 15: IP Routing</h3> <p> A packet that arrives at this table is an IP packet that should be @@ -4349,7 +4392,7 @@ reg8[16..31] = <var>MID1</var>); </li> </ul> - <h3>Ingress Table 15: IP_ROUTING_ECMP</h3> + <h3>Ingress Table 16: IP_ROUTING_ECMP</h3> <p> This table implements the second part of IP routing for ECMP routes @@ -4406,7 +4449,7 @@ outport = <var>P</var>; </li> </ul> - <h3>Ingress Table 16: Router policies</h3> + <h3>Ingress Table 17: Router policies</h3> <p> This table adds flows for the logical router policies configured on the logical router. Please see the @@ -4478,7 +4521,7 @@ next; </li> </ul> - <h3>Ingress Table 17: ECMP handling for router policies</h3> + <h3>Ingress Table 18: ECMP handling for router policies</h3> <p> This table handles the ECMP for the router policies configured with multiple nexthops. @@ -4527,7 +4570,7 @@ outport = <var>P</var> </li> </ul> - <h3>Ingress Table 18: DHCP Relay Response Check</h3> + <h3>Ingress Table 19: DHCP Relay Response Check</h3> <p> This stage process the DHCP response packets coming from the DHCP server. </p> @@ -4561,7 +4604,7 @@ outport = <var>P</var> </li> </ul> - <h3>Ingress Table 19: DHCP Relay Response</h3> + <h3>Ingress Table 20: DHCP Relay Response</h3> <p> This stage process the DHCP response packets on which <code>dhcp_relay_resp_chk</code> action is applied in the previous stage. @@ -4604,7 +4647,7 @@ output; </li> </ul> - <h3>Ingress Table 20: ARP/ND Resolution</h3> + <h3>Ingress Table 21: ARP/ND Resolution</h3> <p> Any packet that reaches this table is an IP packet whose next-hop @@ -4818,7 +4861,7 @@ output; </ul> - <h3>Ingress Table 21: Check packet length</h3> + <h3>Ingress Table 22: Check packet length</h3> <p> For distributed logical routers or gateway routers with gateway @@ -4855,7 +4898,7 @@ REGBIT_PKT_LARGER = check_pkt_larger(<var>L</var>); next; and advances to the next table. </p> - <h3>Ingress Table 22: Handle larger packets</h3> + <h3>Ingress Table 23: Handle larger packets</h3> <p> For distributed logical routers or gateway routers with gateway port @@ -4918,7 +4961,7 @@ icmp6 { and advances to the next table. </p> - <h3>Ingress Table 23: Gateway Redirect</h3> + <h3>Ingress Table 24: Gateway Redirect</h3> <p> For distributed logical routers where one or more of the logical router @@ -5002,7 +5045,7 @@ icmp6 { </li> </ul> - <h3>Ingress Table 24: ARP Request</h3> + <h3>Ingress Table 25: ARP Request</h3> <p> In the common case where the Ethernet destination has been resolved, this @@ -5202,6 +5245,16 @@ nd_ns { with action <code>ct_commit { } ; next; </code>. </li> + <li> + If the <code>options:ct-commit-all</code> is set to <code>true</code> + the following flows are configured matching on <code>ip && + (!ct.trk || !ct.rpl) && flags.unsnat_not_tracked == 1</code> + with an action <code>ct_next(snat);</code> and <code>ip && + flags.unsnat_new == 1</code> with an action <code>next;</code>. There + is extra match when there is configured DGP + <code>outport == DGP && is_chassis_resident(CHASSIS)</code>. + </li> + <li> A priority-0 logical flow with match <code>1</code> has actions <code>next;</code>. @@ -5332,6 +5385,17 @@ nd_ns { </p> </li> + <li> + <p> + If the <code>options:ct-commit-all</code> is set to <code>true</code> + the following two flows are configured matching on <code> ip + && (!ct.trk || !ct.rpl) && + flags.unsnat_new == 1</code> and + <code>ip && ct.new && flags.unsnat_not_tracked == 1 + </code> both with an action <code>ct_commit_to_zone(snat);</code>. + </p> + </li> + <li> <p> A priority-0 logical flow with match <code>1</code> has actions @@ -5412,6 +5476,19 @@ nd_ns { initiated from the internal or external network. </li> + <li> + <p> + If the <code>options:ct-commit-all</code> is set to <code>true</code> + the following two flows are configured matching on <code>ip + && (!ct.trk || !ct.rpl) && flags.unsnat_new == 1 + && outport == DGP && is_chassis_resident(CHASSIS) + </code> and <code>ip && ct.new && + flags.unsnat_not_tracked == 1 && outport == DGP && + is_chassis_resident(CHASSIS)</code>both with an action + <code>ct_commit_to_zone(snat);</code>. + </p> + </li> + <li> A priority-0 logical flow with match <code>1</code> has actions <code>next;</code>. diff --git a/ovn-nb.xml b/ovn-nb.xml index 20d30dd58..c4af28d04 100644 --- a/ovn-nb.xml +++ b/ovn-nb.xml @@ -3132,6 +3132,17 @@ or table="Logical_Router"/> on the Logical_Router. </p> </column> + + <column name="options" key="ct-commit-all" type='{"type": "boolean"}'> + When enabled the LR will commit traffic in a zone that is stateful. + The traffic is not commited to both zones but it is selective based + whether there is stateful DNAT/SNAT or both. The commit all will + prevent issues with <code>ct.inv</code> packets as it will prevent + the commit of reply traffic, which could happen in some cases. This + also helps with HWOL as there shouldn't be any match on ct.new + for established sessions as we will commit everything in addition + to already existing stateful NATs and LBs. + </column> </group> <group title="Common Columns"> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index 7455764fe..7859c51c7 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -15673,3 +15673,271 @@ CHECK_NO_CHANGE_AFTER_RECOMPUTE AT_CLEANUP ]) + +OVN_FOR_EACH_NORTHD_NO_HV_PARALLELIZATION([ +AT_SETUP([ovn -- LR ct-commit-all]) +ovn_start + +check ovn-nbctl ls-add sw0 +check ovn-nbctl lr-add lr0 +check ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 +check ovn-nbctl lsp-add sw0 sw0-lr0 +check ovn-nbctl lsp-set-type sw0-lr0 router +check ovn-nbctl lsp-set-addresses sw0-lr0 00:00:00:00:ff:01 +check ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0 +check ovn-nbctl set logical_router lr0 options:ct-commit-all="true" +check ovn-nbctl --wait=sb sync + +check ovn-sbctl chassis-add gw1 geneve 127.0.0.1 \ + -- set chassis gw1 other_config:ct-commit-to-zone="true" \ + -- set chassis gw1 other_config:ct-next-zone="true" + +ovn-sbctl dump-flows lr0 > lr0flows +AT_CAPTURE_FILE([lr0flows]) + +AT_CHECK([grep "lr_in_post_unsnat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_in_post_unsnat ), priority=0 , match=(1), action=(next;) +]) + +AT_CHECK([grep "lr_in_defrag" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_in_defrag ), priority=0 , match=(1), action=(next;) +]) + +AT_CHECK([grep "lr_in_dnat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_in_dnat ), priority=0 , match=(1), action=(next;) +]) + +AT_CHECK([grep "lr_out_post_undnat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_out_post_undnat ), priority=0 , match=(1), action=(next;) +]) + +AT_CHECK([grep "lr_out_snat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_out_snat ), priority=0 , match=(1), action=(next;) + table=??(lr_out_snat ), priority=120 , match=(nd_ns), action=(next;) +]) + + +# Create a distributed gw port on lr0 +check ovn-nbctl ls-add public +check ovn-nbctl lrp-add lr0 lr0-public 00:00:00:00:ff:02 172.168.0.10/24 +check ovn-nbctl lrp-set-gateway-chassis lr0-public gw1 + +check ovn-nbctl lsp-add public public-lr0 \ + -- set Logical_Switch_Port public-lr0 \ + type=router options:router-port=lr0-public \ + -- lsp-set-addresses public-lr0 router + +# Add SNAT +check ovn-nbctl lr-nat-add lr0 snat 172.168.0.10 10.0.0.0/24 + +ovn-sbctl dump-flows lr0 > lr0flows +AT_CAPTURE_FILE([lr0flows]) + +AT_CHECK([grep "lr_in_post_unsnat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_in_post_unsnat ), priority=0 , match=(1), action=(next;) + table=??(lr_in_post_unsnat ), priority=10 , match=(ip && !ct.trk && inport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(flags.unsnat_not_tracked = 1; next;) + table=??(lr_in_post_unsnat ), priority=10 , match=(ip && ct.new && inport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(flags.unsnat_new = 1; next;) +]) + +AT_CHECK([grep "lr_in_defrag" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_in_defrag ), priority=0 , match=(1), action=(next;) + table=??(lr_in_defrag ), priority=10 , match=(ip && (!ct.trk || !ct.rpl) && inport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(ct_next(dnat);) +]) + +AT_CHECK([grep "lr_in_dnat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_in_dnat ), priority=0 , match=(1), action=(next;) + table=??(lr_in_dnat ), priority=10 , match=(ip && ct.new && inport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(ct_commit_to_zone(dnat);) +]) + +AT_CHECK([grep "lr_out_post_undnat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_out_post_undnat ), priority=0 , match=(1), action=(next;) + table=??(lr_out_post_undnat ), priority=10 , match=(ip && (!ct.trk || !ct.rpl) && flags.unsnat_not_tracked == 1 && outport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(ct_next(snat);) + table=??(lr_out_post_undnat ), priority=10 , match=(ip && flags.unsnat_new == 1 && outport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(next;) +]) + +AT_CHECK([grep "lr_out_snat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_out_snat ), priority=0 , match=(1), action=(next;) + table=??(lr_out_snat ), priority=10 , match=(ip && (!ct.trk || !ct.rpl) && flags.unsnat_new == 1 && outport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(ct_commit_to_zone(snat);) + table=??(lr_out_snat ), priority=10 , match=(ip && ct.new && flags.unsnat_not_tracked == 1 && outport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(ct_commit_to_zone(snat);) + table=??(lr_out_snat ), priority=120 , match=(nd_ns), action=(next;) + table=??(lr_out_snat ), priority=153 , match=(ip && ip4.src == 10.0.0.0/24 && outport == "lr0-public" && is_chassis_resident("cr-lr0-public") && (!ct.trk || !ct.rpl)), action=(ct_snat(172.168.0.10);) +]) + +check ovn-nbctl lr-nat-del lr0 + +# Add LB to lr0 +check ovn-nbctl lb-add lb0 172.168.0.100:8082 "10.0.0.50:82,10.0.0.60:82" +check ovn-nbctl lr-lb-add lr0 lb0 +check ovn-nbctl --wait=sb sync + +ovn-sbctl dump-flows lr0 > lr0flows +AT_CAPTURE_FILE([lr0flows]) + +AT_CHECK([grep "lr_in_post_unsnat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_in_post_unsnat ), priority=0 , match=(1), action=(next;) + table=??(lr_in_post_unsnat ), priority=10 , match=(ip && !ct.trk && inport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(flags.unsnat_not_tracked = 1; next;) + table=??(lr_in_post_unsnat ), priority=10 , match=(ip && ct.new && inport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(flags.unsnat_new = 1; next;) +]) + +AT_CHECK([grep "lr_in_defrag" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_in_defrag ), priority=0 , match=(1), action=(next;) + table=??(lr_in_defrag ), priority=10 , match=(ip && (!ct.trk || !ct.rpl) && inport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(ct_next(dnat);) + table=??(lr_in_defrag ), priority=100 , match=(ip && ip4.dst == 172.168.0.100), action=(ct_dnat;) +]) + +AT_CHECK([grep "lr_in_dnat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_in_dnat ), priority=0 , match=(1), action=(next;) + table=??(lr_in_dnat ), priority=10 , match=(ip && ct.new && inport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(ct_commit_to_zone(dnat);) + table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel && ip4 && ip4.dst == 172.168.0.100 && tcp && tcp.dst == 8082 && is_chassis_resident("cr-lr0-public")), action=(ct_lb_mark(backends=10.0.0.50:82,10.0.0.60:82);) + table=??(lr_in_dnat ), priority=50 , match=(ct.est && !ct.rel && !ct.new && !ct.rpl && ct_mark.natted), action=(next;) + table=??(lr_in_dnat ), priority=50 , match=(ct.rel && !ct.est && !ct.new && !ct.rpl), action=(ct_commit_nat;) + table=??(lr_in_dnat ), priority=70 , match=(ct.est && !ct.rel && !ct.new && !ct.rpl && ct_mark.natted && ct_mark.force_snat == 1), action=(flags.force_snat_for_lb = 1; next;) + table=??(lr_in_dnat ), priority=70 , match=(ct.est && !ct.rel && !ct.new && !ct.rpl && ct_mark.natted && ct_mark.skip_snat == 1), action=(flags.skip_snat_for_lb = 1; next;) + table=??(lr_in_dnat ), priority=70 , match=(ct.rel && !ct.est && !ct.new && !ct.rpl && ct_mark.force_snat == 1), action=(flags.force_snat_for_lb = 1; ct_commit_nat;) + table=??(lr_in_dnat ), priority=70 , match=(ct.rel && !ct.est && !ct.new && !ct.rpl && ct_mark.skip_snat == 1), action=(flags.skip_snat_for_lb = 1; ct_commit_nat;) +]) + +AT_CHECK([grep "lr_out_post_undnat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_out_post_undnat ), priority=0 , match=(1), action=(next;) + table=??(lr_out_post_undnat ), priority=10 , match=(ip && (!ct.trk || !ct.rpl) && flags.unsnat_not_tracked == 1 && outport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(ct_next(snat);) + table=??(lr_out_post_undnat ), priority=10 , match=(ip && flags.unsnat_new == 1 && outport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(next;) +]) + +AT_CHECK([grep "lr_out_snat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_out_snat ), priority=0 , match=(1), action=(next;) + table=??(lr_out_snat ), priority=10 , match=(ip && (!ct.trk || !ct.rpl) && flags.unsnat_new == 1 && outport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(ct_commit_to_zone(snat);) + table=??(lr_out_snat ), priority=10 , match=(ip && ct.new && flags.unsnat_not_tracked == 1 && outport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(ct_commit_to_zone(snat);) + table=??(lr_out_snat ), priority=120 , match=(nd_ns), action=(next;) +]) + +# Add SNAT again +check ovn-nbctl lr-nat-add lr0 snat 172.168.0.10 10.0.0.0/24 +check ovn-nbctl --wait=sb sync + +ovn-sbctl dump-flows lr0 > lr0flows +AT_CAPTURE_FILE([lr0flows]) + +AT_CHECK([grep "lr_in_post_unsnat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_in_post_unsnat ), priority=0 , match=(1), action=(next;) + table=??(lr_in_post_unsnat ), priority=10 , match=(ip && !ct.trk && inport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(flags.unsnat_not_tracked = 1; next;) + table=??(lr_in_post_unsnat ), priority=10 , match=(ip && ct.new && inport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(flags.unsnat_new = 1; next;) +]) + +AT_CHECK([grep "lr_in_defrag" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_in_defrag ), priority=0 , match=(1), action=(next;) + table=??(lr_in_defrag ), priority=10 , match=(ip && (!ct.trk || !ct.rpl) && inport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(ct_next(dnat);) + table=??(lr_in_defrag ), priority=100 , match=(ip && ip4.dst == 172.168.0.100), action=(ct_dnat;) +]) + +AT_CHECK([grep "lr_in_dnat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_in_dnat ), priority=0 , match=(1), action=(next;) + table=??(lr_in_dnat ), priority=10 , match=(ip && ct.new && inport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(ct_commit_to_zone(dnat);) + table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel && ip4 && ip4.dst == 172.168.0.100 && tcp && tcp.dst == 8082 && is_chassis_resident("cr-lr0-public")), action=(ct_lb_mark(backends=10.0.0.50:82,10.0.0.60:82);) + table=??(lr_in_dnat ), priority=50 , match=(ct.est && !ct.rel && !ct.new && !ct.rpl && ct_mark.natted), action=(next;) + table=??(lr_in_dnat ), priority=50 , match=(ct.rel && !ct.est && !ct.new && !ct.rpl), action=(ct_commit_nat;) + table=??(lr_in_dnat ), priority=70 , match=(ct.est && !ct.rel && !ct.new && !ct.rpl && ct_mark.natted && ct_mark.force_snat == 1), action=(flags.force_snat_for_lb = 1; next;) + table=??(lr_in_dnat ), priority=70 , match=(ct.est && !ct.rel && !ct.new && !ct.rpl && ct_mark.natted && ct_mark.skip_snat == 1), action=(flags.skip_snat_for_lb = 1; next;) + table=??(lr_in_dnat ), priority=70 , match=(ct.rel && !ct.est && !ct.new && !ct.rpl && ct_mark.force_snat == 1), action=(flags.force_snat_for_lb = 1; ct_commit_nat;) + table=??(lr_in_dnat ), priority=70 , match=(ct.rel && !ct.est && !ct.new && !ct.rpl && ct_mark.skip_snat == 1), action=(flags.skip_snat_for_lb = 1; ct_commit_nat;) +]) + +AT_CHECK([grep "lr_out_post_undnat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_out_post_undnat ), priority=0 , match=(1), action=(next;) + table=??(lr_out_post_undnat ), priority=10 , match=(ip && (!ct.trk || !ct.rpl) && flags.unsnat_not_tracked == 1 && outport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(ct_next(snat);) + table=??(lr_out_post_undnat ), priority=10 , match=(ip && flags.unsnat_new == 1 && outport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(next;) +]) + +AT_CHECK([grep "lr_out_snat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_out_snat ), priority=0 , match=(1), action=(next;) + table=??(lr_out_snat ), priority=10 , match=(ip && (!ct.trk || !ct.rpl) && flags.unsnat_new == 1 && outport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(ct_commit_to_zone(snat);) + table=??(lr_out_snat ), priority=10 , match=(ip && ct.new && flags.unsnat_not_tracked == 1 && outport == "lr0-public" && is_chassis_resident("cr-lr0-public")), action=(ct_commit_to_zone(snat);) + table=??(lr_out_snat ), priority=120 , match=(nd_ns), action=(next;) + table=??(lr_out_snat ), priority=153 , match=(ip && ip4.src == 10.0.0.0/24 && outport == "lr0-public" && is_chassis_resident("cr-lr0-public") && (!ct.trk || !ct.rpl)), action=(ct_snat(172.168.0.10);) +]) + +# Make the logical router as Gateway router +check ovn-nbctl lrp-del-gateway-chassis lr0-public gw1 +check ovn-nbctl set logical_router lr0 options:chassis=gw1 +check ovn-nbctl --wait=sb sync + +ovn-sbctl dump-flows lr0 > lr0flows +AT_CAPTURE_FILE([lr0flows]) + +AT_CHECK([grep "lr_in_post_unsnat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_in_post_unsnat ), priority=0 , match=(1), action=(next;) + table=??(lr_in_post_unsnat ), priority=10 , match=(ip && !ct.trk), action=(flags.unsnat_not_tracked = 1; next;) + table=??(lr_in_post_unsnat ), priority=10 , match=(ip && ct.new), action=(flags.unsnat_new = 1; next;) +]) + +AT_CHECK([grep "lr_in_defrag" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_in_defrag ), priority=0 , match=(1), action=(next;) + table=??(lr_in_defrag ), priority=10 , match=(ip && (!ct.trk || !ct.rpl)), action=(ct_next(dnat);) + table=??(lr_in_defrag ), priority=100 , match=(ip && ip4.dst == 172.168.0.100), action=(ct_dnat;) +]) + +AT_CHECK([grep "lr_in_dnat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_in_dnat ), priority=0 , match=(1), action=(next;) + table=??(lr_in_dnat ), priority=10 , match=(ip && ct.new), action=(ct_commit_to_zone(dnat);) + table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel && ip4 && ip4.dst == 172.168.0.100 && tcp && tcp.dst == 8082), action=(ct_lb_mark(backends=10.0.0.50:82,10.0.0.60:82);) + table=??(lr_in_dnat ), priority=50 , match=(ct.est && !ct.rel && !ct.new && !ct.rpl && ct_mark.natted), action=(next;) + table=??(lr_in_dnat ), priority=50 , match=(ct.rel && !ct.est && !ct.new && !ct.rpl), action=(ct_commit_nat;) + table=??(lr_in_dnat ), priority=70 , match=(ct.est && !ct.rel && !ct.new && !ct.rpl && ct_mark.natted && ct_mark.force_snat == 1), action=(flags.force_snat_for_lb = 1; next;) + table=??(lr_in_dnat ), priority=70 , match=(ct.est && !ct.rel && !ct.new && !ct.rpl && ct_mark.natted && ct_mark.skip_snat == 1), action=(flags.skip_snat_for_lb = 1; next;) + table=??(lr_in_dnat ), priority=70 , match=(ct.rel && !ct.est && !ct.new && !ct.rpl && ct_mark.force_snat == 1), action=(flags.force_snat_for_lb = 1; ct_commit_nat;) + table=??(lr_in_dnat ), priority=70 , match=(ct.rel && !ct.est && !ct.new && !ct.rpl && ct_mark.skip_snat == 1), action=(flags.skip_snat_for_lb = 1; ct_commit_nat;) +]) + +AT_CHECK([grep "lr_out_post_undnat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_out_post_undnat ), priority=0 , match=(1), action=(next;) + table=??(lr_out_post_undnat ), priority=10 , match=(ip && (!ct.trk || !ct.rpl) && flags.unsnat_not_tracked == 1), action=(ct_next(snat);) + table=??(lr_out_post_undnat ), priority=10 , match=(ip && flags.unsnat_new == 1), action=(next;) +]) + +AT_CHECK([grep "lr_out_snat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_out_snat ), priority=0 , match=(1), action=(next;) + table=??(lr_out_snat ), priority=10 , match=(ip && (!ct.trk || !ct.rpl) && flags.unsnat_new == 1), action=(ct_commit_to_zone(snat);) + table=??(lr_out_snat ), priority=10 , match=(ip && ct.new && flags.unsnat_not_tracked == 1), action=(ct_commit_to_zone(snat);) + table=??(lr_out_snat ), priority=120 , match=(nd_ns), action=(next;) + table=??(lr_out_snat ), priority=25 , match=(ip && ip4.src == 10.0.0.0/24 && (!ct.trk || !ct.rpl)), action=(ct_snat(172.168.0.10);) +]) + +# Disable commit all for the router +check ovn-nbctl remove logical_router lr0 options ct-commit-all +check ovn-nbctl --wait=sb sync + +ovn-sbctl dump-flows lr0 > lr0flows +AT_CAPTURE_FILE([lr0flows]) + +AT_CHECK([grep "lr_in_post_unsnat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_in_post_unsnat ), priority=0 , match=(1), action=(next;) +]) + +AT_CHECK([grep "lr_in_defrag" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_in_defrag ), priority=0 , match=(1), action=(next;) + table=??(lr_in_defrag ), priority=100 , match=(ip && ip4.dst == 172.168.0.100), action=(ct_dnat;) +]) + +AT_CHECK([grep "lr_in_dnat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_in_dnat ), priority=0 , match=(1), action=(next;) + table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel && ip4 && ip4.dst == 172.168.0.100 && tcp && tcp.dst == 8082), action=(ct_lb_mark(backends=10.0.0.50:82,10.0.0.60:82);) + table=??(lr_in_dnat ), priority=50 , match=(ct.est && !ct.rel && !ct.new && !ct.rpl && ct_mark.natted), action=(next;) + table=??(lr_in_dnat ), priority=50 , match=(ct.rel && !ct.est && !ct.new && !ct.rpl), action=(ct_commit_nat;) + table=??(lr_in_dnat ), priority=70 , match=(ct.est && !ct.rel && !ct.new && !ct.rpl && ct_mark.natted && ct_mark.force_snat == 1), action=(flags.force_snat_for_lb = 1; next;) + table=??(lr_in_dnat ), priority=70 , match=(ct.est && !ct.rel && !ct.new && !ct.rpl && ct_mark.natted && ct_mark.skip_snat == 1), action=(flags.skip_snat_for_lb = 1; next;) + table=??(lr_in_dnat ), priority=70 , match=(ct.rel && !ct.est && !ct.new && !ct.rpl && ct_mark.force_snat == 1), action=(flags.force_snat_for_lb = 1; ct_commit_nat;) + table=??(lr_in_dnat ), priority=70 , match=(ct.rel && !ct.est && !ct.new && !ct.rpl && ct_mark.skip_snat == 1), action=(flags.skip_snat_for_lb = 1; ct_commit_nat;) +]) + +AT_CHECK([grep "lr_out_post_undnat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_out_post_undnat ), priority=0 , match=(1), action=(next;) + table=??(lr_out_post_undnat ), priority=50 , match=(ip && ct.new), action=(ct_commit { } ; next; ) +]) + +AT_CHECK([grep "lr_out_snat" lr0flows | ovn_strip_lflows], [0], [dnl + table=??(lr_out_snat ), priority=0 , match=(1), action=(next;) + table=??(lr_out_snat ), priority=120 , match=(nd_ns), action=(next;) + table=??(lr_out_snat ), priority=25 , match=(ip && ip4.src == 10.0.0.0/24 && (!ct.trk || !ct.rpl)), action=(ct_snat(172.168.0.10);) +]) + +AT_CLEANUP +]) diff --git a/tests/system-ovn-kmod.at b/tests/system-ovn-kmod.at index f7745b979..17607fbf2 100644 --- a/tests/system-ovn-kmod.at +++ b/tests/system-ovn-kmod.at @@ -1172,3 +1172,567 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d /connection dropped.*/d"]) AT_CLEANUP ]) + +OVN_FOR_EACH_NORTHD([ +AT_SETUP([LR DGP - ct-commit-all]) +AT_KEYWORDS([ovnnat]) + +CHECK_CONNTRACK() +CHECK_CONNTRACK_NAT() +ovn_start +OVS_TRAFFIC_VSWITCHD_START() +ADD_BR([br-int]) + +# Set external-ids in br-int needed for ovn-controller. +check ovs-vsctl \ + -- set Open_vSwitch . external-ids:system-id=hv1 \ + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true + +# Start ovn-controller. +start_daemon ovn-controller + +# Logical network: +# One LR R1 with switches foo (192.168.1.0/24 fd11::/64), bar (192.168.2.0/24 fd12::/64), +# and alice (172.16.1.0/24 fd20::/64) connected to it. The port between R1 and +# alice is the router gateway port where the R1 NAT rules are applied. +# +# foo -- R1 -- alice +# | +# bar ---- + +check ovn-nbctl lr-add R1 +check ovn-nbctl set logical_router R1 options:ct-commit-all="true" + +check ovn-nbctl ls-add foo +check ovn-nbctl ls-add bar +check ovn-nbctl ls-add alice + +check ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 192.168.1.1/24 fd11::1/64 +check ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 192.168.2.1/24 fd12::1/64 +check ovn-nbctl lrp-add R1 alice 00:00:02:01:02:03 172.16.1.1/24 fd20::1/64 \ + -- lrp-set-gateway-chassis alice hv1 + +# Connect foo to R1. +check ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \ + type=router options:router-port=foo \ + -- lsp-set-addresses rp-foo router + +# Connect bar to R1. +check ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \ + type=router options:router-port=bar \ + -- lsp-set-addresses rp-bar router + +# Connect alice to R1. +check ovn-nbctl lsp-add alice rp-alice -- set Logical_Switch_Port rp-alice \ + type=router options:router-port=alice \ + -- lsp-set-addresses rp-alice router + +# Logical port 'foo1' in switch 'foo'. +ADD_NAMESPACES(foo1) +ADD_VETH(foo1, foo1, br-int, "fd11::2", "f0:00:00:01:02:03", \ + "fd11::1", "nodad", "192.168.1.2/24", "192.168.1.1") +check ovn-nbctl lsp-add foo foo1 \ +-- lsp-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2 fd11::2" + +# Logical port 'foo2' in switch 'foo'. +ADD_NAMESPACES(foo2) +ADD_VETH(foo2, foo2, br-int, "fd11::3/64", "f0:00:00:01:02:06", \ + "fd11::1", "nodad", "192.168.1.3/24", "192.168.1.1") +check ovn-nbctl lsp-add foo foo2 \ +-- lsp-set-addresses foo2 "f0:00:00:01:02:06 192.168.1.3 fd11::3" + +# Logical port 'bar1' in switch 'bar'. +ADD_NAMESPACES(bar1) +ADD_VETH(bar1, bar1, br-int, "fd12::2/64", "f0:00:00:01:02:04", \ + "fd12::1", "nodad", "192.168.2.2/24", "192.168.2.1") +check ovn-nbctl lsp-add bar bar1 \ +-- lsp-set-addresses bar1 "f0:00:00:01:02:04 192.168.2.2 fd12::2" + +# Logical port 'alice1' in switch 'alice'. +ADD_NAMESPACES(alice1) +ADD_VETH(alice1, alice1, br-int, "fd20::2/64", "f0:00:00:01:02:05", \ + "fd20::1", "nodad", "172.16.1.2/24", "172.16.1.1") +check ovn-nbctl lsp-add alice alice1 \ +-- lsp-set-addresses alice1 "f0:00:00:01:02:05 172.16.1.2 fd20::2" + +# Add DNAT and SNAT rules. +check ovn-nbctl lr-nat-add R1 dnat_and_snat 172.16.1.3 192.168.1.2 foo1 00:00:02:02:03:04 +check ovn-nbctl lr-nat-add R1 dnat_and_snat 172.16.1.4 192.168.2.2 bar1 00:00:02:02:03:05 +check ovn-nbctl lr-nat-add R1 dnat_and_snat fd20::3 fd11::2 foo1 00:00:02:02:03:04 +check ovn-nbctl lr-nat-add R1 dnat_and_snat fd20::4 fd12::2 bar1 00:00:02:02:03:05 + +# Add a SNAT rule. +check ovn-nbctl lr-nat-add R1 snat 172.16.1.1 192.168.1.0/24 +check ovn-nbctl lr-nat-add R1 snat fd20::1 fd11::/64 + +OVN_POPULATE_ARP + +wait_for_ports_up +check ovn-nbctl --wait=hv sync +OVS_WAIT_UNTIL([ovs-ofctl dump-flows br-int | grep 'nat(src=172.16.1.1)']) +OVS_WAIT_UNTIL([ovs-ofctl dump-flows br-int | grep 'nat(src=fd20::1)']) + + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +NS_CHECK_EXEC([alice1], [ping -q -c 3 -i 0.3 -w 2 192.168.2.2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep icmp | FORMAT_CT(192.168.2.2) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmp,orig=(src=172.16.1.2,dst=192.168.2.2,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.1.2,id=<cleared>,type=0,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +NS_CHECK_EXEC([alice1], [ping -q -c 3 -i 0.3 -w 2 fd12::2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep icmp | FORMAT_CT(fd12::2) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmpv6,orig=(src=fd20::2,dst=fd12::2,id=<cleared>,type=128,code=0),reply=(src=fd12::2,dst=fd20::2,id=<cleared>,type=129,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +NS_CHECK_EXEC([bar1], [ping -q -c 3 -i 0.3 -w 2 172.16.1.2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep icmp | FORMAT_CT(172.16.1.2) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmp,orig=(src=192.168.2.2,dst=172.16.1.2,id=<cleared>,type=8,code=0),reply=(src=172.16.1.2,dst=172.16.1.4,id=<cleared>,type=0,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +NS_CHECK_EXEC([bar1], [ping -q -c 3 -i 0.3 -w 2 fd20::2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep icmp | FORMAT_CT(fd20::2) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmpv6,orig=(src=fd12::2,dst=fd20::2,id=<cleared>,type=128,code=0),reply=(src=fd20::2,dst=fd20::4,id=<cleared>,type=129,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +# East-West NAT: 'foo1' pings 'bar1' using 172.16.1.4. +NS_CHECK_EXEC([foo1], [ping -q -c 3 -i 0.3 -w 2 172.16.1.4 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +# Check conntrack entries. +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep icmp | FORMAT_CT(172.16.1.4) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmp,orig=(src=172.16.1.3,dst=172.16.1.4,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.1.3,id=<cleared>,type=0,code=0),zone=<cleared> +icmp,orig=(src=192.168.1.2,dst=172.16.1.4,id=<cleared>,type=8,code=0),reply=(src=172.16.1.4,dst=172.16.1.3,id=<cleared>,type=0,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +NS_CHECK_EXEC([foo1], [ping -q -c 3 -i 0.3 -w 2 fd20::4 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +# Check conntrack entries. First SNAT of 'foo1' address happens. +# Then DNAT of 'bar1' address happens (listed first below). +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd20::4) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmpv6,orig=(src=fd11::2,dst=fd20::4,id=<cleared>,type=128,code=0),reply=(src=fd20::4,dst=fd20::3,id=<cleared>,type=129,code=0),zone=<cleared> +icmpv6,orig=(src=fd20::3,dst=fd20::4,id=<cleared>,type=128,code=0),reply=(src=fd12::2,dst=fd20::3,id=<cleared>,type=129,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +NS_CHECK_EXEC([foo2], [ping -q -c 3 -i 0.3 -w 2 172.16.1.4 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +# Check conntrack entries. +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep icmp | FORMAT_CT(172.16.1.1) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmp,orig=(src=172.16.1.1,dst=172.16.1.4,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.1.1,id=<cleared>,type=0,code=0),zone=<cleared> +icmp,orig=(src=192.168.1.3,dst=172.16.1.4,id=<cleared>,type=8,code=0),reply=(src=172.16.1.4,dst=172.16.1.1,id=<cleared>,type=0,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +# East-West NAT: 'foo2' pings 'bar1' using fd20::4. +NS_CHECK_EXEC([foo2], [ping -q -c 3 -i 0.3 -w 2 fd20::4 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd20::1) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmpv6,orig=(src=fd11::3,dst=fd20::4,id=<cleared>,type=128,code=0),reply=(src=fd20::4,dst=fd20::1,id=<cleared>,type=129,code=0),zone=<cleared> +icmpv6,orig=(src=fd20::1,dst=fd20::4,id=<cleared>,type=128,code=0),reply=(src=fd12::2,dst=fd20::1,id=<cleared>,type=129,code=0),zone=<cleared> +]) + +OVS_APP_EXIT_AND_WAIT([ovn-controller]) + +as ovn-sb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as ovn-nb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as northd +OVS_APP_EXIT_AND_WAIT([ovn-northd]) + +as +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d +/connection dropped.*/d"]) +AT_CLEANUP +]) + +OVN_FOR_EACH_NORTHD([ +AT_SETUP([LR GW router - ct-commit-all]) +AT_KEYWORDS([ovnnat]) + +CHECK_CONNTRACK() +CHECK_CONNTRACK_NAT() +ovn_start +OVS_TRAFFIC_VSWITCHD_START() +ADD_BR([br-int]) + +# Set external-ids in br-int needed for ovn-controller. +check ovs-vsctl \ + -- set Open_vSwitch . external-ids:system-id=hv1 \ + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true + +# Start ovn-controller. +start_daemon ovn-controller + +# Logical network: +# One LR R1 with switches foo (192.168.1.0/24 fd11::/64), bar (192.168.2.0/24 fd12::/64), +# and alice (172.16.1.0/24 fd20::/64) connected to it. The port between R1 and +# alice is the router gateway port where the R1 NAT rules are applied. +# +# foo -- R1 -- alice +# | +# bar ---- + +check ovn-nbctl lr-add R1 +check ovn-nbctl set logical_router R1 options:ct-commit-all="true" + +check ovn-nbctl ls-add foo +check ovn-nbctl ls-add bar +check ovn-nbctl ls-add alice + +check ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 192.168.1.1/24 fd11::1/64 +check ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 192.168.2.1/24 fd12::1/64 +check ovn-nbctl lrp-add R1 alice 00:00:02:01:02:03 172.16.1.1/24 fd20::1/64 \ + -- set logical_router R1 options:chassis="hv1" + +# Connect foo to R1. +check ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \ + type=router options:router-port=foo \ + -- lsp-set-addresses rp-foo router + +# Connect bar to R1. +check ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \ + type=router options:router-port=bar \ + -- lsp-set-addresses rp-bar router + +# Connect alice to R1. +check ovn-nbctl lsp-add alice rp-alice -- set Logical_Switch_Port rp-alice \ + type=router options:router-port=alice \ + -- lsp-set-addresses rp-alice router + +# Logical port 'foo1' in switch 'foo'. +ADD_NAMESPACES(foo1) +ADD_VETH(foo1, foo1, br-int, "fd11::2", "f0:00:00:01:02:03", \ + "fd11::1", "nodad", "192.168.1.2/24", "192.168.1.1") +check ovn-nbctl lsp-add foo foo1 \ +-- lsp-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2 fd11::2" + +# Logical port 'foo2' in switch 'foo'. +ADD_NAMESPACES(foo2) +ADD_VETH(foo2, foo2, br-int, "fd11::3/64", "f0:00:00:01:02:06", \ + "fd11::1", "nodad", "192.168.1.3/24", "192.168.1.1") +check ovn-nbctl lsp-add foo foo2 \ +-- lsp-set-addresses foo2 "f0:00:00:01:02:06 192.168.1.3 fd11::3" + +# Logical port 'bar1' in switch 'bar'. +ADD_NAMESPACES(bar1) +ADD_VETH(bar1, bar1, br-int, "fd12::2/64", "f0:00:00:01:02:04", \ + "fd12::1", "nodad", "192.168.2.2/24", "192.168.2.1") +check ovn-nbctl lsp-add bar bar1 \ +-- lsp-set-addresses bar1 "f0:00:00:01:02:04 192.168.2.2 fd12::2" + +# Logical port 'alice1' in switch 'alice'. +ADD_NAMESPACES(alice1) +ADD_VETH(alice1, alice1, br-int, "fd20::2/64", "f0:00:00:01:02:05", \ + "fd20::1", "nodad", "172.16.1.2/24", "172.16.1.1") +check ovn-nbctl lsp-add alice alice1 \ +-- lsp-set-addresses alice1 "f0:00:00:01:02:05 172.16.1.2 fd20::2" + +# Add DNAT and SNAT rules. +check ovn-nbctl lr-nat-add R1 dnat_and_snat 172.16.1.3 192.168.1.2 foo1 00:00:02:02:03:04 +check ovn-nbctl lr-nat-add R1 dnat_and_snat 172.16.1.4 192.168.2.2 bar1 00:00:02:02:03:05 +check ovn-nbctl lr-nat-add R1 dnat_and_snat fd20::3 fd11::2 foo1 00:00:02:02:03:04 +check ovn-nbctl lr-nat-add R1 dnat_and_snat fd20::4 fd12::2 bar1 00:00:02:02:03:05 + +# Add a SNAT rule. +check ovn-nbctl lr-nat-add R1 snat 172.16.1.1 192.168.1.0/24 +check ovn-nbctl lr-nat-add R1 snat fd20::1 fd11::/64 + +OVN_POPULATE_ARP + +wait_for_ports_up +check ovn-nbctl --wait=hv sync +OVS_WAIT_UNTIL([ovs-ofctl dump-flows br-int | grep 'nat(src=172.16.1.1)']) +OVS_WAIT_UNTIL([ovs-ofctl dump-flows br-int | grep 'nat(src=fd20::1)']) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +NS_CHECK_EXEC([foo1], [ping -q -c 3 -i 0.3 -w 2 192.168.2.2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep icmp | FORMAT_CT(192.168.2.2) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmp,orig=(src=192.168.1.2,dst=192.168.2.2,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.1.3,id=<cleared>,type=0,code=0),zone=<cleared> +icmp,orig=(src=192.168.1.2,dst=192.168.2.2,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=192.168.1.2,id=<cleared>,type=0,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +NS_CHECK_EXEC([foo1], [ping -q -c 3 -i 0.3 -w 2 fd12::2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep icmp | FORMAT_CT(fd12::2) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmpv6,orig=(src=fd11::2,dst=fd12::2,id=<cleared>,type=128,code=0),reply=(src=fd12::2,dst=fd11::2,id=<cleared>,type=129,code=0),zone=<cleared> +icmpv6,orig=(src=fd11::2,dst=fd12::2,id=<cleared>,type=128,code=0),reply=(src=fd12::2,dst=fd20::3,id=<cleared>,type=129,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +NS_CHECK_EXEC([foo2], [ping -q -c 3 -i 0.3 -w 2 192.168.2.2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep icmp | FORMAT_CT(192.168.2.2) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmp,orig=(src=192.168.1.3,dst=192.168.2.2,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.1.1,id=<cleared>,type=0,code=0),zone=<cleared> +icmp,orig=(src=192.168.1.3,dst=192.168.2.2,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=192.168.1.3,id=<cleared>,type=0,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +NS_CHECK_EXEC([foo2], [ping -q -c 3 -i 0.3 -w 2 fd12::2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep icmp | FORMAT_CT(fd12::2) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmpv6,orig=(src=fd11::3,dst=fd12::2,id=<cleared>,type=128,code=0),reply=(src=fd12::2,dst=fd11::3,id=<cleared>,type=129,code=0),zone=<cleared> +icmpv6,orig=(src=fd11::3,dst=fd12::2,id=<cleared>,type=128,code=0),reply=(src=fd12::2,dst=fd20::1,id=<cleared>,type=129,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +NS_CHECK_EXEC([bar1], [ping -q -c 3 -i 0.3 -w 2 192.168.1.3 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep icmp | FORMAT_CT(192.168.1.3) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmp,orig=(src=192.168.2.2,dst=192.168.1.3,id=<cleared>,type=8,code=0),reply=(src=192.168.1.3,dst=172.16.1.4,id=<cleared>,type=0,code=0),zone=<cleared> +icmp,orig=(src=192.168.2.2,dst=192.168.1.3,id=<cleared>,type=8,code=0),reply=(src=192.168.1.3,dst=192.168.2.2,id=<cleared>,type=0,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +NS_CHECK_EXEC([bar1], [ping -q -c 3 -i 0.3 -w 2 fd11::3 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep icmp | FORMAT_CT(fd11::3) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmpv6,orig=(src=fd12::2,dst=fd11::3,id=<cleared>,type=128,code=0),reply=(src=fd11::3,dst=fd12::2,id=<cleared>,type=129,code=0),zone=<cleared> +icmpv6,orig=(src=fd12::2,dst=fd11::3,id=<cleared>,type=128,code=0),reply=(src=fd11::3,dst=fd20::4,id=<cleared>,type=129,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +NS_CHECK_EXEC([foo1], [ping -q -c 3 -i 0.3 -w 2 172.16.1.4 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +# Check conntrack entries. +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep icmp | FORMAT_CT(172.16.1.4) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmp,orig=(src=192.168.1.2,dst=172.16.1.4,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=192.168.1.2,id=<cleared>,type=0,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep icmp | FORMAT_CT(172.16.1.3) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmp,orig=(src=192.168.1.2,dst=192.168.2.2,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.1.3,id=<cleared>,type=0,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +NS_CHECK_EXEC([foo1], [ping -q -c 3 -i 0.3 -w 2 fd20::4 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +# Check conntrack entries. +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd20::4) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmpv6,orig=(src=fd11::2,dst=fd20::4,id=<cleared>,type=128,code=0),reply=(src=fd12::2,dst=fd11::2,id=<cleared>,type=129,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd20::3) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmpv6,orig=(src=fd11::2,dst=fd12::2,id=<cleared>,type=128,code=0),reply=(src=fd12::2,dst=fd20::3,id=<cleared>,type=129,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +# East-West NAT: 'foo2' pings 'bar1' using 172.16.1.4. +NS_CHECK_EXEC([foo2], [ping -q -c 3 -i 0.3 -w 2 172.16.1.4 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +# Check conntrack entries. +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep icmp | FORMAT_CT(172.16.1.1) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmp,orig=(src=192.168.1.3,dst=192.168.2.2,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=172.16.1.1,id=<cleared>,type=0,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.1.4) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmp,orig=(src=192.168.1.3,dst=172.16.1.4,id=<cleared>,type=8,code=0),reply=(src=192.168.2.2,dst=192.168.1.3,id=<cleared>,type=0,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +# East-West NAT: 'foo2' pings 'bar1' using fd20::4. +NS_CHECK_EXEC([foo2], [ping -q -c 3 -i 0.3 -w 2 fd20::4 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +# Check conntrack entries. +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd20::1) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmpv6,orig=(src=fd11::3,dst=fd12::2,id=<cleared>,type=128,code=0),reply=(src=fd12::2,dst=fd20::1,id=<cleared>,type=129,code=0),zone=<cleared> +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd20::4) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +icmpv6,orig=(src=fd11::3,dst=fd20::4,id=<cleared>,type=128,code=0),reply=(src=fd12::2,dst=fd11::3,id=<cleared>,type=129,code=0),zone=<cleared> +]) + +OVS_APP_EXIT_AND_WAIT([ovn-controller]) + +as ovn-sb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as ovn-nb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as northd +OVS_APP_EXIT_AND_WAIT([ovn-northd]) + +as +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d +/connection dropped.*/d"]) +AT_CLEANUP +]) + +OVN_FOR_EACH_NORTHD([ +AT_SETUP([Commit all - UDN]) +AT_KEYWORDS([ovnnat]) + +CHECK_CONNTRACK() +CHECK_CONNTRACK_NAT() +ovn_start +OVS_TRAFFIC_VSWITCHD_START() +ADD_BR([br-int]) + +# Set external-ids in br-int needed for ovn-controller. +check ovs-vsctl \ + -- set Open_vSwitch . external-ids:system-id=hv1 \ + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true + +# Start ovn-controller. +start_daemon ovn-controller + +check ovn-nbctl lr-add lr +check ovn-nbctl ls-add ls + +check ovn-nbctl lrp-add lr lr-ls 00:00:00:00:00:01 192.168.100.1/24 \ + -- lrp-set-gateway-chassis lr-ls hv1 +check ovn-nbctl lsp-add ls ls-lr -- set Logical_Switch_Port ls-lr \ + type=router options:router-port=lr-ls \ + -- lsp-set-addresses ls-lr router + +ADD_NAMESPACES(pod) +ADD_VETH(pod, pod, br-int, "192.168.100.5/24", "00:00:00:00:00:05", "192.168.100.1") +check ovn-nbctl lsp-add ls pod \ + -- lsp-set-addresses pod "00:00:00:00:00:05 192.168.100.5" + +ADD_NAMESPACES(mgmt) +ADD_VETH(mgmt, mgmt, br-int, "192.168.100.2/24", "00:00:00:00:00:02", "192.168.100.1") +NS_EXEC([mgmt], [ip addr add 172.16.100.2/24 dev mgmt]) +check ovn-nbctl lsp-add ls mgmt \ + -- lsp-set-addresses mgmt "00:00:00:00:00:02 192.168.100.2" + +check check ovn-nbctl --policy="src-ip" lr-route-add lr 192.168.100.0/24 192.168.100.2 +check ovn-nbctl lb-add lb0 172.16.0.5:5656 192.168.100.5:2323 udp +check ovn-nbctl lb-add lb1 172.16.0.5:5657 192.168.100.5:2324 tcp +check ovn-nbctl ls-lb-add ls lb0 +check ovn-nbctl ls-lb-add ls lb1 + +check ovn-nbctl --match="eth.dst == 00:00:00:00:00:02" lr-nat-add lr snat 172.16.0.2 192.168.100.0/24 +check ovn-nbctl set logical_router lr options:ct-commit-all="true" + +OVN_POPULATE_ARP + +wait_for_ports_up +check ovn-nbctl --wait=hv sync + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +NETNS_START_TCPDUMP([mgmt], [-vnne "ip or icmp"], [mgmt]) +NETNS_START_TCPDUMP([pod], [-vnne "ip or icmp"], [pod]) + +echo -e "Hello UDP\nHello UDP" > udp.expected +echo -e "Hello TCP\nHello TCP" > tcp.expected + +NETNS_DAEMONIZE([pod], [nc -e /bin/cat -v -l -u -o server_udp.log 192.168.100.5 2323], [nc1.pid]) +NETNS_DAEMONIZE([pod], [nc -e /bin/cat -v -l -o server_tcp.log 192.168.100.5 2324], [nc2.pid]) + +NS_CHECK_EXEC([mgmt], [(echo "Hello UDP"; sleep 3) | nc -u -s 172.16.100.2 -o client_udp.log 172.16.0.5 5656], [0], [ignore], [ignore]) +check cmp server_udp.log udp.expected +check cmp client_udp.log udp.expected + +NS_CHECK_EXEC([mgmt], [(echo "Hello TCP"; sleep 3) | nc -s 172.16.100.2 -o client_tcp.log 172.16.0.5 5657], [0], [ignore], [ignore]) +check cmp server_tcp.log tcp.expected +check cmp client_tcp.log tcp.expected + +OVS_APP_EXIT_AND_WAIT([ovn-controller]) + +as ovn-sb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as ovn-nb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as northd +OVS_APP_EXIT_AND_WAIT([ovn-northd]) + +as +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d +/connection dropped.*/d"]) +AT_CLEANUP +]) -- 2.48.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
