This patch adds the capability to force loopback at the end of the egress pipeline. A new flags.force_egress_loopback symbol is defined, along with corresponding flags bits. When flags.force_egress_loopback is set, at OFTABLE_LOG_TO_PHY, instead of the packet being sent out to the peer patch port or out the outport, the packet is forced back to the beginning of the ingress pipeline with inport = outport. All other registers are cleared, as if the packet just arrived on that inport.
This capability is needed in order to implement some of the east/west distributed NAT flows. Note: The existing flags.loopback allows a packet to go from the end of the ingress pipeline to the beginning of the egress pipeline with outport = inport, which is different. Initially, there are no tests incorporated in this patch. This functionality is tested in a subsequent distributed NAT flows patch. Tests specific to egress loopback may be added once the capability to inject a packet with one of the flags bits set is added. Signed-off-by: Mickey Spiegel <mickeys....@gmail.com> --- ovn/controller/physical.c | 38 ++++++++++++++++++++++++++++++++++---- ovn/lib/logical-fields.c | 8 ++++++++ ovn/lib/logical-fields.h | 14 ++++++++++++++ ovn/northd/ovn-northd.8.xml | 4 +++- ovn/northd/ovn-northd.c | 2 ++ ovn/ovn-sb.xml | 2 +- ovn/utilities/ovn-trace.c | 41 ++++++++++++++++++++++++++++++----------- 7 files changed, 92 insertions(+), 17 deletions(-) diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c index 07b7cd5..35f761d 100644 --- a/ovn/controller/physical.c +++ b/ovn/controller/physical.c @@ -183,7 +183,7 @@ get_zone_ids(const struct sbrec_port_binding *binding, } static void -put_local_common_flows(uint32_t dp_key, uint32_t port_key, +put_local_common_flows(uint32_t dp_key, uint32_t port_key, ofp_port_t ofport, bool nested_container, const struct zone_ids *zone_ids, struct ofpbuf *ofpacts_p, struct hmap *flow_table) { @@ -259,6 +259,36 @@ put_local_common_flows(uint32_t dp_key, uint32_t port_key, put_stack(MFF_IN_PORT, ofpact_put_STACK_POP(ofpacts_p)); ofctrl_add_flow(flow_table, OFTABLE_SAVE_INPORT, 100, 0, &match, ofpacts_p); + + /* Table 65, Priority 150. + * ======================= + * + * Send packets with MLF_FORCE_EGRESS_LOOPBACK flag back to the + * ingress pipeline with inport = outport. */ + + match_init_catchall(&match); + ofpbuf_clear(ofpacts_p); + match_set_metadata(&match, htonll(dp_key)); + match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key); + match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, + MLF_FORCE_EGRESS_LOOPBACK, MLF_FORCE_EGRESS_LOOPBACK); + + size_t clone_ofs = ofpacts_p->size; + struct ofpact_nest *clone = ofpact_put_CLONE(ofpacts_p); + put_load(ofport, MFF_IN_PORT, 0, 16, ofpacts_p); + put_load(port_key, MFF_LOG_INPORT, 0, 32, ofpacts_p); + put_load(0, MFF_LOG_OUTPORT, 0, 32, ofpacts_p); + put_load(MLF_EGRESS_LOOPBACK_OCCURRED, MFF_LOG_FLAGS, 0, 32, ofpacts_p); + for (int i = 0; i < MFF_N_LOG_REGS; i++) { + put_load(0, MFF_LOG_REG0 + i, 0, 32, ofpacts_p); + } + put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, ofpacts_p); + clone = ofpbuf_at_assert(ofpacts_p, clone_ofs, sizeof *clone); + ofpacts_p->header = clone; + ofpact_finish_CLONE(ofpacts_p, &clone); + + ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 150, 0, + &match, ofpacts_p); } static void @@ -321,7 +351,7 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve, } struct zone_ids binding_zones = get_zone_ids(binding, ct_zones); - put_local_common_flows(dp_key, port_key, false, &binding_zones, + put_local_common_flows(dp_key, port_key, 0, false, &binding_zones, ofpacts_p, flow_table); match_init_catchall(&match); @@ -490,8 +520,8 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve, */ struct zone_ids zone_ids = get_zone_ids(binding, ct_zones); - put_local_common_flows(dp_key, port_key, nested_container, &zone_ids, - ofpacts_p, flow_table); + put_local_common_flows(dp_key, port_key, ofport, nested_container, + &zone_ids, ofpacts_p, flow_table); /* Table 0, Priority 150 and 100. * ============================== diff --git a/ovn/lib/logical-fields.c b/ovn/lib/logical-fields.c index fa134d6..c056e41 100644 --- a/ovn/lib/logical-fields.c +++ b/ovn/lib/logical-fields.c @@ -96,6 +96,14 @@ ovn_init_symtab(struct shash *symtab) MLF_FORCE_SNAT_FOR_LB_BIT); expr_symtab_add_subfield(symtab, "flags.force_snat_for_lb", NULL, flags_str); + snprintf(flags_str, sizeof flags_str, "flags[%d]", + MLF_FORCE_EGRESS_LOOPBACK_BIT); + expr_symtab_add_subfield(symtab, "flags.force_egress_loopback", NULL, + flags_str); + snprintf(flags_str, sizeof flags_str, "flags[%d]", + MLF_EGRESS_LOOPBACK_OCCURRED_BIT); + expr_symtab_add_subfield(symtab, "flags.egress_loopback_occurred", NULL, + flags_str); /* Connection tracking state. */ expr_symtab_add_field(symtab, "ct_mark", MFF_CT_MARK, NULL, false); diff --git a/ovn/lib/logical-fields.h b/ovn/lib/logical-fields.h index 696c529..87ce695 100644 --- a/ovn/lib/logical-fields.h +++ b/ovn/lib/logical-fields.h @@ -49,6 +49,8 @@ enum mff_log_flags_bits { MLF_RCV_FROM_VXLAN_BIT = 1, MLF_FORCE_SNAT_FOR_DNAT_BIT = 2, MLF_FORCE_SNAT_FOR_LB_BIT = 3, + MLF_FORCE_EGRESS_LOOPBACK_BIT = 4, + MLF_EGRESS_LOOPBACK_OCCURRED_BIT = 5, }; /* MFF_LOG_FLAGS_REG flag assignments */ @@ -69,6 +71,18 @@ enum mff_log_flags { /* Indicate that a packet needs a force SNAT in the gateway router when * load-balancing has taken place. */ MLF_FORCE_SNAT_FOR_LB = (1 << MLF_FORCE_SNAT_FOR_LB_BIT), + + /* Indicate that at the end of the egress pipeline in table + * OFTABLE_LOG_TO_PHY, instead of being sent to the peer patch port or + * out the outport, the packet should be forced back to the beginning + * of the ingress pipeline with inport = outport. */ + MLF_FORCE_EGRESS_LOOPBACK = (1 << MLF_FORCE_EGRESS_LOOPBACK_BIT), + + /* Indicate that this packet has been recirculated using egress + * loopback. This allows certain checks to be bypassed, such as a + * logical router dropping packets with source IP address equals + * one of the logical router's own IP addresses. */ + MLF_EGRESS_LOOPBACK_OCCURRED = (1 << MLF_EGRESS_LOOPBACK_OCCURRED_BIT), }; #endif /* ovn/lib/logical-fields.h */ diff --git a/ovn/northd/ovn-northd.8.xml b/ovn/northd/ovn-northd.8.xml index 49e4291..9adba3b 100644 --- a/ovn/northd/ovn-northd.8.xml +++ b/ovn/northd/ovn-northd.8.xml @@ -928,7 +928,9 @@ output; </li> <li> <code>ip4.src</code> or <code>ip6.src</code> is any IP - address owned by the router. + address owned by the router, unless the packet was recirculated + due to egress loopback as indicated by + <code>flags.egress_loopback_occurred</code> </li> <li> <code>ip4.src</code> is the broadcast address of any IP network diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c index 87c80d1..d01c42a 100644 --- a/ovn/northd/ovn-northd.c +++ b/ovn/northd/ovn-northd.c @@ -3765,6 +3765,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, ds_clear(&match); ds_put_cstr(&match, "ip4.src == "); op_put_v4_networks(&match, op, true); + ds_put_cstr(&match, " && flags.egress_loopback_occurred == 0"); ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, ds_cstr(&match), "drop;"); @@ -4004,6 +4005,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, ds_clear(&match); ds_put_cstr(&match, "ip6.src == "); op_put_v6_networks(&match, op); + ds_put_cstr(&match, " && flags.egress_loopback_occurred == 0"); ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, ds_cstr(&match), "drop;"); diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml index 5704f41..b33b437 100644 --- a/ovn/ovn-sb.xml +++ b/ovn/ovn-sb.xml @@ -832,7 +832,7 @@ <li><code>reg0</code>...<code>reg9</code></li> <li><code>xxreg0</code> <code>xxreg1</code></li> <li><code>inport</code> <code>outport</code></li> - <li><code>flags.loopback</code></li> + <li><code>flags.loopback</code><code>flags.force_egress_loopback</code><code>flags.egress_loopback_occurred</code></li> <li><code>eth.src</code> <code>eth.dst</code> <code>eth.type</code></li> <li><code>vlan.tci</code> <code>vlan.vid</code> <code>vlan.pcp</code> <code>vlan.present</code></li> <li><code>ip.proto</code> <code>ip.dscp</code> <code>ip.ecn</code> <code>ip.ttl</code> <code>ip.frag</code></li> diff --git a/ovn/utilities/ovn-trace.c b/ovn/utilities/ovn-trace.c index 9487b1f..43a36dc 100644 --- a/ovn/utilities/ovn-trace.c +++ b/ovn/utilities/ovn-trace.c @@ -1131,23 +1131,42 @@ execute_output(const struct ovntrace_datapath *dp, struct flow *uflow, key); } + uint32_t flags = uflow->regs[MFF_LOG_FLAGS - MFF_REG0]; + if (pipeline == P_EGRESS) { - ovntrace_node_append(super, OVNTRACE_NODE_OUTPUT, - "/* output to \"%s\", type \"%s\" */", - out_name, port ? port->type : ""); - if (port && port->peer) { - const struct ovntrace_port *peer = port->peer; + bool force_egress_loopback = (flags & MLF_FORCE_EGRESS_LOOPBACK) != 0; + if (port && (port->peer || force_egress_loopback)) { + const struct ovntrace_port *new_inport = force_egress_loopback ? + port : port->peer; + if (force_egress_loopback) { + ovntrace_node_append(super, OVNTRACE_NODE_OUTPUT, + "/* force egress loopback at output to \"%s\" */", + out_name); + } else { + ovntrace_node_append(super, OVNTRACE_NODE_OUTPUT, + "/* output to \"%s\", type \"%s\" */", + out_name, port ? port->type : ""); + } struct ovntrace_node *node = ovntrace_node_append( super, OVNTRACE_NODE_PIPELINE, "ingress(dp=\"%s\", inport=\"%s\")", - peer->dp->name, peer->name); + new_inport->dp->name, new_inport->name); struct flow new_uflow = *uflow; - new_uflow.regs[MFF_LOG_INPORT - MFF_REG0] = peer->tunnel_key; - new_uflow.regs[MFF_LOG_OUTPORT - MFF_REG0] = 0; - trace__(peer->dp, &new_uflow, 0, P_INGRESS, &node->subs); + for (int i = 0; i < FLOW_N_REGS; i++) { + new_uflow.regs[i] = 0; + } + new_uflow.regs[MFF_LOG_INPORT - MFF_REG0] = new_inport->tunnel_key; + if (force_egress_loopback) { + new_uflow.regs[MFF_LOG_FLAGS - MFF_REG0] + = MLF_EGRESS_LOOPBACK_OCCURRED; + } + trace__(new_inport->dp, &new_uflow, 0, P_INGRESS, &node->subs); } else { + ovntrace_node_append(super, OVNTRACE_NODE_OUTPUT, + "/* output to \"%s\", type \"%s\" */", + out_name, port ? port->type : ""); ovntrace_node_append(super, OVNTRACE_NODE_MODIFY, "output(\"%s\")", out_name); @@ -1158,7 +1177,8 @@ execute_output(const struct ovntrace_datapath *dp, struct flow *uflow, struct flow egress_uflow = *uflow; for (int i = 0; i < FLOW_N_REGS; i++) { if (i != MFF_LOG_INPORT - MFF_REG0 && - i != MFF_LOG_OUTPORT - MFF_REG0) { + i != MFF_LOG_OUTPORT - MFF_REG0 && + i != MFF_LOG_FLAGS - MFF_REG0) { egress_uflow.regs[i] = 0; } } @@ -1166,7 +1186,6 @@ execute_output(const struct ovntrace_datapath *dp, struct flow *uflow, uint16_t in_key = uflow->regs[MFF_LOG_INPORT - MFF_REG0]; const struct ovntrace_port *inport = ovntrace_port_find_by_key(dp, in_key); const char *inport_name = !in_key ? "" : inport ? inport->name : "(unnamed)"; - uint32_t flags = uflow->regs[MFF_LOG_FLAGS - MFF_REG0]; bool allow_loopback = (flags & MLF_ALLOW_LOOPBACK) != 0; if (mcgroup) { -- 1.9.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev