For overlay subnets, all cross-host traffic exchanges are tunneled. For VLAN subnets, we need to selectively tunnel traffic sent to or coming from the NF ports. Consider a from-lport ACL applied to port p1 on host1. The NF ports nfp1 and nfp2 are on host2. A new option in LSP allows the NF ports to be linked. The “network-function-linked-port” in nfp1 is to be set to nfp2 and vice versa. The ingress pipeline on host1 sets the outport to nfp1 and the packet is then processed by table 42.
On host1 -------- REMOTE_OUTPUT (table 42): It tunnels traffic destined to all non-local overlay ports to their associated hosts. The Same rule is now also added for traffic to non-local NF ports. Thus the packets from p1 get tunneled to host 2. Upon reaching host2 ------------------- PHY_TO_LOG (table 0): Existing priority 100 rule: for each geneve tunnel interface on the chassis, copy info from header to inport, outport, metadata registers. Now same rule also stores the tun intf id in a register (reg5[16..31]). CHECK_LOOPBACK (table 44) This table has a rule that clears all the registers. The change is to skip the clearing of reg5[16..31]. Logical egress pipeline: ls_out_stateful priority 120: If the outport is NF port, copy reg5[16..31] (table0 had set it) to ct_label.tun_if_id. LOCAL_OUTPUT (table 43) When the packet comes out of the other NF port (nfp2), following two rules send it back to the host that it originally came from: Priority 110: For each NF port local to this host, following rule processes the packet through CT of linked port: match: inport==nfp2 && RECIRC_BIT==0 action: RECIRC_BIT = 1, ct(zone=nfp1’s zone, table=LOCAL), resubmit table 43 Priority 109: For each local {tunnel_id, nf port}, send the recirculated packet using tun_if_id in ct zone: match: inport==nfp1 && RECIRC_BIT==1 && && ct_label.tun_if_id==<tun-id> action: tunnel packet using tun-id Case where NF responds back on nfp1, instead of forwarding to nfp2 ------------------------------------------------------------------ For example, a SYN packet from p1 got redirected to nfp1. Then the NF, which is a firewall VM, drops the SYN and sends RST back on port nfp1. In this case, looking up in linked port (nfp2) ct zone will not give anything. The following rule uses ct.inv to identify such scenario and uses nfp1’s CT zone to send the packet back. To achieve this, following 2 rules are installed: in_network_function: Priority 100 rule that allows packets incoming from NF type ports, is enhanced with additional action to store the tun_if_id from ct_label into reg5[16..31]. LOCAL_OUTPUT (table 43) Priority 110 rule: for recirculated packets, if ct (of the linked port) is invalid, use the tun id from MFF_LOG_TUN_OFPORT to tunnel the packet back (as CT zone info has been overwritten in the above 110 priority rule in table 42). match: inport==nfp1 && RECIRC_BIT==1 && ct.inv && reg5[16..31]==<tun-id> action: tunnel packet using tun-id Acked-by: Naveen Yerramneni <naveen.yerramn...@nutanix.com> Signed-off-by: Sragdhara Datta Chaudhuri <sragdha.chau...@nutanix.com> --- controller/physical.c | 249 +++++++++++++++++++++++++++++++++-- include/ovn/logical-fields.h | 11 +- lib/logical-fields.c | 10 ++ northd/northd.c | 66 +++++++++- tests/ovn-controller.at | 4 +- tests/ovn-northd.at | 32 +++-- tests/ovn.at | 2 + 7 files changed, 345 insertions(+), 29 deletions(-) diff --git a/controller/physical.c b/controller/physical.c index 69bf05347..d3128751f 100644 --- a/controller/physical.c +++ b/controller/physical.c @@ -194,6 +194,8 @@ put_decapsulation(enum mf_field_id mff_ovn_geneve, put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, ofpacts); put_move(mff_ovn_geneve, 16, MFF_LOG_INPORT, 0, 15, ofpacts); put_move(mff_ovn_geneve, 0, MFF_LOG_OUTPORT, 0, 16, ofpacts); + put_load(ofp_to_u16(tun->ofport), MFF_LOG_TUN_OFPORT, + 16, 16, ofpacts); } else if (tun->type == STT) { put_move(MFF_TUN_ID, 40, MFF_LOG_INPORT, 0, 15, ofpacts); put_move(MFF_TUN_ID, 24, MFF_LOG_OUTPORT, 0, 16, ofpacts); @@ -417,6 +419,15 @@ match_outport_dp_and_port_keys(struct match *match, match_set_reg(match, MFF_LOG_OUTPORT - MFF_REG0, port_key); } +static void +match_inport_dp_and_port_keys(struct match *match, + uint32_t dp_key, uint32_t port_key) +{ + match_init_catchall(match); + match_set_metadata(match, htonll(dp_key)); + match_set_reg(match, MFF_LOG_INPORT - MFF_REG0, port_key); +} + static struct sbrec_encap * find_additional_encap_for_chassis(const struct sbrec_port_binding *pb, const struct sbrec_chassis *chassis_rec) @@ -529,6 +540,204 @@ put_remote_port_redirect_overlay(const struct sbrec_port_binding *binding, } } +static const struct sbrec_port_binding * +get_binding_network_function_linked_port( + struct ovsdb_idl_index *sbrec_port_binding_by_name, + const struct sbrec_port_binding *binding) +{ + const char *nf_linked_name = smap_get(&binding->options, + "network-function-linked-port"); + if (!nf_linked_name) { + return NULL; + } + VLOG_DBG("get NF linked port_binding %s:%s", + binding->logical_port, nf_linked_name); + const struct sbrec_port_binding *nf_linked_port = lport_lookup_by_name( + sbrec_port_binding_by_name, nf_linked_name); + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + if (!nf_linked_port) { + VLOG_ERR_RL(&rl, "Binding not found for network-function-linked-port" + " %s", nf_linked_name); + return NULL; + } + if (strcmp(nf_linked_port->type, binding->type)) { + VLOG_ERR_RL(&rl, "Binding type mismatch between %s and " + "network-function-linked-port %s", + binding->logical_port, nf_linked_name); + return NULL; + } + const char *nf_linked_linked_name = smap_get( + &nf_linked_port->options, "network-function-linked-port"); + if (!nf_linked_linked_name || strcmp(nf_linked_linked_name, + binding->logical_port)) { + VLOG_INFO("LSP name %s does not match linked_linked_name", + binding->logical_port); + return NULL; + } + + return nf_linked_port; +} + +static void +send_traffic_by_tunnel( + const struct sbrec_port_binding *binding, + struct match *match, + struct ofpbuf *ofpacts_p, + uint32_t dp_key, + uint32_t port_key, + struct chassis_tunnel *tun, + enum mf_field_id mff_ovn_geneve, + struct ovn_desired_flow_table *flow_table) +{ + match_init_catchall(match); + ofpbuf_clear(ofpacts_p); + + match_inport_dp_and_port_keys(match, dp_key, port_key); + match_set_reg_masked(match, MFF_LOG_FLAGS - MFF_REG0, MLF_RECIRC, + MLF_RECIRC); + ovs_u128 of_tun_ct_label_id_val = { + .u64.hi = ((uint32_t) ofp_to_u16(tun->ofport)) << 16, + }; + ovs_u128 of_tun_ct_label_id_mask = { + .u64.hi = 0x00000000ffff0000, + }; + + match_set_ct_label_masked(match, of_tun_ct_label_id_val, + of_tun_ct_label_id_mask); + + put_load(binding->datapath->tunnel_key, MFF_TUN_ID, 0, 24, ofpacts_p); + put_move(MFF_LOG_OUTPORT, 0, mff_ovn_geneve, 0, 32, ofpacts_p); + put_load(port_key, mff_ovn_geneve, 16, 15, ofpacts_p); + + ofpact_put_OUTPUT(ofpacts_p)->port = tun->ofport; + ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 109, + binding->header_.uuid.parts[0], match, + ofpacts_p, &binding->header_.uuid); +} + +static void +put_redirect_overlay_to_source_from_nf_port( + const struct sbrec_port_binding *binding, + struct ovsdb_idl_index *sbrec_port_binding_by_name, + const struct hmap *chassis_tunnels, + const struct shash *ct_zones, + enum mf_field_id mff_ovn_geneve, + uint32_t port_key, + struct match *match, + struct ofpbuf *ofpacts_p, + struct ovn_desired_flow_table *flow_table) +{ + uint32_t dp_key = binding->datapath->tunnel_key; + const struct sbrec_port_binding *linked_pb; + + /* Say, a network function has ports nf1 and nf2. The source port p1 is on + * a different host. The packet redirected from p1 was tunneled to the NF + * host. In PHY_TO_LOG table the tunnel interface id is stored in + * MFF_LOG_TUN_OFPORT. The egress pipeline then commits it into ct_label + * tun_if_id in nf1's zone (out_stateful priority 120 rule). When the same + * packet comes out from nf2, two rules process it: + * first rule sets recirc bit to 1 and processes the packet through nf1's + * ct zone and resubmits to same table. When the recirculated packet comes + * back, the second rule (which checks recirc bit == 1) uses the tun_if_id + * from ct_label to send the packet back to p1's host. + */ + + linked_pb = get_binding_network_function_linked_port( + sbrec_port_binding_by_name, binding); + if (!linked_pb) { + return; + } + struct zone_ids linked_zone = get_zone_ids(linked_pb, ct_zones); + if (!linked_zone.ct) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_ERR_RL(&rl, "Zone not found for linked port"); + return; + } + + /* Table 43 (LOCAL_OUTPUT), priority 110 + * ===================================== + * + * Each flow matches a logical inport to a nf port and checks if + * recirc bit is 0 (i.e. packet first time being processed by this table). + * The action processes the packet through ct zone of the linked nf port + * and resubmits to the same table after setting recirc bit to 1. + * match: inport == svc-port[i] && MLF_RECIRC_BIT = 0 + * action: MLF_RECIRC_BIT = 1, ct(zone=linked-zone[i], table=LOCAL) + */ + match_init_catchall(match); + ofpbuf_clear(ofpacts_p); + match_inport_dp_and_port_keys(match, dp_key, port_key); + match_set_dl_type(match, htons(ETH_TYPE_IP)); + match_set_reg_masked(match, MFF_LOG_FLAGS - MFF_REG0, 0, MLF_RECIRC); + + put_load(1, MFF_LOG_FLAGS, MLF_RECIRC_BIT, 1, ofpacts_p); + put_load(linked_zone.ct, MFF_LOG_CT_ZONE, 0, 16, ofpacts_p); + + struct ofpact_conntrack *ct = ofpact_put_CT(ofpacts_p); + ct->recirc_table = OFTABLE_LOCAL_OUTPUT; + ct->zone_src.field = mf_from_id(MFF_LOG_CT_ZONE); + ct->zone_src.ofs = 0; + ct->zone_src.n_bits = 16; + ct->flags = 0; + ct->alg = 0; + ofpact_finish(ofpacts_p, &ct->ofpact); + + ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 110, + binding->header_.uuid.parts[0], match, + ofpacts_p, &binding->header_.uuid); + + /* Table 43 (LOCAL_OUTPUT), priority 110 + * In case NF is sending back a response on the port it received the + * packet on, instead of forwarding out of the other port (e.g. NF sending + * RST to the SYN received), the ct lookup in linked port's zone would + * fail. Based on ct.inv check the packet is then tunneled back using + * the tunnel id from this port's zone itself. The above rule has + * overwritten the zone info by now, so we recover it from the register + * that was populated by in_network_function stage with the tunnel id. + * match: inport == svc-port[i] && MLF_RECIRC_BIT = 1 + * && ct.inv && MFF_LOG_TUN_OFPORT == <tun-id> + * action: tunnel back using above tun-id + */ + struct chassis_tunnel *tun; + HMAP_FOR_EACH (tun, hmap_node, chassis_tunnels) { + match_init_catchall(match); + ofpbuf_clear(ofpacts_p); + match_inport_dp_and_port_keys(match, dp_key, port_key); + match_set_reg_masked(match, MFF_LOG_FLAGS - MFF_REG0, MLF_RECIRC, + MLF_RECIRC); + match_set_ct_state_masked(match, OVS_CS_F_INVALID, OVS_CS_F_INVALID); + match_set_reg_masked(match, MFF_LOG_TUN_OFPORT - MFF_REG0, + ((uint32_t) ofp_to_u16(tun->ofport)) << 16, + 0xffff << 16); + put_load(binding->datapath->tunnel_key, MFF_TUN_ID, 0, 24, ofpacts_p); + put_move(MFF_LOG_OUTPORT, 0, mff_ovn_geneve, 0, 32, ofpacts_p); + put_load(port_key, mff_ovn_geneve, 16, 15, ofpacts_p); + + ofpact_put_OUTPUT(ofpacts_p)->port = tun->ofport; + ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 110, + binding->header_.uuid.parts[0], match, + ofpacts_p, &binding->header_.uuid); + } + + /* Table 43 (LOCAL_OUTPUT), priority 109 + * ===================================== + * + * A flow is installed For each {remote tunnel_id, nf port} combination. It + * matches the inport with the nf port and the ct_label.tun_if_id with the + * tunnel_id. Also checks if the recirc bit is 1 (i.e. packet being + * processed by this table second time). The action is to send the packet + * out using the tunnel interface. + * match: inport == svc-port[i] && MLF_RECIRC_BIT = 1 + * && ct_label.tun_if_id == <tun-id> + * action: tunnel back using tun-id + */ + HMAP_FOR_EACH (tun, hmap_node, chassis_tunnels) { + send_traffic_by_tunnel(binding, match, ofpacts_p, dp_key, port_key, + tun, mff_ovn_geneve, flow_table); + } + ofpbuf_clear(ofpacts_p); +} + static void put_remote_port_redirect_overlay_ha_remote( const struct sbrec_port_binding *binding, @@ -1808,10 +2017,12 @@ consider_port_binding(const struct physical_ctx *ctx, /* Determine how the port is accessed. */ enum access_type access_type = PORT_LOCAL; + bool is_nf = (smap_get(&binding->options, "network-function-linked-port") ? + true: false); if (!ofport) { /* Enforce tunneling while we clone packets to additional chassis b/c * otherwise upstream switch won't flood the packet to both chassis. */ - if (localnet_port && !binding->additional_chassis) { + if (localnet_port && !binding->additional_chassis && !is_nf) { ofport = u16_to_ofp(simap_get(ctx->patch_ofports, localnet_port->logical_port)); if (!ofport) { @@ -2041,6 +2252,21 @@ consider_port_binding(const struct physical_ctx *ctx, binding->header_.uuid.parts[0], &match, ofpacts_p, &binding->header_.uuid); } + + /* Packets egressing from network function ports need to be sent to the + * source */ + if (is_nf) { + put_redirect_overlay_to_source_from_nf_port( + binding, + ctx->sbrec_port_binding_by_name, + ctx->chassis_tunnels, + ctx->ct_zones, + ctx->mff_ovn_geneve, + port_key, + &match, + ofpacts_p, + flow_table); + } } else if (access_type == PORT_LOCALNET && !ctx->always_tunnel) { /* Remote port connected by localnet port */ /* Table 43, priority 100. @@ -2716,7 +2942,6 @@ physical_run(struct physical_ctx *p_ctx, ofpbuf_clear(&ofpacts); put_decapsulation(p_ctx->mff_ovn_geneve, tun, &ofpacts); - put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts); ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, 0, &match, &ofpacts, hc_uuid); @@ -2851,7 +3076,7 @@ physical_run(struct physical_ctx *p_ctx, * * Handles packets received from a VXLAN tunnel which get resubmitted to * OFTABLE_LOG_INGRESS_PIPELINE due to lack of needed metadata in VXLAN, - * explicitly skip sending back out any tunnels and resubmit to table 40 + * explicitly skip sending back out any tunnels and resubmit to table 43 * for local delivery, except packets which have MLF_ALLOW_LOOPBACK bit * set. */ @@ -2859,7 +3084,7 @@ physical_run(struct physical_ctx *p_ctx, match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, MLF_RCV_FROM_RAMP, MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK); - /* Resubmit to table 40. */ + /* Resubmit to table 43. */ ofpbuf_clear(&ofpacts); put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts); ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 150, 0, @@ -2873,7 +3098,7 @@ physical_run(struct physical_ctx *p_ctx, match_init_catchall(&match); match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, MLF_LOCAL_ONLY, MLF_LOCAL_ONLY); - /* Resubmit to table 40. */ + /* Resubmit to table 43. */ ofpbuf_clear(&ofpacts); put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts); ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 150, 0, @@ -2890,24 +3115,30 @@ physical_run(struct physical_ctx *p_ctx, ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 0, 0, &match, &ofpacts, hc_uuid); - /* Table 40, priority 0. + /* Table 43, priority 0. * ====================== * * Drop packets that do not match previous flows. */ add_default_drop_flow(p_ctx, OFTABLE_LOCAL_OUTPUT, flow_table); - /* Table 41, Priority 0. + /* Table 44, Priority 0. * ======================= * * Resubmit packets that don't output to the ingress port (already checked - * in table 40) to the logical egress pipeline, clearing the logical + * in table 43) to the logical egress pipeline, clearing the logical * registers (for consistent behavior with packets that get tunneled). */ match_init_catchall(&match); ofpbuf_clear(&ofpacts); for (int i = 0; i < MFF_N_LOG_REGS; i++) { - put_load(0, MFF_REG0 + i, 0, 32, &ofpacts); + if ((MFF_REG0 + i) != MFF_LOG_TUN_OFPORT) { + put_load(0, MFF_REG0 + i, 0, 32, &ofpacts); + } } + /* In MFF_LOG_TUN_OFPORT, the bits 16..31 are used to store geneve + * tunnel id of received packets and these need to be carried over to + * the egress pipeline. The remaining bits can be reset to zero. */ + put_load(0, MFF_LOG_TUN_OFPORT, 0, 16, &ofpacts); put_resubmit(OFTABLE_LOG_EGRESS_PIPELINE, &ofpacts); ofctrl_add_flow(flow_table, OFTABLE_CHECK_LOOPBACK, 0, 0, &match, &ofpacts, hc_uuid); diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h index 3a36d0c84..8c20b83a3 100644 --- a/include/ovn/logical-fields.h +++ b/include/ovn/logical-fields.h @@ -42,6 +42,7 @@ enum ovn_controller_event { * (16..31 of the 32 bits). */ #define MFF_LOG_INPORT MFF_REG14 /* Logical input port (32 bits). */ #define MFF_LOG_OUTPORT MFF_REG15 /* Logical output port (32 bits). */ +#define MFF_LOG_TUN_OFPORT MFF_REG5 /* 16..31 of the 32 bits */ /* Logical registers. * @@ -97,6 +98,7 @@ enum mff_log_flags_bits { MLF_FROM_CTRL_BIT = 19, MLF_UNSNAT_NEW_BIT = 20, MLF_UNSNAT_NOT_TRACKED_BIT = 21, + MLF_RECIRC_BIT = 22, }; /* MFF_LOG_FLAGS_REG flag assignments */ @@ -159,7 +161,10 @@ enum mff_log_flags { MLF_UNSNAT_NEW = (1 << MLF_UNSNAT_NEW_BIT), /* Indicate that the packet didn't go through unSNAT. */ - MLF_UNSNAT_NOT_TRACKED = (1 << MLF_UNSNAT_NOT_TRACKED_BIT) + MLF_UNSNAT_NOT_TRACKED = (1 << MLF_UNSNAT_NOT_TRACKED_BIT), + + /* Indicate the packet has been processed by LOCAL table once before. */ + MLF_RECIRC = (1 << MLF_RECIRC_BIT), }; /* OVN logical fields @@ -224,15 +229,19 @@ const struct ovn_field *ovn_field_from_name(const char *name); #define OVN_CT_OBS_STAGE_END_BIT 5 #define OVN_CT_ALLOW_ESTABLISHED_BIT 6 #define OVN_CT_NETWORK_FUNCTION_GROUP_BIT 7 +#define OVN_CT_TUN_IF_BIT 8 #define OVN_CT_BLOCKED 1 #define OVN_CT_NATTED 2 #define OVN_CT_LB_SKIP_SNAT 4 #define OVN_CT_LB_FORCE_SNAT 8 #define OVN_CT_NETWORK_FUNCTION_GROUP 128 +#define OVN_CT_TUN_IF 256 #define OVN_CT_NETWORK_FUNCTION_GROUP_ID_1ST_BIT 17 #define OVN_CT_NETWORK_FUNCTION_GROUP_ID_END_BIT 24 +#define OVN_CT_TUN_IF_1ST_BIT 80 +#define OVN_CT_TUN_IF_END_BIT 95 #define OVN_CT_ECMP_ETH_1ST_BIT 32 #define OVN_CT_ECMP_ETH_END_BIT 79 diff --git a/lib/logical-fields.c b/lib/logical-fields.c index 6a15a783d..4016df72f 100644 --- a/lib/logical-fields.c +++ b/lib/logical-fields.c @@ -221,6 +221,16 @@ ovn_init_symtab(struct shash *symtab) OVN_CT_NETWORK_FUNCTION_GROUP_ID_END_BIT) "]", WR_CT_COMMIT); + expr_symtab_add_subfield_scoped(symtab, "ct_label.tun_if", NULL, + "ct_label[" + OVN_CT_STR(OVN_CT_TUN_IF_BIT) + "]", + WR_CT_COMMIT); + expr_symtab_add_subfield_scoped(symtab, "ct_label.tun_if_id", NULL, + "ct_label[" + OVN_CT_STR(OVN_CT_TUN_IF_1ST_BIT) ".." + OVN_CT_STR(OVN_CT_TUN_IF_END_BIT) "]", + WR_CT_COMMIT); expr_symtab_add_field(symtab, "ct_state", MFF_CT_STATE, NULL, false); diff --git a/northd/northd.c b/northd/northd.c index 8842d507f..f0e998f55 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -220,6 +220,10 @@ BUILD_ASSERT_DECL(ACL_OBS_STAGE_MAX < (1 << 2)); /* Register used for storing persistent ACL IDs */ #define REG_ACL_ID "reg2[16..31]" +/* Register used for storing tunnel openflow interface id, in a Logical Switch. + * Must match the MFF_LOG_TUN_OFPORT in logical-fields.h */ +#define REG_TUN_OFPORT "reg5[16..31]" + /* Register used for temporarily store ECMP eth.src to avoid masked ct_label * access. It doesn't really occupy registers because the content of the * register is saved to stack and then restored in the same flow. @@ -17398,8 +17402,50 @@ network_function_get_active(const struct nbrec_network_function_group *nfg) return nfg->network_function[0]; } +/* For packets received on tunnel and egressing towards a network-function port + * commit the tunnel interface id in CT. This will be utilized when the packet + * comes out of the other network-function interface of the service VM. The + * packet then will be tunneled back to the source host. */ static void -consider_network_function(struct lflow_table *lflows, struct ovn_datapath *od, +build_lswitch_stateful_nf(struct ovn_port *op, + struct lflow_table *lflows, + struct ds *actions, struct ds *match) +{ + ds_clear(actions); + ds_clear(match); + + ds_put_cstr(actions, + "ct_commit { " + "ct_mark.blocked = 0; " + "ct_mark.allow_established = " REGBIT_ACL_PERSIST_ID "; " + "ct_label.acl_id = " REG_ACL_ID "; " + "ct_label.tun_if_id = " REG_TUN_OFPORT "; }; next;"); + ds_put_format(match, + "outport == %s && " REGBIT_ACL_LABEL" == 0", op->json_key); + ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_STATEFUL, 120, + ds_cstr(match), ds_cstr(actions), op->lflow_ref); + + ds_clear(actions); + ds_clear(match); + ds_put_format(match, + "outport == %s && " REGBIT_ACL_LABEL" == 1", + op->json_key); + ds_put_cstr(actions, + "ct_commit { " + "ct_mark.blocked = 0; " + "ct_mark.allow_established = " REGBIT_ACL_PERSIST_ID "; " + "ct_label.acl_id = " REG_ACL_ID "; " + "ct_mark.obs_stage = " REGBIT_ACL_OBS_STAGE "; " + "ct_mark.obs_collector_id = " REG_OBS_COLLECTOR_ID_EST "; " + "ct_label.obs_point_id = " REG_OBS_POINT_ID_EST "; " + "ct_label.tun_if_id = " REG_TUN_OFPORT "; }; next;"); + ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_STATEFUL, 120, + ds_cstr(match), ds_cstr(actions), op->lflow_ref); +} + +static void +consider_network_function( + struct lflow_table *lflows, struct ovn_datapath *od, struct nbrec_network_function_group *nfg, const struct hmap *ports, struct lflow_ref *lflow_ref, bool ingress) @@ -17522,7 +17568,7 @@ consider_network_function(struct lflow_table *lflows, struct ovn_datapath *od, * match. */ ds_put_format(&match, "inport == %s", input_port->json_key); - ds_put_format(&action, "next;"); + ds_put_format(&action, REG_TUN_OFPORT" = ct_label.tun_if_id; next;"); ovn_lflow_add(lflows, od, S_SWITCH_IN_NETWORK_FUNCTION, 100, ds_cstr(&match), ds_cstr(&action), lflow_ref); ds_clear(&match); @@ -17564,13 +17610,21 @@ consider_network_function(struct lflow_table *lflows, struct ovn_datapath *od, ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, ds_cstr(&match), ds_cstr(&action), lflow_ref); + /* Priority 120 flows in out_stateful: + * If packet was received on a tunnel interface and being forwarded to a + * NF port, commit openflow tunnel interface id in ct_label. + */ + build_lswitch_stateful_nf(output_port, lflows, &action, &match); + build_lswitch_stateful_nf(input_port, lflows, &action, &match); + ds_destroy(&match); ds_destroy(&action); } static void -build_network_function(struct ovn_datapath *od, struct lflow_table *lflows, +build_network_function( + struct ovn_datapath *od, struct lflow_table *lflows, const struct hmap *ports, const struct ls_port_group_table *ls_pgs, struct lflow_ref *lflow_ref) { @@ -17637,8 +17691,7 @@ build_network_function(struct ovn_datapath *od, struct lflow_table *lflows, VLOG_DBG("Adding %s network_function to switch", ingress == true ? "ingress" : "egress"); consider_network_function(lflows, od, acl->network_function_group, - ports, - lflow_ref, ingress); + ports, lflow_ref, ingress); } } @@ -17669,8 +17722,7 @@ build_network_function(struct ovn_datapath *od, struct lflow_table *lflows, ingress == true ? "ingress" : "egress"); consider_network_function(lflows, od, acl->network_function_group, - ports, - lflow_ref, ingress); + ports, lflow_ref, ingress); } } } diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at index 9d282dc60..bf89ce455 100644 --- a/tests/ovn-controller.at +++ b/tests/ovn-controller.at @@ -3747,8 +3747,8 @@ AT_CHECK([grep -c "reg10=0/0x10000" flood_flows], [0], [dnl # Geneve hv2_cookie="$(chassis_cookie hv2)" AT_CHECK_UNQUOTED([grep "cookie=$hv2_cookie," phy_to_log_flows], [0], [dnl - cookie=$hv2_cookie, priority=120,arp,tun_metadata0=0,in_port="ovn-hv2-0",arp_op=2 actions=load:0x1->NXM_NX_REG10[[16]],move:NXM_NX_TUN_ID[[0..23]]->OXM_OF_METADATA[[0..23]],move:NXM_NX_TUN_METADATA0[[16..30]]->NXM_NX_REG14[[0..14]],move:NXM_NX_TUN_METADATA0[[0..15]]->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE) - cookie=$hv2_cookie, priority=120,icmp6,tun_metadata0=0,in_port="ovn-hv2-0",icmp_type=136,icmp_code=0 actions=load:0x1->NXM_NX_REG10[[16]],move:NXM_NX_TUN_ID[[0..23]]->OXM_OF_METADATA[[0..23]],move:NXM_NX_TUN_METADATA0[[16..30]]->NXM_NX_REG14[[0..14]],move:NXM_NX_TUN_METADATA0[[0..15]]->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE) + cookie=$hv2_cookie, priority=120,arp,tun_metadata0=0,in_port="ovn-hv2-0",arp_op=2 actions=load:0x1->NXM_NX_REG10[[16]],move:NXM_NX_TUN_ID[[0..23]]->OXM_OF_METADATA[[0..23]],move:NXM_NX_TUN_METADATA0[[16..30]]->NXM_NX_REG14[[0..14]],move:NXM_NX_TUN_METADATA0[[0..15]]->NXM_NX_REG15[[0..15]],load:0x1->NXM_NX_REG5[[16..31]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE) + cookie=$hv2_cookie, priority=120,icmp6,tun_metadata0=0,in_port="ovn-hv2-0",icmp_type=136,icmp_code=0 actions=load:0x1->NXM_NX_REG10[[16]],move:NXM_NX_TUN_ID[[0..23]]->OXM_OF_METADATA[[0..23]],move:NXM_NX_TUN_METADATA0[[16..30]]->NXM_NX_REG14[[0..14]],move:NXM_NX_TUN_METADATA0[[0..15]]->NXM_NX_REG15[[0..15]],load:0x1->NXM_NX_REG5[[16..31]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE) ]) # VXLAN diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index d0b5b0ad0..b9b7c9acf 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -16829,8 +16829,8 @@ AT_CHECK( [grep -E 'ls_(in|out)_network_function' sw0flows | ovn_strip_lflows | sort], [0], [dnl table=??(ls_in_network_function), priority=0 , match=(1), action=(next;) table=??(ls_in_network_function), priority=1 , match=(reg8[[21]] == 1), action=(drop;) - table=??(ls_in_network_function), priority=100 , match=(inport == "sw0-nf-p1"), action=(next;) - table=??(ls_in_network_function), priority=100 , match=(inport == "sw0-nf-p2"), action=(next;) + table=??(ls_in_network_function), priority=100 , match=(inport == "sw0-nf-p1"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) + table=??(ls_in_network_function), priority=100 , match=(inport == "sw0-nf-p2"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) table=??(ls_in_network_function), priority=100 , match=(reg8[[21]] == 1 && eth.mcast), action=(next;) table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg5[[0..7]] == 1), action=(outport = "sw0-nf-p1"; output;) table=??(ls_out_network_function), priority=0 , match=(1), action=(next;) @@ -16853,6 +16853,18 @@ AT_CHECK( table=??(ls_out_acl_eval ), priority=65532, match=(ct.est && !ct.rel && !ct.new && !ct.inv && ct.rpl && ct_mark.blocked == 0), action=(reg8[[21]] = ct_label.network_function_group; reg8[[16]] = 1; next;) ]) + AT_CHECK([grep "ls_out_stateful" sw0flows | ovn_strip_lflows], [0], [dnl + table=??(ls_out_stateful ), priority=0 , match=(1), action=(next;) + table=??(ls_out_stateful ), priority=100 , match=(reg0[[1]] == 1 && reg0[[13]] == 0), action=(ct_commit { ct_mark.blocked = 0; ct_mark.allow_established = reg0[[20]]; ct_label.acl_id = reg2[[16..31]]; ct_label.network_function_group = 0; ct_label.network_function_group_id = 0; }; next;) + table=??(ls_out_stateful ), priority=100 , match=(reg0[[1]] == 1 && reg0[[13]] == 1), action=(ct_commit { ct_mark.blocked = 0; ct_mark.allow_established = reg0[[20]]; ct_mark.obs_stage = reg8[[19..20]]; ct_mark.obs_collector_id = reg8[[8..15]]; ct_label.obs_point_id = reg9; ct_label.acl_id = reg2[[16..31]]; ct_label.network_function_group = 0; ct_label.network_function_group_id = 0; }; next;) + table=??(ls_out_stateful ), priority=110 , match=(reg0[[1]] == 1 && reg0[[13]] == 0 && reg8[[21]] == 1), action=(ct_commit { ct_mark.blocked = 0; ct_mark.allow_established = reg0[[20]]; ct_label.acl_id = reg2[[16..31]]; ct_label.network_function_group = 1; ct_label.network_function_group_id = reg5[[0..7]]; }; next;) + table=??(ls_out_stateful ), priority=110 , match=(reg0[[1]] == 1 && reg0[[13]] == 1 && reg8[[21]] == 1), action=(ct_commit { ct_mark.blocked = 0; ct_mark.allow_established = reg0[[20]]; ct_mark.obs_stage = reg8[[19..20]]; ct_mark.obs_collector_id = reg8[[8..15]]; ct_label.obs_point_id = reg9; ct_label.acl_id = reg2[[16..31]]; ct_label.network_function_group = 1; ct_label.network_function_group_id = reg5[[0..7]]; }; next;) + table=??(ls_out_stateful ), priority=120 , match=(outport == "sw0-nf-p1" && reg0[[13]] == 0), action=(ct_commit { ct_mark.blocked = 0; ct_mark.allow_established = reg0[[20]]; ct_label.acl_id = reg2[[16..31]]; ct_label.tun_if_id = reg5[[16..31]]; }; next;) + table=??(ls_out_stateful ), priority=120 , match=(outport == "sw0-nf-p1" && reg0[[13]] == 1), action=(ct_commit { ct_mark.blocked = 0; ct_mark.allow_established = reg0[[20]]; ct_label.acl_id = reg2[[16..31]]; ct_mark.obs_stage = reg8[[19..20]]; ct_mark.obs_collector_id = reg8[[8..15]]; ct_label.obs_point_id = reg9; ct_label.tun_if_id = reg5[[16..31]]; }; next;) + table=??(ls_out_stateful ), priority=120 , match=(outport == "sw0-nf-p2" && reg0[[13]] == 0), action=(ct_commit { ct_mark.blocked = 0; ct_mark.allow_established = reg0[[20]]; ct_label.acl_id = reg2[[16..31]]; ct_label.tun_if_id = reg5[[16..31]]; }; next;) + table=??(ls_out_stateful ), priority=120 , match=(outport == "sw0-nf-p2" && reg0[[13]] == 1), action=(ct_commit { ct_mark.blocked = 0; ct_mark.allow_established = reg0[[20]]; ct_label.acl_id = reg2[[16..31]]; ct_mark.obs_stage = reg8[[19..20]]; ct_mark.obs_collector_id = reg8[[8..15]]; ct_label.obs_point_id = reg9; ct_label.tun_if_id = reg5[[16..31]]; }; next;) +]) + # ICMP packets from sw0-p1 should be redirected to sw0-nf-p1, but in revervse direction should not. flow_eth_from_p1='eth.src == 00:00:00:00:00:01 && eth.dst == 00:00:00:00:00:02' flow_ip_from_p1='ip.ttl==64 && ip4.src == 10.0.0.2 && ip4.dst == 10.0.0.3' @@ -16901,10 +16913,10 @@ AT_CHECK( [grep -E 'ls_(in|out)_network_function' sw0flows | ovn_strip_lflows | sort], [0], [dnl table=??(ls_in_network_function), priority=0 , match=(1), action=(next;) table=??(ls_in_network_function), priority=1 , match=(reg8[[21]] == 1), action=(drop;) - table=??(ls_in_network_function), priority=100 , match=(inport == "sw0-nf-p1"), action=(next;) - table=??(ls_in_network_function), priority=100 , match=(inport == "sw0-nf-p2"), action=(next;) - table=??(ls_in_network_function), priority=100 , match=(inport == "sw0-nf-p3"), action=(next;) - table=??(ls_in_network_function), priority=100 , match=(inport == "sw0-nf-p4"), action=(next;) + table=??(ls_in_network_function), priority=100 , match=(inport == "sw0-nf-p1"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) + table=??(ls_in_network_function), priority=100 , match=(inport == "sw0-nf-p2"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) + table=??(ls_in_network_function), priority=100 , match=(inport == "sw0-nf-p3"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) + table=??(ls_in_network_function), priority=100 , match=(inport == "sw0-nf-p4"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) table=??(ls_in_network_function), priority=100 , match=(reg8[[21]] == 1 && eth.mcast), action=(next;) table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 0 && ct_label.network_function_group_id == 2), action=(outport = "sw0-nf-p3"; output;) table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg5[[0..7]] == 1), action=(outport = "sw0-nf-p1"; output;) @@ -16964,10 +16976,10 @@ AT_CHECK( [grep -E 'ls_(in|out)_network_function' sw1flows | ovn_strip_lflows | sort], [0], [dnl table=??(ls_in_network_function), priority=0 , match=(1), action=(next;) table=??(ls_in_network_function), priority=1 , match=(reg8[[21]] == 1), action=(drop;) - table=??(ls_in_network_function), priority=100 , match=(inport == "sw1-nf-p1"), action=(next;) - table=??(ls_in_network_function), priority=100 , match=(inport == "sw1-nf-p2"), action=(next;) - table=??(ls_in_network_function), priority=100 , match=(inport == "sw1-nf-p3"), action=(next;) - table=??(ls_in_network_function), priority=100 , match=(inport == "sw1-nf-p4"), action=(next;) + table=??(ls_in_network_function), priority=100 , match=(inport == "sw1-nf-p1"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) + table=??(ls_in_network_function), priority=100 , match=(inport == "sw1-nf-p2"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) + table=??(ls_in_network_function), priority=100 , match=(inport == "sw1-nf-p3"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) + table=??(ls_in_network_function), priority=100 , match=(inport == "sw1-nf-p4"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) table=??(ls_in_network_function), priority=100 , match=(reg8[[21]] == 1 && eth.mcast), action=(next;) table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 0 && ct_label.network_function_group_id == 2), action=(outport = "sw1-nf-p3"; output;) table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg5[[0..7]] == 1), action=(outport = "sw1-nf-p1"; output;) diff --git a/tests/ovn.at b/tests/ovn.at index 88aae5398..f2a324130 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -147,6 +147,8 @@ ct_label.network_function_group = ct_label[7] ct_label.network_function_group_id = ct_label[17..24] ct_label.obs_point_id = ct_label[96..127] ct_label.obs_unused = ct_label[0..95] +ct_label.tun_if = ct_label[8] +ct_label.tun_if_id = ct_label[80..95] ct_mark = NXM_NX_CT_MARK ct_mark.allow_established = ct_mark[6] ct_mark.blocked = ct_mark[0] -- 2.39.3 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev