Implement vtap mode in northd where traffic is cloned to the NF port while the original packet continues to its destination.
- Generate mirror flows that clone packets to NF port - Determine NF health from port binding status (no health probes) - Validate that health_check requires both inport and outport - Clear ct_state for packets egressing through localnet ports to avoid matching flows in egress stage based on egress CT info Note: ---- For inline NF health status, updated the code to consider port binding state along with service monitor health. Signed-off-by: Naveen Yerramneni <[email protected]> Acked-by: Sragdhara Datta Chaudhuri <[email protected]> Acked-by: Aditya Mehakare <[email protected]> --- NEWS | 5 + northd/northd.c | 438 +++++++++++++++++++++++++++++++++++----- northd/ovn-northd.8.xml | 270 +++++++++++++++---------- tests/ovn-northd.at | 231 ++++++++++++++++++++- tests/ovn.at | 372 +++++++++++++++++++++++++++++++++- tests/system-ovn.at | 265 +++++++++++++++++++++++- 6 files changed, 1417 insertions(+), 164 deletions(-) diff --git a/NEWS b/NEWS index 888946b54..d9f437910 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,10 @@ Post v26.03.0 ------------- + - Add vtap mode support for Network Function. In vtap mode, traffic matching + ACLs is mirrored to the network function while continuing to flow to the + original destination. This enables passive monitoring use cases where + network functions can observe traffic without being inline in + the data path. OVN v26.03.0 - xxx xx xxxx -------------------------- diff --git a/northd/northd.c b/northd/northd.c index ac2361417..c560a65cf 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -3238,6 +3238,66 @@ create_or_get_service_mon(struct ovsdb_idl_txn *ovnsb_txn, return mon_info; } +enum nf_port_binding_state{ + NF_PORT_STATE_UNKNOWN, + NF_PORT_STATE_CHASSIS_INVALID, + NF_PORT_STATE_DOWN, + NF_PORT_STATE_UP +}; + +static enum nf_port_binding_state +network_function_port_binding_state(const char **ports, uint8_t n_ports, + struct hmap *ls_ports, + const char **chassis_name_pptr) +{ + const char *chassis_name = NULL; + enum nf_port_binding_state port_state = NF_PORT_STATE_UNKNOWN; + uint8_t n_port_up = 0; + + for (int i = 0; i < n_ports; i++) { + const char *port = ports[i]; + struct ovn_port *op = ovn_port_find(ls_ports, port); + if (op == NULL) { + static struct vlog_rate_limit rl = + VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_ERR_RL(&rl, "NetworkFunction: skip health check, port:%s " + "not found", port); + return port_state; + } + if (op->sb && op->sb->chassis) { + if (chassis_name == NULL) { + chassis_name = op->sb->chassis->name; + } else if (strcmp(chassis_name, op->sb->chassis->name)) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_ERR_RL(&rl, "NetworkFunction: chassis mismatch " + "for port:%s chassis:%s peer_port_chassis:%s", + port, op->sb->chassis->name, chassis_name); + return NF_PORT_STATE_CHASSIS_INVALID; + } + if (op->sb->n_up && op->sb->up[0]) { + n_port_up++; + } + } else { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_ERR_RL(&rl, "NetworkFunction: chassis not set for port:%s", + port); + return NF_PORT_STATE_CHASSIS_INVALID; + } + } + + if (chassis_name_pptr) { + *chassis_name_pptr = chassis_name; + } + + if (n_port_up == n_ports) { + port_state = NF_PORT_STATE_UP; + } else { + port_state = NF_PORT_STATE_DOWN; + } + + return port_state; +} + static void ovn_nf_svc_create(struct ovsdb_idl_txn *ovnsb_txn, const struct nbrec_network_function *nbrec_nf, @@ -3256,29 +3316,22 @@ ovn_nf_svc_create(struct ovsdb_idl_txn *ovnsb_txn, } const char *ports[] = {nbrec_nf->outport->name, nbrec_nf->inport->name}; + size_t n_ports = ARRAY_SIZE(ports); const char *chassis_name = NULL; - bool port_up = true; - for (size_t i = 0; i < ARRAY_SIZE(ports); i++) { + for (size_t i = 0; i < n_ports; i++) { const char *port = ports[i]; sset_add(svc_monitor_lsps, port); - struct ovn_port *op = ovn_port_find(ls_ports, port); - if (op == NULL) { - VLOG_ERR_RL(&rl, "NetworkFunction: skip health check, port:%s " - "not found", port); - return; - } + } - if (op->sb->chassis) { - if (chassis_name == NULL) { - chassis_name = op->sb->chassis->name; - } else if (strcmp(chassis_name, op->sb->chassis->name)) { - VLOG_ERR_RL(&rl, "NetworkFunction: chassis mismatch " - "chassis:%s port:%s\n", - op->sb->chassis->name, port); - } - } - port_up = port_up && (op->sb->n_up && op->sb->up[0]); + bool port_up = false; + enum nf_port_binding_state port_state = + network_function_port_binding_state(ports, n_ports, ls_ports, + &chassis_name); + if (port_state == NF_PORT_STATE_UNKNOWN) { + return; + } else if (port_state == NF_PORT_STATE_UP) { + port_up = true; } struct service_monitor_info *mon_info = @@ -3801,6 +3854,16 @@ build_svc_monitors_data( NBREC_NETWORK_FUNCTION_TABLE_FOR_EACH (nbrec_nf, nbrec_network_function_table) { if (nbrec_nf->health_check) { + /* For Network Function, health check requires both + * inport and outport to be set. + */ + if (!nbrec_nf->inport || !nbrec_nf->outport) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_WARN_RL(&rl, "NetworkFunction: health_check requires " + "both inport and outport, skipping health_check " + "for network_function:%s", nbrec_nf->name); + continue; + } ovn_nf_svc_create(ovnsb_txn, nbrec_nf, svc_global_addresses, @@ -6382,10 +6445,14 @@ skip_port_from_conntrack(const struct ovn_datapath *od, struct ovn_port *op, * router on hostA, not hostB. This would only work with distributed * conntrack state across all chassis. */ + /* Clear the ct_state for packets egressing through localnet ports to + * prevent them from matching flows in ls_out_acl_eval stage based on + * ct_state carried over from ingress pipeline */ const char *ingress_action = "next;"; - const char *egress_action = has_stateful_acl - ? "next;" - : "flags.pkt_sampled = 0; ct_clear; next;"; + const char *egress_action = + (has_stateful_acl && !lsp_is_localnet(op->nbsp)) + ? "next;" + : "flags.pkt_sampled = 0; ct_clear; next;"; char *ingress_match = xasprintf("ip && inport == %s", op->json_key); char *egress_match = xasprintf("ip && outport == %s", op->json_key); @@ -18842,10 +18909,35 @@ build_lswitch_stateful_nf(struct ovn_port *op, ds_cstr(match), ds_cstr(actions), lflow_ref); } +static const char* +network_function_group_get_mode(const struct nbrec_network_function_group *nfg) +{ + if (nfg->mode) { + return nfg->mode; + } + return "inline"; +} + +static bool +network_function_group_is_vtap_mode( + const struct nbrec_network_function_group *nfg) +{ + const char *mode = network_function_group_get_mode(nfg); + if (!strcasecmp(mode, "vtap")) { + return true; + } + return false; +} + static const char* network_function_group_get_fallback( const struct nbrec_network_function_group *nfg) { + /* For vtap mode, fallback is always defaulted to fail-open */ + if (network_function_group_is_vtap_mode(nfg)) { + return "fail-open"; + } + if (nfg->fallback) { return nfg->fallback; } @@ -18873,7 +18965,8 @@ static void network_function_update_active(const struct nbrec_network_function_group *nfg, struct hmap *local_svc_monitors_map, struct hmap *ic_learned_svc_monitors_map, - const char *svc_monitor_ip_dst) + const char *svc_monitor_ip_dst, + struct hmap *ls_ports) { if (!nfg->n_network_function) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); @@ -18885,10 +18978,13 @@ network_function_update_active(const struct nbrec_network_function_group *nfg, } return; } + /* Array to store healthy network functions */ struct nbrec_network_function **healthy_nfs = xmalloc(sizeof *healthy_nfs * nfg->n_network_function); struct nbrec_network_function *nf_active_prev = NULL; + bool is_nfg_vtap = network_function_group_is_vtap_mode(nfg); + if (nfg->network_function_active) { nf_active_prev = nfg->network_function_active; } @@ -18898,25 +18994,62 @@ network_function_update_active(const struct nbrec_network_function_group *nfg, for (size_t i = 0; i < nfg->n_network_function; i++) { struct nbrec_network_function *nf = nfg->network_function[i]; bool is_healthy = false; + const char *inport = nf->inport->name; + const char *ports[2] = {inport, NULL}; + size_t n_ports = 1; - if (nf->health_check == NULL) { - VLOG_DBG("NetworkFunction: Health check is not configured for " - "network_function %s, considering it healthy", nf->name); - is_healthy = true; + if (is_nfg_vtap) { + if (nf->outport) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_ERR_RL(&rl, "NetworkFunction: outport should not be set " + "for vtap mode, network_function:%s", nf->name); + continue; + } + + /* For vtap mode, consider network_function healthy based on + * port binding status. */ + if (network_function_port_binding_state(ports, n_ports, ls_ports, + NULL) == NF_PORT_STATE_UP) { + is_healthy = true; + } } else { - struct service_monitor_info *mon_info = - get_service_mon(local_svc_monitors_map, - ic_learned_svc_monitors_map, - svc_monitor_ip_dst, - nf->outport->name, 0, "icmp"); - if (mon_info == NULL) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); - VLOG_ERR_RL(&rl, "NetworkFunction: Service_monitor is not " - "found for network_function:%s", nf->name); - is_healthy = false; - } else if (mon_info->sbrec_mon->status - && !strcmp(mon_info->sbrec_mon->status, "online")) { + /* For inline mode, inport and outport must be specified. + * inport is mandatory in schema, check for outport. */ + if (nf->outport == NULL) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_ERR_RL(&rl, "NetworkFunction: outport must be set " + "for inline mode, network_function:%s", nf->name); + continue; + } + + const char *outport = nf->outport->name; + ports[n_ports++] = outport; + + /* Always check port binding state first. */ + if (network_function_port_binding_state(ports, n_ports, + ls_ports, NULL) != NF_PORT_STATE_UP) { + continue; + } + + if (nf->health_check == NULL) { + /* Consider network_function healthy based on port binding + * status if health_check is not configured. */ is_healthy = true; + } else { + struct service_monitor_info *mon_info = + get_service_mon(local_svc_monitors_map, + ic_learned_svc_monitors_map, + svc_monitor_ip_dst, + nf->outport->name, 0, "icmp"); + if (mon_info == NULL) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, + 1); + VLOG_ERR_RL(&rl, "NetworkFunction: Service_monitor is not " + "found for network_function:%s", nf->name); + } else if (mon_info->sbrec_mon->status + && !strcmp(mon_info->sbrec_mon->status, "online")) { + is_healthy = true; + } } } @@ -18965,15 +19098,15 @@ static void build_network_function_active( const struct nbrec_network_function_group_table *nbrec_nfg_table, struct hmap *local_svc_monitors_map, struct hmap *ic_learned_svc_monitors_map, - const char *svc_monitor_ip_dst) + const char *svc_monitor_ip_dst, + struct hmap *ls_ports) { const struct nbrec_network_function_group *nbrec_nfg; NBREC_NETWORK_FUNCTION_GROUP_TABLE_FOR_EACH (nbrec_nfg, nbrec_nfg_table) { - network_function_update_active(nbrec_nfg, - local_svc_monitors_map, - ic_learned_svc_monitors_map, - svc_monitor_ip_dst); + network_function_update_active(nbrec_nfg, local_svc_monitors_map, + ic_learned_svc_monitors_map, + svc_monitor_ip_dst, ls_ports); } } @@ -19006,10 +19139,10 @@ network_function_configure_fail_open_flows(struct lflow_table *lflows, } static void -consider_network_function(struct lflow_table *lflows, - const struct ovn_datapath *od, - struct nbrec_network_function_group *nfg, - bool ingress, struct lflow_ref *lflow_ref) +consider_network_function_inline(struct lflow_table *lflows, + const struct ovn_datapath *od, + struct nbrec_network_function_group *nfg, + bool ingress, struct lflow_ref *lflow_ref) { struct ds match = DS_EMPTY_INITIALIZER; struct ds action = DS_EMPTY_INITIALIZER; @@ -19034,6 +19167,15 @@ consider_network_function(struct lflow_table *lflows, return; } + if (nf->outport == NULL) { + VLOG_ERR_RL(&rl, "No outport configured for inline mode " + "network function:%s", nf->name); + return; + } + + VLOG_DBG("network_function %s: inport %s outport %s", + nf->name, nf->inport->name, nf->outport->name); + /* If NF ports are present on this LS, use those; otherwise look for child * ports. */ struct ovn_port *input_port = @@ -19206,6 +19348,203 @@ consider_network_function(struct lflow_table *lflows, ds_destroy(&action); } +static void +consider_network_function_vtap(struct lflow_table *lflows, + const struct ovn_datapath *od, + struct nbrec_network_function_group *nfg, + bool ingress, struct lflow_ref *lflow_ref) +{ + struct nbrec_network_function *nf; + struct ds match = DS_EMPTY_INITIALIZER; + struct ds action = DS_EMPTY_INITIALIZER; + const struct ovn_stage *fwd_stage, *rev_stage; + struct ovn_port *input_port = NULL; + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + + if (nfg->fallback && !strcasecmp(nfg->fallback, "fail-close")) { + VLOG_WARN_RL(&rl, "NF vtap mode: fallback is set to fail-close but " + "will be overridden to fail-open for nfg:%s", nfg->name); + } + /* Configure flows with higher priority than default drop rule to allow + * the traffic when there is no active NF available. + */ + network_function_configure_fail_open_flows(lflows, od, lflow_ref, + nfg->id, ingress); + /* Currently we support only one active port-pair in a group. + * If there are multiple active pairs, take the first one. + * Load balancing would be added in future. */ + nf = nf_get_active(nfg); + if (nf == NULL) { + VLOG_ERR_RL(&rl, "No active network function available, nfg:%s", + nfg->name); + return; + } + + if (nf->outport) { + VLOG_ERR_RL(&rl, "Outport is not supported for vtap mode " + "network function:%s", nf->name); + return; + } + + VLOG_DBG("network_function %s: inport %s", + nf->name, nf->inport->name); + + /* If NF ports are present on this LS, use those; otherwise look for child + * ports. */ + input_port = ovn_port_find_port_or_child(od, nf->inport->name); + if (input_port == NULL) { + VLOG_ERR_RL(&rl, "Port not found for network_function %s", nf->name); + return; + } + + if (ingress) { + fwd_stage = S_SWITCH_IN_NF; + rev_stage = S_SWITCH_OUT_NF; + } else { + fwd_stage = S_SWITCH_OUT_NF; + rev_stage = S_SWITCH_IN_NF; + } + + /* Pre NF Table (Priority 99): + * + * Currently, this stage simply writes the active network function ID into + * the nf_id register. + * + * In the future, this stage will be extended to support network function + * load balancing. + */ + ds_put_format(&match, REGBIT_NF_ENABLED" == 1 && " + REGBIT_NF_ORIG_DIR" == 1 && " + REG_NF_GROUP_ID " == %"PRIu8, + (uint8_t) nfg->id); + ds_put_format(&action, REG_NF_ID" = %"PRIu8"; next;", (uint8_t) nf->id); + ovn_lflow_add(lflows, od, ingress ? S_SWITCH_IN_PRE_NF + : S_SWITCH_OUT_PRE_NF, + 99, ds_cstr(&match), ds_cstr(&action), lflow_ref); + ds_clear(&match); + ds_clear(&action); + + /* Add forward flows for mirroring: + * Flows to handle request packets for new or existing connections. + * + * from-lport ACL in_network_function priority 99: + * in_acl_eval has already categorized it and populated nf_enabled, + * direction and nfg_id registers. in_pre_nf sets the active network + * function id in nf_id register. Here this rule sets the outport to the + * NF port for the mirrored packet and does output action to skip the rest + * of the ingress pipeline. Original packet continues with ingress + * pipeline. + * + * to-lport ACL out_network_function priority 99: + * out_acl_eval, and out_pre_nf set the nf related registers. Then the + * out_network_function stage sets the outport to NF port for the mirrored + * packet and submits the packet back to ingress pipeline l2_lkup table. + * The l2_lkup would skip mac based lookup as the + * NETWORK_FUNCTION_EGRESS_LOOPBACK is set. Original packet continues with + * the egress pipeline processing. + */ + if (ingress) { + ds_put_format(&action, "clone {outport = %s; output;}; next;", + input_port->json_key); + } else { + ds_put_format(&action, "clone {outport = %s; " + REGBIT_NF_EGRESS_LOOPBACK" = 1; " + "next(pipeline=ingress, table=%d);}; next;", + input_port->json_key, + ovn_stage_get_table(S_SWITCH_IN_L2_LKUP)); + } + ds_put_format(&match, REGBIT_NF_ENABLED" == 1 && " + REGBIT_NF_ORIG_DIR" == 1 && " + REG_NF_ID " == %"PRIu8, (uint8_t) nf->id); + ovn_lflow_add(lflows, od, fwd_stage, 99, ds_cstr(&match), + ds_cstr(&action), lflow_ref); + ds_clear(&match); + ds_clear(&action); + + /* Add reverse flows for mirroring: + * Flows to handle response packets for existing connections. + * + * from-lport ACL out_network_function priority 99: + * out_acl stage sets the nf_enabled register based on CT label. + * Here this rule sets the outport to the NF port for the mirrored packet + * based on nf_id fetched from the CT label. Then it submits the packet + * back to ingress pipeline l2_lkup table. The l2_lkup would skip mac + * lookup as the NETWORK_FUNCTION_EGRESS_LOOPBACK is set. Original packet + * continues with the egress pipeline. + * + * to-lport ACL in_network_function priority 99: + * in_acl stage sets the nf_enabled register based on CT label. + * Here this rule sets the outport to the NF port for the mirrored packet + * based on nf_id fetched from the CT label and does output action to skip + * the rest of the ingress pipeline. Original packet continues with the + * ingress pipeline. + */ + if (ingress) { + ds_put_format(&action, "clone {outport = %s; " + REGBIT_NF_EGRESS_LOOPBACK" = 1; " + "next(pipeline=ingress, table=%d);}; next;", + input_port->json_key, + ovn_stage_get_table(S_SWITCH_IN_L2_LKUP)); + } else { + ds_put_format(&action, "clone {outport = %s; output;}; next;", + input_port->json_key); + } + ds_put_format(&match, REGBIT_NF_ENABLED" == 1 && " + REGBIT_NF_ORIG_DIR" == 0 && " + "ct_label.nf_id == %"PRIu8, (uint8_t) nf->id); + ovn_lflow_add(lflows, od, rev_stage, 99, ds_cstr(&match), ds_cstr(&action), + lflow_ref); + ds_clear(&match); + ds_clear(&action); + + /* Priority 100 flow in in_network_function: + * Drop packets coming from network-function in vtap mode. + */ + ds_put_format(&match, "inport == %s", input_port->json_key); + ds_put_format(&action, "drop;"); + ovn_lflow_add(lflows, od, S_SWITCH_IN_NF, 100, + ds_cstr(&match), ds_cstr(&action), lflow_ref); + ds_clear(&match); + ds_clear(&action); + + /* Priority 100 flow in out_network_function: + * Allow packets to go through if outport is network-function port as + * we don't want the packets to be mirrored again based on to-lport + * match. + */ + ds_put_format(&match, "outport == %s", input_port->json_key); + ds_put_format(&action, "next;"); + ovn_lflow_add(lflows, od, S_SWITCH_OUT_NF, 100, + ds_cstr(&match), ds_cstr(&action), lflow_ref); + ds_clear(&match); + ds_clear(&action); + + /* Priority 110 flow in out_pre_acl: + * Avoid ct for packets going to network-function port in vtap mode since + * these packets gets consumed at VNF. + */ + ds_put_format(&match, "ip && outport == %s", input_port->json_key); + ds_put_format(&action, "ct_clear; next;"); + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, ds_cstr(&match), + ds_cstr(&action), lflow_ref); + + ds_destroy(&match); + ds_destroy(&action); +} + +static void +consider_network_function(struct lflow_table *lflows, + const struct ovn_datapath *od, + struct nbrec_network_function_group *nfg, + bool ingress, struct lflow_ref *lflow_ref) +{ + if (network_function_group_is_vtap_mode(nfg)) { + consider_network_function_vtap(lflows, od, nfg, ingress, lflow_ref); + return; + } + consider_network_function_inline(lflows, od, nfg, ingress, lflow_ref); +} + static void build_network_function(const struct ovn_datapath *od, struct lflow_table *lflows, @@ -19232,7 +19571,7 @@ build_network_function(const struct ovn_datapath *od, /* Ingress and Egress PRE NF Table (Priority 1): ACL stage determined these * packets should be redirected, but there is no active NF in NFG. * Reset the nf_id register to 0. This will drop the packet by the - * default drop rule in the subsequent NF table. + * default drop rule in the subsequent NF tabl if NF is in fail-close mode. */ ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_NF, 1, REGBIT_NF_ENABLED" == 1 && " REGBIT_NF_ORIG_DIR" == 1", @@ -21171,7 +21510,8 @@ ovnnb_db_run(struct northd_input *input_data, input_data->nbrec_network_function_group_table, &data->local_svc_monitors_map, input_data->ic_learned_svc_monitors_map, - input_data->svc_global_addresses->ip_dst); + input_data->svc_global_addresses->ip_dst, + &data->ls_ports); build_ipam(&data->ls_datapaths.datapaths); build_lrouter_groups(&data->lr_ports, &data->lr_datapaths); build_ip_mcast(ovnsb_txn, input_data->sbrec_ip_multicast_table, diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index 9f2d118dd..c5d663aeb 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -1523,39 +1523,45 @@ <ul> <li> - For each network_function_group <var>id</var> with an active network - function, a priority-99 flow matches <code>reg8[21] == 1 && - reg8[22] == 1 && reg0[22..29] == <var>id</var></code> and - sets <code>reg0[22..29] = <var>nf_id</var>; next;</code> where + In inline, vtap mode: For each network_function_group <var>id</var> + with an active network function, a priority-99 flow matches + <code>reg8[21] == 1 && reg8[22] == 1 && + reg0[22..29] == <var>id</var></code> and sets + <code>reg0[22..29] = <var>nf_id</var>; next;</code> where <var>nf_id</var> is the ID of the active network function. This prepares request packets that matched a <code>from-lport</code> ACL - with network_function_group for redirection in the subsequent Network - Function table. + with network_function_group for redirection (inline) or mirroring + (vtap) in the subsequent Network Function table. </li> <li> - For each network function group with <var>id</var> that has - <code>fallback</code> set to <code>fail-open</code>, a priority-10 flow - matches <code>reg8[21] == 1 && reg8[22] == 1 && + In inline mode: For each network function group with <var>id</var> that + has <code>fallback</code> set to <code>fail-open</code>, a priority-10 + flow matches <code>reg8[21] == 1 && reg8[22] == 1 && reg0[22..29] == <var>id</var></code> and sets <code>reg8[21] = 0; - reg0[22..29] = 0; next;</code>. This clears both the NF enabled bit and - the NF group ID, allowing packets to continue processing through the - pipeline without network function redirection when no active network - function is available (fail-open behavior). + reg0[22..29] = 0; next;</code>. + In vtap mode: A priority-10 flow with the same match and action is + always added (vtap does not support fail-close). This clears both the + NF enabled bit and the NF group ID, allowing packets to continue when + no active network function is available (fail-open behavior). </li> <li> - A priority-1 flow matches <code>reg8[21] == 1 && reg8[22] == 1 - </code> and sets <code>reg0[22..29] = 0; next;</code>. This is a - catch-all flow for network function groups with <code>fallback</code> - set to <code>fail-close</code> (or default) when no active network - function is available. It clears only the NF group ID, leaving the NF - enabled bit set. These packets will be dropped by the priority-1 drop - rule in the subsequent Network Function table (fail-close behavior). + In inline, vtap mode: A priority-1 flow matches <code>reg8[21] == 1 + && reg8[22] == 1</code> and sets <code>reg0[22..29] = 0; + next;</code>. This is a catch-all for when no active network function + is available and no higher-priority flow matched. For inline groups + with <code>fallback</code> set to <code>fail-close</code> (or default) + this leaves the NF enabled bit set so the packet is dropped by the + priority-1 drop rule in the subsequent Network Function table + (fail-close behavior). For vtap groups this flow is superseded by + the priority-10 fail-open flow above and is not reached when an NFG + is configured; it acts as a safety net. </li> <li> - A priority-0 flow that simply moves traffic to the next table. + In inline, vtap mode: A priority-0 flow that simply moves traffic to + the next table. </li> </ul> @@ -1579,15 +1585,15 @@ </li> <li> - Corresponding to each of the two priority 100 flows above, a priority - 110 flow is added, which has the following extra match and - action, but otherwise identical to the priority 100 flow. - Match: <code>reg8[21] == 1</code> (packet matched an ACL with - <code>network_function_group</code> set) - Action: <code>ct_label.nf = 1; - ct_label.nf_id = reg0[22..29];</code> - This is to commit the network_function information in conntrack so that - the response and related packets can be redirected to it as well. + In inline, vtap mode: Corresponding to each of the two priority 100 + flows above, a priority 110 flow is added, which has the following + extra match and action, but otherwise identical to the priority 100 + flow. Match: <code>reg8[21] == 1</code> (packet matched an ACL with + <code>network_function_group</code> set). Action: + <code>ct_label.nf = 1; ct_label.nf_id = reg0[22..29];</code> This + commits the network_function information in conntrack so that + response and related packets can be redirected or mirrored to it as + well. </li> <li> @@ -1631,20 +1637,18 @@ <ul> <li> - For each network_function port <var>P</var>, a priority-100 flow is - added that matches <code>inport == <var>P</var></code> and advances - packets to the next table. Thus packets coming from network function - are not subject to redirection. This flow also sets - <code>reg5[16..31] = ct_label.tun_if_id</code>. This is used for - tunneling packet to originating host in case of cross host traffic - redirection for VLAN subnet. This ct_label field stores the openflow - tunnel interface id of the originating host for this connection and - gets populated in egress <code>Stateful</code> table. + In inline: For each network_function port <var>P</var>, a + priority-100 flow matches <code>inport == <var>P</var></code> and + advances packets to the next table (packets from the network function + are not subject to redirection). This flow also sets + <code>reg5[16..31] = ct_label.tun_if_id</code> for cross host traffic + redirection for VLAN subnet; the tunnel id is populated in egress + <code>Stateful</code> table. </li> <li> - For each active network function with <var>id</var> that is referenced - in a network function group, a priority-99 flow matches + In inline: For each active network function with <var>id</var> that is + referenced in a network function group, a priority-99 flow matches <code>reg8[21] == 1 && reg8[22] == 1 && reg0[22..29] == <var>id</var></code> and sets <code>outport=<var>P</var>; output;</code> where <var>P</var> is the @@ -1655,37 +1659,62 @@ </li> <li> - For each active network function with <var>id</var> that is referenced - in a network function group, a priority-99 rule matches + In vtap mode: For each active network function with <var>id</var>, a + priority-99 forward flow matches <code>reg8[21] == 1 && + reg8[22] == 1 && reg0[22..29] == <var>id</var></code> and + sets <code>clone { outport = <var>P</var>; output; }; next;</code> + where <var>P</var> is the <code>inport</code> of that network function. + A copy is sent to the NF port while the original packet continues + (mirroring; only inport is used, outport is not supported). + </li> + + <li> + In inline: For each active network function with <var>id</var> that is + referenced in a network function group, a priority-99 rule matches <code>reg8[21] == 1 && reg8[22] == 0 && ct_label.nf_id == <var>id</var></code> and takes identical action as above. This redirects response and related packets for <code>to-lport</code> ACLs to the same network function that handled - the request, using the NF ID stored in the connection tracking label. + the request. </li> <li> - In each of the above cases, when the same packet comes out unchanged - through the other port of the network_function, it would match the - priority 100 flow and be forwarded to the next table. + In vtap mode: A priority-99 reverse flow matches + <code>reg8[21] == 1 && reg8[22] == 0 && + ct_label.nf_id == <var>id</var></code> and sets + <code>clone { outport = <var>P</var>; output; }; next;</code> to mirror + response/related packets to the same NF. </li> <li> - One priority-100 rule to skip redirection of multicast packets that hit - a network_function ACL. Match on <code>reg8[21] == 1 && - eth.mcast</code> and action is to advance to the next table. + In inline: In each of the above cases, when the same packet comes out + unchanged through the other port of the network_function, it would + match the priority 100 flow and be forwarded to the next table. </li> <li> - One priority-1 rule that checks <code>reg8[21] == 1</code>, and drops - such packets. This is to address the case where a packet hit an ACL - with network function but the network function does not have ports or - child ports on this logical switch. + In vtap mode: A priority-100 flow matches + <code>inport == <var>P</var></code> (packets from the NF port) and + drops them. </li> <li> - One priority-0 fallback flow that matches all packets and advances to - the next table. + In inline, vtap mode: One priority-100 rule to skip + redirection/mirroring + of multicast packets that hit a network_function ACL. Match on + <code>reg8[21] == 1 && eth.mcast</code> and action is to + advance to the next table. + </li> + + <li> + In inline: One priority-1 rule that checks <code>reg8[21] == 1</code>, + and drops such packets when the network function does not have ports + or child ports on this logical switch. + </li> + + <li> + In inline, vtap mode: One priority-0 fallback flow that matches all + packets and advances to the next table. </li> </ul> @@ -2845,40 +2874,45 @@ output; <ul> <li> - For each network function group with <var>id</var> that has an active - network function, a priority-99 flow matches <code>reg8[21] == 1 - && reg8[22] == 1 && reg0[22..29] == <var>id</var></code> - and sets <code>reg0[22..29] = <var>nf_id</var>; next;</code> where + In inline, vtap mode: For each network function group with + <var>id</var> that has an active network function, a priority-99 flow + matches <code>reg8[21] == 1 && reg8[22] == 1 && + reg0[22..29] == <var>id</var></code> and sets + <code>reg0[22..29] = <var>nf_id</var>; next;</code> where <var>nf_id</var> is the <code>id</code> of the active <code>Network_Function</code> selected from the group. This prepares request packets that matched a <code>to-lport</code> ACL with - network_function_group for redirection in the subsequent Network - Function table. + network_function_group for redirection (inline) or mirroring (vtap) in + the subsequent Network Function table. </li> <li> - For each network function group with <var>id</var> that has + In inline: For each network function group with <var>id</var> that has <code>fallback</code> set to <code>fail-open</code>, a priority-10 flow matches <code>reg8[21] == 1 && reg8[22] == 1 && reg0[22..29] == <var>id</var></code> and sets <code>reg8[21] = 0; - reg0[22..29] = 0; next;</code>. This clears both the NF enabled bit and - the NF group ID, allowing packets to continue processing through the - pipeline without network function redirection when no active network - function is available (fail-open behavior). + reg0[22..29] = 0; next;</code>. In vtap mode: A priority-10 flow with + the same match and action is always added. This clears both the NF + enabled bit and the NF group ID when no active network function is + available (fail-open behavior). </li> <li> - A priority-1 flow matches <code>reg8[21] == 1 && reg8[22] == 1 - </code> and sets <code>reg0[22..29] = 0; next;</code>. This is a - catch-all flow for network function groups with <code>fallback</code> - set to <code>fail-close</code> (or default) when no active network - function is available. It clears only the NF group ID, leaving the NF - enabled bit set. These packets will be dropped by the priority-1 drop - rule in the subsequent Network Function table (fail-close behavior). + In inline, vtap mode: A priority-1 flow matches <code>reg8[21] == 1 + && reg8[22] == 1</code> and sets <code>reg0[22..29] = 0; + next;</code>. This is a catch-all for when no active network function + is available and no higher-priority flow matched. For inline groups + with <code>fallback</code> set to <code>fail-close</code> (or default) + this leaves the NF enabled bit set so the packet is dropped by the + priority-1 drop rule in the subsequent Network Function table + (fail-close behavior). For vtap groups this flow is superseded by + the priority-10 fail-open flow above and is not reached when an NFG + is configured; it acts as a safety net. </li> <li> - A priority-0 flow that simply moves traffic to the next table. + In inline, vtap mode: A priority-0 flow that simply moves traffic to + the next table. </li> </ul> @@ -2893,18 +2927,16 @@ output; <ul> <li> - A priority 120 flow is added for each network function port + In inline: A priority 120 flow is added for each network function port <var>P</var> that is identical to the priority 100 flow except for additional match <code>outport == <var>P</var></code> and additional - action <code>ct_label.tun_if_id = reg5[16..31]</code>. In case packets - redirected by network function logic gets tunneled from host1 to host2 + action <code>ct_label.tun_if_id = reg5[16..31]</code>. In case packets + redirected by network function logic get tunneled from host1 to host2 where the network function port resides, host2's physical table 0 - populates reg5[16..31] with the openflow tunnel interface id on which - the packet was received. This priority 120 flow commits the tunnel id - to the ct_label. That way, when the same packet comes out of the other - port of the network function it can retrieve this information from the - peer port's CT entry and tunnel the packet back to host1. This is - required to make cross host traffic redirection work for VLAN subnet. + populates reg5[16..31] with the openflow tunnel interface id. This + flow commits the tunnel id to ct_label so the packet can be tunneled + back to host1 when it comes out of the other port of the network + function (required for cross host traffic redirection for VLAN subnet). </li> </ul> @@ -2924,54 +2956,86 @@ output; <ul> <li> - Similar to ingress <code>Network Function</code> a priority-100 flow is - added for each network_function port, that matches the inport with the - network function port and advances the packet to the next table. + In inline: Similar to ingress <code>Network Function</code>, a + priority-100 flow is added for each network_function port that matches + the inport with the network function port and advances the packet to + the next table. </li> <li> - For each active network function with <var>id</var> that is + In inline: For each active network function with <var>id</var> that is referenced in a network function group, a priority-99 flow matches <code>reg8[21] == 1 && reg8[22] == 1 && reg0[22..29] == <var>id</var></code> and sets <code>outport=<var>P</var>; reg8[23] = 1; next(pipeline=ingress, table=<var>T</var>)</code> where <var>P</var> is the - <code>outport</code> of that network function and <var>T</var> is - the ingress table <code>Destination Lookup</code>. This redirects - request packets matching <code>to-lport</code> ACLs with + <code>outport</code> of that network function and <var>T</var> is the + ingress table <code>Destination Lookup</code>. This redirects request + packets matching <code>to-lport</code> ACLs with network_function_group to the specific network function selected by - the Pre Network Function stage. The packets are injected back to the - ingress pipeline from where they get sent out, skipping any further - lookup because of <code>reg8[23]</code>. + the Pre Network Function stage. + </li> + + <li> + In vtap mode: For each active network function with <var>id</var>, a + priority-99 forward flow matches <code>reg8[21] == 1 && + reg8[22] == 1 && reg0[22..29] == <var>id</var></code> and sets + <code>clone { outport = <var>P</var>; reg8[23] = 1; + next(pipeline=ingress, table=Destination Lookup); }; next;</code> + where <var>P</var> is the <code>inport</code> of that network function + (mirroring; only inport is used). </li> <li> - For each active network function with <var>id</var> that is referenced - in a network function group, a priority-99 rule matches + In inline: For each active network function with <var>id</var> that is + referenced in a network function group, a priority-99 rule matches <code>reg8[21] == 1 && reg8[22] == 0 && ct_label.nf_id == <var>id</var></code> and takes identical action as above. This redirects response and related packets for <code>from-lport</code> ACLs to the same network function that handled - the request, using the NF ID stored in the connection tracking label. + the request. + </li> + + <li> + In vtap mode: A priority-99 reverse flow matches + <code>reg8[21] == 1 && reg8[22] == 0 && + ct_label.nf_id == <var>id</var></code> and sets + <code>clone { outport = <var>P</var>; output; }; next;</code> to mirror + response/related packets to the same NF. + </li> + + <li> + In inline: In each of the above cases, when the same packet comes out + unchanged through the other port of the network_function, it would + match the priority 100 flow and be forwarded to the next table. + </li> + + <li> + In vtap mode: A priority-100 flow matches + <code>outport == <var>P</var></code> (packets to the NF port) and + advances to the next table so packets to the NF are not mirrored again. </li> <li> - In each of the above cases, when the same packet comes out unchanged - through the other port of the network_function, it would match the - priority 100 flow and be forwarded to the next table. + In vtap mode: In egress Pre ACL table, a priority-110 flow matches + <code>ip && outport == <var>P</var></code> with action + <code>ct_clear; next;</code> for the vtap NF port so packets toward the + NF are not committed to conntrack. </li> <li> - One priority-100 multicast match flow same as + In inline, vtap mode: One priority-100 multicast match flow same as ingress <code>Network Function</code>. </li> <li> - One priority-1 flow same as ingress <code>Network Function</code>. + In inline, vtap mode: One priority-1 flow same as ingress + <code>Network Function</code>. </li> <li> - One priority-0 flow same as ingress <code>Network Function</code>. + In inline, vtap mode: One priority-0 flow same as ingress + <code>Network Function</code>. </li> </ul> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index 3e7a6f7f8..0c9440982 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -11142,6 +11142,24 @@ AT_CHECK([ovn-sbctl lflow-list sw | grep ls_out_pre_lb | grep priority=110 | gre table=??(ls_out_pre_lb ), priority=110 , match=(ip && outport == "sw-ln"), action=(flags.pkt_sampled = 0; ct_clear; next;) ]) +# Now add a regular port and a stateful ACL to verify that ct_state is +# cleared for packets egressing through the localnet port even when +# stateful ACLs are configured on the switch. +check ovn-nbctl lsp-add sw sw-p1 -- lsp-set-addresses sw-p1 "00:00:00:00:00:01 10.0.0.2" +check ovn-nbctl acl-add sw from-lport 1002 "ip4 && tcp && tcp.dst == 80" allow-related +check ovn-nbctl --wait=sb sync + +# Egress pre_acl: localnet port should get ct_clear even with stateful ACLs. +AT_CHECK([ovn-sbctl lflow-list sw | grep ls_out_pre_acl | grep priority=110 | grep sw-ln | ovn_strip_lflows], [0], [dnl + table=??(ls_out_pre_acl ), priority=110 , match=(ip && outport == "sw-ln"), action=(flags.pkt_sampled = 0; ct_clear; next;) +]) + +# Regular port should not have a skip flow in egress pre_acl when +# stateful ACLs are configured (traffic goes through conntrack normally). +AT_CHECK([ovn-sbctl lflow-list sw | grep ls_out_pre_acl | grep priority=110 | grep sw-p1 | ovn_strip_lflows], [0], [dnl +]) + + OVN_CLEANUP_NORTHD AT_CLEANUP ]) @@ -18718,7 +18736,7 @@ AT_CLEANUP ]) OVN_FOR_EACH_NORTHD_NO_HV([ -AT_SETUP([Check network function]) +AT_SETUP([Check network-function in inline mode]) ovn_start AS_BOX([Create a NF and add it to a from-lport ACL]) @@ -18743,11 +18761,12 @@ check ovn-nbctl lsp-add sw0 sw0-p3 -- lsp-set-addresses sw0-p3 "00:00:00:00:00:0 check ovn-nbctl pg-add pg0 sw0-p1 check ovn-nbctl acl-add pg0 from-lport 1002 "inport == @pg0 && ip4.dst == 10.0.0.3" allow-related nfg0 -# Add hypervisor and bind NF ports -check ovn-sbctl chassis-add hv1 geneve 127.0.0.1 -check ovn-sbctl lsp-bind sw0-nf-p1 hv1 -check ovn-sbctl lsp-bind sw0-nf-p2 hv1 - +check ovn-sbctl chassis-add gw1 geneve 127.0.0.1 \ + -- set chassis gw1 other_config:ovn-ct-lb-related=true \ + -- set chassis gw1 other_config:ct-no-masked-label=true +chassis_uuid=$(fetch_column Chassis _uuid name=gw1) +check ovn-sbctl set port_binding sw0-nf-p1 up=true chassis=$chassis_uuid +check ovn-sbctl set port_binding sw0-nf-p2 up=true chassis=$chassis_uuid check ovn-nbctl --wait=sb sync ovn-sbctl dump-flows sw0 > sw0flows @@ -18858,8 +18877,8 @@ check ovn-nbctl set logical_switch_port sw0-nf-p4 \ check ovn-nbctl nf-add nf1 102 sw0-nf-p3 sw0-nf-p4 check ovn-nbctl nfg-add nfg1 202 inline nf1 check ovn-nbctl acl-add pg0 to-lport 1003 "outport == @pg0 && ip4.src == 10.0.0.4" allow-related nfg1 -check ovn-sbctl lsp-bind sw0-nf-p3 hv1 -check ovn-sbctl lsp-bind sw0-nf-p4 hv1 +check ovn-sbctl set port_binding sw0-nf-p3 up=true chassis=$chassis_uuid +check ovn-sbctl set port_binding sw0-nf-p4 up=true chassis=$chassis_uuid check ovn-nbctl --wait=sb sync ovn-sbctl dump-flows sw0 > sw0flows @@ -19034,10 +19053,16 @@ done nfsw="nf-sw" check ovn-nbctl ls-add $nfsw + +check ovn-sbctl chassis-add gw1 geneve 127.0.0.1 \ + -- set chassis gw1 other_config:ovn-ct-lb-related=true \ + -- set chassis gw1 other_config:ct-no-masked-label=true +chassis_uuid=$(fetch_column Chassis _uuid name=gw1) + for i in {1..4}; do port=$nfsw-p$i check ovn-nbctl lsp-add $nfsw $port - check ovn-sbctl set port_binding $port up=true + check ovn-sbctl set port_binding $port up=true chassis=$chassis_uuid check ovn-nbctl lsp-add $sw child-$i $port 100 done check ovn-nbctl set logical_switch_port $nfsw-p1 \ @@ -20270,3 +20295,191 @@ check_column "$global_svc_mon_mac" sb:Service_Monitor src_mac port=2 OVN_CLEANUP_NORTHD AT_CLEANUP ]) + +OVN_FOR_EACH_NORTHD_NO_HV([ +AT_SETUP([Check network-function in vtap mode]) +ovn_start + +AS_BOX([Create a NF and add it to a from-lport ACL]) + +# Create a NF and add it to a from-lport ACL. +check ovn-nbctl ls-add sw0 +check ovn-nbctl lsp-add sw0 sw0-nf-p1 +check ovn-nbctl set logical_switch_port sw0-nf-p1 options:receive_multicast=false options:lsp_learn_fdb=false options:is-nf=true +check ovn-nbctl nf-add nf0 1 sw0-nf-p1 +check ovn-nbctl nfg-add nfg0 1 vtap nf0 + +check ovn-nbctl lsp-add sw0 sw0-p1 -- lsp-set-addresses sw0-p1 "00:00:00:00:00:01 10.0.0.2" +check ovn-nbctl lsp-add sw0 sw0-p2 -- lsp-set-addresses sw0-p2 "00:00:00:00:00:02 10.0.0.3" +check ovn-nbctl lsp-add sw0 sw0-p3 -- lsp-set-addresses sw0-p3 "00:00:00:00:00:03 10.0.0.4" + +check ovn-nbctl pg-add pg0 sw0-p1 +check ovn-nbctl acl-add pg0 from-lport 1002 "inport == @pg0 && ip4.dst == 10.0.0.3" allow-related nfg0 + +check ovn-sbctl chassis-add gw1 geneve 127.0.0.1 \ + -- set chassis gw1 other_config:ovn-ct-lb-related=true \ + -- set chassis gw1 other_config:ct-no-masked-label=true +chassis_uuid=$(fetch_column Chassis _uuid name=gw1) +check ovn-sbctl set port_binding sw0-nf-p1 up=true chassis=$chassis_uuid +check ovn-nbctl --wait=sb sync + +ovn-sbctl dump-flows sw0 > sw0flows +AT_CAPTURE_FILE([sw0flows]) + +AT_CHECK( + [grep -E 'ls_(in|out)_acl_eval' sw0flows | ovn_strip_lflows | grep pg0 | sort], [0], [dnl + table=??(ls_in_acl_eval ), priority=2002 , match=(reg0[[7]] == 1 && (inport == @pg0 && ip4.dst == 10.0.0.3)), action=(reg8[[16]] = 1; reg8[[21]] = 1; reg8[[22]] = 1; reg0[[22..29]] = 1; next;) + table=??(ls_in_acl_eval ), priority=2002 , match=(reg0[[8]] == 1 && (inport == @pg0 && ip4.dst == 10.0.0.3)), action=(reg8[[16]] = 1; reg0[[1]] = 1; reg8[[21]] = 1; reg8[[22]] = 1; reg0[[22..29]] = 1; next;) +]) + +# Vtap uses Pre NF (priority 99) to set REG_NF_ID for CT commit, aligned with inline mode. +# First box has only from-lport ACL (nfg0), so vtap Pre NF flows are only in IN path. +AT_CHECK( + [grep -E 'ls_(in|out)_pre_network_function' sw0flows | ovn_strip_lflows | sort], [0], [dnl + table=??(ls_in_pre_network_function), priority=0 , match=(1), action=(next;) + table=??(ls_in_pre_network_function), priority=1 , match=(reg8[[21]] == 1 && reg8[[22]] == 1), action=(reg0[[22..29]] = 0; next;) + table=??(ls_in_pre_network_function), priority=10 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg0[[22..29]] == 1), action=(reg8[[21]] = 0; reg0[[22..29]] = 0; next;) + table=??(ls_in_pre_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg0[[22..29]] == 1), action=(reg0[[22..29]] = 1; next;) + table=??(ls_out_pre_network_function), priority=0 , match=(1), action=(next;) + table=??(ls_out_pre_network_function), priority=1 , match=(reg8[[21]] == 1 && reg8[[22]] == 1), action=(reg0[[22..29]] = 0; next;) +]) + +AT_CHECK( + [grep -E 'ls_(in|out)_network_function' sw0flows | ovn_strip_lflows | sort], [0], [dnl + table=??(ls_in_network_function), priority=0 , match=(1), action=(next;) + table=??(ls_in_network_function), priority=1 , match=(reg8[[21]] == 1), action=(drop;) + table=??(ls_in_network_function), priority=100 , match=(inport == "sw0-nf-p1"), action=(drop;) + table=??(ls_in_network_function), priority=100 , match=(reg8[[21]] == 1 && eth.mcast), action=(next;) + table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg0[[22..29]] == 1), action=(clone {outport = "sw0-nf-p1"; output;}; next;) + table=??(ls_out_network_function), priority=0 , match=(1), action=(next;) + table=??(ls_out_network_function), priority=1 , match=(reg8[[21]] == 1), action=(drop;) + table=??(ls_out_network_function), priority=100 , match=(outport == "sw0-nf-p1"), action=(next;) + table=??(ls_out_network_function), priority=100 , match=(reg8[[21]] == 1 && eth.mcast), action=(next;) + table=??(ls_out_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 0 && ct_label.nf_id == 1), action=(clone {outport = "sw0-nf-p1"; reg8[[23]] = 1; next(pipeline=ingress, table=??);}; next;) +]) + +AT_CHECK([grep "ls_in_l2_lkup" sw0flows | ovn_strip_lflows | grep 'priority=100'], [0], [dnl + table=??(ls_in_l2_lkup ), priority=100 , match=(reg8[[23]] == 1), action=(output;) +]) + +# Vtap: egress pre-acl ct_clear for packets going to vtap NF port so they are +# not committed to conntrack. +AT_CHECK( + [grep 'ls_out_pre_acl' sw0flows | ovn_strip_lflows | grep 'sw0-nf-p1'], [0], [dnl + table=??(ls_out_pre_acl ), priority=110 , match=(ip && outport == "sw0-nf-p1"), action=(ct_clear; next;) +]) + +AT_CHECK( + [grep -E 'ls_(in|out)_acl_eval' sw0flows | ovn_strip_lflows | grep 'ct_label.nf' | sort], [0], [dnl + table=??(ls_in_acl_eval ), priority=65532, match=(!ct.est && ct.rel && !ct.new && ct_mark.blocked == 0), action=(reg0[[17]] = 1; reg8[[21]] = ct_label.nf; reg8[[16]] = 1; ct_commit_nat;) + table=??(ls_in_acl_eval ), priority=65532, match=(ct.est && !ct.rel && ct.rpl && ct_mark.blocked == 0), action=(reg0[[9]] = 0; reg0[[10]] = 0; reg0[[17]] = 1; reg8[[21]] = ct_label.nf; reg8[[16]] = 1; next;) + table=??(ls_in_acl_eval ), priority=65532, match=(ct.est && ct_mark.allow_established == 1), action=(reg0[[21]] = 1; reg8[[21]] = ct_label.nf; reg8[[16]] = 1; next;) + table=??(ls_out_acl_eval ), priority=65532, match=(!ct.est && ct.rel && !ct.new && ct_mark.blocked == 0), action=(reg8[[21]] = ct_label.nf; reg8[[16]] = 1; ct_commit_nat;) + table=??(ls_out_acl_eval ), priority=65532, match=(ct.est && !ct.rel && ct.rpl && ct_mark.blocked == 0), action=(reg8[[21]] = ct_label.nf; reg8[[16]] = 1; next;) + table=??(ls_out_acl_eval ), priority=65532, match=(ct.est && ct_mark.allow_established == 1), action=(reg8[[21]] = ct_label.nf; reg8[[16]] = 1; next;) +]) + +# ICMP packets from sw0-p1 should be mirrored to sw0-nf-p1 but traffic originated +# in opposite direction should not get mirrored. +flow_eth_from_p1='eth.src == 00:00:00:00:00:01 && eth.dst == 00:00:00:00:00:02' +flow_ip_from_p1='ip.ttl==64 && ip4.src == 10.0.0.2 && ip4.dst == 10.0.0.3' +flow_icmp='icmp4.type == 8' +flow_from_p1="inport == \"sw0-p1\" && ${flow_eth_from_p1} && ${flow_ip_from_p1} && ${flow_icmp}" +AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal sw0 "${flow_from_p1}"], [0], [dnl +ct_next(ct_state=new|trk) { + clone { + output("sw0-nf-p1"); + }; + ct_next(ct_state=new|trk) { + output("sw0-p2"); + }; +}; +]) +flow_eth_rev='eth.src == 00:00:00:00:00:02 && eth.dst == 00:00:00:00:00:01' +flow_ip_rev='ip.ttl==64 && ip4.src == 10.0.0.3 && ip4.dst == 10.0.0.2' +flow_rev="inport == \"sw0-p2\" && ${flow_eth_rev} && ${flow_ip_rev} && ${flow_icmp}" +AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal sw0 "${flow_rev}"], [0], [dnl +ct_next(ct_state=new|trk) { + ct_next(ct_state=new|trk) { + output("sw0-p1"); + }; +}; +]) + +AS_BOX([Create another NF and add it to a to-lport ACL.]) + +# Create another NF and add it to a to-lport ACL. +check ovn-nbctl lsp-add sw0 sw0-nf-p3 +check ovn-nbctl set logical_switch_port sw0-nf-p3 options:receive_multicast=false options:lsp_learn_fdb=false options:is-nf=true +check ovn-nbctl nf-add nf1 2 sw0-nf-p3 +check ovn-nbctl nfg-add nfg1 2 vtap nf1 +check ovn-sbctl set port_binding sw0-nf-p3 up=true chassis=$chassis_uuid +check ovn-nbctl --wait=sb sync +check ovn-nbctl acl-add pg0 to-lport 1003 "outport == @pg0 && ip4.src == 10.0.0.4" allow-related nfg1 + +ovn-sbctl dump-flows sw0 > sw0flows +AT_CAPTURE_FILE([sw0flows]) + +AT_CHECK( + [grep -E 'ls_(in|out)_acl_eval' sw0flows | ovn_strip_lflows | grep pg0 | sort], [0], [dnl + table=??(ls_in_acl_eval ), priority=2002 , match=(reg0[[7]] == 1 && (inport == @pg0 && ip4.dst == 10.0.0.3)), action=(reg8[[16]] = 1; reg8[[21]] = 1; reg8[[22]] = 1; reg0[[22..29]] = 1; next;) + table=??(ls_in_acl_eval ), priority=2002 , match=(reg0[[8]] == 1 && (inport == @pg0 && ip4.dst == 10.0.0.3)), action=(reg8[[16]] = 1; reg0[[1]] = 1; reg8[[21]] = 1; reg8[[22]] = 1; reg0[[22..29]] = 1; next;) + table=??(ls_out_acl_eval ), priority=2003 , match=(reg0[[7]] == 1 && (outport == @pg0 && ip4.src == 10.0.0.4)), action=(reg8[[16]] = 1; reg8[[21]] = 1; reg8[[22]] = 1; reg0[[22..29]] = 2; next;) + table=??(ls_out_acl_eval ), priority=2003 , match=(reg0[[8]] == 1 && (outport == @pg0 && ip4.src == 10.0.0.4)), action=(reg8[[16]] = 1; reg0[[1]] = 1; reg8[[21]] = 1; reg8[[22]] = 1; reg0[[22..29]] = 2; next;) +]) + +# Pre NF: nfg0 (id=1) is from-lport so IN_PRE_NF only; nfg1 (id=2) is to-lport so OUT_PRE_NF only. +AT_CHECK( + [grep -E 'ls_(in|out)_pre_network_function' sw0flows | ovn_strip_lflows | sort], [0], [dnl + table=??(ls_in_pre_network_function), priority=0 , match=(1), action=(next;) + table=??(ls_in_pre_network_function), priority=1 , match=(reg8[[21]] == 1 && reg8[[22]] == 1), action=(reg0[[22..29]] = 0; next;) + table=??(ls_in_pre_network_function), priority=10 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg0[[22..29]] == 1), action=(reg8[[21]] = 0; reg0[[22..29]] = 0; next;) + table=??(ls_in_pre_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg0[[22..29]] == 1), action=(reg0[[22..29]] = 1; next;) + table=??(ls_out_pre_network_function), priority=0 , match=(1), action=(next;) + table=??(ls_out_pre_network_function), priority=1 , match=(reg8[[21]] == 1 && reg8[[22]] == 1), action=(reg0[[22..29]] = 0; next;) + table=??(ls_out_pre_network_function), priority=10 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg0[[22..29]] == 2), action=(reg8[[21]] = 0; reg0[[22..29]] = 0; next;) + table=??(ls_out_pre_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg0[[22..29]] == 2), action=(reg0[[22..29]] = 2; next;) +]) + +AT_CHECK( + [grep -E 'ls_(in|out)_network_function' sw0flows | ovn_strip_lflows | sort], [0], [dnl + table=??(ls_in_network_function), priority=0 , match=(1), action=(next;) + table=??(ls_in_network_function), priority=1 , match=(reg8[[21]] == 1), action=(drop;) + table=??(ls_in_network_function), priority=100 , match=(inport == "sw0-nf-p1"), action=(drop;) + table=??(ls_in_network_function), priority=100 , match=(inport == "sw0-nf-p3"), action=(drop;) + table=??(ls_in_network_function), priority=100 , match=(reg8[[21]] == 1 && eth.mcast), action=(next;) + table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 0 && ct_label.nf_id == 2), action=(clone {outport = "sw0-nf-p3"; output;}; next;) + table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg0[[22..29]] == 1), action=(clone {outport = "sw0-nf-p1"; output;}; next;) + table=??(ls_out_network_function), priority=0 , match=(1), action=(next;) + table=??(ls_out_network_function), priority=1 , match=(reg8[[21]] == 1), action=(drop;) + table=??(ls_out_network_function), priority=100 , match=(outport == "sw0-nf-p1"), action=(next;) + table=??(ls_out_network_function), priority=100 , match=(outport == "sw0-nf-p3"), action=(next;) + table=??(ls_out_network_function), priority=100 , match=(reg8[[21]] == 1 && eth.mcast), action=(next;) + table=??(ls_out_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 0 && ct_label.nf_id == 1), action=(clone {outport = "sw0-nf-p1"; reg8[[23]] = 1; next(pipeline=ingress, table=??);}; next;) + table=??(ls_out_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg0[[22..29]] == 2), action=(clone {outport = "sw0-nf-p3"; reg8[[23]] = 1; next(pipeline=ingress, table=??);}; next;) +]) + +# Vtap: egress pre-acl ct_clear for all vtap NF ports so that they are not committed to conntrack. +AT_CHECK( + [grep 'ls_out_pre_acl' sw0flows | ovn_strip_lflows | grep 'nf-p' | sort], [0], [dnl + table=??(ls_out_pre_acl ), priority=110 , match=(ip && outport == "sw0-nf-p1"), action=(ct_clear; next;) + table=??(ls_out_pre_acl ), priority=110 , match=(ip && outport == "sw0-nf-p3"), action=(ct_clear; next;) +]) + +# ICMP packets to sw0-p1 should be mirrored to sw0-nf-p3. +flow_eth_to_p1='eth.src == 00:00:00:00:00:03 && eth.dst == 00:00:00:00:00:01' +flow_ip_to_p1='ip.ttl==64 && ip4.src == 10.0.0.4 && ip4.dst == 10.0.0.2' +flow_to_p1="inport == \"sw0-p3\" && ${flow_eth_to_p1} && ${flow_ip_to_p1} && ${flow_icmp}" +AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal sw0 "${flow_to_p1}"], [0], [dnl +ct_next(ct_state=new|trk) { + ct_next(ct_state=new|trk) { + clone { + output("sw0-nf-p3"); + }; + output("sw0-p1"); + }; +}; +]) + +AT_CLEANUP +]) diff --git a/tests/ovn.at b/tests/ovn.at index f0d4b1dd3..b202c0a2e 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -44170,7 +44170,7 @@ AT_CLEANUP ]) OVN_FOR_EACH_NORTHD([ -AT_SETUP([Network function packet flow - outbound]) +AT_SETUP([Network function inline packet flow - outbound]) AT_KEYWORDS([ovn]) TAG_UNSTABLE ovn_start @@ -44360,7 +44360,7 @@ AT_CLEANUP ]) OVN_FOR_EACH_NORTHD([ -AT_SETUP([Network function packet flow - inbound]) +AT_SETUP([Network function inline packet flow - inbound]) AT_KEYWORDS([ovn]) TAG_UNSTABLE ovn_start @@ -44553,6 +44553,374 @@ OVN_CLEANUP([hv1],[hv2],[hv3]) AT_CLEANUP ]) +OVN_FOR_EACH_NORTHD([ +AT_SETUP([Network function vtap packet flow - outbound]) +AT_KEYWORDS([ovn]) +TAG_UNSTABLE +ovn_start + +# Create logical topology. One LS sw0 with 3 ports. +# From-lport ACL rule mirrors request packets from sw0-p1 to sw0-p2 via vtap NF port sw0-nf-vtap. +# In vtap mode, traffic is mirrored (copied) to NF, original packets still reach destination. +create_logical_topology() { + sw=$1 + check ovn-nbctl ls-add $sw + for i in 1 2; do + check ovn-nbctl lsp-add $sw $sw-p$i -- lsp-set-addresses $sw-p$i "f0:00:00:00:00:0$i 192.168.0.1$i" + done + check ovn-nbctl lsp-add $sw $sw-nf-vtap -- lsp-set-addresses $sw-nf-vtap "f0:00:00:00:01:01" + check ovn-nbctl set logical_switch_port $sw-nf-vtap \ + options:receive_multicast=false options:lsp_learn_mac=false \ + options:is-nf=true + check ovn-nbctl nf-add nf0 1 $sw-nf-vtap + check ovn-nbctl nfg-add nfg0 1 vtap nf0 + check ovn-nbctl pg-add pg0 $sw-p1 + check ovn-nbctl acl-add pg0 from-lport 1002 "inport == @pg0 && ip4.dst == 192.168.0.12" allow-related nfg0 +} + +create_logical_topology sw0 + +# Create three hypervisors +net_add n +for i in 1 2 3; do + sim_add hv$i + as hv$i + ovs-vsctl add-br br-phys + ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys + ovn_attach n br-phys 192.168.1.$i +done + +test_icmp() { + local inport=$1 src_mac=$2 dst_mac=$3 src_ip=$4 dst_ip=$5 icmp_type=$6 outport=$7 in_hv=$8 out_hv=$9 + local packet="inport==\"$inport\" && eth.src==$src_mac && + eth.dst==$dst_mac && ip.ttl==64 && ip4.src==$src_ip + && ip4.dst==$dst_ip && icmp4.type==$icmp_type && + icmp4.code==0" + OVS_WAIT_UNTIL([as $in_hv ovs-appctl -t ovn-controller inject-pkt "$packet"]) + echo "INJECTED PACKET $packet" + echo $packet | ovstest test-ovn expr-to-packets >> $out_hv-$outport.expected +} + +test_icmp_mirrored() { + # Inject packet and expect it at both NF (mirrored) and destination + local inport=$1 src_mac=$2 dst_mac=$3 src_ip=$4 dst_ip=$5 icmp_type=$6 + local nf_outport=$7 dst_outport=$8 in_hv=$9 nf_hv=${10} dst_hv=${11} + local packet="inport==\"$inport\" && eth.src==$src_mac && + eth.dst==$dst_mac && ip.ttl==64 && ip4.src==$src_ip + && ip4.dst==$dst_ip && icmp4.type==$icmp_type && + icmp4.code==0" + OVS_WAIT_UNTIL([as $in_hv ovs-appctl -t ovn-controller inject-pkt "$packet"]) + echo "INJECTED PACKET $packet" + # Expect packet at both NF port (mirrored) and destination port + echo $packet | ovstest test-ovn expr-to-packets >> $nf_hv-$nf_outport.expected + echo $packet | ovstest test-ovn expr-to-packets >> $dst_hv-$dst_outport.expected +} + +packet_mirroring_test() { + local hvp1=$1 hvp2=$2 hvnf=$3 + + # Test 1: Inject ICMP request from sw0-p1 to sw0-p2 + # In vtap mode: single packet should be mirrored to NF AND reach sw0-p2 + test_icmp_mirrored sw0-p1 "f0:00:00:00:00:01" "f0:00:00:00:00:02" "192.168.0.11" "192.168.0.12" 8 \ + vif-nf vif2 $hvp1 $hvnf $hvp2 + OVN_CHECK_PACKETS_REMOVE_BROADCAST([$hvnf/vif-nf-tx.pcap], [$hvnf-vif-nf.expected]) + OVN_CHECK_PACKETS_REMOVE_BROADCAST([$hvp2/vif2-tx.pcap], [$hvp2-vif2.expected]) + + # Test 2: Reverse direction - ICMP request from sw0-p2 to sw0-p1 + # No mirroring expected (ACL only matches from-lport on pg0 which contains sw0-p1) + test_icmp sw0-p2 "f0:00:00:00:00:02" "f0:00:00:00:00:01" "192.168.0.12" "192.168.0.11" 8 vif1 $hvp2 $hvp1 + OVN_CHECK_PACKETS_REMOVE_BROADCAST([$hvp1/vif1-tx.pcap], [$hvp1-vif1.expected]) +} + +create_port_binding() { + hvp1=$1 hvp2=$2 hvnf=$3 + as $hvp1 + ovs-vsctl add-port br-int vif1 -- \ + set interface vif1 external-ids:iface-id=sw0-p1 \ + options:tx_pcap=$hvp1/vif1-tx.pcap \ + options:rxq_pcap=$hvp1/vif1-rx.pcap + as $hvp2 + ovs-vsctl add-port br-int vif2 -- \ + set interface vif2 external-ids:iface-id=sw0-p2 \ + options:tx_pcap=$hvp2/vif2-tx.pcap \ + options:rxq_pcap=$hvp2/vif2-rx.pcap + as $hvnf + ovs-vsctl add-port br-int vif-nf -- \ + set interface vif-nf external-ids:iface-id=sw0-nf-vtap \ + options:tx_pcap=$hvnf/vif-nf-tx.pcap \ + options:rxq_pcap=$hvnf/vif-nf-rx.pcap + + OVN_POPULATE_ARP + wait_for_ports_up + check ovn-nbctl --wait=hv sync + sleep 1 +} + +cleanup_port_binding() { + hvp1=$1 hvp2=$2 hvnf=$3 + as $hvp1 + ovs-vsctl del-port br-int vif1 + as $hvp2 + ovs-vsctl del-port br-int vif2 + as $hvnf + ovs-vsctl del-port br-int vif-nf + sleep 1 +} + +test_nf_vtap_with_multinodes_outbound() { + mode=$1 + # Test 1: Bind all 3 ports to one node + echo "$mode: Network function vtap outbound with single node" + create_port_binding hv1 hv1 hv1 + + packet_mirroring_test hv1 hv1 hv1 sw0 + + cleanup_port_binding hv1 hv1 hv1 + + # Test 2: src & dst ports on one node, NF on another node + echo "$mode: Network function vtap outbound with two nodes - nf separate" + create_port_binding hv1 hv1 hv2 + + packet_mirroring_test hv1 hv1 hv2 sw0 + + cleanup_port_binding hv1 hv1 hv2 + + # Test 3: src and nf on one node, dst on a second node + echo "$mode: Network function vtap outbound with two nodes - nf with src" + create_port_binding hv1 hv2 hv1 + + packet_mirroring_test hv1 hv2 hv1 sw0 + + cleanup_port_binding hv1 hv2 hv1 + + # Test 4: src on one node, nf & dst on a second node + echo "$mode: Network function vtap outbound with two nodes - nf with dst" + create_port_binding hv1 hv2 hv2 + + packet_mirroring_test hv1 hv2 hv2 sw0 + + cleanup_port_binding hv1 hv2 hv2 + + # Test 5: src on one node, dst on another, NF on a 3rd one + echo "$mode: Network function vtap outbound with three nodes" + create_port_binding hv1 hv2 hv3 + + packet_mirroring_test hv1 hv2 hv3 sw0 + + cleanup_port_binding hv1 hv2 hv3 +} + +test_nf_vtap_with_multinodes_outbound overlay + +# Tests for VLAN network +check ovn-nbctl lsp-add-localnet-port sw0 ln0 phys +check ovn-nbctl set logical_switch_port ln0 tag_request=100 + +test_nf_vtap_with_multinodes_outbound VLAN + +# Cleanup logical topology +check ovn-nbctl lsp-del ln0 +check ovn-nbctl acl-del pg0 from-lport 1002 "inport == @pg0 && ip4.dst == 192.168.0.12" +check ovn-nbctl pg-del pg0 +check ovn-nbctl nfg-del nfg0 +check ovn-nbctl nf-del nf0 +check ovn-nbctl clear logical_switch_port sw0-nf-vtap options +for i in 1 2; do + check ovn-nbctl lsp-del sw0-p$i +done +check ovn-nbctl lsp-del sw0-nf-vtap +check ovn-nbctl ls-del sw0 +check ovn-nbctl --wait=hv sync + +OVN_CLEANUP([hv1],[hv2],[hv3]) +AT_CLEANUP +]) + +OVN_FOR_EACH_NORTHD([ +AT_SETUP([Network function vtap packet flow - inbound]) +AT_KEYWORDS([ovn]) +TAG_UNSTABLE +ovn_start + +# Create logical topology. One LS sw0 with 3 ports. +# To-lport ACL rule mirrors request packets from sw0-p2 to sw0-p1 via vtap NF port sw0-nf-vtap. +# In vtap mode, traffic is mirrored (copied) to NF, original packets still reach destination. +create_logical_topology() { + sw=$1 + check ovn-nbctl ls-add $sw + for i in 1 2; do + check ovn-nbctl lsp-add $sw $sw-p$i -- lsp-set-addresses $sw-p$i "f0:00:00:00:00:0$i 192.168.0.1$i" + done + check ovn-nbctl lsp-add $sw $sw-nf-vtap -- lsp-set-addresses $sw-nf-vtap "f0:00:00:00:01:01" + check ovn-nbctl set logical_switch_port $sw-nf-vtap \ + options:receive_multicast=false options:lsp_learn_mac=false \ + options:is-nf=true + check ovn-nbctl nf-add nf0 1 $sw-nf-vtap + check ovn-nbctl nfg-add nfg0 1 vtap nf0 + check ovn-nbctl pg-add pg0 $sw-p1 + check ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4.src == 192.168.0.12" allow-related nfg0 +} + +create_logical_topology sw0 + +# Create three hypervisors +net_add n +for i in 1 2 3; do + sim_add hv$i + as hv$i + ovs-vsctl add-br br-phys + ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys + ovn_attach n br-phys 192.168.1.$i +done + +test_icmp() { + local inport=$1 src_mac=$2 dst_mac=$3 src_ip=$4 dst_ip=$5 icmp_type=$6 outport=$7 in_hv=$8 out_hv=$9 + local packet="inport==\"$inport\" && eth.src==$src_mac && + eth.dst==$dst_mac && ip.ttl==64 && ip4.src==$src_ip + && ip4.dst==$dst_ip && icmp4.type==$icmp_type && + icmp4.code==0" + OVS_WAIT_UNTIL([as $in_hv ovs-appctl -t ovn-controller inject-pkt "$packet"]) + echo "INJECTED PACKET $packet" + echo $packet | ovstest test-ovn expr-to-packets >> $out_hv-$outport.expected +} + +test_icmp_mirrored() { + # Inject packet and expect it at both NF (mirrored) and destination + local inport=$1 src_mac=$2 dst_mac=$3 src_ip=$4 dst_ip=$5 icmp_type=$6 + local nf_outport=$7 dst_outport=$8 in_hv=$9 nf_hv=${10} dst_hv=${11} + local packet="inport==\"$inport\" && eth.src==$src_mac && + eth.dst==$dst_mac && ip.ttl==64 && ip4.src==$src_ip + && ip4.dst==$dst_ip && icmp4.type==$icmp_type && + icmp4.code==0" + OVS_WAIT_UNTIL([as $in_hv ovs-appctl -t ovn-controller inject-pkt "$packet"]) + echo "INJECTED PACKET $packet" + # Expect packet at both NF port (mirrored) and destination port + echo $packet | ovstest test-ovn expr-to-packets >> $nf_hv-$nf_outport.expected + echo $packet | ovstest test-ovn expr-to-packets >> $dst_hv-$dst_outport.expected +} + +packet_mirroring_test() { + local hvp1=$1 hvp2=$2 hvnf=$3 + + # Test 1: Inject ICMP request from sw0-p2 to sw0-p1 + # In vtap mode: single packet should be mirrored to NF AND reach sw0-p1 + test_icmp_mirrored sw0-p2 "f0:00:00:00:00:02" "f0:00:00:00:00:01" "192.168.0.12" "192.168.0.11" 8 \ + vif-nf vif1 $hvp2 $hvnf $hvp1 + OVN_CHECK_PACKETS_REMOVE_BROADCAST([$hvnf/vif-nf-tx.pcap], [$hvnf-vif-nf.expected]) + OVN_CHECK_PACKETS_REMOVE_BROADCAST([$hvp1/vif1-tx.pcap], [$hvp1-vif1.expected]) + + # Test 2: Reverse direction - ICMP request from sw0-p1 to sw0-p2 + # No mirroring expected (ACL only matches to-lport on pg0 which contains sw0-p1) + test_icmp sw0-p1 "f0:00:00:00:00:01" "f0:00:00:00:00:02" "192.168.0.11" "192.168.0.12" 8 vif2 $hvp1 $hvp2 + OVN_CHECK_PACKETS_REMOVE_BROADCAST([$hvp2/vif2-tx.pcap], [$hvp2-vif2.expected]) +} + +create_port_binding() { + hvp1=$1 hvp2=$2 hvnf=$3 + as $hvp1 + ovs-vsctl add-port br-int vif1 -- \ + set interface vif1 external-ids:iface-id=sw0-p1 \ + options:tx_pcap=$hvp1/vif1-tx.pcap \ + options:rxq_pcap=$hvp1/vif1-rx.pcap + as $hvp2 + ovs-vsctl add-port br-int vif2 -- \ + set interface vif2 external-ids:iface-id=sw0-p2 \ + options:tx_pcap=$hvp2/vif2-tx.pcap \ + options:rxq_pcap=$hvp2/vif2-rx.pcap + as $hvnf + ovs-vsctl add-port br-int vif-nf -- \ + set interface vif-nf external-ids:iface-id=sw0-nf-vtap \ + options:tx_pcap=$hvnf/vif-nf-tx.pcap \ + options:rxq_pcap=$hvnf/vif-nf-rx.pcap + + OVN_POPULATE_ARP + wait_for_ports_up + check ovn-nbctl --wait=hv sync + sleep 1 +} + +cleanup_port_binding() { + hvp1=$1 hvp2=$2 hvnf=$3 + as $hvp1 + ovs-vsctl del-port br-int vif1 + as $hvp2 + ovs-vsctl del-port br-int vif2 + as $hvnf + ovs-vsctl del-port br-int vif-nf + check ovn-nbctl --wait=hv sync + sleep 1 +} + +test_nf_vtap_with_multinodes_inbound() { + mode=$1 + + # Test 1: Bind all 3 ports to one node + echo "$mode: Network function vtap inbound with single node" + create_port_binding hv1 hv1 hv1 + + packet_mirroring_test hv1 hv1 hv1 sw0 + + cleanup_port_binding hv1 hv1 hv1 + + # Test 2: src & dst ports on one node, NF on another node + echo "$mode: Network function vtap inbound with two nodes - nf separate" + create_port_binding hv1 hv1 hv2 + + packet_mirroring_test hv1 hv1 hv2 sw0 + + cleanup_port_binding hv1 hv1 hv2 + + # Test 3: dst and nf on one node, src on a second node + echo "$mode: Network function vtap inbound with two nodes - nf with dst" + create_port_binding hv1 hv2 hv1 + + packet_mirroring_test hv1 hv2 hv1 sw0 + + cleanup_port_binding hv1 hv2 hv1 + + # Test 4: dst on one node, nf & src on a second node + echo "$mode: Network function vtap inbound with two nodes - nf with src" + create_port_binding hv1 hv2 hv2 + + packet_mirroring_test hv1 hv2 hv2 sw0 + + cleanup_port_binding hv1 hv2 hv2 + + # Test 5: src on one node, dst on another, NF on a 3rd one + echo "$mode: Network function vtap inbound with three nodes" + create_port_binding hv1 hv2 hv3 + + packet_mirroring_test hv1 hv2 hv3 sw0 + + cleanup_port_binding hv1 hv2 hv3 +} + +test_nf_vtap_with_multinodes_inbound overlay + +# Tests for VLAN network +check ovn-nbctl lsp-add-localnet-port sw0 ln0 phys +check ovn-nbctl set logical_switch_port ln0 tag_request=100 + +test_nf_vtap_with_multinodes_inbound VLAN + +# Cleanup logical topology +check ovn-nbctl lsp-del ln0 +check ovn-nbctl acl-del pg0 to-lport 1002 "outport == @pg0 && ip4.src == 192.168.0.12" +check ovn-nbctl pg-del pg0 +check ovn-nbctl nfg-del nfg0 +check ovn-nbctl nf-del nf0 +check ovn-nbctl clear logical_switch_port sw0-nf-vtap options +for i in 1 2; do + check ovn-nbctl lsp-del sw0-p$i +done +check ovn-nbctl lsp-del sw0-nf-vtap +check ovn-nbctl ls-del sw0 +check ovn-nbctl --wait=hv sync + +OVN_CLEANUP([hv1],[hv2],[hv3]) +AT_CLEANUP +]) + OVN_FOR_EACH_NORTHD([ AT_SETUP([Unicast ARP when proxy ARP is configured]) AT_SKIP_IF([test $HAVE_SCAPY = no]) diff --git a/tests/system-ovn.at b/tests/system-ovn.at index 8d1f21609..4c3348a56 100644 --- a/tests/system-ovn.at +++ b/tests/system-ovn.at @@ -19873,7 +19873,7 @@ AT_CLEANUP ]) OVN_FOR_EACH_NORTHD([ -AT_SETUP([Network Function]) +AT_SETUP([Network Function - inline mode]) AT_SKIP_IF([test $HAVE_TCPDUMP = no]) ovn_start OVS_TRAFFIC_VSWITCHD_START() @@ -20192,6 +20192,269 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d AT_CLEANUP ]) +OVN_FOR_EACH_NORTHD([ +AT_SETUP([Network Function - vtap mode]) +AT_SKIP_IF([test $HAVE_TCPDUMP = no]) +ovn_start +OVS_TRAFFIC_VSWITCHD_START() + +ADD_BR([br-int]) + +# Set external-ids in br-int needed for ovn-controller. +check ovs-vsctl \ + -- set Open_vSwitch . external-ids:system-id=hv1 \ + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true + +start_daemon ovn-controller + +# Create namespaces: client, server, and nf (for vtap) +ADD_NAMESPACES(client) +ADD_VETH(client, client, br-int, "192.168.1.10/24", "f0:00:00:01:02:10") +ADD_NAMESPACES(server) +ADD_VETH(server, server, br-int, "192.168.1.20/24", "f0:00:00:01:02:20") +ADD_NAMESPACES(nf) +ADD_VETH(nf-vtap, nf, br-int, "0", "f0:00:00:01:02:30") +ADD_VETH(nf-vtap2, nf, br-int, "0", "f0:00:00:01:02:40") + +# Create logical switch and ports +check ovn-nbctl ls-add sw0 +check ovn-nbctl lsp-add sw0 client \ + -- lsp-set-addresses client "f0:00:00:01:02:10 192.168.1.10/24" +check ovn-nbctl lsp-add sw0 server \ + -- lsp-set-addresses server "f0:00:00:01:02:20 192.168.1.20/24" +check ovn-nbctl lsp-add sw0 nf-vtap +check ovn-nbctl set logical_switch_port nf-vtap options:receive_multicast=false \ + options:lsp_learn_fdb=false \ + options:is-nf=true +check ovn-nbctl lsp-add sw0 nf-vtap2 +check ovn-nbctl set logical_switch_port nf-vtap2 options:receive_multicast=false \ + options:lsp_learn_fdb=false \ + options:is-nf=true + +AS_BOX([Setup: Create 2 NFs in vtap mode with health check]) + +# Create NF0 with only inport (vtap mode) +check ovn-nbctl nf-add nf0 1 nf-vtap +nf0_uuid=$(fetch_column nb:network_function _uuid name=nf0) + +# Create NF1 with only inport (vtap mode) +check ovn-nbctl nf-add nf1 2 nf-vtap2 +nf1_uuid=$(fetch_column nb:network_function _uuid name=nf1) + +# Create NFG with both NFs +check ovn-nbctl nfg-add nfg0 1 vtap nf0 +nfg_uuid=$(fetch_column nb:network_function_group _uuid name=nfg0) +check ovn-nbctl nfg-add-nf nfg0 nf1 + +# Set monitor IPs for health check +check ovn-nbctl set nb_global . options:svc_monitor_ip=169.254.100.10 +check ovn-nbctl set nb_global . options:svc_monitor_ip_dst=169.254.100.11 + +# Create health check configuration and assign to both NFs +AT_CHECK( + [ovn-nbctl --wait=sb \ + -- --id=@hc create network_function_health_check name=nf_health_cfg \ + options:interval=1 options:timeout=1 options:success_count=2 options:failure_count=2 \ + -- add network_function $nf0_uuid health_check @hc | uuidfilt], [0], [<0> +]) +nf_health_uuid=$(fetch_column nb:network_function_health_check _uuid name=nf_health_cfg) +check ovn-nbctl set network_function $nf1_uuid health_check=$nf_health_uuid + +# Create port group and ACLs for both from-lport and to-lport traffic mirroring +check ovn-nbctl pg-add pg0 client +check ovn-nbctl acl-add pg0 from-lport 1001 "inport == @pg0 && ip4.dst == 192.168.1.20" allow-related nfg0 +check ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4.src == 192.168.1.20" allow-related nfg0 + +check ovn-nbctl --wait=hv sync + +# Bring up NF ports +NS_CHECK_EXEC([nf], [ip link set dev nf-vtap up]) +NS_CHECK_EXEC([nf], [ip link set dev nf-vtap2 up]) + +# Helper function to simulate NF down by removing iface-id +nf_down() { + local port=$1 + ovs-vsctl remove interface ovs-$port external-ids iface-id +} + +# Helper function to simulate NF up by restoring iface-id +nf_up() { + local port=$1 + ovs-vsctl set interface ovs-$port external-ids:iface-id="$port" +} + +validate_nf_vtap_with_traffic() { + client_ns=$1; server_ns=$2; sip=$3; direction=$4 + + # Determine ping command based on IP address format + local ping_cmd="ping" + if [[ "$sip" == *":"* ]]; then + ping_cmd="ping -6" + fi + + AS_BOX([$direction: Verify traffic mirroring to nf0 when nf0 is active]) + + # Ensure nf0 is up, nf1 is down + nf_up nf-vtap + nf_down nf-vtap2 + check ovn-nbctl set network_function_group $nfg_uuid fallback=fail-close + check ovn-nbctl --wait=hv sync + + # Wait for health check to detect state + sleep 5 + + # Use broad filter to capture both IPv4 and IPv6 ICMP + NETNS_START_TCPDUMP([nf], [-nvvv -i nf-vtap icmp or icmp6], [tcpdump-nf-vtap]) + + # Send 5 ICMP packets - in vtap mode, traffic should be mirrored AND reach destination + # NF should see 10 packets: 5 echo requests (from-lport) + 5 echo replies (to-lport) + NS_CHECK_EXEC([$client_ns], [$ping_cmd -c 5 -i 0.3 $sip], [0], [ignore]) + + # Verify all mirrored packets were captured (5 requests + 5 replies = 10 packets) + OVS_WAIT_UNTIL([ + n=$(cat tcpdump-nf-vtap.tcpdump | wc -l) + test "$n" -ge 10 + ]) + + kill $(cat tcpdump-nf-vtap.pid) 2>/dev/null || true + + AS_BOX([$direction: Verify failover - traffic mirroring to nf1 when nf0 is down]) + + # Bring nf0 down, nf1 up (failover) + nf_down nf-vtap + nf_up nf-vtap2 + check ovn-nbctl --wait=hv sync + + # Wait for health check to detect state change + sleep 5 + + NETNS_START_TCPDUMP([nf], [-nvvv -i nf-vtap2 icmp or icmp6], [tcpdump-nf-vtap]) + + # Send 5 ICMP packets - should now be mirrored to nf1 + # NF should see 10 packets: 5 echo requests + 5 echo replies + NS_CHECK_EXEC([$client_ns], [$ping_cmd -c 5 -i 0.3 $sip], [0], [ignore]) + + # Verify all mirrored packets were captured (5 requests + 5 replies = 10 packets) + OVS_WAIT_UNTIL([ + n=$(cat tcpdump-nf-vtap.tcpdump | wc -l) + test "$n" -ge 10 + ]) + + kill $(cat tcpdump-nf-vtap.pid) 2>/dev/null || true + + AS_BOX([$direction: Verify fallback - traffic mirroring back to nf0 when nf0 recovers]) + + # Bring nf0 back up and nf1 down (fallback to nf0) + nf_up nf-vtap + nf_down nf-vtap2 + check ovn-nbctl --wait=hv sync + + # Wait for health check to detect state change + sleep 5 + + NETNS_START_TCPDUMP([nf], [-nvvv -i nf-vtap icmp or icmp6], [tcpdump-nf-vtap]) + + # Send 5 ICMP packets - should be mirrored back to nf0 + # NF should see 10 packets: 5 echo requests + 5 echo replies + NS_CHECK_EXEC([$client_ns], [$ping_cmd -c 5 -i 0.3 $sip], [0], [ignore]) + + # Verify all mirrored packets were captured (5 requests + 5 replies = 10 packets) + OVS_WAIT_UNTIL([ + n=$(cat tcpdump-nf-vtap.tcpdump | wc -l) + test "$n" -ge 10 + ]) + + kill $(cat tcpdump-nf-vtap.pid) 2>/dev/null || true + + AS_BOX([$direction: Verify fail-close - traffic flows but no mirroring when both NFs are down]) + + # Bring both NFs down with fail-close + nf_down nf-vtap + nf_down nf-vtap2 + check ovn-nbctl set network_function_group $nfg_uuid fallback=fail-close + check ovn-nbctl --wait=hv sync + + # Wait for health check to detect both down + sleep 5 + + NETNS_START_TCPDUMP([nf], [-nvvv -i nf-vtap icmp or icmp6], [tcpdump-nf-vtap]) + + # Send ICMP packets - in vtap mode, traffic still flows (mirroring is separate from forwarding) + # but no packets should be mirrored to NF with fail-close + NS_CHECK_EXEC([$client_ns], [$ping_cmd -c 3 -i 0.3 $sip], [0], [ignore]) + + # Verify no packets were mirrored (tcpdump should capture nothing) + sleep 1 + AT_CHECK([cat tcpdump-nf-vtap.tcpdump | wc -l], [0], [0 +]) + + kill $(cat tcpdump-nf-vtap.pid) 2>/dev/null || true + + AS_BOX([$direction: Verify fail-open - traffic flows with no mirroring when both NFs are down]) + + # Set fail-open mode - in vtap mode, this behaves same as fail-close for traffic flow + # (traffic always flows), difference is in ACL behavior + check ovn-nbctl set network_function_group $nfg_uuid fallback=fail-open + check ovn-nbctl --wait=hv sync + + # Send ICMP packets - traffic should flow + NS_CHECK_EXEC([$client_ns], [$ping_cmd -c 3 -i 0.3 $sip], [0], [ignore]) +} + +AS_BOX([IPv4 Testing - Inbound traffic]) +validate_nf_vtap_with_traffic "client" "server" "192.168.1.20" "Inbound" + +AS_BOX([IPv4 Testing - Outbound traffic]) +validate_nf_vtap_with_traffic "server" "client" "192.168.1.10" "Outbound" + +AS_BOX([IPv6 Testing - Setup]) + +# Remove IPv4 addresses from namespaces +ip netns exec client ip addr del 192.168.1.10/24 dev client +ip netns exec server ip addr del 192.168.1.20/24 dev server + +# Add IPv6 addresses to client and server +ip netns exec client ip -6 addr add fd00:192:168:1::10/64 dev client +ip netns exec server ip -6 addr add fd00:192:168:1::20/64 dev server + +# Update service monitor IPs to IPv6 for health check +check ovn-nbctl set nb_global . options:svc_monitor_ip=fd00:169:254:100::10 +check ovn-nbctl set nb_global . options:svc_monitor_ip_dst=fd00:169:254:100::11 + +# Configure IPv6-only addresses on logical ports +check ovn-nbctl lsp-set-addresses client "f0:00:00:01:02:10 fd00:192:168:1::10" +check ovn-nbctl lsp-set-addresses server "f0:00:00:01:02:20 fd00:192:168:1::20" + +# Add IPv6 ACLs +check ovn-nbctl acl-add pg0 from-lport 1003 "inport == @pg0 && ip6.dst == fd00:192:168:1::20" allow-related nfg0 +check ovn-nbctl acl-add pg0 to-lport 1004 "outport == @pg0 && ip6.src == fd00:192:168:1::20" allow-related nfg0 + +check ovn-nbctl --wait=hv sync + +AS_BOX([IPv6 Testing - Inbound traffic]) +validate_nf_vtap_with_traffic "client" "server" "fd00:192:168:1::20" "IPv6 Inbound" + +AS_BOX([IPv6 Testing - Outbound traffic]) +validate_nf_vtap_with_traffic "server" "client" "fd00:192:168:1::10" "IPv6 Outbound" + +# Restore NF iface-ids before cleanup +nf_up nf-vtap +nf_up nf-vtap2 +check ovn-nbctl --wait=hv sync + +OVN_CLEANUP_CONTROLLER([hv1]) +OVN_CLEANUP_NORTHD + +as +OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d +/failed to query port patch-.*/d +/.*terminating with signal 15.*/d"]) +AT_CLEANUP +]) + OVN_FOR_EACH_NORTHD([ AT_SETUP([dynamic-routing - BGP learned routes]) -- 2.43.5 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
