The LB health monitoring functionality has been extended to support NFs. Network_Function_Group has a list of Network_Functions, each of which has a reference to network_Function_Health_Check that has the monitoring config. There is a corresponding SB service_monitor maintaining the online/offline status. When status changes, northd picks one of the “online” NFs and sets in network_function_active field of NFG. The redirection rule in LS uses the ports from this NF.
Ovn-controller performs the health monitoring by sending ICMP echo request with source IP and MAC from NB global options “svc_monitor_ip4” and “svc_monitor_mac”, and destination IP and MAC from new NB global options “svc_monitor_ip4_dst” and “svc_monitor_mac_dst”. The sequence number and id are randomly generated and stored in service_mon. The NF VM forwards the same packet out of the other port. When it comes out, ovn-controller matches the sequence number and id with stored values and marks online if matched. In SB Service_Monitor table three new fields have been added: type: to indicate “load-balancer” or “network-function” mac: the destination MAC address for the monitor packets logical_input_port: The LSP to which the probe packet would be sent (taken from inport of Network_Function) Co-authored-by: Naveen Yerramneni <naveen.yerramn...@nutanix.com> Co-authored-by: Karthik Chandrashekar <karthi...@nutanix.com> Signed-off-by: Naveen Yerramneni <naveen.yerramn...@nutanix.com> Signed-off-by: Karthik Chandrashekar <karthi...@nutanix.com> Signed-off-by: Sragdhara Datta Chaudhuri <sragdha.chau...@nutanix.com> --- controller/pinctrl.c | 252 +++++++++++++++++++++++++++++------- northd/en-global-config.c | 75 +++++++++++ northd/en-global-config.h | 12 +- northd/en-northd.c | 4 + northd/en-sync-sb.c | 16 ++- northd/northd.c | 263 ++++++++++++++++++++++++++++++++++---- northd/northd.h | 6 +- northd/ovn-northd.8.xml | 239 ++++++++++++++++++++++++++++++++-- ovn-sb.ovsschema | 10 +- ovn-sb.xml | 22 +++- tests/ovn-northd.at | 192 +++++++++++++++++++++++++++- tests/system-ovn.at | 204 +++++++++++++++++++++++++++++ 12 files changed, 1199 insertions(+), 96 deletions(-) diff --git a/controller/pinctrl.c b/controller/pinctrl.c index 9071b2b58..3adc09b3d 100644 --- a/controller/pinctrl.c +++ b/controller/pinctrl.c @@ -6913,8 +6913,17 @@ enum svc_monitor_status { enum svc_monitor_protocol { SVC_MON_PROTO_TCP, SVC_MON_PROTO_UDP, + SVC_MON_PROTO_ICMP, }; +enum svc_monitor_type { + /* load balancer */ + SVC_MON_TYPE_LB, + /* network function */ + SVC_MON_TYPE_NF, +}; + + /* Service monitor health checks. */ struct svc_monitor { struct hmap_node hmap_node; @@ -6927,6 +6936,7 @@ struct svc_monitor { /* key */ struct in6_addr ip; uint32_t dp_key; + uint32_t input_port_key; uint32_t port_key; uint32_t proto_port; /* tcp/udp port */ @@ -6959,6 +6969,7 @@ struct svc_monitor { int n_failures; enum svc_monitor_protocol protocol; + enum svc_monitor_type type; enum svc_monitor_state state; enum svc_monitor_status status; struct dp_packet pkt; @@ -6966,6 +6977,9 @@ struct svc_monitor { uint32_t seq_no; ovs_be16 tp_src; + ovs_be16 icmp_id; + ovs_be16 icmp_seq_no; + bool delete; }; @@ -7031,9 +7045,28 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn, const struct sbrec_service_monitor *sb_svc_mon; SBREC_SERVICE_MONITOR_TABLE_FOR_EACH (sb_svc_mon, svc_mon_table) { + enum svc_monitor_type mon_type; + if (sb_svc_mon->type + && !strcmp(sb_svc_mon->type, "network-function")) { + mon_type = SVC_MON_TYPE_NF; + } else { + mon_type = SVC_MON_TYPE_LB; + } + + enum svc_monitor_protocol protocol; + if (!strcmp(sb_svc_mon->protocol, "udp")) { + protocol = SVC_MON_PROTO_UDP; + } else if (!strcmp(sb_svc_mon->protocol, "icmp")) { + protocol = SVC_MON_PROTO_ICMP; + } else { + protocol = SVC_MON_PROTO_TCP; + } + const struct sbrec_port_binding *pb = lport_lookup_by_name(sbrec_port_binding_by_name, sb_svc_mon->logical_port); + const struct sbrec_port_binding *input_pb = NULL; + if (!pb) { continue; } @@ -7053,39 +7086,65 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn, struct eth_addr ea; bool mac_found = false; - for (size_t i = 0; i < pb->n_mac && !mac_found; i++) { - struct lport_addresses laddrs; - if (!extract_lsp_addresses(pb->mac[i], &laddrs)) { + if (mon_type == SVC_MON_TYPE_NF) { + if (protocol != SVC_MON_PROTO_ICMP) { + continue; + } + input_pb = lport_lookup_by_name(sbrec_port_binding_by_name, + sb_svc_mon->logical_input_port); + if (!input_pb) { + continue; + } + if (input_pb->chassis != our_chassis) { + continue; + } + if (strcmp(sb_svc_mon->mac, "")) { + if (eth_addr_from_string(sb_svc_mon->mac, &ea)) { + mac_found = true; + } + } + } else { + if (protocol != SVC_MON_PROTO_TCP && + protocol != SVC_MON_PROTO_UDP) { continue; } - if (is_ipv4) { - for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) { - if (ip4 == laddrs.ipv4_addrs[j].addr) { - ea = laddrs.ea; - mac_found = true; - break; - } + for (size_t i = 0; i < pb->n_mac && !mac_found; i++) { + struct lport_addresses laddrs; + + if (!extract_lsp_addresses(pb->mac[i], &laddrs)) { + continue; } - } else { - for (size_t j = 0; j < laddrs.n_ipv6_addrs; j++) { - if (IN6_ARE_ADDR_EQUAL(&ip_addr, - &laddrs.ipv6_addrs[j].addr)) { - ea = laddrs.ea; - mac_found = true; - break; + + if (is_ipv4) { + for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) { + if (ip4 == laddrs.ipv4_addrs[j].addr) { + ea = laddrs.ea; + mac_found = true; + break; + } + } + } else { + for (size_t j = 0; j < laddrs.n_ipv6_addrs; j++) { + if (IN6_ARE_ADDR_EQUAL(&ip_addr, + &laddrs.ipv6_addrs[j].addr)) { + ea = laddrs.ea; + mac_found = true; + break; + } } } - } - if (!mac_found && !laddrs.n_ipv4_addrs && !laddrs.n_ipv6_addrs) { - /* IP address(es) are not configured. Use the first mac. */ - ea = laddrs.ea; - mac_found = true; - } + if (!mac_found && !laddrs.n_ipv4_addrs && + !laddrs.n_ipv6_addrs) { + /* IP address(es) are not configured. Use the first mac. */ + ea = laddrs.ea; + mac_found = true; + } - destroy_lport_addresses(&laddrs); + destroy_lport_addresses(&laddrs); + } } if (!mac_found) { @@ -7094,23 +7153,18 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn, uint32_t dp_key = pb->datapath->tunnel_key; uint32_t port_key = pb->tunnel_key; + uint32_t input_port_key = input_pb ? input_pb->tunnel_key : UINT32_MAX; uint32_t hash = hash_bytes(&ip_addr, sizeof ip_addr, hash_3words(dp_key, port_key, sb_svc_mon->port)); - enum svc_monitor_protocol protocol; - if (!sb_svc_mon->protocol || strcmp(sb_svc_mon->protocol, "udp")) { - protocol = SVC_MON_PROTO_TCP; - } else { - protocol = SVC_MON_PROTO_UDP; - } - svc_mon = pinctrl_find_svc_monitor(dp_key, port_key, &ip_addr, sb_svc_mon->port, protocol, hash); if (!svc_mon) { svc_mon = xmalloc(sizeof *svc_mon); svc_mon->dp_key = dp_key; + svc_mon->input_port_key = input_port_key; svc_mon->port_key = port_key; svc_mon->proto_port = sb_svc_mon->port; svc_mon->ip = ip_addr; @@ -7118,6 +7172,7 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn, svc_mon->state = SVC_MON_S_INIT; svc_mon->status = SVC_MON_ST_UNKNOWN; svc_mon->protocol = protocol; + svc_mon->type = mon_type; smap_init(&svc_mon->options); svc_mon->interval = @@ -8009,11 +8064,67 @@ svc_monitor_send_udp_health_check(struct rconn *swconn, ofpbuf_uninit(&ofpacts); } + +static void +svc_monitor_send_icmp_health_check__(struct rconn *swconn, + struct svc_monitor *svc_mon) +{ + uint64_t packet_stub[128 / 8]; + struct dp_packet packet; + dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub); + + struct eth_addr eth_src; + eth_addr_from_string(svc_mon->sb_svc_mon->src_mac, ð_src); + + ovs_be32 ip4_src; + ip_parse(svc_mon->sb_svc_mon->src_ip, &ip4_src); + pinctrl_compose_ipv4(&packet, eth_src, svc_mon->ea, ip4_src, + in6_addr_get_mapped_ipv4(&svc_mon->ip), + IPPROTO_ICMP, 255, ICMP_HEADER_LEN); + + struct icmp_header *ih = dp_packet_l4(&packet); + ih->icmp_fields.echo.id = svc_mon->icmp_id; + ih->icmp_fields.echo.seq = svc_mon->icmp_seq_no; + + uint8_t icmp_code = 0; + packet_set_icmp(&packet, ICMP4_ECHO_REQUEST, icmp_code); + + ih->icmp_csum = 0; + ih->icmp_csum = csum(ih, sizeof *ih); + + uint64_t ofpacts_stub[4096 / 8]; + struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); + enum ofp_version version = rconn_get_version(swconn); + put_load(svc_mon->dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts); + put_load(svc_mon->input_port_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts); + put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY, 1, &ofpacts); + struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts); + resubmit->in_port = OFPP_CONTROLLER; + resubmit->table_id = OFTABLE_LOCAL_OUTPUT; + + struct ofputil_packet_out po = { + .packet = dp_packet_data(&packet), + .packet_len = dp_packet_size(&packet), + .buffer_id = UINT32_MAX, + .ofpacts = ofpacts.data, + .ofpacts_len = ofpacts.size, + }; + match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); + enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version); + queue_msg(swconn, ofputil_encode_packet_out(&po, proto)); + dp_packet_uninit(&packet); + ofpbuf_uninit(&ofpacts); +} + static void svc_monitor_send_health_check(struct rconn *swconn, struct svc_monitor *svc_mon) { - if (svc_mon->protocol == SVC_MON_PROTO_TCP) { + if (svc_mon->protocol == SVC_MON_PROTO_ICMP) { + svc_mon->icmp_id = (OVS_FORCE ovs_be16) random_uint16(); + svc_mon->icmp_seq_no = (OVS_FORCE ovs_be16) random_uint16(); + svc_monitor_send_icmp_health_check__(swconn, svc_mon); + } else if (svc_mon->protocol == SVC_MON_PROTO_TCP) { svc_mon->seq_no = random_uint32(); svc_mon->tp_src = htons(get_random_src_port()); svc_monitor_send_tcp_health_check__(swconn, svc_mon, @@ -8054,13 +8165,14 @@ svc_monitors_run(struct rconn *swconn, case SVC_MON_S_WAITING: if (current_time > svc_mon->wait_time) { - if (svc_mon->protocol == SVC_MON_PROTO_TCP) { - svc_mon->n_failures++; - svc_mon->state = SVC_MON_S_OFFLINE; - } else { + if (svc_mon->protocol == SVC_MON_PROTO_UDP) { svc_mon->n_success++; svc_mon->state = SVC_MON_S_ONLINE; + } else { + svc_mon->n_failures++; + svc_mon->state = SVC_MON_S_OFFLINE; } + svc_mon->next_send_time = current_time + svc_mon->interval; next_run_time = svc_mon->next_send_time; } else { @@ -8121,6 +8233,27 @@ svc_monitors_wait(long long int svc_monitors_next_run_time) } } + +static void +pinctrl_handle_icmp_svc_check(struct dp_packet *pkt_in, + struct svc_monitor *svc_mon) +{ + struct icmp_header *ih = dp_packet_l4(pkt_in); + + if (!ih) { + return; + } + + if ((ih->icmp_fields.echo.id != svc_mon->icmp_id) || + (ih->icmp_fields.echo.seq != svc_mon->icmp_seq_no)) { + return; + } + + svc_mon->n_success++; + svc_mon->state = SVC_MON_S_ONLINE; + svc_mon->next_send_time = time_msec() + svc_mon->interval; +} + static bool pinctrl_handle_tcp_svc_check(struct rconn *swconn, struct dp_packet *pkt_in, @@ -8177,6 +8310,7 @@ pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow, uint32_t dp_key = ntohll(md->flow.metadata); uint32_t port_key = md->flow.regs[MFF_LOG_INPORT - MFF_REG0]; struct in6_addr ip_addr; + struct in6_addr dst_ip_addr; struct eth_header *in_eth = dp_packet_data(pkt_in); uint8_t ip_proto; @@ -8192,10 +8326,12 @@ pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow, } ip_addr = in6_addr_mapped_ipv4(ip_flow->nw_src); + dst_ip_addr = in6_addr_mapped_ipv4(ip_flow->nw_dst); ip_proto = in_ip->ip_proto; } else { struct ovs_16aligned_ip6_hdr *in_ip = dp_packet_l3(pkt_in); ip_addr = ip_flow->ipv6_src; + dst_ip_addr = ip_flow->ipv6_dst; ip_proto = in_ip->ip6_nxt; } @@ -8208,7 +8344,6 @@ pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow, return; } - if (ip_proto == IPPROTO_TCP) { uint32_t hash = hash_bytes(&ip_addr, sizeof ip_addr, @@ -8237,17 +8372,36 @@ pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow, return; } - const void *in_ip = dp_packet_get_icmp_payload(pkt_in); - if (!in_ip) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - VLOG_WARN_RL(&rl, "Original IP datagram not present in " - "ICMP packet"); - return; - } - if (in_eth->eth_type == htons(ETH_TYPE_IP)) { struct icmp_header *ih = l4h; /* It's ICMP packet. */ + if (ih->icmp_type == ICMP4_ECHO_REQUEST && ih->icmp_code == 0) { + uint32_t hash = hash_bytes(&dst_ip_addr, sizeof dst_ip_addr, + hash_3words(dp_key, port_key, 0)); + struct svc_monitor *svc_mon = + pinctrl_find_svc_monitor(dp_key, port_key, &dst_ip_addr, 0, + SVC_MON_PROTO_ICMP, hash); + if (!svc_mon) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT( + 1, 5); + VLOG_WARN_RL(&rl, "handle service check: Service monitor " + "not found for ICMP request"); + return; + } + if (svc_mon->type == SVC_MON_TYPE_NF) { + pinctrl_handle_icmp_svc_check(pkt_in, svc_mon); + } + return; + } + + const void *in_ip = dp_packet_get_icmp_payload(pkt_in); + if (!in_ip) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_WARN_RL(&rl, "Original IP datagram not present in " + "ICMP packet"); + return; + } + if (ih->icmp_type != ICMP4_DST_UNREACH || ih->icmp_code != 3) { return; } @@ -8269,6 +8423,14 @@ pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow, return; } } else { + const void *in_ip = dp_packet_get_icmp_payload(pkt_in); + if (!in_ip) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_WARN_RL(&rl, "Original IP datagram not present in " + "ICMP packet"); + return; + } + struct icmp6_header *ih6 = l4h; if (ih6->icmp6_type != 1 || ih6->icmp6_code != 4) { return; diff --git a/northd/en-global-config.c b/northd/en-global-config.c index 76046c265..84fa03026 100644 --- a/northd/en-global-config.c +++ b/northd/en-global-config.c @@ -20,6 +20,7 @@ /* OVS includes */ #include "openvswitch/vlog.h" +#include "socket-util.h" /* OVN includes */ #include "debug.h" @@ -74,6 +75,35 @@ get_ovn_max_dp_key_local(bool vxlan_mode, bool vxlan_ic_mode) return vxlan_ic_mode ? OVN_MAX_DP_VXLAN_KEY_LOCAL : OVN_MAX_DP_KEY_LOCAL; } +static void +update_svc_monitor_addr(const char *new_ip4, const char **old_ip4_pptr) +{ + if (new_ip4) { + struct sockaddr_storage svc_mon_addr; + if (inet_parse_address(new_ip4, &svc_mon_addr)) { + struct ds ip_s = DS_EMPTY_INITIALIZER; + ss_format_address_nobracks(&svc_mon_addr, &ip_s); + if ((*old_ip4_pptr == NULL) + || strcmp(*old_ip4_pptr, ds_steal_cstr(&ip_s))) { + if (*old_ip4_pptr) { + free(CONST_CAST(void *, *old_ip4_pptr)); + } + *old_ip4_pptr = ds_steal_cstr(&ip_s); + } + } else { + if (*old_ip4_pptr) { + free(CONST_CAST(void *, *old_ip4_pptr)); + *old_ip4_pptr = NULL; + } + } + } else { + if (*old_ip4_pptr) { + free(CONST_CAST(void *, *old_ip4_pptr)); + *old_ip4_pptr = NULL; + } + } +} + enum engine_node_state en_global_config_run(struct engine_node *node , void *data) { @@ -117,6 +147,27 @@ en_global_config_run(struct engine_node *node , void *data) } } + const char *dst_monitor_mac = smap_get(&nb->options, + "svc_monitor_mac_dst"); + if (dst_monitor_mac) { + if (eth_addr_from_string(dst_monitor_mac, + &config_data->svc_monitor_mac_ea_dst)) { + snprintf(config_data->svc_monitor_mac_dst, + sizeof config_data->svc_monitor_mac_dst, + ETH_ADDR_FMT, + ETH_ADDR_ARGS(config_data->svc_monitor_mac_ea_dst)); + } else { + dst_monitor_mac = NULL; + } + } + + const char *monitor_ip4 = smap_get(&nb->options, "svc_monitor_ip4"); + update_svc_monitor_addr(monitor_ip4, &config_data->svc_monitor_ip4); + const char *monitor_ip4_dst = smap_get(&nb->options, + "svc_monitor_ip4_dst"); + update_svc_monitor_addr(monitor_ip4_dst, + &config_data->svc_monitor_ip4_dst); + struct smap *options = &config_data->nb_options; smap_destroy(options); smap_clone(options, &nb->options); @@ -132,6 +183,15 @@ en_global_config_run(struct engine_node *node , void *data) config_data->svc_monitor_mac); } + if (!dst_monitor_mac) { + eth_addr_random(&config_data->svc_monitor_mac_ea_dst); + snprintf(config_data->svc_monitor_mac_dst, + sizeof config_data->svc_monitor_mac_dst, ETH_ADDR_FMT, + ETH_ADDR_ARGS(config_data->svc_monitor_mac_ea_dst)); + smap_replace(options, "svc_monitor_mac_dst", + config_data->svc_monitor_mac_dst); + } + bool ic_vxlan_mode = false; const struct nbrec_logical_switch *nbs; NBREC_LOGICAL_SWITCH_TABLE_FOR_EACH (nbs, nbrec_ls_table) { @@ -254,6 +314,21 @@ global_config_nb_global_handler(struct engine_node *node, void *data) return EN_UNHANDLED; } + if (config_out_of_sync(&nb->options, &config_data->nb_options, + "svc_monitor_mac_dst", true)) { + return EN_UNHANDLED; + } + + if (config_out_of_sync(&nb->options, &config_data->nb_options, + "svc_monitor_ip4", false)) { + return EN_UNHANDLED; + } + + if (config_out_of_sync(&nb->options, &config_data->nb_options, + "svc_monitor_ip4_dst", false)) { + return EN_UNHANDLED; + } + /* Check if max_tunid has changed or not. */ if (config_out_of_sync(&nb->options, &config_data->nb_options, "max_tunid", true)) { diff --git a/northd/en-global-config.h b/northd/en-global-config.h index 55a1e420b..b76c0775a 100644 --- a/northd/en-global-config.h +++ b/northd/en-global-config.h @@ -37,13 +37,19 @@ struct ed_type_global_config { const struct nbrec_nb_global *nb_global; const struct sbrec_sb_global *sb_global; - /* MAC allocated for service monitor usage. Just one mac is allocated + /* MAC allocated for service monitor usage. Just one pair is allocated * for this purpose and ovn-controller's on each chassis will make use - * of this mac when sending out the packets to monitor the services + * of this pair when sending out the packets to monitor the services * defined in Service_Monitor Southbound table. Since these packets - * are locally handled, having just one mac is good enough. */ + * are locally handled, having just one pair is good enough. */ char svc_monitor_mac[ETH_ADDR_STRLEN + 1]; struct eth_addr svc_monitor_mac_ea; + char svc_monitor_mac_dst[ETH_ADDR_STRLEN + 1]; + struct eth_addr svc_monitor_mac_ea_dst; + + /* IP configured for LB and NF service monitor usage. */ + const char *svc_monitor_ip4; + const char *svc_monitor_ip4_dst; struct chassis_features features; diff --git a/northd/en-northd.c b/northd/en-northd.c index 595247e97..6c2cb6251 100644 --- a/northd/en-northd.c +++ b/northd/en-northd.c @@ -115,6 +115,10 @@ northd_get_input_data(struct engine_node *node, input_data->sb_options = &global_config->sb_options; input_data->svc_monitor_mac = global_config->svc_monitor_mac; input_data->svc_monitor_mac_ea = global_config->svc_monitor_mac_ea; + input_data->svc_monitor_mac_dst = global_config->svc_monitor_mac_dst; + input_data->svc_monitor_mac_ea_dst = global_config->svc_monitor_mac_ea_dst; + input_data->svc_monitor_ip4 = global_config->svc_monitor_ip4; + input_data->svc_monitor_ip4_dst = global_config->svc_monitor_ip4_dst; input_data->features = &global_config->features; input_data->vxlan_mode = global_config->vxlan_mode; diff --git a/northd/en-sync-sb.c b/northd/en-sync-sb.c index a111f14fd..c37c7bc04 100644 --- a/northd/en-sync-sb.c +++ b/northd/en-sync-sb.c @@ -49,7 +49,8 @@ static void sync_addr_sets(struct ovsdb_idl_txn *ovnsb_txn, const struct sbrec_address_set_table *, const struct lr_stateful_table *, const struct ovn_datapaths *, - const char *svc_monitor_macp); + const char *svc_monitor_macp, + const char *svc_monitor_macp_dst); static const struct sbrec_address_set *sb_address_set_lookup_by_name( struct ovsdb_idl_index *, const char *name); static void update_sb_addr_set(struct sorted_array *, @@ -104,7 +105,8 @@ en_sync_to_sb_addr_set_run(struct engine_node *node, void *data OVS_UNUSED) nb_port_group_table, sb_address_set_table, &lr_stateful_data->table, &northd_data->lr_datapaths, - global_config->svc_monitor_mac); + global_config->svc_monitor_mac, + global_config->svc_monitor_mac_dst); return EN_UPDATED; } @@ -464,7 +466,8 @@ sync_addr_sets(struct ovsdb_idl_txn *ovnsb_txn, const struct sbrec_address_set_table *sb_address_set_table, const struct lr_stateful_table *lr_statefuls, const struct ovn_datapaths *lr_datapaths, - const char *svc_monitor_macp) + const char *svc_monitor_macp, + const char *svc_monitor_macp_dst) { struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets); @@ -474,8 +477,11 @@ sync_addr_sets(struct ovsdb_idl_txn *ovnsb_txn, shash_add(&sb_address_sets, sb_address_set->name, sb_address_set); } - /* Service monitor MAC. */ - struct sorted_array svc = sorted_array_create(&svc_monitor_macp, 1, false); + /* Service monitor MACs. */ + const char *svc_macs[] = {svc_monitor_macp, svc_monitor_macp_dst}; + size_t n_macs = sizeof(svc_macs) / sizeof(svc_macs[0]); + struct sorted_array svc = sorted_array_create(svc_macs, n_macs, + false); sync_addr_set(ovnsb_txn, "svc_monitor_mac", &svc, &sb_address_sets); sorted_array_destroy(&svc); diff --git a/northd/northd.c b/northd/northd.c index 382182ca6..6664a3748 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -2985,7 +2985,9 @@ get_service_mon(const struct hmap *monitor_map, static struct service_monitor_info * create_or_get_service_mon(struct ovsdb_idl_txn *ovnsb_txn, struct hmap *monitor_map, - const char *ip, const char *logical_port, + const char *type, const char *ip, + const char *logical_port, + const char *logical_input_port, uint16_t service_port, const char *protocol, const char *chassis_name) { @@ -3009,9 +3011,14 @@ create_or_get_service_mon(struct ovsdb_idl_txn *ovnsb_txn, struct sbrec_service_monitor *sbrec_mon = sbrec_service_monitor_insert(ovnsb_txn); + sbrec_service_monitor_set_type(sbrec_mon, type); sbrec_service_monitor_set_ip(sbrec_mon, ip); sbrec_service_monitor_set_port(sbrec_mon, service_port); sbrec_service_monitor_set_logical_port(sbrec_mon, logical_port); + if (logical_input_port) { + sbrec_service_monitor_set_logical_input_port(sbrec_mon, + logical_input_port); + } sbrec_service_monitor_set_protocol(sbrec_mon, protocol); if (chassis_name) { sbrec_service_monitor_set_chassis_name(sbrec_mon, chassis_name); @@ -3022,6 +3029,99 @@ create_or_get_service_mon(struct ovsdb_idl_txn *ovnsb_txn, return mon_info; } +static void +ovn_nf_svc_create(struct ovsdb_idl_txn *ovnsb_txn, + struct hmap *monitor_map, + struct sset *svc_monitor_lsps, + struct hmap *ls_ports, + const char *mac_src, const char *mac_dst, + const char *ip_src, const char *ip_dst, + const char *logical_port, const char *logical_input_port, + const struct smap *health_check_options) +{ + if (!ip_src || !ip_dst || !mac_src || !mac_dst) { + static struct vlog_rate_limit rl = + VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_ERR_RL(&rl, "NetworkFunction: invalid service monitor src_mac:%s " + "dst_mac:%s src_ip:%s dst_ip:%s\n", + mac_src, mac_dst, ip_src, ip_dst); + return; + } + + const char *ports[] = {logical_port, logical_input_port}; + size_t n_ports = sizeof(ports) / sizeof(ports[0]); + const char *chassis_name = NULL; + bool port_up = true; + + for (int i = 0; i < n_ports; i++) { + const char *port = ports[i]; + sset_add(svc_monitor_lsps, port); + struct ovn_port *op = ovn_port_find(ls_ports, port); + if (op == NULL) { + static struct vlog_rate_limit rl = + VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_ERR_RL(&rl, "NetworkFunction: skip health check, port:%s " + "not found\n", port); + return; + } + + if (op->sb && op->sb->chassis) { + if (chassis_name == NULL) { + chassis_name = op->sb->chassis->name; + } else if (strcmp(chassis_name, op->sb->chassis->name)) { + static struct vlog_rate_limit rl = + VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_ERR_RL(&rl, "NetworkFunction: chassis mismatch " + " chassis:%s port:%s\n", op->sb->chassis->name, port); + } + } + port_up &= (op->sb->n_up && op->sb->up[0]); + } + + + struct service_monitor_info *mon_info = + create_or_get_service_mon(ovnsb_txn, monitor_map, + "network-function", ip_dst, + logical_port, + logical_input_port, + 0, + "icmp", + chassis_name); + ovs_assert(mon_info); + sbrec_service_monitor_set_options( + mon_info->sbrec_mon, health_check_options); + + if (!mon_info->sbrec_mon->src_mac || + strcmp(mon_info->sbrec_mon->src_mac, mac_src)) { + sbrec_service_monitor_set_src_mac(mon_info->sbrec_mon, + mac_src); + } + + if (!mon_info->sbrec_mon->mac || + strcmp(mon_info->sbrec_mon->mac, mac_dst)) { + sbrec_service_monitor_set_mac(mon_info->sbrec_mon, + mac_dst); + } + + if (!mon_info->sbrec_mon->src_ip || + strcmp(mon_info->sbrec_mon->src_ip, ip_src)) { + sbrec_service_monitor_set_src_ip(mon_info->sbrec_mon, ip_src); + } + + if (!mon_info->sbrec_mon->ip || + strcmp(mon_info->sbrec_mon->ip, ip_dst)) { + sbrec_service_monitor_set_ip(mon_info->sbrec_mon, ip_dst); + } + + if (!port_up + && mon_info->sbrec_mon->status + && !strcmp(mon_info->sbrec_mon->status, "online")) { + sbrec_service_monitor_set_status(mon_info->sbrec_mon, + "offline"); + } + mon_info->required = true; +} + static void ovn_lb_svc_create(struct ovsdb_idl_txn *ovnsb_txn, const struct ovn_northd_lb *lb, @@ -3068,8 +3168,10 @@ ovn_lb_svc_create(struct ovsdb_idl_txn *ovnsb_txn, struct service_monitor_info *mon_info = create_or_get_service_mon(ovnsb_txn, monitor_map, + "load-balancer", backend->ip_str, backend_nb->logical_port, + NULL, backend->port, protocol, chassis_name); @@ -3306,12 +3408,16 @@ build_lb_datapaths(const struct hmap *lbs, const struct hmap *lb_groups, } static void -build_lb_svcs( +build_svcs( struct ovsdb_idl_txn *ovnsb_txn, const struct sbrec_service_monitor_table *sbrec_service_monitor_table, const char *svc_monitor_mac, const struct eth_addr *svc_monitor_mac_ea, + const char *svc_monitor_mac_dst, + const char *svc_monitor_ip4, + const char *svc_monitor_ip4_dst, struct hmap *ls_ports, struct hmap *lb_dps_map, + const struct nbrec_network_function_table *nbrec_network_function_table, struct sset *svc_monitor_lsps, struct hmap *svc_monitor_map) { @@ -3334,6 +3440,21 @@ build_lb_svcs( svc_monitor_lsps); } + const struct nbrec_network_function *nbrec_nf; + NBREC_NETWORK_FUNCTION_TABLE_FOR_EACH (nbrec_nf, + nbrec_network_function_table) { + if (nbrec_nf->health_check) { + ovn_nf_svc_create(ovnsb_txn, + svc_monitor_map, + svc_monitor_lsps, + ls_ports, + svc_monitor_mac, svc_monitor_mac_dst, + svc_monitor_ip4, svc_monitor_ip4_dst, + nbrec_nf->outport->name, nbrec_nf->inport->name, + &nbrec_nf->health_check->options); + } + } + struct service_monitor_info *mon_info; HMAP_FOR_EACH_SAFE (mon_info, hmap_node, svc_monitor_map) { if (!mon_info->required) { @@ -3402,18 +3523,9 @@ build_lb_count_dps(struct hmap *lb_dps_map, */ static void build_lb_port_related_data( - struct ovsdb_idl_txn *ovnsb_txn, - const struct sbrec_service_monitor_table *sbrec_service_monitor_table, - const char *svc_monitor_mac, - const struct eth_addr *svc_monitor_mac_ea, - struct ovn_datapaths *lr_datapaths, struct hmap *ls_ports, - struct hmap *lb_dps_map, struct hmap *lb_group_dps_map, - struct sset *svc_monitor_lsps, - struct hmap *svc_monitor_map) + struct ovn_datapaths *lr_datapaths, + struct hmap *lb_dps_map, struct hmap *lb_group_dps_map) { - build_lb_svcs(ovnsb_txn, sbrec_service_monitor_table, svc_monitor_mac, - svc_monitor_mac_ea, ls_ports, lb_dps_map, - svc_monitor_lsps, svc_monitor_map); build_lswitch_lbs_from_lrouter(lr_datapaths, lb_dps_map, lb_group_dps_map); } @@ -17465,13 +17577,6 @@ build_ls_stateful_flows(const struct ls_stateful_record *ls_stateful_rec, build_lb_hairpin(ls_stateful_rec, od, lflows, ls_stateful_rec->lflow_ref); } -static struct nbrec_network_function * -network_function_get_active(const struct nbrec_network_function_group *nfg) -{ - /* Another patch adds the healthmon support. This is temporary. */ - return nfg->n_network_function ? nfg->network_function[0] : NULL; -} - /* For packets received on tunnel and egressing towards a network-function port * commit the tunnel interface id in CT. This will be utilized when the packet * comes out of the other network-function interface of the service VM. The @@ -17514,6 +17619,101 @@ build_lswitch_stateful_nf(struct ovn_port *op, ds_cstr(match), ds_cstr(actions), lflow_ref); } +static struct nbrec_network_function * +network_function_get_active(const struct nbrec_network_function_group *nfg) +{ + return nfg->network_function_active; +} + +static void +network_function_update_active(const struct nbrec_network_function_group *nfg, + const struct hmap *svc_monitor_map, + const char *svc_monitor_ip4_dst) +{ + if (!nfg->n_network_function) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_ERR_RL(&rl, "NetworkFunction: No network_function found in " + "network_function_group %s", nfg->name); + if (nfg->network_function_active) { + nbrec_network_function_group_set_network_function_active(nfg, + NULL); + } + return; + } + struct nbrec_network_function *nf_active = nfg->network_function[0]; + struct nbrec_network_function *nf_active_prev = NULL; + uint16_t best_score = 0; + bool healthy_nf_available = false; + if (nfg->network_function_active) { + nf_active_prev = nfg->network_function_active; + } + + for (int i = 0; i < nfg->n_network_function; i++) { + struct nbrec_network_function *nf = nfg->network_function[i]; + uint16_t curr_score = 0; + if (nf->health_check == NULL) { + VLOG_DBG("NetworkFunction: Health check is not configured for " + "network_function %s", nf->name); + /* Consider network_function as healthy if health_check is + * not configured. */ + curr_score += 3; + healthy_nf_available = true; + } else { + struct service_monitor_info *mon_info = + get_service_mon(svc_monitor_map, svc_monitor_ip4_dst, + nf->outport->name, 0, "icmp"); + if (mon_info == NULL) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_ERR_RL(&rl, "NetworkFunction: Service_monitor is not " + "found for network_function:%s", nf->name); + } else if (mon_info->sbrec_mon->status + && !strcmp(mon_info->sbrec_mon->status, "online")) { + curr_score += 3; + healthy_nf_available = true; + } + } + + if (nf_active_prev && (nf == nf_active_prev)) { + curr_score += 1; + } + + if (curr_score > best_score) { + nf_active = nf; + best_score = curr_score; + } + } + + if (!healthy_nf_available) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "NetworkFunction: No healthy network_function found " + "in network_function_group %s, " + "selected network_function %s as active", nfg->name, + nf_active->name); + } + + if (nf_active_prev != nf_active) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_INFO_RL(&rl, "NetworkFunction: Update active network_function %s " + "in network_function_group %s", + nf_active->name, nfg->name); + nbrec_network_function_group_set_network_function_active(nfg, + nf_active); + } +} + +static void build_network_function_active( + const struct nbrec_network_function_group_table *nbrec_nfg_table, + struct hmap *svc_monitor_map, + const char *svc_monitor_ip4_dst) +{ + const struct nbrec_network_function_group *nbrec_nfg; + NBREC_NETWORK_FUNCTION_GROUP_TABLE_FOR_EACH (nbrec_nfg, + nbrec_nfg_table) { + network_function_update_active(nbrec_nfg, svc_monitor_map, + svc_monitor_ip4_dst); + } +} + static void consider_network_function(struct lflow_table *lflows, const struct ovn_datapath *od, @@ -19445,18 +19645,25 @@ ovnnb_db_run(struct northd_input *input_data, input_data->sbrec_ha_chassis_grp_by_name, &data->ls_datapaths.datapaths, &data->lr_datapaths.datapaths, &data->ls_ports, &data->lr_ports); - build_lb_port_related_data(ovnsb_txn, - input_data->sbrec_service_monitor_table, - input_data->svc_monitor_mac, - &input_data->svc_monitor_mac_ea, - &data->lr_datapaths, &data->ls_ports, + build_lb_port_related_data(&data->lr_datapaths, &data->lb_datapaths_map, - &data->lb_group_datapaths_map, - &data->svc_monitor_lsps, - &data->svc_monitor_map); + &data->lb_group_datapaths_map); + build_svcs(ovnsb_txn, input_data->sbrec_service_monitor_table, + input_data->svc_monitor_mac, + &input_data->svc_monitor_mac_ea, + input_data->svc_monitor_mac_dst, + input_data->svc_monitor_ip4, + input_data->svc_monitor_ip4_dst, + &data->ls_ports, &data->lb_datapaths_map, + input_data->nbrec_network_function_table, + &data->svc_monitor_lsps, &data->svc_monitor_map); build_lb_count_dps(&data->lb_datapaths_map, ods_size(&data->ls_datapaths), ods_size(&data->lr_datapaths)); + build_network_function_active( + input_data->nbrec_network_function_group_table, + &data->svc_monitor_map, + input_data->svc_monitor_ip4_dst); build_ipam(&data->ls_datapaths.datapaths); build_lrouter_groups(&data->lr_ports, &data->lr_datapaths); build_ip_mcast(ovnsb_txn, input_data->sbrec_ip_multicast_table, diff --git a/northd/northd.h b/northd/northd.h index 98eec03aa..cdeba48e0 100644 --- a/northd/northd.h +++ b/northd/northd.h @@ -66,6 +66,10 @@ struct northd_input { const struct smap *sb_options; const char *svc_monitor_mac; struct eth_addr svc_monitor_mac_ea; + const char *svc_monitor_mac_dst; + struct eth_addr svc_monitor_mac_ea_dst; + const char *svc_monitor_ip4; + const char *svc_monitor_ip4_dst; const struct chassis_features *features; bool vxlan_mode; @@ -246,8 +250,8 @@ struct lflow_input { const struct hmap *lb_datapaths_map; const struct sset *bfd_ports; const struct chassis_features *features; - const struct hmap *svc_monitor_map; bool ovn_internal_version_changed; + const struct hmap *svc_monitor_map; const char *svc_monitor_mac; const struct sampling_app_table *sampling_apps; struct group_ecmp_route_data *route_data; diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index 460a481d9..4c7d958b4 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -802,6 +802,19 @@ <code>reg0[13]</code> bit is set to 1 (which acts as a hint for the next tables to commit the label to conntrack). </li> + <li> + For <code>allow</code> and <code>allow-related</code> ACL, an + additonal set of registers get set in case the ACL has the column + <code>network_function_group</code> set to the <var>id</var> of one + of the entities in <code>Network_Function_Group</code> table. The + <var>id</var> is an internally generated unique identifier for a + <code>Network_Function_Group</code> entity. The flow sets + <code>reg8[21] = 1</code> (to indicate need for packet redirection), + <code>reg8[22] = 1</code> (to indicate this is a request packet) and + <code>reg5[0..7] = <var>id</var></code>. These registers are later + used in the <code>Network Function</code> table. + </li> + <li> <code>allow-stateless</code> ACLs translate into logical flows that set the allow bit and advance to the next table. @@ -886,6 +899,12 @@ accomplishes the same thing but also logs the traffic. </li> + <li> + The priority-65532 flows that allow response and related traffic, also + set <code>reg8[21] = ct_label.network_function_group</code>, which gets + checked in the <code>Network Function</code> table. + </li> + <li> A priority-65532 flow that sets the allow bit for any traffic that is considered related to a committed flow in the connection tracker (e.g., @@ -1470,12 +1489,109 @@ action based on a hint provided by the previous tables (with a match for <code>reg0[1] == 1 && reg0[13] == 0</code>). </li> + + <li> + Correspinding to each of the two priority 100 flows above, a priority + 110 flow is added, which has the following extra match and + action, but otherwise identical to the priority 100 flow. + Match: <code>reg8[21] == 1</code> (packet matched an ACL with + <code>network_function_group</code> set) + Action: <code>ct_label.network_function_group = 1; + ct_label.network_function_group_id = reg5[0..7];</code> + This is to commit the network_function information in conntrack so that + the response and related packets can be redirected to it as well. + </li> + <li> A priority-0 flow that simply moves traffic to the next table. </li> </ul> - <h3>Ingress Table 24: ARP/ND responder</h3> + <h3>Ingress Table 25: Network Function</h3> + <p> + This table implements the packet redirection rules for network function. + If <code>network_function_group</code> column in <code>from-lport</code> + ACL is set to <var>id</var> of a <code>Network_Function_Group</code> + entity, the ingress ACL eval stage sets a set of registers as described + before. Those registers get used here. In case of <code>to-lport</code> + ACLs, the request packet is redirected in egress pipeline as described + later. The response is handled here using the network_function_group id + committed in ct_label during request processing. + </p> + + <p> + There can be one or more network_functions in a group. Health monitoring + is done by sending datapath probes as par parameters defined in + <code>Network_Function_Health_Check</code>. One of the healthy + network_functions is selected for each network_function_group. + If none are healthy, or if health monitoring is not configured, any one + from the group is selected. The rules in this table redirects request + packets for <code>from-lport</code> ACLs and response packets for + <code>to-lport</code> ACLs to the selected network_function's + <code>inport</code>. If the network_function ports are not present on + this logical switch, their child ports if any, are used. In the below + statements when network function ports are referred it implies the parent + or child ports as applicable to this logical switch. + </p> + + <ul> + <li> + For each network_function port <var>P</var>, a priority-100 flow is + added that matches <code>inport == <var>P</var></code> and advances + packets to the next table. Thus packets coming from network function + are not subject to redirection. This flow also sets + <code>reg5[16..31] = ct_label.tun_if_id</code>. This is used for + tunneling packet to originating host in case of cross host traffic + redirection for VLAN subnet. This ct_label field stores the openflow + tunnel interface id of the originating host for this connection and + gets populated in egress <code>Stateful</code> table. + </li> + + <li> + For each network_function_group <var>id</var>, a priority-99 flow + matches <code>reg8[21] == 1 && reg8[22] == 1 && + reg5[0..7] == <var>id</var></code> and sets + <code>outport=<var>P</var>; output;</code> where <var>P</var> is the + <code>inport</code> of the selected network function. This ensures + redirection of request packets for flows matching + <code>from-lport</code> ACLs with network_function. + </li> + + <li> + For each network_function_group <var>id</var>, a priority-99 rule + matches <code>reg8[21] == 1 && reg8[22] == 0 && + ct_label.network_function_group_id == <var>id</var></code> and takes + identical action as above. This ensures redirection of response and + related packets matching <code>to-lport</code> ACLs with + network_function. + </li> + + <li> + In each of the above cases, when the same packet comes out unchanged + through the other port of the network_function, it would match the + priority 100 flow and be forwarded to the next table. + </li> + + <li> + One priority-100 rule to skip redirection of multicast packets that hit + a network_function ACL. Match on <code>8[21] == 1 && + eth.mcast</code> and action is to advance to the next table. + </li> + + <li> + One priority-1 rule that checks <code>reg8[[21]] == 1</code>, and drops + such packets. This is to address the case where a packet hit an ACL + with network function but the network function does not have ports or + child ports on this logical switch. + </li> + + <li> + One priority-0 fallback flow that matches all packets and advances to + the next table. + </li> + </ul> + + <h3>Ingress Table 25: ARP/ND responder</h3> <p> This table implements ARP/ND responder in a logical switch for known @@ -1810,7 +1926,7 @@ output; </li> </ul> - <h3>Ingress Table 25: DHCP option processing</h3> + <h3>Ingress Table 26: DHCP option processing</h3> <p> This table adds the DHCPv4 options to a DHCPv4 packet from the @@ -1871,7 +1987,7 @@ next; </li> </ul> - <h3>Ingress Table 26: DHCP responses</h3> + <h3>Ingress Table 27: DHCP responses</h3> <p> This table implements DHCP responder for the DHCP replies generated by @@ -1952,7 +2068,7 @@ output; </li> </ul> - <h3>Ingress Table 27 DNS Lookup</h3> + <h3>Ingress Table 28 DNS Lookup</h3> <p> This table looks up and resolves the DNS names to the corresponding @@ -1981,7 +2097,7 @@ reg0[4] = dns_lookup(); next; </li> </ul> - <h3>Ingress Table 28 DNS Responses</h3> + <h3>Ingress Table 29 DNS Responses</h3> <p> This table implements DNS responder for the DNS replies generated by @@ -2016,7 +2132,7 @@ output; </li> </ul> - <h3>Ingress table 29 External ports</h3> + <h3>Ingress table 30 External ports</h3> <p> Traffic from the <code>external</code> logical ports enter the ingress @@ -2059,7 +2175,7 @@ output; </li> </ul> - <h3>Ingress Table 30 Destination Lookup</h3> + <h3>Ingress Table 31 Destination Lookup</h3> <p> This table implements switching behavior. It contains these logical @@ -2089,6 +2205,14 @@ output; on the logical switch. </li> + <li> + A priority-100 flow that matches <code>reg8[23] == 1</code> and does + <code>output</code> action. This ensures that packets that got injected + back into this table from egress table <code>Network Function</code> + (after it set the <code>outport</code> for packet redirection) get + forwarded without any further processing. + </li> + <li> <p> For any logical port that's defined as a target of routing protocol @@ -2285,7 +2409,7 @@ output; </li> </ul> - <h3>Ingress Table 31 Destination unknown</h3> + <h3>Ingress Table 32 Destination unknown</h3> <p> This table handles the packets whose destination was not found or @@ -2384,6 +2508,17 @@ output; logical router datapath from logical switch datapath for routing. </p> + <p> + This table also has a priority-110 flow for each network_function + <code>inport</code> <var>P</var> that matches <code>inport == + <var>P</var></code>. The action is to skip all the egress tables up to + the <code>Network Function</code> table and advance the packet directly + to the table after that. This is for the case where packet redirection + happens in egress <code>Network Function</code> table. The same packet + when it comes out of the other port of network function, they should not + be processed again by the same egress stages, specially they should + skip the conntrack processing. + </p> <h3>Egress Table 3: Pre-LB</h3> @@ -2466,6 +2601,13 @@ output; <code>reg8[18]</code>. </p> + <p> + Also like with ingress ACLs, egress ACLs can have network_function_group + <var>id</var> and in that case the flow will set <code>reg8[21] = 1; + reg8[22] = 1; reg5[0..7] = <var>id</var></code>. These registers are used + in the <code>Network Function</code> table. + </p> + <p> Also like with ingress ACLs, egress ACLs can have a configured <code>tier</code>. If a tier is configured, then the current tier @@ -2565,7 +2707,84 @@ output; there are no rules added for load balancing new connections. </p> - <h3>Egress Table 12: Egress Port Security - check</h3> + <ul> + <li> + A priority 120 flow is added for each network function port + <var>P</var> that is identical to the priority 100 flow except for + additional match <code>outport == <var>P</var></code> and additional + action <code>ct_label.tun_if_id = reg5[16..31]</code>. In case packets + redirected by network function logic gets tunneled from host1 to host2 + where the network function port resides, host2's physical table 0 + populates reg5[16..31] with the openflow tunnel interface id on which + the packet was received. This priority 120 flow commits the tunnel id + to the ct_label. That way, when the same packet comes out of the other + port of the network function it can retrieve this information from the + peer port's CT entry and tunnel the packet back to host1. This is + required to make cross host traffic redirection work for VLAN subnet. + </li> + </ul> + + <h3>Egress Table 12: Network Function</h3> + + <p> + This table is similar to ingress table <code>Network Function</code> + except for the role of <code>from-lport</code> and <code>to-lport</code> + ACLs reversed, and the packet redirection happening to the selected + network function's <code>outport</code> rather than to its + <code>inport</code>. Another difference is that the action injects the + packets back into the ingress pipeline. + </p> + + <ul> + <li> + Similar to ingress <code>Network Function</code> a priority-100 flow is + added for each network_function port, that matches the inport with the + network function port and advances the packet to the next table. + </li> + + <li> + For each network_function_group <var>id</var>, a priority-99 flow + matches <code>reg8[21] == 1 && reg8[22] == 1 && + reg5[0..7] == <var>id</var></code> and sets <code>outport=<var>P</var>; + reg8[23] = 1; next(pipeline=ingress, table=<var>T</var>)</code> where + <var>P</var> is the <code>outport</code> of the selected + network_function and <var>T</var> is the ingress table + <code>Destination Lookup</code>. This ensures redirection of request + packets matching <code>to-lport</code> ACL with network_function. The + packets are injected back to the ingress pipeline from where they get + sent out skipping any further lookup because of <code>reg8[23]</code>. + </li> + + <li> + For each network_function_group <var>id</var>, a priority-99 rule + matches <code>reg8[21] == 1 && reg8[22] == 0 && + ct_label.network_function_group_id == <var>id</var></code> and takes + identical action as above. This ensures redirection if response and + related packets for flows matching <code>from-lport</code> ACLs with + network_function. + </li> + + <li> + In each of the above cases, when the same packet comes out unchanged + through the other port of the network_function, it would match the + priority 100 flow and be forwarded to the next table. + </li> + + <li> + One priority-100 multicast match flow same as + ingress <code>Network Function</code>. + </li> + + <li> + One priority-1 flow same as ingress <code>Network Function</code>. + </li> + + <li> + One priority-0 flow same as ingress <code>Network Function</code>. + </li> + </ul> + + <h3>Egress Table 13: Egress Port Security - check</h3> <p> This is similar to the port security logic in table @@ -2594,7 +2813,7 @@ output; </li> </ul> - <h3>Egress Table 13: Egress Port Security - Apply</h3> + <h3>Egress Table 14: Egress Port Security - Apply</h3> <p> This is similar to the ingress port security logic in ingress table diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema index 8123679b7..456ee92c1 100644 --- a/ovn-sb.ovsschema +++ b/ovn-sb.ovsschema @@ -1,7 +1,7 @@ { "name": "OVN_Southbound", "version": "21.3.0", - "cksum": "3714238857 35128", + "cksum": "2023557864 35455", "tables": { "SB_Global": { "columns": { @@ -509,14 +509,20 @@ "isRoot": true}, "Service_Monitor": { "columns": { + "type": {"type": {"key": { + "type": "string", + "enum": ["set", ["load-balancer", + "network-function"]]}}}, "ip": {"type": "string"}, + "mac": {"type": "string"}, "protocol": { "type": {"key": {"type": "string", - "enum": ["set", ["tcp", "udp"]]}, + "enum": ["set", ["tcp", "udp", "icmp"]]}, "min": 0, "max": 1}}, "port": {"type": {"key": {"type": "integer", "minInteger": 0, "maxInteger": 65535}}}, + "logical_input_port": {"type": "string"}, "logical_port": {"type": "string"}, "src_mac": {"type": "string"}, "src_ip": {"type": "string"}, diff --git a/ovn-sb.xml b/ovn-sb.xml index 490394ff8..e535b34d9 100644 --- a/ovn-sb.xml +++ b/ovn-sb.xml @@ -4994,10 +4994,20 @@ tcp.flags = RST; service monitor. </p> + <column name="type"> + The type of the service. Supported values are "load-balancer" and + "network-function". + </column> + <column name="ip"> + Destination IP used in monitor packets. For load-balancer this is the IP of the service to be monitored. Only IPv4 is supported. </column> + <column name="mac"> + Destination MAC address used in monitor packets for network-function. + </column> + <column name="protocol"> The protocol of the service. </column> @@ -5006,10 +5016,20 @@ tcp.flags = RST; The TCP or UDP port of the service. </column> + <column name="logical_input_port"> + This is applicable only for network-function type. The VIF of the + logical port on which monitor packets have to be sent. The + <code>ovn-controller</code> that binds this <code>logical_port</code> + monitors the service by sending periodic monitor packets. + </column> + <column name="logical_port"> The VIF of the logical port on which the service is running. The <code>ovn-controller</code> that binds this <code>logical_port</code> - monitors the service by sending periodic monitor packets. + monitors the service by sending periodic monitor packets. For + load-balancer this is the port to which monitor packets are sent and + from which response packets are received. For network-function this + is the port from which the forwarded monitor packets are received. </column> <column name="src_mac"> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index c80b5aca6..de6c2ad51 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -17847,7 +17847,7 @@ AT_CLEANUP ]) OVN_FOR_EACH_NORTHD_NO_HV([ -AT_SETUP([Check network-function]) +AT_SETUP([Check network function]) ovn_start # Create a NF and add it to a from-lport ACL. @@ -17879,6 +17879,13 @@ AT_CHECK( table=??(ls_in_acl_eval ), priority=2002 , match=(reg0[[8]] == 1 && (inport == @pg0 && ip4.dst == 10.0.0.3)), action=(reg8[[16]] = 1; reg0[[1]] = 1; reg8[[21]] = 1; reg8[[22]] = 1; reg5[[0..7]] = 1; next;) ]) +AT_CHECK([grep "ls_in_stateful" sw0flows | ovn_strip_lflows | grep 'network_function'], [0], [dnl + table=??(ls_in_stateful ), priority=100 , match=(reg0[[1]] == 1 && reg0[[13]] == 0), action=(ct_commit { ct_mark.blocked = 0; ct_mark.allow_established = reg0[[20]]; ct_label.acl_id = reg2[[16..31]]; ct_label.network_function_group = 0; ct_label.network_function_group_id = 0; }; next;) + table=??(ls_in_stateful ), priority=100 , match=(reg0[[1]] == 1 && reg0[[13]] == 1), action=(ct_commit { ct_mark.blocked = 0; ct_mark.allow_established = reg0[[20]]; ct_mark.obs_stage = reg8[[19..20]]; ct_mark.obs_collector_id = reg8[[8..15]]; ct_label.obs_point_id = reg9; ct_label.acl_id = reg2[[16..31]]; ct_label.network_function_group = 0; ct_label.network_function_group_id = 0; }; next;) + table=??(ls_in_stateful ), priority=110 , match=(reg0[[1]] == 1 && reg0[[13]] == 0 && reg8[[21]] == 1), action=(ct_commit { ct_mark.blocked = 0; ct_mark.allow_established = reg0[[20]]; ct_label.acl_id = reg2[[16..31]]; ct_label.network_function_group = 1; ct_label.network_function_group_id = reg5[[0..7]]; }; next;) + table=??(ls_in_stateful ), priority=110 , match=(reg0[[1]] == 1 && reg0[[13]] == 1 && reg8[[21]] == 1), action=(ct_commit { ct_mark.blocked = 0; ct_mark.allow_established = reg0[[20]]; ct_mark.obs_stage = reg8[[19..20]]; ct_mark.obs_collector_id = reg8[[8..15]]; ct_label.obs_point_id = reg9; ct_label.acl_id = reg2[[16..31]]; ct_label.network_function_group = 1; ct_label.network_function_group_id = reg5[[0..7]]; }; next;) +]) + AT_CHECK( [grep -E 'ls_(in|out)_network_function' sw0flows | ovn_strip_lflows | sort], [0], [dnl table=??(ls_in_network_function), priority=0 , match=(1), action=(next;) @@ -17899,6 +17906,10 @@ AT_CHECK([grep "ls_in_l2_lkup" sw0flows | ovn_strip_lflows | grep 'priority=100' table=??(ls_in_l2_lkup ), priority=100 , match=(reg8[[23]] == 1), action=(output;) ]) +AT_CHECK([grep "ls_out_pre_acl" sw0flows | ovn_strip_lflows | grep 'sw0-nf-p1'], [0], [dnl + table=??(ls_out_pre_acl ), priority=110 , match=(inport == "sw0-nf-p1"), action=(next(pipeline=egress, table=??);) +]) + AT_CHECK( [grep -E 'ls_(in|out)_acl_eval' sw0flows | ovn_strip_lflows | grep network_function_group | sort], [0], [dnl table=??(ls_in_acl_eval ), priority=65532, match=(!ct.est && ct.rel && !ct.new && ct_mark.blocked == 0), action=(reg0[[17]] = 1; reg8[[21]] = ct_label.network_function_group; reg8[[16]] = 1; ct_commit_nat;) @@ -18078,3 +18089,182 @@ ct_next(ct_state=new|trk) { AT_CLEANUP ]) + +OVN_FOR_EACH_NORTHD([ +AT_SETUP([Network function health check]) +AT_KEYWORDS([ovn]) +ovn_start + +check ovn-nbctl set nb_global . options:svc_monitor_ip4=169.254.100.10 +check ovn-nbctl set nb_global . options:svc_monitor_ip4_dst=169.254.100.11 +sw="sw0" +check ovn-nbctl ls-add $sw +for i in 1 2; do + port=$sw-p$i + check ovn-nbctl lsp-add $sw $port + check ovn-nbctl lsp-set-addresses $port "52:54:00:00:00:0$i" +done + +nfsw="nf-sw" +check ovn-nbctl ls-add $nfsw +for i in {1..4}; do + port=$nfsw-p$i + check ovn-nbctl lsp-add $nfsw $port + check ovn-sbctl set port_binding $port up=true + check ovn-nbctl lsp-add $sw child-$i $port 100 +done +check ovn-nbctl set logical_switch_port $nfsw-p1 options:receive_multicast=false options:lsp_learn_fdb=false options:network-function=true options:network-function-linked-port=$nfsw-p2 +check ovn-nbctl set logical_switch_port $nfsw-p2 options:receive_multicast=false options:lsp_learn_fdb=false options:network-function=true options:network-function-linked-port=$nfsw-p1 +check ovn-nbctl set logical_switch_port $nfsw-p3 options:receive_multicast=false options:lsp_learn_fdb=false options:network-function=true options:network-function-linked-port=$nfsw-p4 +check ovn-nbctl set logical_switch_port $nfsw-p4 options:receive_multicast=false options:lsp_learn_fdb=false options:network-function=true options:network-function-linked-port=$nfsw-p3 + +ovn-nbctl show +check ovn-nbctl network-function-add nf0 $nfsw-p1 $nfsw-p2 +check ovn-nbctl network-function-add nf1 $nfsw-p3 $nfsw-p4 +check_uuid ovn-nbctl create network_function_health_check name=nf_health_cfg options:interval=5 options:timeout=1 options:success_count=3 options:failure_count=3 +nf_health_uuid=$(ovn-nbctl --bare --columns _uuid find network_function_health_check name=nf_health_cfg) +nf0_uuid=$(ovn-nbctl --bare --columns _uuid find network_function name=nf0) +nf1_uuid=$(ovn-nbctl --bare --columns _uuid find network_function name=nf1) +check ovn-nbctl set network_function $nf0_uuid health_check=$nf_health_uuid +check ovn-nbctl set network_function $nf1_uuid health_check=$nf_health_uuid +check ovn-nbctl network-function-group-add nfg0 nf0 nf1 + +check ovn-nbctl pg-add pg0 $sw-p1 +check ovn-nbctl acl-add pg0 from-lport 1001 "inport == @pg0 && ip4.dst == 192.168.2.10" allow-related nfg0 +check ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4.src == 192.168.1.10" allow-related nfg0 +check ovn-nbctl --wait=sb sync + +# Set the service monitor for nf0 to online and nf1 to online +# and verify nf0 is considered active. + +AS_BOX([Set the service monitor for nf0 to online and nf1 to offline]) +check ovn-sbctl set service_monitor $nfsw-p2 status=online +check ovn-sbctl set service_monitor $nfsw-p4 status=offline +wait_row_count Service_Monitor 1 logical_port=$nfsw-p2 status=online +wait_row_count Service_Monitor 1 logical_port=$nfsw-p4 status=offline +check ovn-nbctl --wait=sb sync + +ovn-sbctl dump-flows $sw > lflows +AT_CAPTURE_FILE([lflows]) + +AT_CHECK( + [grep -E 'ls_(in|out)_acl_eval' lflows | ovn_strip_lflows | grep pg0 | sort], [0], [dnl + table=??(ls_in_acl_eval ), priority=2001 , match=(reg0[[7]] == 1 && (inport == @pg0 && ip4.dst == 192.168.2.10)), action=(reg8[[16]] = 1; reg8[[21]] = 1; reg8[[22]] = 1; reg5[[0..7]] = 1; next;) + table=??(ls_in_acl_eval ), priority=2001 , match=(reg0[[8]] == 1 && (inport == @pg0 && ip4.dst == 192.168.2.10)), action=(reg8[[16]] = 1; reg0[[1]] = 1; reg8[[21]] = 1; reg8[[22]] = 1; reg5[[0..7]] = 1; next;) + table=??(ls_out_acl_eval ), priority=2002 , match=(reg0[[7]] == 1 && (outport == @pg0 && ip4.src == 192.168.1.10)), action=(reg8[[16]] = 1; reg8[[21]] = 1; reg8[[22]] = 1; reg5[[0..7]] = 1; next;) + table=??(ls_out_acl_eval ), priority=2002 , match=(reg0[[8]] == 1 && (outport == @pg0 && ip4.src == 192.168.1.10)), action=(reg8[[16]] = 1; reg0[[1]] = 1; reg8[[21]] = 1; reg8[[22]] = 1; reg5[[0..7]] = 1; next;) +]) + +AT_CHECK( + [grep -E 'ls_(in|out)_network_function' lflows | ovn_strip_lflows | sort], [0], [dnl + table=??(ls_in_network_function), priority=0 , match=(1), action=(next;) + table=??(ls_in_network_function), priority=1 , match=(reg8[[21]] == 1), action=(drop;) + table=??(ls_in_network_function), priority=100 , match=(inport == "child-1"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) + table=??(ls_in_network_function), priority=100 , match=(inport == "child-2"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) + table=??(ls_in_network_function), priority=100 , match=(reg8[[21]] == 1 && eth.mcast), action=(next;) + table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 0 && ct_label.network_function_group_id == 1), action=(outport = "child-1"; output;) + table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg5[[0..7]] == 1), action=(outport = "child-1"; output;) + table=??(ls_out_network_function), priority=0 , match=(1), action=(next;) + table=??(ls_out_network_function), priority=1 , match=(reg8[[21]] == 1), action=(drop;) + table=??(ls_out_network_function), priority=100 , match=(outport == "child-1"), action=(next;) + table=??(ls_out_network_function), priority=100 , match=(outport == "child-2"), action=(next;) + table=??(ls_out_network_function), priority=100 , match=(reg8[[21]] == 1 && eth.mcast), action=(next;) + table=??(ls_out_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 0 && ct_label.network_function_group_id == 1), action=(outport = "child-2"; reg8[[23]] = 1; next(pipeline=ingress, table=??);) + table=??(ls_out_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg5[[0..7]] == 1), action=(outport = "child-2"; reg8[[23]] = 1; next(pipeline=ingress, table=??);) +]) + +# Set the service monitor for nf0 to online and nf1 to online +# and verify nf0 is still the active. + +AS_BOX([Set the service monitor for nf0 to online and nf1 to online]) +check ovn-sbctl set service_monitor $nfsw-p2 status=online +check ovn-sbctl set service_monitor $nfsw-p4 status=online +wait_row_count Service_Monitor 1 logical_port=$nfsw-p2 status=online +wait_row_count Service_Monitor 1 logical_port=$nfsw-p4 status=online +check ovn-nbctl --wait=sb sync + +ovn-sbctl dump-flows $sw > lflows +AT_CAPTURE_FILE([lflows]) + +AT_CHECK( + [grep -E 'ls_(in|out)_network_function' lflows | ovn_strip_lflows | sort], [0], [dnl + table=??(ls_in_network_function), priority=0 , match=(1), action=(next;) + table=??(ls_in_network_function), priority=1 , match=(reg8[[21]] == 1), action=(drop;) + table=??(ls_in_network_function), priority=100 , match=(inport == "child-1"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) + table=??(ls_in_network_function), priority=100 , match=(inport == "child-2"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) + table=??(ls_in_network_function), priority=100 , match=(reg8[[21]] == 1 && eth.mcast), action=(next;) + table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 0 && ct_label.network_function_group_id == 1), action=(outport = "child-1"; output;) + table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg5[[0..7]] == 1), action=(outport = "child-1"; output;) + table=??(ls_out_network_function), priority=0 , match=(1), action=(next;) + table=??(ls_out_network_function), priority=1 , match=(reg8[[21]] == 1), action=(drop;) + table=??(ls_out_network_function), priority=100 , match=(outport == "child-1"), action=(next;) + table=??(ls_out_network_function), priority=100 , match=(outport == "child-2"), action=(next;) + table=??(ls_out_network_function), priority=100 , match=(reg8[[21]] == 1 && eth.mcast), action=(next;) + table=??(ls_out_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 0 && ct_label.network_function_group_id == 1), action=(outport = "child-2"; reg8[[23]] = 1; next(pipeline=ingress, table=??);) + table=??(ls_out_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg5[[0..7]] == 1), action=(outport = "child-2"; reg8[[23]] = 1; next(pipeline=ingress, table=??);) +]) + +# Set the service monitor for nf0 to offline and nf1 to online +# and verify nf1 is the active. + +AS_BOX([Set the service monitor for nf0 to offline and nf1 to online]) +check ovn-sbctl set service_monitor $nfsw-p2 status=offline +check ovn-sbctl set service_monitor $nfsw-p4 status=online +wait_row_count Service_Monitor 1 logical_port=$nfsw-p2 status=offline +wait_row_count Service_Monitor 1 logical_port=$nfsw-p4 status=online +check ovn-nbctl --wait=sb sync + +ovn-sbctl dump-flows $sw > lflows +AT_CAPTURE_FILE([lflows]) + +AT_CHECK( + [grep -E 'ls_(in|out)_network_function' lflows | ovn_strip_lflows | sort], [0], [dnl + table=??(ls_in_network_function), priority=0 , match=(1), action=(next;) + table=??(ls_in_network_function), priority=1 , match=(reg8[[21]] == 1), action=(drop;) + table=??(ls_in_network_function), priority=100 , match=(inport == "child-3"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) + table=??(ls_in_network_function), priority=100 , match=(inport == "child-4"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) + table=??(ls_in_network_function), priority=100 , match=(reg8[[21]] == 1 && eth.mcast), action=(next;) + table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 0 && ct_label.network_function_group_id == 1), action=(outport = "child-3"; output;) + table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg5[[0..7]] == 1), action=(outport = "child-3"; output;) + table=??(ls_out_network_function), priority=0 , match=(1), action=(next;) + table=??(ls_out_network_function), priority=1 , match=(reg8[[21]] == 1), action=(drop;) + table=??(ls_out_network_function), priority=100 , match=(outport == "child-3"), action=(next;) + table=??(ls_out_network_function), priority=100 , match=(outport == "child-4"), action=(next;) + table=??(ls_out_network_function), priority=100 , match=(reg8[[21]] == 1 && eth.mcast), action=(next;) + table=??(ls_out_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 0 && ct_label.network_function_group_id == 1), action=(outport = "child-4"; reg8[[23]] = 1; next(pipeline=ingress, table=??);) + table=??(ls_out_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg5[[0..7]] == 1), action=(outport = "child-4"; reg8[[23]] = 1; next(pipeline=ingress, table=??);) +]) + +# Set the service monitor for nf0 to offline and nf1 to offline +# and verify nf1 is still the active. + +AS_BOX([Set the service monitor for nf0 to offline and nf1 to offline]) +check ovn-sbctl set service_monitor $nfsw-p2 status=offline +check ovn-sbctl set service_monitor $nfsw-p4 status=offline +wait_row_count Service_Monitor 1 logical_port=$nfsw-p2 status=offline +wait_row_count Service_Monitor 1 logical_port=$nfsw-p4 status=offline +check ovn-nbctl --wait=sb sync + +ovn-sbctl dump-flows $sw > lflows +AT_CAPTURE_FILE([lflows]) + +AT_CHECK( + [grep -E 'ls_(in|out)_network_function' lflows | ovn_strip_lflows | sort], [0], [dnl + table=??(ls_in_network_function), priority=0 , match=(1), action=(next;) + table=??(ls_in_network_function), priority=1 , match=(reg8[[21]] == 1), action=(drop;) + table=??(ls_in_network_function), priority=100 , match=(inport == "child-3"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) + table=??(ls_in_network_function), priority=100 , match=(inport == "child-4"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) + table=??(ls_in_network_function), priority=100 , match=(reg8[[21]] == 1 && eth.mcast), action=(next;) + table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 0 && ct_label.network_function_group_id == 1), action=(outport = "child-3"; output;) + table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg5[[0..7]] == 1), action=(outport = "child-3"; output;) + table=??(ls_out_network_function), priority=0 , match=(1), action=(next;) + table=??(ls_out_network_function), priority=1 , match=(reg8[[21]] == 1), action=(drop;) + table=??(ls_out_network_function), priority=100 , match=(outport == "child-3"), action=(next;) + table=??(ls_out_network_function), priority=100 , match=(outport == "child-4"), action=(next;) + table=??(ls_out_network_function), priority=100 , match=(reg8[[21]] == 1 && eth.mcast), action=(next;) + table=??(ls_out_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 0 && ct_label.network_function_group_id == 1), action=(outport = "child-4"; reg8[[23]] = 1; next(pipeline=ingress, table=??);) + table=??(ls_out_network_function), priority=99 , match=(reg8[[21]] == 1 && reg8[[22]] == 1 && reg5[[0..7]] == 1), action=(outport = "child-4"; reg8[[23]] = 1; next(pipeline=ingress, table=??);) +]) + +AT_CLEANUP +]) diff --git a/tests/system-ovn.at b/tests/system-ovn.at index 066703418..547d98623 100644 --- a/tests/system-ovn.at +++ b/tests/system-ovn.at @@ -18251,3 +18251,207 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d /connection dropped.*/d"]) AT_CLEANUP ]) + +OVN_FOR_EACH_NORTHD([ +AT_SETUP([Network Function]) +AT_SKIP_IF([test $HAVE_TCPDUMP = no]) +ovn_start +OVS_TRAFFIC_VSWITCHD_START() + +ADD_BR([br-int]) + +# Set external-ids in br-int needed for ovn-controller +ovs-vsctl \ + -- set Open_vSwitch . external-ids:system-id=hv1 \ + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true + +# Start ovn-controller +start_daemon ovn-controller + +ADD_NAMESPACES(client) +ADD_VETH(client, client, br-int, "192.168.1.10/24", "f0:00:00:01:02:10") +ADD_NAMESPACES(server) +ADD_VETH(server, server, br-int, "192.168.1.20/24", "f0:00:00:01:02:20") +ADD_NAMESPACES(nf) +ADD_VETH(nf-p1, nf, br-int, "0", "f0:00:00:01:02:30") +ADD_VETH(nf-p2, nf, br-int, "0", "f0:00:00:01:02:40") +ADD_VETH(nf-p3, nf, br-int, "0", "f0:00:00:01:02:50") +ADD_VETH(nf-p4, nf, br-int, "0", "f0:00:00:01:02:60") + +check ovn-nbctl ls-add sw0 +check ovn-nbctl lsp-add sw0 client \ + -- lsp-set-addresses client "f0:00:00:01:02:10 192.168.1.10/24" +check ovn-nbctl lsp-add sw0 server \ + -- lsp-set-addresses server "f0:00:00:01:02:20 192.168.1.20/24" +check ovn-nbctl ls-add nf +check ovn-nbctl lsp-add nf nf-p1 +check ovn-nbctl lsp-add nf nf-p2 +check ovn-nbctl lsp-add nf nf-p3 +check ovn-nbctl lsp-add nf nf-p4 +check ovn-nbctl set logical_switch_port nf-p1 options:receive_multicast=false options:lsp_learn_fdb=false options:network-function=true options:network-function-linked-port=nf-p2 +check ovn-nbctl set logical_switch_port nf-p2 options:receive_multicast=false options:lsp_learn_fdb=false options:network-function=true options:network-function-linked-port=nf-p1 +check ovn-nbctl set logical_switch_port nf-p3 options:receive_multicast=false options:lsp_learn_fdb=false options:network-function=true options:network-function-linked-port=nf-p4 +check ovn-nbctl set logical_switch_port nf-p4 options:receive_multicast=false options:lsp_learn_fdb=false options:network-function=true options:network-function-linked-port=nf-p3 +check ovn-nbctl lsp-add sw0 child-1 nf-p1 100 +check ovn-nbctl lsp-add sw0 child-2 nf-p2 100 +check ovn-nbctl lsp-add sw0 child-3 nf-p3 100 +check ovn-nbctl lsp-add sw0 child-4 nf-p4 100 +check ovn-nbctl set logical_switch_port child-1 options:receive_multicast=false options:lsp_learn_fdb=false options:network-function=true options:network-function-linked-port=child-2 +check ovn-nbctl set logical_switch_port child-2 options:receive_multicast=false options:lsp_learn_fdb=false options:network-function=true options:network-function-linked-port=child-1 +check ovn-nbctl set logical_switch_port child-3 options:receive_multicast=false options:lsp_learn_fdb=false options:network-function=true options:network-function-linked-port=child-4 +check ovn-nbctl set logical_switch_port child-4 options:receive_multicast=false options:lsp_learn_fdb=false options:network-function=true options:network-function-linked-port=child-3 + +check ovn-nbctl set nb_global . options:svc_monitor_ip4=169.254.100.10 +check ovn-nbctl set nb_global . options:svc_monitor_ip4_dst=169.254.100.11 +check ovn-nbctl network-function-add nf0 nf-p1 nf-p2 +check ovn-nbctl network-function-add nf1 nf-p3 nf-p4 +check ovn-nbctl create network_function_health_check name=nf_health_cfg options:interval=1 options:timeout=1 options:success_count=2 options:failure_count=2 +nf_health_uuid=$(ovn-nbctl --bare --columns _uuid find network_function_health_check name=nf_health_cfg) +nf0_uuid=$(ovn-nbctl --bare --columns _uuid find network_function name=nf0) +nf1_uuid=$(ovn-nbctl --bare --columns _uuid find network_function name=nf1) +check ovn-nbctl set network_function $nf0_uuid health_check=$nf_health_uuid +check ovn-nbctl set network_function $nf1_uuid health_check=$nf_health_uuid +check ovn-nbctl network-function-group-add nfg0 nf0 nf1 + +check ovn-nbctl pg-add pg0 server +check ovn-nbctl acl-add pg0 from-lport 1001 "inport == @pg0 && ip4.dst == 192.168.1.10" allow-related nfg0 +check ovn-nbctl acl-add pg0 to-lport 1002 "outport == @pg0 && ip4.src == 192.168.1.10" allow-related nfg0 + +check ovn-nbctl --wait=hv sync + +# configure bridge inside nf namespace with NF ports to simulate NF behaviour +# Add bridge for nf0 +NS_CHECK_EXEC([nf], [ip link add name br0 type bridge]) +NS_CHECK_EXEC([nf], [ip link set dev nf-p1 master br0]) +NS_CHECK_EXEC([nf], [ip link set dev nf-p2 master br0]) +# Add bridge for nf1 +NS_CHECK_EXEC([nf], [ip link add name br1 type bridge]) +NS_CHECK_EXEC([nf], [ip link set dev nf-p3 master br1]) +NS_CHECK_EXEC([nf], [ip link set dev nf-p4 master br1]) + +validate_traffic() { + send_data=$1; recv_data=$2; pkt_cnt=$3; + AT_CHECK([printf "$send_data\n" > /tmp/nffifo], [0], [dnl +]) + + if [[ -n "$recv_data" ]]; then + OVS_WAIT_FOR_OUTPUT_UNQUOTED([cat output.txt], [0], [dnl +$recv_data +]) + else + OVS_WAIT_FOR_OUTPUT([cat output.txt], [0], [dnl +]) + fi + + : > output.txt + + OVS_WAIT_UNTIL([ + total_pkts=$(cat pkt.pcap | wc -l) + test ${total_pkts} -ge ${pkt_cnt} + ]) +} + +validate_nf_with_traffic() { + client_ns=$1; server_ns=$2; sip=$3; direction=$4 + + # Start a TCP server + NETNS_DAEMONIZE($server_ns, [server.py -i $sip -p 10000], [server.pid]) + on_exit 'kill $(cat server.pid)' + + # Ensure TCP server is ready for connections + OVS_WAIT_FOR_OUTPUT([cat output.txt], [0], [dnl +Server Ready +]) + : > output.txt + + # Make a FIFO and send its output to a server + mkfifo /tmp/nffifo + on_exit 'rm -rf /tmp/nffifo' + + NETNS_DAEMONIZE($client_ns, [client.py -f "/tmp/nffifo" -i $sip -p 10000], [client.pid]) + on_exit 'kill $(cat client.pid)' + + AS_BOX([$direction: Verify traffic forwarding through NF when nf0 is active]) + NS_CHECK_EXEC([nf], [ip link set dev br0 up]) + NS_CHECK_EXEC([nf], [ip link set dev br1 down]) + + NS_CHECK_EXEC([nf], [tcpdump -l -nvv -i nf-p1 tcp > pkt.pcap 2>tcpdump_err &]) + OVS_WAIT_UNTIL([grep "listening" tcpdump_err]) + on_exit 'kill $(pidof tcpdump)' + + # sleep to allow service_monitor to detect the state + sleep 5 + + ovn-sbctl dump-flows sw0 > lflows_nf0_active + ovn-sbctl list service_monitor + + validate_traffic "test" "test" 5 + + AS_BOX([$direction: Verify traffic forwarding through NF when nf1 is active]) + + kill $(pidof tcpdump) + NS_CHECK_EXEC([nf], [tcpdump -l -nvv -i nf-p3 tcp > pkt.pcap 2>tcpdump_err &]) + OVS_WAIT_UNTIL([grep "listening" tcpdump_err]) + on_exit 'kill $(pidof tcpdump)' + + # Bring nf0 down and nf1 up + NS_CHECK_EXEC([nf], [ip link set dev br0 down]) + NS_CHECK_EXEC([nf], [ip link set dev br1 up]) + # sleep to allow service_monitor to detect the state + sleep 5 + + ovn-sbctl dump-flows sw0 > lflows_nf1_active + ovn-sbctl list service_monitor + + validate_traffic "test" "test" 2 + + AS_BOX([$direction: Verify traffic forwarding through NF when nf0 and nf1 are down]) + + kill $(pidof tcpdump) + NS_CHECK_EXEC([nf], [tcpdump -l -nvv -i nf-p3 tcp > pkt.pcap 2>tcpdump_err &]) + OVS_WAIT_UNTIL([grep "listening" tcpdump_err]) + on_exit 'kill $(pidof tcpdump)' + + # Bring nf0 down and nf1 up + NS_CHECK_EXEC([nf], [ip link set dev br1 down]) + # sleep to allow service_monitor to detect the state + sleep 5 + + ovn-sbctl dump-flows sw0 > lflows_nf1_active + ovn-sbctl list service_monitor + + validate_traffic "test" "" 0 + + kill $(cat client.pid) + kill $(cat server.pid) + rm -f client.pid + rm -f server.pid + rm -f /tmp/nffifo +} + +AS_BOX([Verify inbound traffic forwarding through NF when nf0 is active]) +validate_nf_with_traffic "client" "server" "192.168.1.20" "Inbound" + +AS_BOX([Verify outbound traffic forwarding through NF when nf0 is active]) +validate_nf_with_traffic "server" "client" "192.168.1.10" "Outbound" + +OVN_CLEANUP_CONTROLLER([hv1]) + +as ovn-sb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as ovn-nb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as northd +OVS_APP_EXIT_AND_WAIT([ovn-northd]) + +as +OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d +/failed to query port patch-.*/d +/.*terminating with signal 15.*/d"]) +AT_CLEANUP +]) -- 2.39.3 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev