Introduce BFD state machine for BFD packet parsing according to RFC880 https://tools.ietf.org/html/rfc5880. Introduce BFD logical flows in ovn-northd.
Signed-off-by: Lorenzo Bianconi <[email protected]> --- NEWS | 2 + controller/pinctrl.c | 281 +++++++++++++++++++++++++++++++++++++++- northd/ovn-northd.8.xml | 21 +++ northd/ovn-northd.c | 85 ++++++++++-- tests/ovn-northd.at | 21 +++ 5 files changed, 395 insertions(+), 15 deletions(-) diff --git a/NEWS b/NEWS index f0ac94b8e..306a7ccda 100644 --- a/NEWS +++ b/NEWS @@ -24,6 +24,8 @@ OVN v20.12.0 - xx xxx xxxx significantly decrease size of a Southbound DB. However, in some cases, it could have performance penalty for ovn-controller. Disabled by default. + - BFD protocol support according to RFC880 [0] + [0] https://tools.ietf.org/html/rfc5880) OVN v20.09.0 - 28 Sep 2020 -------------------------- diff --git a/controller/pinctrl.c b/controller/pinctrl.c index 0528fe20d..875049af4 100644 --- a/controller/pinctrl.c +++ b/controller/pinctrl.c @@ -330,7 +330,7 @@ static void bfd_monitor_destroy(void); static void bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) OVS_REQUIRES(pinctrl_mutex); static void -pinctrl_handle_bfd_msg(void) +pinctrl_handle_bfd_msg(const struct flow *ip_flow, struct dp_packet *pkt_in) OVS_REQUIRES(pinctrl_mutex); static void bfd_monitor_run(const struct sbrec_bfd_table *bfd_table, struct ovsdb_idl_index *sbrec_port_binding_by_name, @@ -2980,7 +2980,7 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg) case ACTION_OPCODE_BFD_MSG: ovs_mutex_lock(&pinctrl_mutex); - pinctrl_handle_bfd_msg(); + pinctrl_handle_bfd_msg(&headers, &packet); ovs_mutex_unlock(&pinctrl_mutex); break; @@ -6343,8 +6343,48 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn, } +enum bfd_state { + BFD_STATE_ADMIN_DOWN, + BFD_STATE_DOWN, + BFD_STATE_INIT, + BFD_STATE_UP, +}; + +enum bfd_flags { + BFD_FLAG_MULTIPOINT = 1 << 0, + BFD_FLAG_DEMAND = 1 << 1, + BFD_FLAG_AUTH = 1 << 2, + BFD_FLAG_CTL = 1 << 3, + BFD_FLAG_FINAL = 1 << 4, + BFD_FLAG_POLL = 1 << 5 +}; + +#define BFD_FLAGS_MASK 0x3f + +static char * +bfd_get_status(enum bfd_state state) +{ + switch (state) { + case BFD_STATE_ADMIN_DOWN: + return "admin_down"; + case BFD_STATE_DOWN: + return "down"; + case BFD_STATE_INIT: + return "init"; + case BFD_STATE_UP: + return "up"; + default: + return ""; + } +} + static struct hmap bfd_monitor_map; +#define BFD_UPDATE_BATCH_TH 10 +static uint16_t bpd_pending_update; +#define BFD_UPDATE_TIMEOUT 5000LL +static long long bfd_last_update; + struct bfd_entry { struct hmap_node node; @@ -6362,12 +6402,24 @@ struct bfd_entry { * sessions on the system */ uint16_t udp_src; - ovs_be32 disc; + ovs_be32 local_disc; + ovs_be32 remote_disc; + + uint32_t local_min_tx; + uint32_t local_min_rx; + uint32_t remote_min_rx; + + uint8_t local_mult; int64_t port_key; int64_t metadata; + enum bfd_state state; + bool change_state; + + uint32_t detection_timeout; long long int last_update; + long long int last_rx; long long int next_tx; }; @@ -6375,6 +6427,7 @@ static void bfd_monitor_init(void) { hmap_init(&bfd_monitor_map); + bfd_last_update = time_msec(); } static void @@ -6401,6 +6454,24 @@ pinctrl_find_bfd_monitor_entry_by_port(char *ip, uint16_t port) return NULL; } +static struct bfd_entry * +pinctrl_find_bfd_monitor_entry_by_disc(ovs_be32 ip, ovs_be32 disc) +{ + char *ip_src = xasprintf(IP_FMT, IP_ARGS(ip)); + struct bfd_entry *ret = NULL, *entry; + + HMAP_FOR_EACH_WITH_HASH (entry, node, hash_string(ip_src, 0), + &bfd_monitor_map) { + if (entry->local_disc == disc) { + ret = entry; + break; + } + } + + free(ip_src); + return ret; +} + static bool bfd_monitor_should_inject(void) { @@ -6452,9 +6523,36 @@ bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet) udp->udp_dst = htons(BFD_DEST_PORT); udp->udp_len = htons(sizeof *udp + sizeof *msg); - msg = dp_packet_put_uninit(packet, sizeof *msg); + msg = dp_packet_put_zeros(packet, sizeof *msg); msg->vers_diag = (BFD_VERSION << 5); + msg->mult = entry->local_mult; msg->length = BFD_PACKET_LEN; + msg->flags = entry->state << 6; + msg->my_disc = entry->local_disc; + msg->your_disc = entry->remote_disc; + msg->min_tx = htonl(entry->local_min_tx * 1000); + msg->min_rx = htonl(entry->local_min_rx * 1000); +} + +static bool +bpf_monitor_need_update(void) +{ + long long int cur_time = time_msec(); + + if (bpd_pending_update == BFD_UPDATE_BATCH_TH) { + goto update; + } + + if (bpd_pending_update && + bfd_last_update + BFD_UPDATE_TIMEOUT < cur_time) { + goto update; + } + return false; + +update: + bfd_last_update = cur_time; + bpd_pending_update = 0; + return true; } static void @@ -6464,11 +6562,28 @@ bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) long long int cur_time = time_msec(); struct bfd_entry *entry; + if (bpf_monitor_need_update()) { + notify_pinctrl_main(); + } + HMAP_FOR_EACH (entry, node, &bfd_monitor_map) { + if (cur_time > entry->last_rx + entry->detection_timeout && + entry->state != BFD_STATE_ADMIN_DOWN) { + entry->state = BFD_STATE_DOWN; + entry->change_state = true; + bfd_last_update = cur_time; + bpd_pending_update = 0; + notify_pinctrl_main(); + } + if (cur_time < entry->next_tx) { goto next; } + if (!entry->remote_min_rx) { + goto next; + } + uint64_t packet_stub[256 / 8]; struct dp_packet packet; dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub); @@ -6503,7 +6618,10 @@ bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time) dp_packet_uninit(&packet); ofpbuf_uninit(&ofpacts); - entry->next_tx = cur_time + 5000; + unsigned long tx_timeout = MAX(entry->local_min_tx, + entry->remote_min_rx); + tx_timeout -= random_range((tx_timeout * 25) / 100); + entry->next_tx = cur_time + tx_timeout; next: if (*bfd_time > entry->next_tx) { *bfd_time = entry->next_tx; @@ -6511,10 +6629,139 @@ next: } } +static bool +pinctrl_check_bfd_msg(const struct flow *ip_flow, struct dp_packet *pkt_in) +{ + if (ip_flow->dl_type != htons(ETH_TYPE_IP) && + ip_flow->dl_type != htons(ETH_TYPE_IPV6)) { + return false; + } + + if (ip_flow->nw_proto != IPPROTO_UDP) { + return false; + } + + struct udp_header *udp_hdr = dp_packet_l4(pkt_in); + if (udp_hdr->udp_dst != htons(BFD_DEST_PORT)) { + return false; + } + + const struct bfd_msg *msg = dp_packet_get_udp_payload(pkt_in); + uint8_t version = msg->vers_diag >> 5; + if (version != BFD_VERSION) { + return false; + } + + enum bfd_flags flags = msg->flags & BFD_FLAGS_MASK; + if (flags & BFD_FLAG_AUTH) { + /* AUTH not supported yet */ + return false; + } + + if (msg->length < BFD_PACKET_LEN) { + return false; + } + + if (!msg->mult) { + return false; + } + + if (flags & BFD_FLAG_MULTIPOINT) { + return false; + } + + if (!msg->my_disc) { + return false; + } + + enum bfd_state peer_state = msg->flags >> 6; + if (peer_state >= BFD_STATE_INIT && !msg->your_disc) { + return false; + } + + return true; +} + static void -pinctrl_handle_bfd_msg(void) +pinctrl_handle_bfd_msg(const struct flow *ip_flow, struct dp_packet *pkt_in) OVS_REQUIRES(pinctrl_mutex) { + if (!pinctrl_check_bfd_msg(ip_flow, pkt_in)) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_WARN_RL(&rl, "BFD packet discarded"); + return; + } + + const struct bfd_msg *msg = dp_packet_get_udp_payload(pkt_in); + struct bfd_entry *entry = pinctrl_find_bfd_monitor_entry_by_disc( + ip_flow->nw_src, msg->your_disc); + if (!entry) { + return; + } + + bool change_state = false; + entry->remote_disc = msg->my_disc; + uint32_t remote_min_tx = ntohl(msg->min_tx) / 1000; + entry->remote_min_rx = ntohl(msg->min_rx) / 1000; + entry->detection_timeout = msg->mult * MAX(remote_min_tx, + entry->local_min_rx); + + enum bfd_state peer_state = msg->flags >> 6; + if (peer_state == BFD_STATE_ADMIN_DOWN && + entry->state >= BFD_STATE_INIT) { + entry->state = BFD_STATE_DOWN; + entry->last_rx = time_msec(); + change_state = true; + goto out; + } + + /* bfd state machine */ + switch (entry->state) { + case BFD_STATE_DOWN: + if (peer_state == BFD_STATE_DOWN) { + entry->state = BFD_STATE_INIT; + change_state = true; + } + if (peer_state == BFD_STATE_INIT) { + entry->state = BFD_STATE_UP; + change_state = true; + } + entry->last_rx = time_msec(); + break; + case BFD_STATE_INIT: + if (peer_state == BFD_STATE_INIT || + peer_state == BFD_STATE_UP) { + entry->state = BFD_STATE_UP; + change_state = true; + } + if (peer_state == BFD_STATE_ADMIN_DOWN) { + entry->state = BFD_STATE_DOWN; + change_state = true; + } + entry->last_rx = time_msec(); + break; + case BFD_STATE_UP: + if (peer_state == BFD_STATE_ADMIN_DOWN || + peer_state == BFD_STATE_DOWN) { + entry->state = BFD_STATE_DOWN; + change_state = true; + } + entry->last_rx = time_msec(); + break; + case BFD_STATE_ADMIN_DOWN: + default: + break; + } + +out: + /* let's try to bacth db updates */ + if (change_state) { + entry->change_state = true; + bpd_pending_update++; + } + if (bpf_monitor_need_update()) { + notify_pinctrl_main(); + } } #define BFD_MONITOR_STALE_TIMEOUT 180000LL @@ -6594,14 +6841,34 @@ bfd_monitor_run(const struct sbrec_bfd_table *bfd_table, entry->ip_src = ip_src; entry->ip_dst = ip_dst; entry->udp_src = bt->src_port; - entry->disc = htonl(bt->disc); + entry->local_disc = htonl(bt->disc); entry->next_tx = cur_time; + entry->last_rx = cur_time; + entry->detection_timeout = 30000; entry->metadata = pb->datapath->tunnel_key; entry->port_key = pb->tunnel_key; + entry->state = BFD_STATE_DOWN; + entry->local_min_tx = bt->min_tx; + entry->local_min_rx = bt->min_rx; + entry->remote_min_rx = bt->min_rx; + entry->local_mult = bt->detect_mult; uint32_t hash = hash_string(bt->dst_ip, 0); hmap_insert(&bfd_monitor_map, &entry->node, hash); changed = true; + } else if (!strcmp(bt->status, "admin_down") && + entry->state != BFD_STATE_ADMIN_DOWN) { + entry->state = BFD_STATE_ADMIN_DOWN; + entry->change_state = false; + changed = true; + } else if (strcmp(bt->status, "admin_down") && + entry->state == BFD_STATE_ADMIN_DOWN) { + entry->state = BFD_STATE_DOWN; + entry->change_state = false; + changed = true; + } else if (entry->change_state) { + sbrec_bfd_set_status(bt, bfd_get_status(entry->state)); + entry->change_state = false; } entry->last_update = cur_time; } diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index 1f0f71f34..64ff831c6 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -1936,6 +1936,27 @@ next; </p> </li> + <li> + <p> + For each BFD port the two following priorirty-110 flows are added + to manage BFD traffic: + + <ul> + <li> + if <code>ip4.src</code> or <code>ip6.src</code> is any IP + address owned by the router port and <code>udp.dst == 3784 + </code>, the packet is advanced to the next pipeline stage. + </li> + + <li> + if <code>ip4.dst</code> or <code>ip6.dst</code> is any IP + address owned by the router port and <code>udp.dst == 3784 + </code>, the <code>handle_bfd_msg</code> action is executed. + </li> + </ul> + </p> + </li> + <li> <p> L3 admission control: A priority-100 flow drops packets that match diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index 20de84585..f15e1513e 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -1473,6 +1473,8 @@ struct ovn_port { bool has_unknown; /* If the addresses have 'unknown' defined. */ + bool has_bfd; + /* The port's peer: * * - A switch port S of type "router" has a router port R as a peer, @@ -8223,16 +8225,15 @@ add_route(struct hmap *lflows, const struct ovn_port *op, build_route_match(op_inport, network_s, plen, is_src_route, is_ipv4, &match, &priority); - struct ds actions = DS_EMPTY_INITIALIZER; - ds_put_format(&actions, "ip.ttl--; "REG_ECMP_GROUP_ID" = 0; %s = ", + struct ds common_actions = DS_EMPTY_INITIALIZER; + ds_put_format(&common_actions, REG_ECMP_GROUP_ID" = 0; %s = ", is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6); - if (gateway) { - ds_put_cstr(&actions, gateway); + ds_put_cstr(&common_actions, gateway); } else { - ds_put_format(&actions, "ip%s.dst", is_ipv4 ? "4" : "6"); + ds_put_format(&common_actions, "ip%s.dst", is_ipv4 ? "4" : "6"); } - ds_put_format(&actions, "; " + ds_put_format(&common_actions, "; " "%s = %s; " "eth.src = %s; " "outport = %s; " @@ -8242,11 +8243,20 @@ add_route(struct hmap *lflows, const struct ovn_port *op, lrp_addr_s, op->lrp_networks.ea_s, op->json_key); + struct ds actions = DS_EMPTY_INITIALIZER; + ds_put_format(&actions, "ip.ttl--; %s", ds_cstr(&common_actions)); ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_ROUTING, priority, ds_cstr(&match), ds_cstr(&actions), stage_hint); + if (op->has_bfd) { + ds_put_format(&match, " && udp.dst == 3784"); + ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_ROUTING, + priority + 1, ds_cstr(&match), + ds_cstr(&common_actions), stage_hint); + } ds_destroy(&match); + ds_destroy(&common_actions); ds_destroy(&actions); } @@ -8908,6 +8918,52 @@ build_lrouter_force_snat_flows(struct hmap *lflows, struct ovn_datapath *od, ds_destroy(&actions); } +static void +build_lrouter_bfd_flows(struct hmap *lflows, struct ovn_port *op) +{ + if (!op->has_bfd) { + return; + } + + struct ds ip_list = DS_EMPTY_INITIALIZER; + struct ds match = DS_EMPTY_INITIALIZER; + + if (op->lrp_networks.n_ipv4_addrs) { + op_put_v4_networks(&ip_list, op, false); + ds_put_format(&match, "ip4.src == %s && udp.dst == 3784", + ds_cstr(&ip_list)); + ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 110, + ds_cstr(&match), "next; ", + &op->nbrp->header_); + ds_clear(&match); + ds_put_format(&match, "ip4.dst == %s && udp.dst == 3784", + ds_cstr(&ip_list)); + ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 110, + ds_cstr(&match), "handle_bfd_msg(); ", + &op->nbrp->header_); + } + if (op->lrp_networks.n_ipv6_addrs) { + ds_clear(&ip_list); + ds_clear(&match); + + op_put_v6_networks(&ip_list, op); + ds_put_format(&match, "ip6.src == %s && udp.dst == 3784", + ds_cstr(&ip_list)); + ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 110, + ds_cstr(&match), "next; ", + &op->nbrp->header_); + ds_clear(&match); + ds_put_format(&match, "ip6.dst == %s && udp.dst == 3784", + ds_cstr(&ip_list)); + ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 110, + ds_cstr(&match), "handle_bfd_msg(); ", + &op->nbrp->header_); + } + + ds_destroy(&ip_list); + ds_destroy(&match); +} + static void build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, struct hmap *lflows, struct shash *meter_groups, @@ -9012,6 +9068,9 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, &op->nbrp->header_); } + /* BFD msg handling */ + build_lrouter_bfd_flows(lflows, op); + /* ICMP time exceeded */ for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { ds_clear(&match); @@ -11765,7 +11824,7 @@ bfd_table_check_default(const struct nbrec_bfd *nb_bt) } static void -build_bfd_table(struct northd_context *ctx) +build_bfd_table(struct northd_context *ctx, struct hmap *ports) { struct hmap sb_only = HMAP_INITIALIZER(&sb_only); struct bfd_entry *bfd_e, *next_bfd_e; @@ -11817,10 +11876,20 @@ build_bfd_table(struct northd_context *ctx) if (bfd_e) { bfd_e->found = true; } + + struct ovn_port *op = ovn_port_find(ports, nb_bt->logical_port); + if (op) { + op->has_bfd = true; + } } HMAP_FOR_EACH_SAFE (bfd_e, next_bfd_e, hmap_node, &sb_only) { if (!bfd_e->found && bfd_e->sb_bt) { + struct ovn_port *op = ovn_port_find(ports, + bfd_e->sb_bt->logical_port); + if (op) { + op->has_bfd = false; + } sbrec_bfd_delete(bfd_e->sb_bt); } hmap_remove(&sb_only, &bfd_e->hmap_node); @@ -12626,7 +12695,7 @@ ovnnb_db_run(struct northd_context *ctx, build_ip_mcast(ctx, datapaths); build_mcast_groups(ctx, datapaths, ports, &mcast_groups, &igmp_groups); build_meter_groups(ctx, &meter_groups); - build_bfd_table(ctx); + build_bfd_table(ctx, ports); build_lflows(ctx, datapaths, ports, &port_groups, &mcast_groups, &igmp_groups, &meter_groups, &lbs); ovn_update_ipv6_prefix(ports); diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index ce6c44db4..58d137796 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -2322,3 +2322,24 @@ sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == <cleared>/' | sort], [0], ]) AT_CLEANUP + +AT_SETUP([ovn -- check BFD config propagation to SBDB]) +ovn_start + +check ovn-nbctl --wait=sb lr-add r0 +check ovn-nbctl --wait=sb lrp-add r0 r0-sw0 00:00:00:00:00:01 192.168.0.1/24 +check ovn-nbctl --wait=sb ls-add sw0 +check ovn-nbctl --wait=sb lsp-add sw0 sw0-r0 +check ovn-nbctl --wait=sb lsp-set-type sw0-r0 router +check ovn-nbctl --wait=sb lsp-set-options sw0-r0 router-port=r0-sw0 +check ovn-nbctl --wait=sb lsp-set-addresses sw0-r0 00:00:00:00:00:01 + +ovn-nbctl create bfd logical_port=r0-sw0 dst_ip=192.168.1.2 min_tx=250 min_rx=250 detect_mult=10 + +check_column 10 bfd detect_mult logical_port=r0-sw0 +check_column "192.168.1.2" bfd dst_ip logical_port=r0-sw0 +check_column 250 bfd min_rx logical_port=r0-sw0 +check_column 250 bfd min_tx logical_port=r0-sw0 +check_column admin_down bfd status logical_port=r0-sw0 + +AT_CLEANUP -- 2.29.2 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
