Rework OVN QoS implementation in order to configure it through OVS QoS table instead of running tc command directly bypassing OVS.
Signed-off-by: Lorenzo Bianconi <[email protected]> --- controller/binding.c | 316 ++++++++++++++---------------------- controller/ovn-controller.c | 5 + northd/ovn-northd.8.xml | 6 - tests/system-ovn.at | 7 +- 4 files changed, 129 insertions(+), 205 deletions(-) diff --git a/controller/binding.c b/controller/binding.c index add64b07d..a58183479 100644 --- a/controller/binding.c +++ b/controller/binding.c @@ -172,194 +172,142 @@ get_qos_params(const struct sbrec_port_binding *pb, struct hmap *queue_map) } static const struct ovsrec_qos * -get_noop_qos(struct ovsdb_idl_txn *ovs_idl_txn, - const struct ovsrec_qos_table *qos_table) +add_ovs_qos_table_entry(struct ovsdb_idl_txn *ovs_idl_txn, + const struct ovsrec_qos_table *qos_table, + struct qos_queue *sb_info) { const struct ovsrec_qos *qos; + struct ovsrec_queue *queue; OVSREC_QOS_TABLE_FOR_EACH (qos, qos_table) { - if (!strcmp(qos->type, "linux-noop")) { - return qos; + if (strcmp(qos->type, OVN_QOS_TYPE)) { + continue; } - } - if (!ovs_idl_txn) { - return NULL; - } - qos = ovsrec_qos_insert(ovs_idl_txn); - ovsrec_qos_set_type(qos, "linux-noop"); - return qos; -} - -static bool -set_noop_qos(struct ovsdb_idl_txn *ovs_idl_txn, - const struct ovsrec_port_table *port_table, - const struct ovsrec_qos_table *qos_table, - struct sset *egress_ifaces) -{ - if (!ovs_idl_txn) { - return false; - } - - const struct ovsrec_qos *noop_qos = get_noop_qos(ovs_idl_txn, qos_table); - if (!noop_qos) { - return false; - } + if (!qos->n_queues) { + continue; + } - const struct ovsrec_port *port; - size_t count = 0; + queue = qos->value_queues[0]; + uint32_t max_rate = smap_get_int(&queue->other_config, "max-rate", 0); + if (max_rate != sb_info->max_rate) { + continue; + } - OVSREC_PORT_TABLE_FOR_EACH (port, port_table) { - if (sset_contains(egress_ifaces, port->name)) { - ovsrec_port_set_qos(port, noop_qos); - count++; + uint32_t min_rate = smap_get_int(&queue->other_config, "min-rate", 0); + if (min_rate != sb_info->min_rate) { + continue; } - if (sset_count(egress_ifaces) == count) { - break; + + uint32_t burst = smap_get_int(&queue->other_config, "burst", 0); + if (burst != sb_info->burst) { + continue; } - } - return true; -} -static void -set_qos_type(struct netdev *netdev, const char *type) -{ - /* 34359738360 == (2^32 - 1) * 8. netdev_set_qos() doesn't support - * 64-bit rate netlink attributes, so the maximum value is 2^32 - 1 bytes. - * The 'max-rate' config option is in bits, so multiplying by 8. - * Without setting max-rate the reported link speed will be used, which - * can be unrecognized for certain NICs or reported too low for virtual - * interfaces. */ - const struct smap conf = SMAP_CONST1(&conf, "max-rate", "34359738360"); - int error = netdev_set_qos(netdev, type, &conf); - if (error) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); - VLOG_WARN_RL(&rl, "%s: could not set qdisc type \"%s\" (%s)", - netdev_get_name(netdev), type, ovs_strerror(error)); + return qos; } -} -static void -setup_qos(const char *egress_iface, struct hmap *queue_map) -{ - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); - struct netdev *netdev_phy; - - if (!egress_iface) { - /* Queues cannot be configured. */ - return; + if (!ovs_idl_txn) { + return NULL; } - int error = netdev_open(egress_iface, NULL, &netdev_phy); - if (error) { - VLOG_WARN_RL(&rl, "%s: could not open netdev (%s)", - egress_iface, ovs_strerror(error)); - return; - } + queue = ovsrec_queue_insert(ovs_idl_txn); - /* Check current qdisc. */ - const char *qdisc_type; - struct smap qdisc_details; + struct smap other_config = SMAP_INITIALIZER(&other_config); + smap_add_format(&other_config, "max-rate", "%d", sb_info->max_rate); + smap_add_format(&other_config, "min-rate", "%d", sb_info->min_rate); + smap_add_format(&other_config, "burst", "%d", sb_info->burst); + ovsrec_queue_set_other_config(queue, &other_config); - smap_init(&qdisc_details); - if (netdev_get_qos(netdev_phy, &qdisc_type, &qdisc_details) != 0 || - qdisc_type[0] == '\0') { - smap_destroy(&qdisc_details); - netdev_close(netdev_phy); - /* Qos is not supported. */ - return; - } - smap_destroy(&qdisc_details); + const int64_t id = sb_info->queue_id; + qos = ovsrec_qos_insert(ovs_idl_txn); + ovsrec_qos_set_type(qos, OVN_QOS_TYPE); + smap_clear(&other_config); + smap_add_format(&other_config, "max-rate", "%d", sb_info->max_rate); + ovsrec_qos_set_other_config(qos, &other_config); + ovsrec_qos_set_queues(qos, &id, &queue, 1); + smap_destroy(&other_config); - /* If we're not actually being requested to do any QoS: - * - * - If the current qdisc type is OVN_QOS_TYPE, then we clear the qdisc - * type to "". Otherwise, it's possible that our own leftover qdisc - * settings could cause strange behavior on egress. Also, QoS is - * expensive and may waste CPU time even if it's not really in use. - * - * OVN isn't the only software that can configure qdiscs, and - * physical interfaces are shared resources, so there is some risk in - * this strategy: we could disrupt some other program's QoS. - * Probably, to entirely avoid this possibility we would need to add - * a configuration setting. - * - * - Otherwise leave the qdisc alone. */ - if (hmap_is_empty(queue_map)) { - if (!strcmp(qdisc_type, OVN_QOS_TYPE)) { - set_qos_type(netdev_phy, ""); + return qos; +} + +static void +remove_stale_ovs_qos_entry(const struct ovsrec_port_table *port_table, + const struct ovsrec_qos_table *qos_table, + const struct sbrec_port_binding_table *pb_table) +{ + const struct ovsrec_qos *qos, *qos_next; + OVSREC_QOS_TABLE_FOR_EACH_SAFE (qos, qos_next, qos_table) { + if (!qos->n_queues) { + continue; } - netdev_close(netdev_phy); - return; - } - /* Configure qdisc. */ - if (strcmp(qdisc_type, OVN_QOS_TYPE)) { - set_qos_type(netdev_phy, OVN_QOS_TYPE); - } + struct ovsrec_queue *queue = qos->value_queues[0]; + bool stale = true; + const struct sbrec_port_binding *pb; + SBREC_PORT_BINDING_TABLE_FOR_EACH (pb, pb_table) { + if (get_lport_type(pb) != LP_LOCALNET) { + /* QoS is only supported on localnet port for the moment. */ + continue; + } - /* Check and delete if needed. */ - struct netdev_queue_dump dump; - unsigned int queue_id; - struct smap queue_details; - struct qos_queue *sb_info; - struct hmap consistent_queues; - - smap_init(&queue_details); - hmap_init(&consistent_queues); - NETDEV_QUEUE_FOR_EACH (&queue_id, &queue_details, &dump, netdev_phy) { - bool is_queue_needed = false; - - HMAP_FOR_EACH_WITH_HASH (sb_info, node, hash_int(queue_id, 0), - queue_map) { - is_queue_needed = true; - if (sb_info->min_rate == - smap_get_int(&queue_details, "min-rate", 0) - && sb_info->max_rate == - smap_get_int(&queue_details, "max-rate", 0) - && sb_info->burst == - smap_get_int(&queue_details, "burst", 0) && - !strcmp(egress_iface, sb_info->port_name)) { - /* This queue is consistent. */ - hmap_insert(&consistent_queues, &sb_info->node, - hash_int(queue_id, 0)); + uint32_t pb_max_rate = smap_get_int(&pb->options, + "qos_max_rate", 0); + uint32_t pb_min_rate = smap_get_int(&pb->options, + "qos_min_rate", 0); + uint32_t pb_burst = smap_get_int(&pb->options, "qos_burst", 0); + if (!pb_max_rate && !pb_burst) { + continue; + } + + uint32_t max_rate = smap_get_int(&queue->other_config, + "max-rate", 0); + uint32_t min_rate = smap_get_int(&queue->other_config, + "min-rate", 0); + uint32_t burst = smap_get_int(&queue->other_config, "burst", 0); + if (pb_max_rate == max_rate && pb_min_rate == min_rate && + pb_burst == burst) { + stale = false; break; } } - - if (!is_queue_needed) { - error = netdev_delete_queue(netdev_phy, queue_id); - if (error) { - VLOG_WARN_RL(&rl, "%s: could not delete queue %u (%s)", - egress_iface, queue_id, ovs_strerror(error)); + if (stale) { + const struct ovsrec_port *port; + OVSREC_PORT_TABLE_FOR_EACH (port, port_table) { + if (port->qos == qos) { + ovsrec_port_set_qos(port, NULL); + ovsrec_queue_delete(queue); + ovsrec_qos_delete(qos); + break; + } } } } +} - /* Create/Update queues. */ +static void +configure_ovs_qos(struct hmap *queue_map, + struct ovsdb_idl_txn *ovs_idl_txn, + const struct ovsrec_port_table *port_table, + const struct ovsrec_qos_table *qos_table, + const struct sbrec_port_binding_table *pb_table) +{ + struct qos_queue *sb_info; HMAP_FOR_EACH (sb_info, node, queue_map) { - if (hmap_contains(&consistent_queues, &sb_info->node)) { - hmap_remove(&consistent_queues, &sb_info->node); - continue; - } + const struct ovsrec_qos *qos = + add_ovs_qos_table_entry(ovs_idl_txn, qos_table, sb_info); - if (strcmp(egress_iface, sb_info->port_name)) { - continue; + const struct ovsrec_port *port; + OVSREC_PORT_TABLE_FOR_EACH (port, port_table) { + if (!strcmp(port->name, sb_info->port_name)) { + ovsrec_port_set_qos(port, qos); + break; + } } + } - smap_clear(&queue_details); - smap_add_format(&queue_details, "min-rate", "%d", sb_info->min_rate); - smap_add_format(&queue_details, "max-rate", "%d", sb_info->max_rate); - smap_add_format(&queue_details, "burst", "%d", sb_info->burst); - error = netdev_set_queue(netdev_phy, sb_info->queue_id, - &queue_details); - if (error) { - VLOG_WARN_RL(&rl, "%s: could not configure queue %u (%s)", - egress_iface, sb_info->queue_id, ovs_strerror(error)); - } + if (ovs_idl_txn) { + remove_stale_ovs_qos_entry(port_table, qos_table, pb_table); } - smap_destroy(&queue_details); - hmap_destroy(&consistent_queues); - netdev_close(netdev_phy); } static void @@ -1926,9 +1874,6 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out) build_local_bindings(b_ctx_in, b_ctx_out); } - struct hmap *qos_map_ptr = - !sset_is_empty(b_ctx_out->egress_ifaces) ? &qos_map : NULL; - struct ovs_list localnet_lports = OVS_LIST_INITIALIZER(&localnet_lports); struct ovs_list external_lports = OVS_LIST_INITIALIZER(&external_lports); struct ovs_list multichassis_ports = OVS_LIST_INITIALIZER( @@ -1965,7 +1910,7 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out) break; case LP_VIF: - consider_vif_lport(pb, b_ctx_in, b_ctx_out, NULL, qos_map_ptr); + consider_vif_lport(pb, b_ctx_in, b_ctx_out, NULL, &qos_map); if (pb->additional_chassis) { struct lport *multichassis_lport = xmalloc( sizeof *multichassis_lport); @@ -1976,11 +1921,11 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out) break; case LP_CONTAINER: - consider_container_lport(pb, b_ctx_in, b_ctx_out, qos_map_ptr); + consider_container_lport(pb, b_ctx_in, b_ctx_out, &qos_map); break; case LP_VIRTUAL: - consider_virtual_lport(pb, b_ctx_in, b_ctx_out, qos_map_ptr); + consider_virtual_lport(pb, b_ctx_in, b_ctx_out, &qos_map); break; case LP_L2GATEWAY: @@ -2060,15 +2005,9 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct binding_ctx_out *b_ctx_out) shash_destroy(&bridge_mappings); - if (!sset_is_empty(b_ctx_out->egress_ifaces) - && set_noop_qos(b_ctx_in->ovs_idl_txn, b_ctx_in->port_table, - b_ctx_in->qos_table, b_ctx_out->egress_ifaces)) { - const char *entry; - SSET_FOR_EACH (entry, b_ctx_out->egress_ifaces) { - setup_qos(entry, &qos_map); - } - } - + configure_ovs_qos(&qos_map, b_ctx_in->ovs_idl_txn, + b_ctx_in->port_table, b_ctx_in->qos_table, + b_ctx_in->port_binding_table); destroy_qos_map(&qos_map); cleanup_claimed_port_timestamps(); @@ -2503,8 +2442,6 @@ binding_handle_ovs_interface_changes(struct binding_ctx_in *b_ctx_in, } struct hmap qos_map = HMAP_INITIALIZER(&qos_map); - struct hmap *qos_map_ptr = - sset_is_empty(b_ctx_out->egress_ifaces) ? NULL : &qos_map; /* * We consider an OVS interface for claiming if the following @@ -2534,23 +2471,18 @@ binding_handle_ovs_interface_changes(struct binding_ctx_in *b_ctx_in, if (iface_id && ofport > 0 && is_iface_in_int_bridge(iface_rec, b_ctx_in->br_int)) { handled = consider_iface_claim(iface_rec, iface_id, b_ctx_in, - b_ctx_out, qos_map_ptr); + b_ctx_out, &qos_map); if (!handled) { break; } } } - if (handled && qos_map_ptr && set_noop_qos(b_ctx_in->ovs_idl_txn, - b_ctx_in->port_table, - b_ctx_in->qos_table, - b_ctx_out->egress_ifaces)) { - const char *entry; - SSET_FOR_EACH (entry, b_ctx_out->egress_ifaces) { - setup_qos(entry, &qos_map); - } + if (handled) { + configure_ovs_qos(&qos_map, b_ctx_in->ovs_idl_txn, + b_ctx_in->port_table, b_ctx_in->qos_table, + b_ctx_in->port_binding_table); } - destroy_qos_map(&qos_map); return handled; } @@ -2987,8 +2919,6 @@ delete_done: } struct hmap qos_map = HMAP_INITIALIZER(&qos_map); - struct hmap *qos_map_ptr = - sset_is_empty(b_ctx_out->egress_ifaces) ? NULL : &qos_map; SBREC_PORT_BINDING_TABLE_FOR_EACH_TRACKED (pb, b_ctx_in->port_binding_table) { @@ -3001,7 +2931,7 @@ delete_done: update_ld_peers(pb, b_ctx_out->local_datapaths); } - handled = handle_updated_port(b_ctx_in, b_ctx_out, pb, qos_map_ptr); + handled = handle_updated_port(b_ctx_in, b_ctx_out, pb, &qos_map); if (!handled) { break; } @@ -3018,7 +2948,7 @@ delete_done: sset_find_and_delete(b_ctx_out->postponed_ports, port_name); continue; } - handled = handle_updated_port(b_ctx_in, b_ctx_out, pb, qos_map_ptr); + handled = handle_updated_port(b_ctx_in, b_ctx_out, pb, &qos_map); if (!handled) { break; } @@ -3064,14 +2994,10 @@ delete_done: shash_destroy(&bridge_mappings); } - if (handled && qos_map_ptr && set_noop_qos(b_ctx_in->ovs_idl_txn, - b_ctx_in->port_table, - b_ctx_in->qos_table, - b_ctx_out->egress_ifaces)) { - const char *entry; - SSET_FOR_EACH (entry, b_ctx_out->egress_ifaces) { - setup_qos(entry, &qos_map); - } + if (handled) { + configure_ovs_qos(&qos_map, b_ctx_in->ovs_idl_txn, + b_ctx_in->port_table, b_ctx_in->qos_table, + b_ctx_in->port_binding_table); } destroy_qos_map(&qos_map); diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index 7dcbfd252..4e2301365 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -1049,6 +1049,11 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl) ovsdb_idl_add_table(ovs_idl, &ovsrec_table_datapath); ovsdb_idl_add_column(ovs_idl, &ovsrec_datapath_col_capabilities); ovsdb_idl_add_table(ovs_idl, &ovsrec_table_flow_sample_collector_set); + ovsdb_idl_add_table(ovs_idl, &ovsrec_table_qos); + ovsdb_idl_add_column(ovs_idl, &ovsrec_qos_col_other_config); + ovsdb_idl_add_column(ovs_idl, &ovsrec_qos_col_queues); + ovsdb_idl_add_table(ovs_idl, &ovsrec_table_queue); + ovsdb_idl_add_column(ovs_idl, &ovsrec_queue_col_other_config); chassis_register_ovs_idl(ovs_idl); encaps_register_ovs_idl(ovs_idl); diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index 5d513e65a..2daa6c105 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -2218,12 +2218,6 @@ output; matches on the localnet <code>outport</code> and applies the action <code>set_queue(id); output;"</code>. </p> - - <p> - Please remember to mark the corresponding physical interface with - <code>ovn-egress-iface</code> set to true in - <ref column="external_ids" table="Interface" db="Open_vSwitch"/>. - </p> </li> <li> diff --git a/tests/system-ovn.at b/tests/system-ovn.at index 8fb45c6b6..971c1af67 100644 --- a/tests/system-ovn.at +++ b/tests/system-ovn.at @@ -6582,13 +6582,11 @@ AT_CHECK([ovn-nbctl set Logical_Switch_Port public options:qos_min_rate=200000]) AT_CHECK([ovn-nbctl set Logical_Switch_Port public options:qos_max_rate=300000]) AT_CHECK([ovn-nbctl set Logical_Switch_Port public options:qos_burst=3000000]) AT_CHECK([ovn-nbctl set Logical_Switch_Port public options:qos_ovs_port=\"ovs-public\"]) -AT_CHECK([ovs-vsctl set interface ovs-public external-ids:ovn-egress-iface=true]) AT_CHECK([ovn-nbctl set Logical_Switch_Port ext options:qos_min_rate=400000]) AT_CHECK([ovn-nbctl set Logical_Switch_Port ext options:qos_max_rate=600000]) AT_CHECK([ovn-nbctl set Logical_Switch_Port ext options:qos_burst=6000000]) AT_CHECK([ovn-nbctl set Logical_Switch_Port ext options:qos_ovs_port=\"ovs-ext\"]) -AT_CHECK([ovs-vsctl set interface ovs-ext external-ids:ovn-egress-iface=true]) OVS_WAIT_UNTIL([tc qdisc show | grep -q 'htb 1: dev ovs-public']) OVS_WAIT_UNTIL([tc class show dev ovs-public | \ @@ -6598,11 +6596,12 @@ OVS_WAIT_UNTIL([tc qdisc show | grep -q 'htb 1: dev ovs-ext']) OVS_WAIT_UNTIL([tc class show dev ovs-ext | \ grep -q 'class htb .* rate 400Kbit ceil 600Kbit burst 750000b cburst 750000b']) +AT_CHECK([ovn-nbctl remove Logical_Switch_Port public options qos_min_rate=200000]) AT_CHECK([ovn-nbctl remove Logical_Switch_Port public options qos_max_rate=300000]) + OVS_WAIT_UNTIL([tc class show dev ovs-public | \ - grep -q 'class htb .* rate 200Kbit ceil 34359Mbit burst 375000b .*']) + grep -q 'class htb .* rate 12Kbit ceil 10Gbit burst 375000b cburst 365Kb']) -AT_CHECK([ovn-nbctl remove Logical_Switch_Port public options qos_min_rate=200000]) AT_CHECK([ovn-nbctl remove Logical_Switch_Port public options qos_burst=3000000]) OVS_WAIT_UNTIL([test "$(tc qdisc show | grep 'htb 1: dev ovs-public')" = ""]) -- 2.39.2 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
