Rework OVN QoS implementation in order to configure it through OVS QoS
table instead of running tc command directly bypassing OVS.

Signed-off-by: Lorenzo Bianconi <[email protected]>
---
 controller/binding.c        | 316 ++++++++++++++----------------------
 controller/ovn-controller.c |   5 +
 northd/ovn-northd.8.xml     |   6 -
 tests/system-ovn.at         |   7 +-
 4 files changed, 129 insertions(+), 205 deletions(-)

diff --git a/controller/binding.c b/controller/binding.c
index add64b07d..a58183479 100644
--- a/controller/binding.c
+++ b/controller/binding.c
@@ -172,194 +172,142 @@ get_qos_params(const struct sbrec_port_binding *pb, 
struct hmap *queue_map)
 }
 
 static const struct ovsrec_qos *
-get_noop_qos(struct ovsdb_idl_txn *ovs_idl_txn,
-             const struct ovsrec_qos_table *qos_table)
+add_ovs_qos_table_entry(struct ovsdb_idl_txn *ovs_idl_txn,
+                        const struct ovsrec_qos_table *qos_table,
+                        struct qos_queue *sb_info)
 {
     const struct ovsrec_qos *qos;
+    struct ovsrec_queue *queue;
     OVSREC_QOS_TABLE_FOR_EACH (qos, qos_table) {
-        if (!strcmp(qos->type, "linux-noop")) {
-            return qos;
+        if (strcmp(qos->type, OVN_QOS_TYPE)) {
+            continue;
         }
-    }
 
-    if (!ovs_idl_txn) {
-        return NULL;
-    }
-    qos = ovsrec_qos_insert(ovs_idl_txn);
-    ovsrec_qos_set_type(qos, "linux-noop");
-    return qos;
-}
-
-static bool
-set_noop_qos(struct ovsdb_idl_txn *ovs_idl_txn,
-             const struct ovsrec_port_table *port_table,
-             const struct ovsrec_qos_table *qos_table,
-             struct sset *egress_ifaces)
-{
-    if (!ovs_idl_txn) {
-        return false;
-    }
-
-    const struct ovsrec_qos *noop_qos = get_noop_qos(ovs_idl_txn, qos_table);
-    if (!noop_qos) {
-        return false;
-    }
+        if (!qos->n_queues) {
+            continue;
+        }
 
-    const struct ovsrec_port *port;
-    size_t count = 0;
+        queue = qos->value_queues[0];
+        uint32_t max_rate = smap_get_int(&queue->other_config, "max-rate", 0);
+        if (max_rate != sb_info->max_rate) {
+            continue;
+        }
 
-    OVSREC_PORT_TABLE_FOR_EACH (port, port_table) {
-        if (sset_contains(egress_ifaces, port->name)) {
-            ovsrec_port_set_qos(port, noop_qos);
-            count++;
+        uint32_t min_rate = smap_get_int(&queue->other_config, "min-rate", 0);
+        if (min_rate != sb_info->min_rate) {
+            continue;
         }
-        if (sset_count(egress_ifaces) == count) {
-            break;
+
+        uint32_t burst = smap_get_int(&queue->other_config, "burst", 0);
+        if (burst != sb_info->burst) {
+            continue;
         }
-    }
-    return true;
-}
 
-static void
-set_qos_type(struct netdev *netdev, const char *type)
-{
-    /* 34359738360 == (2^32 - 1) * 8.  netdev_set_qos() doesn't support
-     * 64-bit rate netlink attributes, so the maximum value is 2^32 - 1 bytes.
-     * The 'max-rate' config option is in bits, so multiplying by 8.
-     * Without setting max-rate the reported link speed will be used, which
-     * can be unrecognized for certain NICs or reported too low for virtual
-     * interfaces. */
-    const struct smap conf = SMAP_CONST1(&conf, "max-rate", "34359738360");
-    int error = netdev_set_qos(netdev, type, &conf);
-    if (error) {
-        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
-        VLOG_WARN_RL(&rl, "%s: could not set qdisc type \"%s\" (%s)",
-                     netdev_get_name(netdev), type, ovs_strerror(error));
+        return qos;
     }
-}
 
-static void
-setup_qos(const char *egress_iface, struct hmap *queue_map)
-{
-    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
-    struct netdev *netdev_phy;
-
-    if (!egress_iface) {
-        /* Queues cannot be configured. */
-        return;
+    if (!ovs_idl_txn) {
+        return NULL;
     }
 
-    int error = netdev_open(egress_iface, NULL, &netdev_phy);
-    if (error) {
-        VLOG_WARN_RL(&rl, "%s: could not open netdev (%s)",
-                     egress_iface, ovs_strerror(error));
-        return;
-    }
+    queue = ovsrec_queue_insert(ovs_idl_txn);
 
-    /* Check current qdisc. */
-    const char *qdisc_type;
-    struct smap qdisc_details;
+    struct smap other_config = SMAP_INITIALIZER(&other_config);
+    smap_add_format(&other_config, "max-rate", "%d", sb_info->max_rate);
+    smap_add_format(&other_config, "min-rate", "%d", sb_info->min_rate);
+    smap_add_format(&other_config, "burst", "%d", sb_info->burst);
+    ovsrec_queue_set_other_config(queue, &other_config);
 
-    smap_init(&qdisc_details);
-    if (netdev_get_qos(netdev_phy, &qdisc_type, &qdisc_details) != 0 ||
-        qdisc_type[0] == '\0') {
-        smap_destroy(&qdisc_details);
-        netdev_close(netdev_phy);
-        /* Qos is not supported. */
-        return;
-    }
-    smap_destroy(&qdisc_details);
+    const int64_t id = sb_info->queue_id;
+    qos = ovsrec_qos_insert(ovs_idl_txn);
+    ovsrec_qos_set_type(qos, OVN_QOS_TYPE);
+    smap_clear(&other_config);
+    smap_add_format(&other_config, "max-rate", "%d", sb_info->max_rate);
+    ovsrec_qos_set_other_config(qos, &other_config);
+    ovsrec_qos_set_queues(qos, &id, &queue, 1);
+    smap_destroy(&other_config);
 
-    /* If we're not actually being requested to do any QoS:
-     *
-     *     - If the current qdisc type is OVN_QOS_TYPE, then we clear the qdisc
-     *       type to "".  Otherwise, it's possible that our own leftover qdisc
-     *       settings could cause strange behavior on egress.  Also, QoS is
-     *       expensive and may waste CPU time even if it's not really in use.
-     *
-     *       OVN isn't the only software that can configure qdiscs, and
-     *       physical interfaces are shared resources, so there is some risk in
-     *       this strategy: we could disrupt some other program's QoS.
-     *       Probably, to entirely avoid this possibility we would need to add
-     *       a configuration setting.
-     *
-     *     - Otherwise leave the qdisc alone. */
-    if (hmap_is_empty(queue_map)) {
-        if (!strcmp(qdisc_type, OVN_QOS_TYPE)) {
-            set_qos_type(netdev_phy, "");
+    return qos;
+}
+
+static void
+remove_stale_ovs_qos_entry(const struct ovsrec_port_table *port_table,
+                           const struct ovsrec_qos_table *qos_table,
+                           const struct sbrec_port_binding_table *pb_table)
+{
+    const struct ovsrec_qos *qos, *qos_next;
+    OVSREC_QOS_TABLE_FOR_EACH_SAFE (qos, qos_next, qos_table) {
+        if (!qos->n_queues) {
+            continue;
         }
-        netdev_close(netdev_phy);
-        return;
-    }
 
-    /* Configure qdisc. */
-    if (strcmp(qdisc_type, OVN_QOS_TYPE)) {
-        set_qos_type(netdev_phy, OVN_QOS_TYPE);
-    }
+        struct ovsrec_queue *queue = qos->value_queues[0];
+        bool stale = true;
+        const struct sbrec_port_binding *pb;
+        SBREC_PORT_BINDING_TABLE_FOR_EACH (pb, pb_table) {
+            if (get_lport_type(pb) != LP_LOCALNET) {
+                /* QoS is only supported on localnet port for the moment. */
+                continue;
+            }
 
-    /* Check and delete if needed. */
-    struct netdev_queue_dump dump;
-    unsigned int queue_id;
-    struct smap queue_details;
-    struct qos_queue *sb_info;
-    struct hmap consistent_queues;
-
-    smap_init(&queue_details);
-    hmap_init(&consistent_queues);
-    NETDEV_QUEUE_FOR_EACH (&queue_id, &queue_details, &dump, netdev_phy) {
-        bool is_queue_needed = false;
-
-        HMAP_FOR_EACH_WITH_HASH (sb_info, node, hash_int(queue_id, 0),
-                                 queue_map) {
-            is_queue_needed = true;
-            if (sb_info->min_rate ==
-                smap_get_int(&queue_details, "min-rate", 0)
-                && sb_info->max_rate ==
-                smap_get_int(&queue_details, "max-rate", 0)
-                && sb_info->burst ==
-                smap_get_int(&queue_details, "burst", 0) &&
-                !strcmp(egress_iface, sb_info->port_name)) {
-                /* This queue is consistent. */
-                hmap_insert(&consistent_queues, &sb_info->node,
-                            hash_int(queue_id, 0));
+            uint32_t pb_max_rate = smap_get_int(&pb->options,
+                                                "qos_max_rate", 0);
+            uint32_t pb_min_rate = smap_get_int(&pb->options,
+                                                "qos_min_rate", 0);
+            uint32_t pb_burst = smap_get_int(&pb->options, "qos_burst", 0);
+            if (!pb_max_rate && !pb_burst) {
+                continue;
+            }
+
+            uint32_t max_rate = smap_get_int(&queue->other_config,
+                                             "max-rate", 0);
+            uint32_t min_rate = smap_get_int(&queue->other_config,
+                                             "min-rate", 0);
+            uint32_t burst = smap_get_int(&queue->other_config, "burst", 0);
+            if (pb_max_rate == max_rate && pb_min_rate == min_rate &&
+                pb_burst == burst) {
+                stale = false;
                 break;
             }
         }
-
-        if (!is_queue_needed) {
-            error = netdev_delete_queue(netdev_phy, queue_id);
-            if (error) {
-                VLOG_WARN_RL(&rl, "%s: could not delete queue %u (%s)",
-                             egress_iface, queue_id, ovs_strerror(error));
+        if (stale) {
+            const struct ovsrec_port *port;
+            OVSREC_PORT_TABLE_FOR_EACH (port, port_table) {
+                if (port->qos == qos) {
+                    ovsrec_port_set_qos(port, NULL);
+                    ovsrec_queue_delete(queue);
+                    ovsrec_qos_delete(qos);
+                    break;
+                }
             }
         }
     }
+}
 
-    /* Create/Update queues. */
+static void
+configure_ovs_qos(struct hmap *queue_map,
+                  struct ovsdb_idl_txn *ovs_idl_txn,
+                  const struct ovsrec_port_table *port_table,
+                  const struct ovsrec_qos_table *qos_table,
+                  const struct sbrec_port_binding_table *pb_table)
+{
+    struct qos_queue *sb_info;
     HMAP_FOR_EACH (sb_info, node, queue_map) {
-        if (hmap_contains(&consistent_queues, &sb_info->node)) {
-            hmap_remove(&consistent_queues, &sb_info->node);
-            continue;
-        }
+        const struct ovsrec_qos *qos =
+            add_ovs_qos_table_entry(ovs_idl_txn, qos_table, sb_info);
 
-        if (strcmp(egress_iface, sb_info->port_name)) {
-            continue;
+        const struct ovsrec_port *port;
+        OVSREC_PORT_TABLE_FOR_EACH (port, port_table) {
+            if (!strcmp(port->name, sb_info->port_name)) {
+                ovsrec_port_set_qos(port, qos);
+                break;
+            }
         }
+    }
 
-        smap_clear(&queue_details);
-        smap_add_format(&queue_details, "min-rate", "%d", sb_info->min_rate);
-        smap_add_format(&queue_details, "max-rate", "%d", sb_info->max_rate);
-        smap_add_format(&queue_details, "burst", "%d", sb_info->burst);
-        error = netdev_set_queue(netdev_phy, sb_info->queue_id,
-                                 &queue_details);
-        if (error) {
-            VLOG_WARN_RL(&rl, "%s: could not configure queue %u (%s)",
-                         egress_iface, sb_info->queue_id, ovs_strerror(error));
-        }
+    if (ovs_idl_txn) {
+        remove_stale_ovs_qos_entry(port_table, qos_table, pb_table);
     }
-    smap_destroy(&queue_details);
-    hmap_destroy(&consistent_queues);
-    netdev_close(netdev_phy);
 }
 
 static void
@@ -1926,9 +1874,6 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct 
binding_ctx_out *b_ctx_out)
         build_local_bindings(b_ctx_in, b_ctx_out);
     }
 
-    struct hmap *qos_map_ptr =
-        !sset_is_empty(b_ctx_out->egress_ifaces) ? &qos_map : NULL;
-
     struct ovs_list localnet_lports = OVS_LIST_INITIALIZER(&localnet_lports);
     struct ovs_list external_lports = OVS_LIST_INITIALIZER(&external_lports);
     struct ovs_list multichassis_ports = OVS_LIST_INITIALIZER(
@@ -1965,7 +1910,7 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct 
binding_ctx_out *b_ctx_out)
             break;
 
         case LP_VIF:
-            consider_vif_lport(pb, b_ctx_in, b_ctx_out, NULL, qos_map_ptr);
+            consider_vif_lport(pb, b_ctx_in, b_ctx_out, NULL, &qos_map);
             if (pb->additional_chassis) {
                 struct lport *multichassis_lport = xmalloc(
                     sizeof *multichassis_lport);
@@ -1976,11 +1921,11 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct 
binding_ctx_out *b_ctx_out)
             break;
 
         case LP_CONTAINER:
-            consider_container_lport(pb, b_ctx_in, b_ctx_out, qos_map_ptr);
+            consider_container_lport(pb, b_ctx_in, b_ctx_out, &qos_map);
             break;
 
         case LP_VIRTUAL:
-            consider_virtual_lport(pb, b_ctx_in, b_ctx_out, qos_map_ptr);
+            consider_virtual_lport(pb, b_ctx_in, b_ctx_out, &qos_map);
             break;
 
         case LP_L2GATEWAY:
@@ -2060,15 +2005,9 @@ binding_run(struct binding_ctx_in *b_ctx_in, struct 
binding_ctx_out *b_ctx_out)
 
     shash_destroy(&bridge_mappings);
 
-    if (!sset_is_empty(b_ctx_out->egress_ifaces)
-        && set_noop_qos(b_ctx_in->ovs_idl_txn, b_ctx_in->port_table,
-                        b_ctx_in->qos_table, b_ctx_out->egress_ifaces)) {
-        const char *entry;
-        SSET_FOR_EACH (entry, b_ctx_out->egress_ifaces) {
-            setup_qos(entry, &qos_map);
-        }
-    }
-
+    configure_ovs_qos(&qos_map, b_ctx_in->ovs_idl_txn,
+                      b_ctx_in->port_table, b_ctx_in->qos_table,
+                      b_ctx_in->port_binding_table);
     destroy_qos_map(&qos_map);
 
     cleanup_claimed_port_timestamps();
@@ -2503,8 +2442,6 @@ binding_handle_ovs_interface_changes(struct 
binding_ctx_in *b_ctx_in,
     }
 
     struct hmap qos_map = HMAP_INITIALIZER(&qos_map);
-    struct hmap *qos_map_ptr =
-        sset_is_empty(b_ctx_out->egress_ifaces) ? NULL : &qos_map;
 
     /*
      * We consider an OVS interface for claiming if the following
@@ -2534,23 +2471,18 @@ binding_handle_ovs_interface_changes(struct 
binding_ctx_in *b_ctx_in,
         if (iface_id && ofport > 0 &&
                 is_iface_in_int_bridge(iface_rec, b_ctx_in->br_int)) {
             handled = consider_iface_claim(iface_rec, iface_id, b_ctx_in,
-                                           b_ctx_out, qos_map_ptr);
+                                           b_ctx_out, &qos_map);
             if (!handled) {
                 break;
             }
         }
     }
 
-    if (handled && qos_map_ptr && set_noop_qos(b_ctx_in->ovs_idl_txn,
-                                               b_ctx_in->port_table,
-                                               b_ctx_in->qos_table,
-                                               b_ctx_out->egress_ifaces)) {
-        const char *entry;
-        SSET_FOR_EACH (entry, b_ctx_out->egress_ifaces) {
-            setup_qos(entry, &qos_map);
-        }
+    if (handled) {
+        configure_ovs_qos(&qos_map, b_ctx_in->ovs_idl_txn,
+                          b_ctx_in->port_table, b_ctx_in->qos_table,
+                          b_ctx_in->port_binding_table);
     }
-
     destroy_qos_map(&qos_map);
     return handled;
 }
@@ -2987,8 +2919,6 @@ delete_done:
     }
 
     struct hmap qos_map = HMAP_INITIALIZER(&qos_map);
-    struct hmap *qos_map_ptr =
-        sset_is_empty(b_ctx_out->egress_ifaces) ? NULL : &qos_map;
 
     SBREC_PORT_BINDING_TABLE_FOR_EACH_TRACKED (pb,
                                                b_ctx_in->port_binding_table) {
@@ -3001,7 +2931,7 @@ delete_done:
             update_ld_peers(pb, b_ctx_out->local_datapaths);
         }
 
-        handled = handle_updated_port(b_ctx_in, b_ctx_out, pb, qos_map_ptr);
+        handled = handle_updated_port(b_ctx_in, b_ctx_out, pb, &qos_map);
         if (!handled) {
             break;
         }
@@ -3018,7 +2948,7 @@ delete_done:
             sset_find_and_delete(b_ctx_out->postponed_ports, port_name);
             continue;
         }
-        handled = handle_updated_port(b_ctx_in, b_ctx_out, pb, qos_map_ptr);
+        handled = handle_updated_port(b_ctx_in, b_ctx_out, pb, &qos_map);
         if (!handled) {
             break;
         }
@@ -3064,14 +2994,10 @@ delete_done:
         shash_destroy(&bridge_mappings);
     }
 
-    if (handled && qos_map_ptr && set_noop_qos(b_ctx_in->ovs_idl_txn,
-                                               b_ctx_in->port_table,
-                                               b_ctx_in->qos_table,
-                                               b_ctx_out->egress_ifaces)) {
-        const char *entry;
-        SSET_FOR_EACH (entry, b_ctx_out->egress_ifaces) {
-            setup_qos(entry, &qos_map);
-        }
+    if (handled) {
+        configure_ovs_qos(&qos_map, b_ctx_in->ovs_idl_txn,
+                          b_ctx_in->port_table, b_ctx_in->qos_table,
+                          b_ctx_in->port_binding_table);
     }
 
     destroy_qos_map(&qos_map);
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 7dcbfd252..4e2301365 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -1049,6 +1049,11 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
     ovsdb_idl_add_table(ovs_idl, &ovsrec_table_datapath);
     ovsdb_idl_add_column(ovs_idl, &ovsrec_datapath_col_capabilities);
     ovsdb_idl_add_table(ovs_idl, &ovsrec_table_flow_sample_collector_set);
+    ovsdb_idl_add_table(ovs_idl, &ovsrec_table_qos);
+    ovsdb_idl_add_column(ovs_idl, &ovsrec_qos_col_other_config);
+    ovsdb_idl_add_column(ovs_idl, &ovsrec_qos_col_queues);
+    ovsdb_idl_add_table(ovs_idl, &ovsrec_table_queue);
+    ovsdb_idl_add_column(ovs_idl, &ovsrec_queue_col_other_config);
 
     chassis_register_ovs_idl(ovs_idl);
     encaps_register_ovs_idl(ovs_idl);
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index 5d513e65a..2daa6c105 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -2218,12 +2218,6 @@ output;
         matches on the localnet <code>outport</code> and applies the action
         <code>set_queue(id); output;"</code>.
         </p>
-
-        <p>
-          Please remember to mark the corresponding physical interface with
-          <code>ovn-egress-iface</code> set to true in
-          <ref column="external_ids" table="Interface" db="Open_vSwitch"/>.
-        </p>
       </li>
 
       <li>
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index 8fb45c6b6..971c1af67 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -6582,13 +6582,11 @@ AT_CHECK([ovn-nbctl set Logical_Switch_Port public 
options:qos_min_rate=200000])
 AT_CHECK([ovn-nbctl set Logical_Switch_Port public 
options:qos_max_rate=300000])
 AT_CHECK([ovn-nbctl set Logical_Switch_Port public options:qos_burst=3000000])
 AT_CHECK([ovn-nbctl set Logical_Switch_Port public 
options:qos_ovs_port=\"ovs-public\"])
-AT_CHECK([ovs-vsctl set interface ovs-public 
external-ids:ovn-egress-iface=true])
 
 AT_CHECK([ovn-nbctl set Logical_Switch_Port ext options:qos_min_rate=400000])
 AT_CHECK([ovn-nbctl set Logical_Switch_Port ext options:qos_max_rate=600000])
 AT_CHECK([ovn-nbctl set Logical_Switch_Port ext options:qos_burst=6000000])
 AT_CHECK([ovn-nbctl set Logical_Switch_Port ext 
options:qos_ovs_port=\"ovs-ext\"])
-AT_CHECK([ovs-vsctl set interface ovs-ext external-ids:ovn-egress-iface=true])
 
 OVS_WAIT_UNTIL([tc qdisc show | grep -q 'htb 1: dev ovs-public'])
 OVS_WAIT_UNTIL([tc class show dev ovs-public | \
@@ -6598,11 +6596,12 @@ OVS_WAIT_UNTIL([tc qdisc show | grep -q 'htb 1: dev 
ovs-ext'])
 OVS_WAIT_UNTIL([tc class show dev ovs-ext | \
                 grep -q 'class htb .* rate 400Kbit ceil 600Kbit burst 750000b 
cburst 750000b'])
 
+AT_CHECK([ovn-nbctl remove Logical_Switch_Port public options 
qos_min_rate=200000])
 AT_CHECK([ovn-nbctl remove Logical_Switch_Port public options 
qos_max_rate=300000])
+
 OVS_WAIT_UNTIL([tc class show dev ovs-public | \
-                grep -q 'class htb .* rate 200Kbit ceil 34359Mbit burst 
375000b .*'])
+                grep -q 'class htb .* rate 12Kbit ceil 10Gbit burst 375000b 
cburst 365Kb'])
 
-AT_CHECK([ovn-nbctl remove Logical_Switch_Port public options 
qos_min_rate=200000])
 AT_CHECK([ovn-nbctl remove Logical_Switch_Port public options 
qos_burst=3000000])
 OVS_WAIT_UNTIL([test "$(tc qdisc show | grep 'htb 1: dev ovs-public')" = ""])
 
-- 
2.39.2

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to