From: Christophe Fontaine <[email protected]>

Two important parts of how PMD auto load balance operates are how
loaded a core needs to be and how much improvement is estimated
before a PMD auto load balance can trigger.

Previously they were hardcoded to 95% loaded and 25% variance
improvement.

These default values may not be suitable for all use cases and
we may want to use a more (or less) aggressive rebalance, either
on the pmd load threshold or on the minimum variance improvement
threshold.

The defaults are not changed, but "pmd-auto-lb-pmd-load" and
"pmd-auto-lb-improvement" parameters are added to override the
defaults.

$ ovs-vsctl set open_vswitch . other_config:pmd-auto-lb-pmd-load="70"
$ ovs-vsctl set open_vswitch . other_config:pmd-auto-lb-improvement="20"

Signed-off-by: Christophe Fontaine <[email protected]>
Co-Authored-by: Kevin Traynor <[email protected]>
Signed-off-by: Kevin Traynor <[email protected]>
Acked-by: David Marchand <[email protected]>
---
 NEWS                 |  1 +
 lib/dpif-netdev.c    | 54 +++++++++++++++++++++++++++++++++++++-------
 vswitchd/vswitch.xml | 29 ++++++++++++++++++++++--
 3 files changed, 74 insertions(+), 10 deletions(-)

diff --git a/NEWS b/NEWS
index 7e291a180..7994e434d 100644
--- a/NEWS
+++ b/NEWS
@@ -17,4 +17,5 @@ Post-v2.14.0
      * New 'options:dpdk-vf-mac' field for DPDK interface of VF ports,
        that allows configuring the MAC address of a VF representor.
+     * Add parameters to configure PMD auto load balance behaviour.
    - The environment variable OVS_UNBOUND_CONF, if set, is now used
      as the DNS resolver's (unbound) configuration file.
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index d19d5bbff..bf2112815 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -86,7 +86,7 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev);
 
 /* Auto Load Balancing Defaults */
-#define ALB_ACCEPTABLE_IMPROVEMENT       25
-#define ALB_PMD_LOAD_THRESHOLD           95
-#define ALB_PMD_REBALANCE_POLL_INTERVAL  1 /* 1 Min */
+#define ALB_IMPROVEMENT_THRESHOLD    25
+#define ALB_LOAD_THRESHOLD           95
+#define ALB_REBALANCE_INTERVAL       1 /* 1 Min */
 #define MIN_TO_MSEC                  60000
 
@@ -301,4 +301,6 @@ struct pmd_auto_lb {
     uint64_t rebalance_intvl;
     uint64_t rebalance_poll_timer;
+    uint8_t rebalance_improve_thresh;
+    atomic_uint8_t rebalance_load_thresh;
 };
 
@@ -4205,4 +4207,5 @@ set_pmd_auto_lb(struct dp_netdev *dp)
     struct dp_netdev_pmd_thread *pmd;
     struct pmd_auto_lb *pmd_alb = &dp->pmd_alb;
+    uint8_t rebalance_load_thresh;
 
     bool enable_alb = false;
@@ -4234,7 +4237,14 @@ set_pmd_auto_lb(struct dp_netdev *dp)
         pmd_alb->is_enabled = enable_alb;
         if (pmd_alb->is_enabled) {
+            atomic_read_relaxed(&pmd_alb->rebalance_load_thresh,
+                                &rebalance_load_thresh);
             VLOG_INFO("PMD auto load balance is enabled "
-                      "interval %"PRIu64" mins",
-                       pmd_alb->rebalance_intvl / MIN_TO_MSEC);
+                      "interval %"PRIu64" mins, "
+                      "pmd load threshold %"PRIu8"%%, "
+                      "improvement threshold %"PRIu8"%%",
+                       pmd_alb->rebalance_intvl / MIN_TO_MSEC,
+                       rebalance_load_thresh,
+                       pmd_alb->rebalance_improve_thresh);
+
         } else {
             pmd_alb->rebalance_poll_timer = 0;
@@ -4260,4 +4270,6 @@ dpif_netdev_set_config(struct dpif *dpif, const struct 
smap *other_config)
     uint32_t tx_flush_interval, cur_tx_flush_interval;
     uint64_t rebalance_intvl;
+    uint8_t rebalance_load, cur_rebalance_load;
+    uint8_t rebalance_improve;
 
     tx_flush_interval = smap_get_int(other_config, "tx-flush-interval",
@@ -4337,5 +4349,5 @@ dpif_netdev_set_config(struct dpif *dpif, const struct 
smap *other_config)
 
     rebalance_intvl = smap_get_int(other_config, "pmd-auto-lb-rebal-interval",
-                              ALB_PMD_REBALANCE_POLL_INTERVAL);
+                                   ALB_REBALANCE_INTERVAL);
 
     /* Input is in min, convert it to msec. */
@@ -4349,4 +4361,27 @@ dpif_netdev_set_config(struct dpif *dpif, const struct 
smap *other_config)
     }
 
+    rebalance_improve = smap_get_int(other_config, "pmd-auto-lb-improvement",
+                                     ALB_IMPROVEMENT_THRESHOLD);
+    if (rebalance_improve > 100) {
+        rebalance_improve = ALB_IMPROVEMENT_THRESHOLD;
+    }
+    if (rebalance_improve != pmd_alb->rebalance_improve_thresh) {
+        pmd_alb->rebalance_improve_thresh = rebalance_improve;
+        VLOG_INFO("PMD auto load balance improvement threshold set to "
+                  "%"PRIu8"%%\n", rebalance_improve);
+    }
+
+    rebalance_load = smap_get_int(other_config, "pmd-auto-lb-pmd-load",
+                             ALB_LOAD_THRESHOLD);
+    if (rebalance_load > 100) {
+        rebalance_load = ALB_LOAD_THRESHOLD;
+    }
+    atomic_read_relaxed(&pmd_alb->rebalance_load_thresh, &cur_rebalance_load);
+    if (rebalance_load != cur_rebalance_load) {
+        atomic_store_relaxed(&pmd_alb->rebalance_load_thresh,
+                             rebalance_load);
+        VLOG_INFO("PMD auto load balance pmd load threshold set to "
+                "%"PRIu8"%%\n", rebalance_load);
+    }
     set_pmd_auto_lb(dp);
     return 0;
@@ -5677,5 +5712,5 @@ pmd_rebalance_dry_run(struct dp_netdev *dp)
                 ((curr_variance - new_variance) * 100) / curr_variance;
         }
-        if (improvement < ALB_ACCEPTABLE_IMPROVEMENT) {
+        if (improvement < dp->pmd_alb.rebalance_improve_thresh) {
             ret = false;
         }
@@ -8712,4 +8747,5 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread 
*pmd,
     if (pmd->ctx.now > pmd->rxq_next_cycle_store) {
         uint64_t curr_tsc;
+        uint8_t rebalance_load_trigger;
         struct pmd_auto_lb *pmd_alb = &pmd->dp->pmd_alb;
         if (pmd_alb->is_enabled && !pmd->isolated
@@ -8728,5 +8764,7 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread 
*pmd,
             }
 
-            if (pmd_load >= ALB_PMD_LOAD_THRESHOLD) {
+            atomic_read_relaxed(&pmd_alb->rebalance_load_thresh,
+                                &rebalance_load_trigger);
+            if (pmd_load >= rebalance_load_trigger) {
                 atomic_count_inc(&pmd->pmd_overloaded);
             } else {
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 89a876796..76d528ea7 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -654,6 +654,6 @@
         <p>
          Configures PMD Auto Load Balancing that allows automatic assignment of
-         RX queues to PMDs if any of PMDs is overloaded (i.e. processing cycles
-         > 95%).
+         RX queues to PMDs if any of PMDs is overloaded (i.e. a processing
+         cycles > <ref column="other_config" key="pmd-auto-lb-pmd-load"/>).
         </p>
         <p>
@@ -691,4 +691,29 @@
         </p>
       </column>
+      <column name="other_config" key="pmd-auto-lb-pmd-load"
+              type='{"type": "integer", "minInteger": 0, "maxInteger": 100}'>
+        <p>
+         Specifies the minimum pmd load threshold (% of used cycled) of
+         any non-isolated pmds when a PMD auto load balance may be triggered.
+        </p>
+        <p>
+         The default value is <code>95%</code>.
+        </p>
+      </column>
+      <column name="other_config" key="pmd-auto-lb-improvement"
+              type='{"type": "integer", "minInteger": 0, "maxInteger": 100}'>
+        <p>
+         Specifies the minimum evaluated % improvement in load distribution
+         across the non-isolated pmds that will allow a PMD auto load balance
+         to occur.
+        </p>
+        <p>
+         Note, setting this parameter to 0 will always allow an auto load
+         balance to occur regardless of estimated improvement or not.
+        </p>
+        <p>
+         The default value is <code>25%</code>.
+        </p>
+      </column>
       <column name="other_config" key="userspace-tso-enable"
               type='{"type": "boolean"}'>
-- 
2.26.2

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to