Re: [ovs-dev] [PATCH] Adding support for PMD auto load balancing

Nitin Katiyar Sun, 06 Jan 2019 22:02:30 -0800


From: Gowrishankar Muthukrishnan [mailto:[email protected]]
Sent: Saturday, January 05, 2019 3:24 PM
To: Nitin Katiyar <[email protected]>
Cc: [email protected]
Subject: Re: [ovs-dev] [PATCH] Adding support for PMD auto load balancing


Hi,
Thanks for reviewing it. Some of these are addressed in v2 and rest I will try 
to address in next version.


 VLOG_DEFINE_THIS_MODULE(dpif_netdev);

Minor suggestions on naming variable/macros follow as below, as coding itself 
is documentation IMO :).

+/* Auto Load Balancing Defaults */
+#define ACCEPT_IMPROVE_DEFAULT       (25)

Instead, how about LB_ACCEPTABLE_IMPROVEMENT ?
prefixing global variables/macros with what you would use for, would always 
help reading code.
Sure, I will change it to ALB_*

+#define PMD_LOAD_THRE_DEFAULT        (95)

LB_PMD_LOAD_THRESOLD ?

+#define PMD_REBALANCE_POLL_INTERVAL  1 /* 1 Min */

LB_PMD_ POLL_REBALANCE_INTERVAL ?

+#define MIN_TO_MSEC                  60000
+
 #define FLOW_DUMP_MAX_BATCH 50
 /* Use per thread recirc_depth to prevent recirculation loop. */
 #define MAX_RECIRC_DEPTH 6
@@ -288,6 +294,13 @@ struct dp_meter {
     struct dp_meter_band bands[];
 };

+struct pmd_auto_lb {
+    bool auto_lb_conf;        //enable-disable auto load balancing
+    bool is_enabled;          //auto_lb current status
+    uint64_t rebalance_intvl;
+    uint64_t rebalance_poll_timer;
+};
+
 /* Datapath based on the network device interface from netdev.h.
  *
  *
@@ -368,6 +381,7 @@ struct dp_netdev {
     uint64_t last_tnl_conf_seq;

     struct conntrack conntrack;
+    struct pmd_auto_lb pmd_alb;
 };

 static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
@@ -439,6 +453,10 @@ struct dp_netdev_rxq {
                                           particular core. */
     unsigned intrvl_idx;               /* Write index for 'cycles_intrvl'. */
     struct dp_netdev_pmd_thread *pmd;  /* pmd thread that polls this queue. */
+    struct dp_netdev_pmd_thread *dry_run_pmd;
+                                       /* During auto lb trigger, pmd thread
+                                          associated with this q during dry
+                                          run. */

/* pmd thread that execute(or dry-run) this queue in auto load balance period */
 This is removed in v2
     bool is_vhost;                     /* Is rxq of a vhost port. */

     /* Counters of cycles spent successfully polling and processing pkts. */
@@ -682,6 +700,12 @@ struct dp_netdev_pmd_thread {
     struct ovs_mutex port_mutex;    /* Mutex for 'poll_list' and 'tx_ports'. */
     /* List of rx queues to poll. */
     struct hmap poll_list OVS_GUARDED;
+
+    /* List of rx queues got associated during
+       pmd load balance dry run. These queues are

"during dry run of pmd auto load balance. These queues ..."
Removed in v2

+       not polled by pmd. */
+    struct hmap dry_poll_list OVS_GUARDED;
+
     /* Map of 'tx_port's used for transmission.  Written by the main thread,
      * read by the pmd thread. */
     struct hmap tx_ports OVS_GUARDED;
@@ -702,6 +726,11 @@ struct dp_netdev_pmd_thread {
     /* Keep track of detailed PMD performance statistics. */
     struct pmd_perf_stats perf_stats;

+    /* Some stats from previous iteration used by automatic pmd
+       load balance logic. */

/* stats from previous iteration during auto rebalance of pmds*/
Yes, already taken care of it.

+    uint64_t prev_stats[PMD_N_STATS];
+    atomic_count pmd_overloaded;
+
     /* Set to true if the pmd thread needs to be reloaded. */
     bool need_reload;
 };
@@ -764,7 +793,8 @@ static void dp_netdev_del_port_tx_from_pmd(struct 
dp_netdev_pmd_thread *pmd,
                                            struct tx_port *tx)
     OVS_REQUIRES(pmd->port_mutex);
 static void dp_netdev_add_rxq_to_pmd(struct dp_netdev_pmd_thread *pmd,
-                                     struct dp_netdev_rxq *rxq)
+                                     struct dp_netdev_rxq *rxq,
+                                     bool dry_run)
     OVS_REQUIRES(pmd->port_mutex);
 static void dp_netdev_del_rxq_from_pmd(struct dp_netdev_pmd_thread *pmd,
                                        struct rxq_poll *poll)
@@ -792,9 +822,11 @@ dp_netdev_rxq_get_cycles(struct dp_netdev_rxq *rx,
                          enum rxq_cycles_counter_type type);
 static void
 dp_netdev_rxq_set_intrvl_cycles(struct dp_netdev_rxq *rx,
-                           unsigned long long cycles);
+                                unsigned long long cycles,
+                                unsigned idx);
 static uint64_t
-dp_netdev_rxq_get_intrvl_cycles(struct dp_netdev_rxq *rx, unsigned idx);
+dp_netdev_rxq_get_intrvl_cycles(struct dp_netdev_rxq *rx,
+                                unsigned idx);
 static void
 dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
                                bool purge);
@@ -3734,6 +3766,49 @@ dpif_netdev_operate(struct dpif *dpif, struct dpif_op 
**ops, size_t n_ops,
     }
 }

+/* Enable/Disable PMD auto load balancing */
+static void
+enable_pmd_auto_lb(struct dp_netdev *dp)

As the same function enables as well as disables alb, its name could be 
"configure_pmd_alb()" ?.
As per Kevin’s comment I made it to set_pmd_auto_lb in v2

+{
+    unsigned int cnt = 0;
+    struct dp_netdev_pmd_thread *pmd;
+    struct pmd_auto_lb * pmd_alb = &dp->pmd_alb;

struct pmd_auto_lb *pmd_alb = &dp->pmd_alb;

+
+    bool enable = false;

Instead of "enable", how about enable_alb ?
It is just local variable so I used it like this. I will change in next version.

+    bool pmd_rxq_assign_cyc = dp->pmd_rxq_assign_cyc;
+    bool is_enabled = pmd_alb->is_enabled;
+
+    /* Ensure there is at least 2 non-isolated PMDs and
+     * one of the PMD is polling more than one rxq
+     */

".. one of them is polling .. "
Sure

+    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+        if (pmd->core_id == NON_PMD_CORE_ID || pmd->isolated) {
+            continue;
+        }
+
+        cnt++;
+        if ((hmap_count(&pmd->poll_list) > 1) && cnt > 1) {
+            enable = true;
+            break;
+        }
+    }

The above has to be fixed for a test that Kevin mentioned (3q,1q)
 Done
+
+    /* Enable auto LB if it is configured and cycle based assignment is true */
+    enable = enable && pmd_rxq_assign_cyc && pmd_alb->auto_lb_conf;
+
+    if (enable && !is_enabled) {

Could have been (enable_alb && !is_enabled) ..
Sure.
+        pmd_alb->is_enabled = true;
+        VLOG_INFO("PMD auto lb is enabled, rebalance intvl:%lu(msec)\n",

As it is log, try to convey complete detail (may be useful when someone parses 
the log for right keywords).

"pmd auto load balance is enabled (with rebalance interval %lu msec)\n" ..
Sure
+                   pmd_alb->rebalance_intvl);
+    }
+
+    if (!enable && is_enabled) {
+        pmd_alb->is_enabled = false;
+        pmd_alb->rebalance_poll_timer = 0;
+        VLOG_INFO("PMD auto lb is disabled\n");

May be, convey it saying: "pmd auto lb was enabled, but disabling now"
How about “PMD auto lb is disabled now”?

+    }
+}
+
 /* Applies datapath configuration from the database. Some of the changes are
  * actually applied in dpif_netdev_run(). */
 static int
@@ -3748,6 +3823,7 @@ dpif_netdev_set_config(struct dpif *dpif, const struct 
smap *other_config)
                         DEFAULT_EM_FLOW_INSERT_INV_PROB);
     uint32_t insert_min, cur_min;
     uint32_t tx_flush_interval, cur_tx_flush_interval;
+    uint64_t rebalance_intvl;
     tx_flush_interval = smap_get_int(other_config, "tx-flush-interval",
                                      DEFAULT_TX_FLUSH_INTERVAL);
@@ -3819,6 +3895,23 @@ dpif_netdev_set_config(struct dpif *dpif, const struct 
smap *other_config)
                   pmd_rxq_assign);
         dp_netdev_request_reconfigure(dp);
     }
+
+    struct pmd_auto_lb * pmd_alb = &dp->pmd_alb;
+    pmd_alb->auto_lb_conf = smap_get_bool(other_config, "pmd-auto-lb",
+                              false);
+
+    rebalance_intvl = smap_get_int(other_config, "pmd-auto-lb-rebalance-intvl",
+                              PMD_REBALANCE_POLL_INTERVAL);
+
+    /* Input is in min, convert it to msec */
+    rebalance_intvl =
+        rebalance_intvl ? rebalance_intvl * MIN_TO_MSEC : MIN_TO_MSEC;
+
+    if (pmd_alb->rebalance_intvl != rebalance_intvl) {
+        pmd_alb->rebalance_intvl = rebalance_intvl;
+    }
+
+    enable_pmd_auto_lb(dp);
     return 0;
 }

@@ -3974,9 +4067,9 @@ dp_netdev_rxq_get_cycles(struct dp_netdev_rxq *rx,

 static void
 dp_netdev_rxq_set_intrvl_cycles(struct dp_netdev_rxq *rx,
-                                unsigned long long cycles)
+                                unsigned long long cycles,
+                                unsigned idx)
 {
-    unsigned int idx = rx->intrvl_idx++ % PMD_RXQ_INTERVAL_MAX;
     atomic_store_relaxed(&rx->cycles_intrvl[idx], cycles);
 }

@@ -4194,6 +4287,7 @@ port_reconfigure(struct dp_netdev_port *port)
         }

         port->rxqs[i].port = port;
+        port->rxqs[i].dry_run_pmd = NULL;
         port->rxqs[i].is_vhost = !strncmp(port->type, "dpdkvhost", 9);

         err = netdev_rxq_open(netdev, &port->rxqs[i].rx, i);
@@ -4378,7 +4472,8 @@ compare_rxq_cycles(const void *a, const void *b)
  * The function doesn't touch the pmd threads, it just stores the assignment
  * in the 'pmd' member of each rxq. */
 static void
-rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
+rxq_scheduling(struct dp_netdev *dp, bool pinned, bool dry_run)
+    OVS_REQUIRES(dp->port_mutex)
 {
     struct dp_netdev_port *port;
     struct rr_numa_list rr;
@@ -4389,6 +4484,11 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) 
OVS_REQUIRES(dp->port_mutex)
     int numa_id;
     bool assign_cyc = dp->pmd_rxq_assign_cyc;

+    if (dry_run) {
+        VLOG_INFO("Doing PMD Auto load balancing dry run: "
+                  "Queue to PMD mapping may change");
+    }
+
     HMAP_FOR_EACH (port, node, &dp->ports) {
         if (!netdev_is_pmd(port->netdev)) {
             continue;
@@ -4401,7 +4501,7 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) 
OVS_REQUIRES(dp->port_mutex)
                 struct dp_netdev_pmd_thread *pmd;

                 pmd = dp_netdev_get_pmd(dp, q->core_id);
-                if (!pmd) {
+                if (!pmd && !dry_run) {
                     VLOG_WARN("There is no PMD thread on core %d. Queue "
                               "%d on port \'%s\' will not be polled.",
                               q->core_id, qid, netdev_get_name(port->netdev));
@@ -4442,43 +4542,62 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) 
OVS_REQUIRES(dp->port_mutex)
     rr_numa_list_populate(dp, &rr);
     /* Assign the sorted queues to pmds in round robin. */
     for (int i = 0; i < n_rxqs; i++) {
+        if (!dry_run && rxqs[i]->dry_run_pmd) {
+            rxqs[i]->pmd = rxqs[i]->dry_run_pmd;
+            rxqs[i]->dry_run_pmd = NULL;
+            continue;
+        }
+
         numa_id = netdev_get_numa_id(rxqs[i]->port->netdev);
         numa = rr_numa_list_lookup(&rr, numa_id);
+        struct dp_netdev_pmd_thread **pmd;
+        if (dry_run) {
+            pmd = &rxqs[i]->dry_run_pmd;
+        } else {
+            pmd = &rxqs[i]->pmd;
+        }
         if (!numa) {
             /* There are no pmds on the queue's local NUMA node.
                Round robin on the NUMA nodes that do have pmds. */
             non_local_numa = rr_numa_list_next(&rr, non_local_numa);
             if (!non_local_numa) {
-                VLOG_ERR("There is no available (non-isolated) pmd "
-                         "thread for port \'%s\' queue %d. This queue "
-                         "will not be polled. Is pmd-cpu-mask set to "
-                         "zero? Or are all PMDs isolated to other "
-                         "queues?", netdev_rxq_get_name(rxqs[i]->rx),
-                         netdev_rxq_get_queue_id(rxqs[i]->rx));
+                if (!dry_run) {
+                    VLOG_ERR("There is no available (non-isolated) pmd "
+                             "thread for port \'%s\' queue %d. This queue "
+                             "will not be polled. Is pmd-cpu-mask set to "
+                             "zero? Or are all PMDs isolated to other "
+                             "queues?", netdev_rxq_get_name(rxqs[i]->rx),
+                             netdev_rxq_get_queue_id(rxqs[i]->rx));
+                }
                 continue;
             }
-            rxqs[i]->pmd = rr_numa_get_pmd(non_local_numa, assign_cyc);
-            VLOG_WARN("There's no available (non-isolated) pmd thread "
-                      "on numa node %d. Queue %d on port \'%s\' will "
-                      "be assigned to the pmd on core %d "
-                      "(numa node %d). Expect reduced performance.",
-                      numa_id, netdev_rxq_get_queue_id(rxqs[i]->rx),
-                      netdev_rxq_get_name(rxqs[i]->rx),
-                      rxqs[i]->pmd->core_id, rxqs[i]->pmd->numa_id);
+
+            *pmd = rr_numa_get_pmd(non_local_numa, assign_cyc);
+
+            if (!dry_run) {
+                VLOG_WARN("There's no available (non-isolated) pmd thread "
+                          "on numa node %d. Queue %d on port \'%s\' will "
+                          "be assigned to the pmd on core %d "
+                          "(numa node %d). Expect reduced performance.",
+                          numa_id, netdev_rxq_get_queue_id(rxqs[i]->rx),
+                          netdev_rxq_get_name(rxqs[i]->rx),
+                          (*pmd)->core_id, (*pmd)->numa_id);
+            }
         } else {
-            rxqs[i]->pmd = rr_numa_get_pmd(numa, assign_cyc);
+            *pmd = rr_numa_get_pmd(numa, assign_cyc);
+
             if (assign_cyc) {
                 VLOG_INFO("Core %d on numa node %d assigned port \'%s\' "
                           "rx queue %d "
                           "(measured processing cycles %"PRIu64").",
-                          rxqs[i]->pmd->core_id, numa_id,
+                          (*pmd)->core_id, numa_id,
                           netdev_rxq_get_name(rxqs[i]->rx),
                           netdev_rxq_get_queue_id(rxqs[i]->rx),
                           dp_netdev_rxq_get_cycles(rxqs[i],
                                                    RXQ_CYCLES_PROC_HIST));
             } else {
                 VLOG_INFO("Core %d on numa node %d assigned port \'%s\' "
-                          "rx queue %d.", rxqs[i]->pmd->core_id, numa_id,
+                          "rx queue %d.", (*pmd)->core_id, numa_id,
                           netdev_rxq_get_name(rxqs[i]->rx),
                           netdev_rxq_get_queue_id(rxqs[i]->rx));
             }
@@ -4708,10 +4827,10 @@ reconfigure_datapath(struct dp_netdev *dp)
     }

     /* Add pinned queues and mark pmd threads isolated. */
-    rxq_scheduling(dp, true);
+    rxq_scheduling(dp, true, false);

     /* Add non-pinned queues. */
-    rxq_scheduling(dp, false);
+    rxq_scheduling(dp, false, false);

     /* Step 5: Remove queues not compliant with new scheduling. */
     CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
@@ -4742,7 +4861,7 @@ reconfigure_datapath(struct dp_netdev *dp)

             if (q->pmd) {
                 ovs_mutex_lock(&q->pmd->port_mutex);
-                dp_netdev_add_rxq_to_pmd(q->pmd, q);
+                dp_netdev_add_rxq_to_pmd(q->pmd, q, false);
                 ovs_mutex_unlock(&q->pmd->port_mutex);
             }
         }
@@ -4762,6 +4881,9 @@ reconfigure_datapath(struct dp_netdev *dp)

     /* Reload affected pmd threads. */
     reload_affected_pmds(dp);
+
+    /* Check if PMD Auto LB is to be enabled */
+    enable_pmd_auto_lb(dp);
 }

 /* Returns true if one of the netdevs in 'dp' requires a reconfiguration */
@@ -4780,6 +4902,183 @@ ports_require_restart(const struct dp_netdev *dp)
     return false;
 }

+/* Function for calculating variance */
+static uint64_t
+variance(uint64_t a[], int n)
+{
+    /* Compute mean (average of elements) */
+    uint64_t sum = 0;
+    uint64_t mean;
+    uint64_t sqDiff = 0;
+
+    if (!n) {
+        return 0;
+    }
+
+    for (int i = 0; i < n; i++) {
+        VLOG_DBG("PMD_AUTO_LB_MON pmd_load[%d]=%"PRIu64"",
+                i, a[i]);

Should we really need this LB specific debug statement ? as this utility 
function could be used in
future for something else as well.
Agreed, will remove it with some other debugs also.

+        sum += a[i];
+    }
+    mean = sum / n;
+
+    /* Compute sum squared differences with mean. */
+    for (int i = 0; i < n; i++) {
+        sqDiff += (a[i] - mean)*(a[i] - mean);
+    }
+    VLOG_DBG("PMD_AUTO_LB_MON variance %"PRIu64"",
+              sqDiff / n);
+
+    return sqDiff / n;
+}
+
+static bool
+pmd_rebalance_dry_run(struct dp_netdev *dp)
+{
+    struct dp_netdev_pmd_thread *pmd;
+    struct dp_netdev_port *port;
+    struct rxq_poll *poll, *poll_next;
+    uint64_t *curr_pmd_usage;
+    uint64_t *new_pmd_usage;
+
+    uint64_t new_variance;
+    uint64_t curr_variance;
+    uint64_t improvement = 0;
+    uint32_t num_pmds;
+    bool pmd_mapping_changed = false;
+
+    rxq_scheduling(dp, false, true);
+
+    /* Checking mapping of PMD to q's.

.. of pmd to rxqs.

+     * If it remains same then don't do anything.
+     */
+    HMAP_FOR_EACH (port, node, &dp->ports) {
+        if (!netdev_is_pmd(port->netdev)) {
+            /* Port is not polled by PMD */
+            continue;
+        }
+
+        for (int qid = 0; qid < port->n_rxq; qid++) {
+            struct dp_netdev_rxq *q = &port->rxqs[qid];
+
+            if (q->dry_run_pmd) {
+                dp_netdev_add_rxq_to_pmd(q->dry_run_pmd, q, true);
+                if (q->dry_run_pmd != q->pmd) {
+                    pmd_mapping_changed = true;
+                }
+            }
+        }
+    }
+
+    if (!pmd_mapping_changed) {
+        VLOG_DBG("PMD_AUTO_LB_MON Dry Run indicating no pmd-q mapping change,"
+                 "so skipping reconfiguration");

may be, "in dry-run, no change in pmd-rxq map observed, hence skip rxq 
reconfiguration."
It is removed in v2.

+
+        CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+            if (atomic_count_get(&pmd->pmd_overloaded)) {
+                atomic_count_set(&pmd->pmd_overloaded, 0);
+            }
+            HMAP_FOR_EACH_POP (poll, node, &pmd->dry_poll_list) {
+                free(poll);
+            }
+        }
+
+        goto UNDO_DRYRUN;
+    }
+
+    num_pmds = cmap_count(&dp->poll_threads);
+    curr_pmd_usage = xcalloc(num_pmds, sizeof(uint64_t));
+    new_pmd_usage = xcalloc(num_pmds, sizeof(uint64_t));
+
+    num_pmds = 0;
+    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+        uint64_t total_proc = 0;
+        uint64_t total_cycles = 0;
+        uint64_t pmd_usage = 0;
+
+        if ((pmd->core_id == NON_PMD_CORE_ID) || pmd->isolated) {
+            continue;
+        }
+
+        /* Get the total pmd cycles for an interval. */
+        atomic_read_relaxed(&pmd->intrvl_cycles, &total_cycles);
+        /* Estimate the cycles to cover all intervals. */
+        total_cycles *= PMD_RXQ_INTERVAL_MAX;
+
+        HMAP_FOR_EACH_SAFE (poll, poll_next, node, &pmd->dry_poll_list) {
+            total_proc += dp_netdev_rxq_get_cycles(poll->rxq,
+                                                 RXQ_CYCLES_PROC_HIST);
+        }
+
+        if (total_proc) {
+            pmd_usage = (total_proc * 100) / total_cycles;
+            VLOG_DBG("PMD_AUTO_LB_MON new_pmd_usage(%d) %"PRIu64"",
+                      pmd->core_id, pmd_usage);
+        }
+        new_pmd_usage[num_pmds] = pmd_usage;
+
+        total_proc = 0;
+        pmd_usage = 0;
+        HMAP_FOR_EACH_SAFE (poll, poll_next, node, &pmd->poll_list) {
+            total_proc += dp_netdev_rxq_get_cycles(poll->rxq,
+                                                 RXQ_CYCLES_PROC_HIST);
+        }
+
+        if (total_proc) {
+            pmd_usage = (total_proc * 100) / total_cycles;
+            VLOG_DBG("PMD_AUTO_LB_MON curr_pmd_usage(%d)` %"PRIu64"",
+                      pmd->core_id, pmd_usage);
+        }
+
+        curr_pmd_usage[num_pmds] = pmd_usage;
+
+        if (atomic_count_get(&pmd->pmd_overloaded)) {
+            atomic_count_set(&pmd->pmd_overloaded, 0);
+        }
+
+        HMAP_FOR_EACH_POP (poll, node, &pmd->dry_poll_list) {
+            free(poll);
+        }
+        num_pmds++;
+    }
+
+    if (num_pmds) {
+        curr_variance = variance(curr_pmd_usage, num_pmds);
+        new_variance = variance(new_pmd_usage, num_pmds);
+        VLOG_DBG("PMD_AUTO_LB_MON new variance: %"PRIu64","
+                  " curr_variance: %"PRIu64"",
+                  new_variance, curr_variance);
+
+        if (new_variance < curr_variance) {
+            improvement =
+                ((curr_variance - new_variance) * 100) / curr_variance;
+
+            VLOG_DBG("PMD_AUTO_LB_MON improvement %"PRIu64"", improvement);
+        }
+    }
+
+    free(curr_pmd_usage);
+    free(new_pmd_usage);
+
+    if (improvement >= ACCEPT_IMPROVE_DEFAULT) {
+        return true;
+    }
+
+UNDO_DRYRUN:
+    HMAP_FOR_EACH (port, node, &dp->ports) {
+        if (!netdev_is_pmd(port->netdev)) {
+            continue;
+         }
+
+         for (int qid = 0; qid < port->n_rxq; qid++) {
+            struct dp_netdev_rxq *q = &port->rxqs[qid];
+            q->dry_run_pmd = NULL;
+         }
+    }
+    return false;
+}
+
+
 /* Return true if needs to revalidate datapath flows. */
 static bool
 dpif_netdev_run(struct dpif *dpif)
@@ -4789,6 +5088,9 @@ dpif_netdev_run(struct dpif *dpif)
     struct dp_netdev_pmd_thread *non_pmd;
     uint64_t new_tnl_seq;
     bool need_to_flush = true;
+    bool pmd_rebalance = false;
+    long long int now = time_msec();
+    struct dp_netdev_pmd_thread *pmd;

     ovs_mutex_lock(&dp->port_mutex);
     non_pmd = dp_netdev_get_pmd(dp, NON_PMD_CORE_ID);
@@ -4821,6 +5123,37 @@ dpif_netdev_run(struct dpif *dpif)
         dp_netdev_pmd_unref(non_pmd);
     }

+    struct pmd_auto_lb * pmd_alb = &dp->pmd_alb;
+    if (pmd_alb->is_enabled) {
+        if (!pmd_alb->rebalance_poll_timer) {
+            pmd_alb->rebalance_poll_timer = now;
+        } else if ((pmd_alb->rebalance_poll_timer +
+             pmd_alb->rebalance_intvl) < now) {
+            pmd_alb->rebalance_poll_timer = now;
+            CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+                if (atomic_count_get(&pmd->pmd_overloaded) >=
+                                    PMD_RXQ_INTERVAL_MAX) {
+                    pmd_rebalance = true;
+                    break;
+                }
+            }
+            VLOG_DBG("PMD_AUTO_LB_MON periodic check:pmd rebalance:%d",
+                      pmd_rebalance);
+
+            if (pmd_rebalance && !dp_netdev_is_reconf_required(dp) &&
+                !ports_require_restart(dp)) {
+                if (pmd_rebalance_dry_run(dp)) {
+                    ovs_mutex_unlock(&dp->port_mutex);
+                    ovs_mutex_lock(&dp_netdev_mutex);
+                    VLOG_DBG("PMD_AUTO_LB_MON Invoking PMD RECONFIGURE");

not to capitalize log if there is no specific intention would look more 
consistent in reporting (in other places as well if not mentioned).
Sure. Will change it.

Thanks,
Gowrishankar

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH] Adding support for PMD auto load balancing

Reply via email to