On Fri, Jun 4, 2021 at 11:19 PM Kevin Traynor <[email protected]> wrote:
>
> PMD auto load balance had its own separate implementation of the
> rxq scheduling that it used for dry runs. This was done because
> previously the rxq scheduling was not reusable for a dry run.
>
> Apart from the code duplication (which is a good enough reason
> to replace it alone) this meant that if any further rxq scheduling
> changes or assignment types were added they would also have to be
> duplicated in the auto load balance code too.
>
> This patch replaces the current PMD auto load balance rxq scheduling
> code to reuse the common rxq scheduling code.
>
> The behaviour does not change from a user perspective, except the logs
> are updated to be more consistent.
>
> As the dry run will compare the pmd load variances for current and
> estimated assignments, new functions are added to populate the current
> assignments and calculate variance on the rxq scheduling data structs.
>
> Now that the new rxq scheduling data structures are being used in
> PMD auto load balance, the older rr_* data structs and associated
> functions can be removed.
>
> Signed-off-by: Kevin Traynor <[email protected]>
> ---
> lib/dpif-netdev.c | 511 +++++++++++++++-------------------------------
> 1 file changed, 164 insertions(+), 347 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 57d23e112..eaa4e9733 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -4872,138 +4872,4 @@ port_reconfigure(struct dp_netdev_port *port)
> }
>
> -struct rr_numa_list {
> - struct hmap numas; /* Contains 'struct rr_numa' */
> -};
> -
> -struct rr_numa {
> - struct hmap_node node;
> -
> - int numa_id;
> -
> - /* Non isolated pmds on numa node 'numa_id' */
> - struct dp_netdev_pmd_thread **pmds;
> - int n_pmds;
> -
> - int cur_index;
> - bool idx_inc;
> -};
> -
> -static size_t
> -rr_numa_list_count(struct rr_numa_list *rr)
> -{
> - return hmap_count(&rr->numas);
> -}
> -
> -static struct rr_numa *
> -rr_numa_list_lookup(struct rr_numa_list *rr, int numa_id)
> -{
> - struct rr_numa *numa;
> -
> - HMAP_FOR_EACH_WITH_HASH (numa, node, hash_int(numa_id, 0), &rr->numas) {
> - if (numa->numa_id == numa_id) {
> - return numa;
> - }
> - }
> -
> - return NULL;
> -}
> -
> -/* Returns the next node in numa list following 'numa' in round-robin
> fashion.
> - * Returns first node if 'numa' is a null pointer or the last node in 'rr'.
> - * Returns NULL if 'rr' numa list is empty. */
> -static struct rr_numa *
> -rr_numa_list_next(struct rr_numa_list *rr, const struct rr_numa *numa)
> -{
> - struct hmap_node *node = NULL;
> -
> - if (numa) {
> - node = hmap_next(&rr->numas, &numa->node);
> - }
> - if (!node) {
> - node = hmap_first(&rr->numas);
> - }
> -
> - return (node) ? CONTAINER_OF(node, struct rr_numa, node) : NULL;
> -}
> -
> -static void
> -rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)
> -{
> - struct dp_netdev_pmd_thread *pmd;
> - struct rr_numa *numa;
> -
> - hmap_init(&rr->numas);
> -
> - CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
> - if (pmd->core_id == NON_PMD_CORE_ID || pmd->isolated) {
> - continue;
> - }
> -
> - numa = rr_numa_list_lookup(rr, pmd->numa_id);
> - if (!numa) {
> - numa = xzalloc(sizeof *numa);
> - numa->numa_id = pmd->numa_id;
> - hmap_insert(&rr->numas, &numa->node, hash_int(pmd->numa_id, 0));
> - }
> - numa->n_pmds++;
> - numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof *numa->pmds);
> - numa->pmds[numa->n_pmds - 1] = pmd;
> - /* At least one pmd so initialise curr_idx and idx_inc. */
> - numa->cur_index = 0;
> - numa->idx_inc = true;
> - }
> -}
> -
> -/*
> - * Returns the next pmd from the numa node.
> - *
> - * If 'updown' is 'true' it will alternate between selecting the next pmd in
> - * either an up or down walk, switching between up/down when the first or
> last
> - * core is reached. e.g. 1,2,3,3,2,1,1,2...
> - *
> - * If 'updown' is 'false' it will select the next pmd wrapping around when
> last
> - * core reached. e.g. 1,2,3,1,2,3,1,2...
> - */
> -static struct dp_netdev_pmd_thread *
> -rr_numa_get_pmd(struct rr_numa *numa, bool updown)
> -{
> - int numa_idx = numa->cur_index;
> -
> - if (numa->idx_inc == true) {
> - /* Incrementing through list of pmds. */
> - if (numa->cur_index == numa->n_pmds-1) {
> - /* Reached the last pmd. */
> - if (updown) {
> - numa->idx_inc = false;
> - } else {
> - numa->cur_index = 0;
> - }
> - } else {
> - numa->cur_index++;
> - }
> - } else {
> - /* Decrementing through list of pmds. */
> - if (numa->cur_index == 0) {
> - /* Reached the first pmd. */
> - numa->idx_inc = true;
> - } else {
> - numa->cur_index--;
> - }
> - }
> - return numa->pmds[numa_idx];
> -}
> -
> -static void
> -rr_numa_list_destroy(struct rr_numa_list *rr)
> -{
> - struct rr_numa *numa;
> -
> - HMAP_FOR_EACH_POP (numa, node, &rr->numas) {
> - free(numa->pmds);
> - free(numa);
> - }
> - hmap_destroy(&rr->numas);
> -}
> -
> struct sched_numa_list {
> struct hmap numas; /* Contains 'struct sched_numa'. */
> @@ -5033,4 +4899,6 @@ struct sched_numa {
> };
>
> +static uint64_t variance(uint64_t a[], int n);
Nit: this fwd declaration can be moved right before
sched_numa_list_variance() which is the only function that uses it.
Or variance() itself could be moved.
> +
> static size_t
> sched_numa_list_count(struct sched_numa_list *numa_list)
> @@ -5181,4 +5049,36 @@ sched_add_rxq_to_sched_pmd(struct sched_pmd *sched_pmd,
> }
>
> +static void
> +sched_numa_list_assignments(struct sched_numa_list *numa_list,
> + struct dp_netdev *dp)
Indent of second line.
+
Don't we need a OVS_REQUIRES(dp->port_mutex) annotation?
> +{
> + struct dp_netdev_port *port;
> +
> + /* For each port. */
> + HMAP_FOR_EACH (port, node, &dp->ports) {
> + if (!netdev_is_pmd(port->netdev)) {
> + continue;
> + }
[snip]
--
David Marchand
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev