On 24/06/2021 15:30, David Marchand wrote:
> On Fri, Jun 4, 2021 at 11:19 PM Kevin Traynor <[email protected]> wrote:
>>
>> PMD auto load balance had its own separate implementation of the
>> rxq scheduling that it used for dry runs. This was done because
>> previously the rxq scheduling was not reusable for a dry run.
>>
>> Apart from the code duplication (which is a good enough reason
>> to replace it alone) this meant that if any further rxq scheduling
>> changes or assignment types were added they would also have to be
>> duplicated in the auto load balance code too.
>>
>> This patch replaces the current PMD auto load balance rxq scheduling
>> code to reuse the common rxq scheduling code.
>>
>> The behaviour does not change from a user perspective, except the logs
>> are updated to be more consistent.
>>
>> As the dry run will compare the pmd load variances for current and
>> estimated assignments, new functions are added to populate the current
>> assignments and calculate variance on the rxq scheduling data structs.
>>
>> Now that the new rxq scheduling data structures are being used in
>> PMD auto load balance, the older rr_* data structs and associated
>> functions can be removed.
>>
>> Signed-off-by: Kevin Traynor <[email protected]>
>> ---
>> lib/dpif-netdev.c | 511 +++++++++++++++-------------------------------
>> 1 file changed, 164 insertions(+), 347 deletions(-)
>>
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index 57d23e112..eaa4e9733 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -4872,138 +4872,4 @@ port_reconfigure(struct dp_netdev_port *port)
>> }
>>
>> -struct rr_numa_list {
>> - struct hmap numas; /* Contains 'struct rr_numa' */
>> -};
>> -
>> -struct rr_numa {
>> - struct hmap_node node;
>> -
>> - int numa_id;
>> -
>> - /* Non isolated pmds on numa node 'numa_id' */
>> - struct dp_netdev_pmd_thread **pmds;
>> - int n_pmds;
>> -
>> - int cur_index;
>> - bool idx_inc;
>> -};
>> -
>> -static size_t
>> -rr_numa_list_count(struct rr_numa_list *rr)
>> -{
>> - return hmap_count(&rr->numas);
>> -}
>> -
>> -static struct rr_numa *
>> -rr_numa_list_lookup(struct rr_numa_list *rr, int numa_id)
>> -{
>> - struct rr_numa *numa;
>> -
>> - HMAP_FOR_EACH_WITH_HASH (numa, node, hash_int(numa_id, 0), &rr->numas) {
>> - if (numa->numa_id == numa_id) {
>> - return numa;
>> - }
>> - }
>> -
>> - return NULL;
>> -}
>> -
>> -/* Returns the next node in numa list following 'numa' in round-robin
>> fashion.
>> - * Returns first node if 'numa' is a null pointer or the last node in 'rr'.
>> - * Returns NULL if 'rr' numa list is empty. */
>> -static struct rr_numa *
>> -rr_numa_list_next(struct rr_numa_list *rr, const struct rr_numa *numa)
>> -{
>> - struct hmap_node *node = NULL;
>> -
>> - if (numa) {
>> - node = hmap_next(&rr->numas, &numa->node);
>> - }
>> - if (!node) {
>> - node = hmap_first(&rr->numas);
>> - }
>> -
>> - return (node) ? CONTAINER_OF(node, struct rr_numa, node) : NULL;
>> -}
>> -
>> -static void
>> -rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)
>> -{
>> - struct dp_netdev_pmd_thread *pmd;
>> - struct rr_numa *numa;
>> -
>> - hmap_init(&rr->numas);
>> -
>> - CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>> - if (pmd->core_id == NON_PMD_CORE_ID || pmd->isolated) {
>> - continue;
>> - }
>> -
>> - numa = rr_numa_list_lookup(rr, pmd->numa_id);
>> - if (!numa) {
>> - numa = xzalloc(sizeof *numa);
>> - numa->numa_id = pmd->numa_id;
>> - hmap_insert(&rr->numas, &numa->node, hash_int(pmd->numa_id, 0));
>> - }
>> - numa->n_pmds++;
>> - numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof
>> *numa->pmds);
>> - numa->pmds[numa->n_pmds - 1] = pmd;
>> - /* At least one pmd so initialise curr_idx and idx_inc. */
>> - numa->cur_index = 0;
>> - numa->idx_inc = true;
>> - }
>> -}
>> -
>> -/*
>> - * Returns the next pmd from the numa node.
>> - *
>> - * If 'updown' is 'true' it will alternate between selecting the next pmd in
>> - * either an up or down walk, switching between up/down when the first or
>> last
>> - * core is reached. e.g. 1,2,3,3,2,1,1,2...
>> - *
>> - * If 'updown' is 'false' it will select the next pmd wrapping around when
>> last
>> - * core reached. e.g. 1,2,3,1,2,3,1,2...
>> - */
>> -static struct dp_netdev_pmd_thread *
>> -rr_numa_get_pmd(struct rr_numa *numa, bool updown)
>> -{
>> - int numa_idx = numa->cur_index;
>> -
>> - if (numa->idx_inc == true) {
>> - /* Incrementing through list of pmds. */
>> - if (numa->cur_index == numa->n_pmds-1) {
>> - /* Reached the last pmd. */
>> - if (updown) {
>> - numa->idx_inc = false;
>> - } else {
>> - numa->cur_index = 0;
>> - }
>> - } else {
>> - numa->cur_index++;
>> - }
>> - } else {
>> - /* Decrementing through list of pmds. */
>> - if (numa->cur_index == 0) {
>> - /* Reached the first pmd. */
>> - numa->idx_inc = true;
>> - } else {
>> - numa->cur_index--;
>> - }
>> - }
>> - return numa->pmds[numa_idx];
>> -}
>> -
>> -static void
>> -rr_numa_list_destroy(struct rr_numa_list *rr)
>> -{
>> - struct rr_numa *numa;
>> -
>> - HMAP_FOR_EACH_POP (numa, node, &rr->numas) {
>> - free(numa->pmds);
>> - free(numa);
>> - }
>> - hmap_destroy(&rr->numas);
>> -}
>> -
>> struct sched_numa_list {
>> struct hmap numas; /* Contains 'struct sched_numa'. */
>> @@ -5033,4 +4899,6 @@ struct sched_numa {
>> };
>>
>> +static uint64_t variance(uint64_t a[], int n);
>
> Nit: this fwd declaration can be moved right before
> sched_numa_list_variance() which is the only function that uses it.
> Or variance() itself could be moved.
>
I wanted to avoid moving the variance() fn to avoid unnecessary code
churn, so I will move the declaration.
>
>> +
>> static size_t
>> sched_numa_list_count(struct sched_numa_list *numa_list)
>> @@ -5181,4 +5049,36 @@ sched_add_rxq_to_sched_pmd(struct sched_pmd
>> *sched_pmd,
>> }
>>
>> +static void
>> +sched_numa_list_assignments(struct sched_numa_list *numa_list,
>> + struct dp_netdev *dp)
>
> Indent of second line.
fixed
> +
> Don't we need a OVS_REQUIRES(dp->port_mutex) annotation?
>
it will have the lock, but i need to check again about the annotation.
>
>
>> +{
>> + struct dp_netdev_port *port;
>> +
>> + /* For each port. */
>> + HMAP_FOR_EACH (port, node, &dp->ports) {
>> + if (!netdev_is_pmd(port->netdev)) {
>> + continue;
>> + }
>
> [snip]
>
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev