On 23/06/2021 14:04, David Marchand wrote: > On Fri, Jun 4, 2021 at 11:19 PM Kevin Traynor <[email protected]> wrote: >> >> This reworks the current rxq scheduling code to break it into more >> generic and reusable pieces. >> >> The behaviour does not change from a user perspective, except the logs >> are updated to be more consistent. >> >> From an implementation view, there are some changes with mind to adding >> functionality and reuse in later patches. >> >> The high level reusable functions added in this patch are: >> - Generate a list of current numas and pmds >> - Perform rxq scheduling into the list >> - Effect the rxq scheduling assignments so they are used >> >> The rxq scheduling is updated to handle both pinned and non-pinned rxqs >> in the same call. > > As a global comment, I prefer consistency for function names. > I put suggestions inline, you can take these as simple nits. >
went with the suggestions, thanks for them. > > This patch prepares arrival of new scheduling algorithm, but uses a > single boolean flag. > I would avoid this temp boolean and introduce the enum (used later in > this series). > Added enum (without SCHED_GROUP for now). > >> >> Signed-off-by: Kevin Traynor <[email protected]> >> --- >> lib/dpif-netdev.c | 538 ++++++++++++++++++++++++++++++++++++++-------- >> tests/pmd.at | 2 +- >> 2 files changed, 446 insertions(+), 94 deletions(-) >> >> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >> index 650e67ab3..57d23e112 100644 >> --- a/lib/dpif-netdev.c >> +++ b/lib/dpif-netdev.c >> @@ -5006,4 +5006,211 @@ rr_numa_list_destroy(struct rr_numa_list *rr) >> } >> >> +struct sched_numa_list { >> + struct hmap numas; /* Contains 'struct sched_numa'. */ >> +}; >> + >> +/* Meta data for out-of-place pmd rxq assignments. */ >> +struct sched_pmd { >> + /* Associated PMD thread. */ >> + struct dp_netdev_pmd_thread *pmd; >> + uint64_t pmd_proc_cycles; >> + struct dp_netdev_rxq **rxqs; >> + unsigned n_rxq; >> + bool isolated; >> +}; > > sched_pmd objects are associated in a unique fashion to a sched_numa object. > Having a back pointer to sched_numa in the sched_pmd object removes > the need for sched_numa_list_find_numa(). > done > >> + >> +struct sched_numa { >> + struct hmap_node node; >> + int numa_id; >> + /* PMDs on numa node. */ >> + struct sched_pmd *pmds; >> + /* Num of PMDs on numa node. */ >> + unsigned n_pmds; >> + /* Num of isolated PMDs on numa node. */ >> + unsigned n_iso; >> + int rr_cur_index; >> + bool rr_idx_inc; >> +}; >> + >> +static size_t >> +sched_numa_list_count(struct sched_numa_list *numa_list) >> +{ >> + return hmap_count(&numa_list->numas); >> +} >> + >> +static struct sched_numa * >> +sched_numa_list_next(struct sched_numa_list *numa_list, >> + const struct sched_numa *numa) >> +{ >> + struct hmap_node *node = NULL; >> + >> + if (numa) { >> + node = hmap_next(&numa_list->numas, &numa->node); >> + } >> + if (!node) { >> + node = hmap_first(&numa_list->numas); >> + } >> + >> + return (node) ? CONTAINER_OF(node, struct sched_numa, node) : NULL; >> +} >> + >> +static struct sched_numa * >> +sched_numa_list_lookup(struct sched_numa_list * numa_list, int numa_id) > > Nit: no space *numa_list > fixed > >> +{ >> + struct sched_numa *numa; >> + >> + HMAP_FOR_EACH_WITH_HASH (numa, node, hash_int(numa_id, 0), >> + &numa_list->numas) { >> + if (numa->numa_id == numa_id) { >> + return numa; >> + } >> + } >> + return NULL; >> +} >> + >> +/* Populate numas and pmds on those numas */ > > Nit: missing a '.'. > fixed > >> +static void >> +sched_numa_list_populate(struct sched_numa_list *numa_list, >> + struct dp_netdev *dp) >> +{ >> + struct dp_netdev_pmd_thread *pmd; > > Nit: missing a blank line after var definition. > fixed > >> + hmap_init(&numa_list->numas); >> + >> + /* For each pmd on this datapath. */ >> + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { >> + struct sched_numa *numa; >> + struct sched_pmd *sched_pmd; >> + if (pmd->core_id == NON_PMD_CORE_ID) { >> + continue; >> + } >> + >> + /* Get the numa of the PMD. */ >> + numa = sched_numa_list_lookup(numa_list, pmd->numa_id); >> + /* Create a new numa node for it if not already created */ > > Nit: missing a '.'. > fixed > >> + if (!numa) { >> + numa = xzalloc(sizeof *numa); >> + numa->numa_id = pmd->numa_id; >> + hmap_insert(&numa_list->numas, &numa->node, >> + hash_int(pmd->numa_id, 0)); >> + } >> + >> + /* Create a sched_pmd on this numa for the pmd. */ >> + numa->n_pmds++; >> + numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof >> *numa->pmds); >> + sched_pmd = &numa->pmds[numa->n_pmds - 1]; >> + memset(sched_pmd ,0, sizeof *sched_pmd); > > Nit: should be sched_pmd, 0, > fixed > >> + sched_pmd->pmd = pmd; >> + /* At least one pmd is present so initialize curr_idx and idx_inc. >> */ >> + numa->rr_cur_index = 0; >> + numa->rr_idx_inc = true; >> + } >> +} >> + >> +static void >> +sched_numa_list_free_entries(struct sched_numa_list *numa_list) >> +{ >> + struct sched_numa *numa; >> + >> + HMAP_FOR_EACH_POP (numa, node, &numa_list->numas) { >> + for (unsigned i = 0; i < numa->n_pmds; i++) { >> + struct sched_pmd *sched_pmd; >> + >> + sched_pmd = &numa->pmds[i]; >> + sched_pmd->n_rxq = 0; >> + free(sched_pmd->rxqs); >> + } >> + numa->n_pmds = 0; >> + free(numa->pmds); >> + } > > Are we leaking numa objects? > i shouldn't have hinted about it :-) yes, fixed now. > >> + hmap_destroy(&numa_list->numas); >> +} >> + >> +static struct sched_pmd * >> +find_sched_pmd_by_pmd(struct sched_numa_list *numa_list, >> + struct dp_netdev_pmd_thread *pmd) > > Nit: it returns a sched_pmd object, I would go with sched_pmd_find_by_pmd(). > > >> +{ >> + struct sched_numa *numa; >> + >> + HMAP_FOR_EACH (numa, node, &numa_list->numas) { >> + for (unsigned i = 0; i < numa->n_pmds; i++) { >> + struct sched_pmd *sched_pmd; >> + >> + sched_pmd = &numa->pmds[i]; >> + if (pmd == sched_pmd->pmd) { >> + return sched_pmd; >> + } >> + } >> + } >> + return NULL; >> +} >> + >> +static struct sched_numa * >> +sched_numa_list_find_numa(struct sched_numa_list *numa_list, >> + struct sched_pmd *sched_pmd) > > As commented around the sched_pmd struct definition, this function can > be removed if sched_pmd has a reference to sched_numa. > yep, removed it. > >> +{ >> + struct sched_numa *numa; >> + >> + HMAP_FOR_EACH (numa, node, &numa_list->numas) { >> + for (unsigned i = 0; i < numa->n_pmds; i++) { >> + struct sched_pmd *numa_sched_pmd; >> + >> + numa_sched_pmd = &numa->pmds[i]; >> + if (numa_sched_pmd == sched_pmd) { >> + return numa; >> + } >> + } >> + } >> + return NULL; >> +} >> + >> +static void >> +sched_add_rxq_to_sched_pmd(struct sched_pmd *sched_pmd, >> + struct dp_netdev_rxq *rxq, uint64_t cycles) > > Nit: name could be sched_pmd_add_rxq(). > > >> +{ >> + /* As sched_pmd is allocated outside this fn. better to not assume >> + * rxq is initialized to NULL. */ >> + if (sched_pmd->n_rxq == 0) { >> + sched_pmd->rxqs = xmalloc(sizeof *sched_pmd->rxqs); >> + } else { >> + sched_pmd->rxqs = xrealloc(sched_pmd->rxqs, (sched_pmd->n_rxq + 1) * >> + sizeof >> *sched_pmd->rxqs); >> + } >> + >> + sched_pmd->rxqs[sched_pmd->n_rxq++] = rxq; >> + sched_pmd->pmd_proc_cycles += cycles; >> +} >> + >> +static void >> +sched_numa_list_put_in_place(struct sched_numa_list *numa_list) >> +{ >> + struct sched_numa *numa; >> + >> + /* For each numa */ >> + HMAP_FOR_EACH (numa, node, &numa_list->numas) { >> + /* For each pmd */ >> + for (int i = 0; i < numa->n_pmds; i++) { >> + struct sched_pmd *sched_pmd; >> + >> + sched_pmd = &numa->pmds[i]; >> + sched_pmd->pmd->isolated = sched_pmd->isolated; >> + /* For each rxq. */ >> + for (unsigned k = 0; k < sched_pmd->n_rxq; k++) { >> + /* Store the new pmd from the out of place sched_numa_list >> + * struct to the dp_netdev_rxq struct */ >> + sched_pmd->rxqs[k]->pmd = sched_pmd->pmd; >> + } >> + } >> + } >> +} >> + >> +static unsigned >> +sched_get_numa_pmd_noniso(struct sched_numa *numa) > > Nit: we are not getting a pmd from this function, > sched_numa_noniso_pmd_count()? > > >> +{ >> + if (numa->n_pmds > numa->n_iso) { >> + return numa->n_pmds - numa->n_iso; >> + } >> + return 0; >> +} >> + >> /* Sort Rx Queues by the processing cycles they are consuming. */ >> static int >> @@ -5037,22 +5244,106 @@ compare_rxq_cycles(const void *a, const void *b) >> } >> >> -/* Assign pmds to queues. If 'pinned' is true, assign pmds to pinned >> - * queues and marks the pmds as isolated. Otherwise, assign non isolated >> - * pmds to unpinned queues. >> +/* >> + * Returns the next pmd from the numa node. >> * >> - * The function doesn't touch the pmd threads, it just stores the assignment >> - * in the 'pmd' member of each rxq. */ >> + * If 'updown' is 'true' it will alternate between selecting the next pmd in >> + * either an up or down walk, switching between up/down when the first or >> last >> + * core is reached. e.g. 1,2,3,3,2,1,1,2... >> + * >> + * If 'updown' is 'false' it will select the next pmd wrapping around when >> + * last core reached. e.g. 1,2,3,1,2,3,1,2... >> + */ >> +static struct sched_pmd * >> +get_rr_pmd(struct sched_numa *numa, bool updown) > > Nit: sched_pmd_next_rr(). > > >> +{ >> + int numa_idx = numa->rr_cur_index; >> + >> + if (numa->rr_idx_inc == true) { >> + /* Incrementing through list of pmds. */ >> + if (numa->rr_cur_index == numa->n_pmds - 1) { >> + /* Reached the last pmd. */ >> + if (updown) { >> + numa->rr_idx_inc = false; >> + } else { >> + numa->rr_cur_index = 0; >> + } >> + } else { >> + numa->rr_cur_index++; >> + } >> + } else { >> + /* Decrementing through list of pmds. */ >> + if (numa->rr_cur_index == 0) { >> + /* Reached the first pmd. */ >> + numa->rr_idx_inc = true; >> + } else { >> + numa->rr_cur_index--; >> + } >> + } >> + return &numa->pmds[numa_idx]; >> +} >> + >> +static struct sched_pmd * >> +get_available_rr_pmd(struct sched_numa *numa, bool updown) > > Nit: sched_pmd_next_noniso_rr(). > > >> +{ >> + struct sched_pmd *sched_pmd = NULL; >> + >> + /* get_rr_pmd() may return duplicate PMDs before all PMDs have been >> + * returned depending on updown. Extend the number of call to ensure all >> + * PMDs can be checked. */ >> + for (unsigned i = 0; i < numa->n_pmds * 2; i++) { >> + sched_pmd = get_rr_pmd(numa, updown); >> + if (!sched_pmd->isolated) { >> + break; >> + } >> + sched_pmd = NULL; >> + } >> + return sched_pmd; >> +} >> + >> +static struct sched_pmd * >> +get_next_pmd(struct sched_numa *numa, bool algo) > > Nit: sched_pmd_next(). > > >> +{ >> + return get_available_rr_pmd(numa, algo); >> +} >> + >> +static const char * >> +get_assignment_type_string(bool algo) >> +{ >> + if (algo == false) { >> + return "roundrobin"; >> + } >> + return "cycles"; >> +} >> + >> +#define MAX_RXQ_CYC_STRLEN (INT_STRLEN(uint64_t) + 40) > > I am not sure where this 40 comes from. > Is this "big enough in most cases"? > Yes, it was just to allow for the message part and not have it so fine that a small edit would break it. There will be some variance in the size as it is with INT_STRLEN. I can do this to try and show what the 40 is for: #define MAX_RXQ_CYC_MSG 40 #define MAX_RXQ_CYC_STRLEN (INT_STRLEN(uint64_t) + MAX_RXQ_CYC_MSG) > >> + >> +static bool >> +get_rxq_cyc_log(char *a, bool algo, uint64_t cycles) >> +{ >> + int ret = 0; >> + >> + if (algo) { >> + ret = snprintf(a, MAX_RXQ_CYC_STRLEN, >> + " (measured processing cycles %"PRIu64").", > > The final "." must be removed since it is not consistent with the !algo case. > > >> + cycles); >> + } >> + return ret > 0; >> +} > > We can do a[0] = '\0' when !algo and on error and return 'a' unconditionnally. > This way, callers don't need to check this function succeeded. > That is nice, it avoids the ternary check in the VLOG. > > >> + >> static void >> -rxq_scheduling(struct dp_netdev *dp, bool pinned) >> OVS_REQUIRES(dp->port_mutex) >> +sched_numa_list_schedule(struct sched_numa_list *numa_list, >> + struct dp_netdev *dp, >> + bool algo, >> + enum vlog_level level) >> + OVS_REQUIRES(dp->port_mutex) >> { >> struct dp_netdev_port *port; >> - struct rr_numa_list rr; >> - struct rr_numa *non_local_numa = NULL; >> struct dp_netdev_rxq ** rxqs = NULL; > > **rxqs > fixed >> - int n_rxqs = 0; >> - struct rr_numa *numa = NULL; >> - int numa_id; >> - bool assign_cyc = dp->pmd_rxq_assign_cyc; >> + struct sched_numa *last_cross_numa; >> + unsigned n_rxqs = 0; >> + bool start_logged = false; >> + size_t n_numa; >> >> + /* For each port. */ >> HMAP_FOR_EACH (port, node, &dp->ports) { >> if (!netdev_is_pmd(port->netdev)) { >> @@ -5060,48 +5351,68 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) >> OVS_REQUIRES(dp->port_mutex) >> } >> >> + /* For each rxq on the port. */ >> for (int qid = 0; qid < port->n_rxq; qid++) { >> - struct dp_netdev_rxq *q = &port->rxqs[qid]; >> + struct dp_netdev_rxq *rxq = &port->rxqs[qid]; >> >> - if (pinned && q->core_id != OVS_CORE_UNSPEC) { >> - struct dp_netdev_pmd_thread *pmd; >> + rxqs = xrealloc(rxqs, (n_rxqs + 1) * sizeof *rxqs); >> + rxqs[n_rxqs++] = rxq; > > We might as well only store unpinned queues in rxqs (maybe rename this > var) in this first loop... (see comment in second loop) > I wasn't convinced when first reading, but by the time i got to the later patch I had been converted to this way of thinking :-) > >> >> - pmd = dp_netdev_get_pmd(dp, q->core_id); >> - if (!pmd) { >> - VLOG_WARN("There is no PMD thread on core %d. Queue " >> - "%d on port \'%s\' will not be polled.", >> - q->core_id, qid, >> netdev_get_name(port->netdev)); >> - } else { >> - q->pmd = pmd; >> - pmd->isolated = true; >> - VLOG_INFO("Core %d on numa node %d assigned port \'%s\' >> " >> - "rx queue %d.", pmd->core_id, pmd->numa_id, >> - netdev_rxq_get_name(q->rx), >> - netdev_rxq_get_queue_id(q->rx)); >> - dp_netdev_pmd_unref(pmd); >> - } >> - } else if (!pinned && q->core_id == OVS_CORE_UNSPEC) { >> + if (algo == true) { >> uint64_t cycle_hist = 0; >> >> - if (n_rxqs == 0) { >> - rxqs = xmalloc(sizeof *rxqs); >> - } else { >> - rxqs = xrealloc(rxqs, sizeof *rxqs * (n_rxqs + 1)); >> + /* Sum the queue intervals and store the cycle history. */ >> + for (unsigned i = 0; i < PMD_RXQ_INTERVAL_MAX; i++) { >> + cycle_hist += dp_netdev_rxq_get_intrvl_cycles(rxq, i); >> } >> + dp_netdev_rxq_set_cycles(rxq, RXQ_CYCLES_PROC_HIST, >> + cycle_hist); >> + } >> >> - if (assign_cyc) { >> - /* Sum the queue intervals and store the cycle history. >> */ >> - for (unsigned i = 0; i < PMD_RXQ_INTERVAL_MAX; i++) { >> - cycle_hist += dp_netdev_rxq_get_intrvl_cycles(q, i); >> - } >> - dp_netdev_rxq_set_cycles(q, RXQ_CYCLES_PROC_HIST, >> - cycle_hist); >> + /* Check if this rxq is pinned. */ >> + if (rxq->core_id != OVS_CORE_UNSPEC) { >> + struct sched_pmd *sched_pmd = NULL; > > No need to initialize to NULL. > right, removed it > >> + struct dp_netdev_pmd_thread *pmd; >> + struct sched_numa *numa; >> + uint64_t proc_cycles; >> + char rxq_cyc_log[MAX_RXQ_CYC_STRLEN]; >> + >> + >> + /* This rxq should be pinned, pin it now. */ >> + pmd = dp_netdev_get_pmd(dp, rxq->core_id); >> + sched_pmd = find_sched_pmd_by_pmd(numa_list, pmd); >> + if (!sched_pmd) { >> + /* Cannot find the PMD. Cannot pin this rxq. */ >> + VLOG(level == VLL_DBG ? VLL_DBG : VLL_WARN, >> + "Core %2u cannot be pinned with " >> + "port \'%s\' rx queue %d. Use pmd-cpu-mask to " >> + "enable a pmd on core %u.", >> + rxq->core_id, >> + netdev_rxq_get_name(rxq->rx), >> + netdev_rxq_get_queue_id(rxq->rx), >> + rxq->core_id); >> + continue; >> + } >> + /* Mark PMD as isolated if not done already. */ >> + if (sched_pmd->isolated == false) { >> + sched_pmd->isolated = true; >> + numa = sched_numa_list_find_numa(numa_list, >> + sched_pmd); >> + numa->n_iso++; >> } >> - /* Store the queue. */ >> - rxqs[n_rxqs++] = q; >> + proc_cycles = dp_netdev_rxq_get_cycles(rxq, >> + >> RXQ_CYCLES_PROC_HIST); >> + VLOG(level, "Core %2u on numa node %d is pinned with " >> + "port \'%s\' rx queue %d.%s", > > rx queue %d.%s -> rx queue %d%s. (once get_rxq_cyc_log() does not put > a final '.'). > done > > >> + sched_pmd->pmd->core_id, >> sched_pmd->pmd->numa_id, >> + netdev_rxq_get_name(rxq->rx), >> + netdev_rxq_get_queue_id(rxq->rx), >> + get_rxq_cyc_log(rxq_cyc_log, algo, proc_cycles) >> + ? rxq_cyc_log : ""); >> + sched_add_rxq_to_sched_pmd(sched_pmd, rxq, proc_cycles); >> } >> } >> } >> >> - if (n_rxqs > 1 && assign_cyc) { >> + if (n_rxqs > 1 && algo) { >> /* Sort the queues in order of the processing cycles >> * they consumed during their last pmd interval. */ >> @@ -5109,54 +5420,100 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) >> OVS_REQUIRES(dp->port_mutex) >> } >> >> - rr_numa_list_populate(dp, &rr); >> - /* Assign the sorted queues to pmds in round robin. */ >> - for (int i = 0; i < n_rxqs; i++) { >> - numa_id = netdev_get_numa_id(rxqs[i]->port->netdev); >> - numa = rr_numa_list_lookup(&rr, numa_id); >> - if (!numa) { >> - /* There are no pmds on the queue's local NUMA node. >> - Round robin on the NUMA nodes that do have pmds. */ >> - non_local_numa = rr_numa_list_next(&rr, non_local_numa); >> - if (!non_local_numa) { >> - VLOG_ERR("There is no available (non-isolated) pmd " >> - "thread for port \'%s\' queue %d. This queue " >> - "will not be polled. Is pmd-cpu-mask set to " >> - "zero? Or are all PMDs isolated to other " >> - "queues?", netdev_rxq_get_name(rxqs[i]->rx), >> - netdev_rxq_get_queue_id(rxqs[i]->rx)); >> - continue; >> + last_cross_numa = NULL; >> + n_numa = sched_numa_list_count(numa_list); >> + for (unsigned i = 0; i < n_rxqs; i++) { >> + struct dp_netdev_rxq *rxq = rxqs[i]; >> + struct sched_pmd *sched_pmd; >> + struct sched_numa *numa; >> + int numa_id; >> + uint64_t proc_cycles; >> + char rxq_cyc_log[MAX_RXQ_CYC_STRLEN]; >> + >> + if (rxq->core_id != OVS_CORE_UNSPEC) { >> + continue; >> + } > > (cont of comment on rxqs array)... this way there is no need for a "is > rxq pinned?" check here. > removed > >> + >> + if (start_logged == false && level != VLL_DBG) { >> + VLOG(level, "Performing pmd to rx queue assignment using %s " >> + "algorithm.", get_assignment_type_string(algo)); >> + start_logged = true; >> + } >> + >> + /* Store the cycles for this rxq as we will log these later. */ >> + proc_cycles = dp_netdev_rxq_get_cycles(rxq, RXQ_CYCLES_PROC_HIST); >> + /* Select the numa that should be used for this rxq. */ >> + numa_id = netdev_get_numa_id(rxq->port->netdev); >> + numa = sched_numa_list_lookup(numa_list, numa_id); >> + >> + /* Ensure that there is at least one non-isolated pmd on that numa. >> */ >> + if (numa && !sched_get_numa_pmd_noniso(numa)) { >> + numa = NULL; >> + } > > I think this block above can be covered with the check below. > yes, it was leftover from an earlier version. Harmless but redundent. Removed it. > >> + >> + if (!numa || !sched_get_numa_pmd_noniso(numa)) { >> + /* Find any numa with available pmds. */ >> + for (int k = 0; k < n_numa; k++) { > > Super nit: previous index is 'i', I would expect 'j'. > ok, but if I ever get i and j mixed up, I will blame you. > >> + numa = sched_numa_list_next(numa_list, last_cross_numa); >> + if (sched_get_numa_pmd_noniso(numa)) { >> + break; >> + } >> + last_cross_numa = numa; >> + numa = NULL; >> } >> - rxqs[i]->pmd = rr_numa_get_pmd(non_local_numa, assign_cyc); >> - VLOG_WARN("There's no available (non-isolated) pmd thread " >> - "on numa node %d. Queue %d on port \'%s\' will " >> - "be assigned to the pmd on core %d " >> - "(numa node %d). Expect reduced performance.", >> - numa_id, netdev_rxq_get_queue_id(rxqs[i]->rx), >> - netdev_rxq_get_name(rxqs[i]->rx), >> - rxqs[i]->pmd->core_id, rxqs[i]->pmd->numa_id); >> - } else { >> - rxqs[i]->pmd = rr_numa_get_pmd(numa, assign_cyc); >> - if (assign_cyc) { >> - VLOG_INFO("Core %d on numa node %d assigned port \'%s\' " >> - "rx queue %d " >> - "(measured processing cycles %"PRIu64").", >> - rxqs[i]->pmd->core_id, numa_id, >> - netdev_rxq_get_name(rxqs[i]->rx), >> - netdev_rxq_get_queue_id(rxqs[i]->rx), >> - dp_netdev_rxq_get_cycles(rxqs[i], >> - RXQ_CYCLES_PROC_HIST)); >> - } else { >> - VLOG_INFO("Core %d on numa node %d assigned port \'%s\' " >> - "rx queue %d.", rxqs[i]->pmd->core_id, numa_id, >> - netdev_rxq_get_name(rxqs[i]->rx), >> - netdev_rxq_get_queue_id(rxqs[i]->rx)); >> + } >> + if (numa && numa->numa_id != numa_id) { >> + VLOG(level, "There's no available (non-isolated) pmd thread " >> + "on numa node %d. Port \'%s\' rx queue %d will " >> + "be assigned to a pmd on numa node %d. " >> + "This may lead to reduced performance.", >> + numa_id, netdev_rxq_get_name(rxq->rx), >> + netdev_rxq_get_queue_id(rxq->rx), numa->numa_id); >> + } >> + fyi, i changed the logic around checking (numa) etc in v2 to simplify a bit >> + sched_pmd = NULL; >> + if (numa) { >> + /* Select the PMD that should be used for this rxq. */ >> + sched_pmd = get_next_pmd(numa, algo); >> + if (sched_pmd) { >> + VLOG(level, "Core %2u on numa node %d assigned port \'%s\' " >> + "rx queue %d.%s", > > %d%s. > done > >> + sched_pmd->pmd->core_id, >> sched_pmd->pmd->numa_id, >> + netdev_rxq_get_name(rxq->rx), >> + netdev_rxq_get_queue_id(rxq->rx), >> + get_rxq_cyc_log(rxq_cyc_log, algo, proc_cycles) >> + ? rxq_cyc_log : ""); >> + sched_add_rxq_to_sched_pmd(sched_pmd, rxq, proc_cycles); >> } >> } >> + if (!sched_pmd) { >> + VLOG(level == VLL_DBG ? level : VLL_WARN, >> + "No non-isolated pmd on any numa available for " >> + "port \'%s\' rx queue %d.%s " > > %d%s. > done > > >> + "This rx queue will not be polled.", >> + netdev_rxq_get_name(rxq->rx), >> + netdev_rxq_get_queue_id(rxq->rx), >> + get_rxq_cyc_log(rxq_cyc_log, algo, proc_cycles) >> + ? rxq_cyc_log : ""); >> + } >> } >> - >> - rr_numa_list_destroy(&rr); >> free(rxqs); >> } >> >> +static void >> +rxq_scheduling(struct dp_netdev *dp) OVS_REQUIRES(dp->port_mutex) >> +{ >> + struct sched_numa_list *numa_list; >> + bool algo = dp->pmd_rxq_assign_cyc; >> + >> + numa_list = xzalloc(sizeof *numa_list); >> + >> + sched_numa_list_populate(numa_list, dp); >> + sched_numa_list_schedule(numa_list, dp, algo, VLL_INFO); >> + sched_numa_list_put_in_place(numa_list); >> + >> + sched_numa_list_free_entries(numa_list); >> + free(numa_list); >> +} >> + >> static void >> reload_affected_pmds(struct dp_netdev *dp) >> @@ -5406,10 +5763,5 @@ reconfigure_datapath(struct dp_netdev *dp) >> } >> } >> - >> - /* Add pinned queues and mark pmd threads isolated. */ >> - rxq_scheduling(dp, true); >> - >> - /* Add non-pinned queues. */ >> - rxq_scheduling(dp, false); >> + rxq_scheduling(dp); >> >> /* Step 5: Remove queues not compliant with new scheduling. */ >> diff --git a/tests/pmd.at b/tests/pmd.at >> index cc5371d5a..78105bf45 100644 >> --- a/tests/pmd.at >> +++ b/tests/pmd.at >> @@ -580,5 +580,5 @@ p1 3 0 2 >> ]) >> >> -OVS_VSWITCHD_STOP(["/dpif_netdev|WARN|There is no PMD thread on core/d"]) >> +OVS_VSWITCHD_STOP(["/cannot be pinned with port/d"]) >> AT_CLEANUP >> >> -- >> 2.31.1 >> > > > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
