On 08/28/2017 02:28 PM, Ilya Maximets wrote:
>> Count the cycles used for processing an rxq during the
>> pmd rxq interval. As this is an in flight counter and
>> pmds run independently, also store the total cycles used
>> during the last full interval.
>>
>> Signed-off-by: Kevin Traynor <ktraynor at redhat.com>
>> ---
>> lib/dpif-netdev.c | 75
>> ++++++++++++++++++++++++++++++++++++++++++++++++++++---
>> 1 file changed, 71 insertions(+), 4 deletions(-)
>>
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index 8731435..cdf2662 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -182,4 +182,8 @@ struct emc_cache {
>> #define DPCLS_OPTIMIZATION_INTERVAL 1000
>>
>> +/* Time in ms of the interval in which rxq processing cycles used in
>> + * rxq to pmd assignments is measured and stored. */
>> +#define PMD_RXQ_INTERVAL_LEN 10000
>> +
>> /* Number of intervals for which cycles are stored
>> * and used during rxq to pmd assignment. */
>> @@ -570,4 +574,7 @@ struct dp_netdev_pmd_thread {
>> /* Periodically sort subtable vectors according to hit frequencies */
>> long long int next_optimization;
>> + /* End of the next time interval for which processing cycles
>> + are stored for each polled rxq. */
>> + long long int rxq_interval;
>
> This variable has a misleading name. It's *not* an interval but it's time
> when next cycles will be stored. Also it looks like it has some relation
> with PMD_RXQ_INTERVAL_MAX, but it's not.
>
I agree the name is not intuitive now (or maybe ever). I will change to
rxq_next_cycle_store to avoid confusion between it and the #define.
>>
>> /* Statistics. */
>> @@ -696,5 +703,16 @@ static void pmd_load_cached_ports(struct
>> dp_netdev_pmd_thread *pmd)
>> OVS_REQUIRES(pmd->port_mutex);
>> static inline void
>> -dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd);
>> +dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
>> + struct polled_queue *poll_list, int poll_cnt);
>> +static void
>> +dp_netdev_rxq_set_cycles(struct dp_netdev_rxq *rx,
>> + enum rxq_cycles_counter_type type,
>> + unsigned long long cycles);
>> +static uint64_t
>> +dp_netdev_rxq_get_cycles(struct dp_netdev_rxq *rx,
>> + enum rxq_cycles_counter_type type);
>> +static void
>> +dp_netdev_rxq_set_intrvl_cycles(struct dp_netdev_rxq *rx,
>> + unsigned long long cycles);
>
> Same comment about shortened name.
>
Keeping names consistent with other patch.
>> static void
>> dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
>> @@ -3126,4 +3144,29 @@ cycles_count_intermediate(struct dp_netdev_pmd_thread
>> *pmd,
>> }
>>
>> +static void
>> +dp_netdev_rxq_set_cycles(struct dp_netdev_rxq *rx,
>> + enum rxq_cycles_counter_type type,
>> + unsigned long long cycles)
>> +{
>> + atomic_store_relaxed(&rx->cycles[type], cycles);
>> +}
>> +
>> +static uint64_t
>> +dp_netdev_rxq_get_cycles(struct dp_netdev_rxq *rx,
>> + enum rxq_cycles_counter_type type)
>> +{
>> + unsigned long long processing_cycles;
>> + atomic_read_relaxed(&rx->cycles[type], &processing_cycles);
>> + return processing_cycles;
>> +}
>> +
>> +static void
>> +dp_netdev_rxq_set_intrvl_cycles(struct dp_netdev_rxq *rx,
>> + unsigned long long cycles)
>> +{
>> + atomic_store_relaxed(&rx->cycles_intrvl[rx->intrvl_idx++
>> + % PMD_RXQ_INTERVAL_MAX], cycles);
>> +}
>> +
>> static int
>> dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
>> @@ -3179,4 +3222,5 @@ port_reconfigure(struct dp_netdev_port *port)
>> port->rxqs[i].rx = NULL;
>> }
>> + unsigned last_nrxq = port->n_rxq;
>> port->n_rxq = 0;
>>
>> @@ -3199,4 +3243,12 @@ port_reconfigure(struct dp_netdev_port *port)
>> for (i = 0; i < netdev_n_rxq(netdev); i++) {
>> port->rxqs[i].port = port;
>> + if (i >= last_nrxq) {
>> + /* Only reset cycle stats for new queues */
>> + dp_netdev_rxq_set_cycles(&port->rxqs[i], RXQ_CYCLES_PROC_CURR,
>> 0);
>> + dp_netdev_rxq_set_cycles(&port->rxqs[i], RXQ_CYCLES_PROC_HIST,
>> 0);
>> + for (unsigned j = 0; j < PMD_RXQ_INTERVAL_MAX; j++) {
>
> rx->intrvl_idx isn't initialized at this point. I see, that the code will
> work correctly with any starting value, but, IMHO, it's much more clear to
> have explicit initialization.
>
I don't want to reset it each time based on it being correct as is and
reply to comment below.
> Beside that, Why do you think that we need to initialize only new queues?
> My concern is that if number of queues changed than the traffic pattern
> through
> existing queues also will be changed significantly. This means that an old
> statistics will not be relevant anymore. RSS may distribute traffic in a
> completely different way after changing the number of queues.
>
I understand this, but we will use some data for the calculations. For
existing queues it will either be the measured data or zero. On balance
I choose to use the measured cycles. I don't think using zero for
existing queues will make things better in general.
>> + dp_netdev_rxq_set_intrvl_cycles(&port->rxqs[i], 0);
>> + }
>> + }
>> err = netdev_rxq_open(netdev, &port->rxqs[i].rx, i);
>> if (err) {
>> @@ -3879,5 +3931,5 @@ reload:
>> dp_netdev_process_rxq_port(pmd, poll_list[i].rxq->rx,
>> poll_list[i].port_no);
>> - cycles_count_intermediate(pmd, NULL,
>> + cycles_count_intermediate(pmd, poll_list[i].rxq,
>> process_packets ?
>> PMD_CYCLES_PROCESSING
>> : PMD_CYCLES_IDLE);
>> @@ -3890,5 +3942,5 @@ reload:
>>
>> coverage_try_clear();
>> - dp_netdev_pmd_try_optimize(pmd);
>> + dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt);
>> if (!ovsrcu_try_quiesce()) {
>> emc_cache_slow_sweep(&pmd->flow_cache);
>> @@ -4334,4 +4386,5 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread
>> *pmd, struct dp_netdev *dp,
>> cmap_init(&pmd->classifiers);
>> pmd->next_optimization = time_msec() + DPCLS_OPTIMIZATION_INTERVAL;
>> + pmd->rxq_interval = time_msec() + PMD_RXQ_INTERVAL_LEN;
>> hmap_init(&pmd->poll_list);
>> hmap_init(&pmd->tx_ports);
>> @@ -5771,9 +5824,23 @@ dpcls_sort_subtable_vector(struct dpcls *cls)
>>
>> static inline void
>> -dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd)
>> +dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
>> + struct polled_queue *poll_list, int poll_cnt)
>> {
>> struct dpcls *cls;
>> long long int now = time_msec();
>>
>> + if (now > pmd->rxq_interval) {
>> + /* Get the cycles that were used to process each queue and store. */
>> + for (unsigned i = 0; i < poll_cnt; i++) {
>> + uint64_t rxq_cyc_curr =
>> dp_netdev_rxq_get_cycles(poll_list[i].rxq,
>> +
>> RXQ_CYCLES_PROC_CURR);
>> + dp_netdev_rxq_set_intrvl_cycles(poll_list[i].rxq, rxq_cyc_curr);
>> + dp_netdev_rxq_set_cycles(poll_list[i].rxq, RXQ_CYCLES_PROC_CURR,
>> + 0);
>> + }
>> + /* Start new measuring interval */
>> + pmd->rxq_interval = now + PMD_RXQ_INTERVAL_LEN;
>> + }
>
> IMHO, above code shouldn't be there. This function is for classifier
> optimization.
> Also, above code can't be called as "optimization", it just calculates some
> statistics.
> There should be different function for this and placed in more relevant part
> of
> dpif-netdev.c
>
Yeah, semantically you could say this is not an optimization but storing
data for a later optimization but I think it's ok as is.
>> +
>> if (now > pmd->next_optimization) {
>> /* Try to obtain the flow lock to block out revalidator threads.
>> --
>> 1.8.3.1
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev