On 01/13/2018 12:16 PM, Stokes, Ian wrote:
>
>
>> -----Original Message-----
>> From: Ilya Maximets [mailto:[email protected]]
>> Sent: Friday, January 12, 2018 11:17 AM
>> To: [email protected]
>> Cc: Heetae Ahn <[email protected]>; Bodireddy, Bhanuprakash
>> <[email protected]>; Fischetti, Antonio
>> <[email protected]>; Eelco Chaudron <[email protected]>;
>> Loftus, Ciara <[email protected]>; Kevin Traynor
>> <[email protected]>; Jan Scheurich <[email protected]>; O
>> Mahony, Billy <[email protected]>; Stokes, Ian
>> <[email protected]>; Ilya Maximets <[email protected]>
>> Subject: [PATCH v10 2/5] dpif-netdev: Count cycles on per-rxq basis.
>>
>> Upcoming time-based output batching will allow to collect in a single
>> output batch packets from different RX queues. Lets keep the list of RX
>> queues for each output packet and collect cycles for them on send.
>>
>> Signed-off-by: Ilya Maximets <[email protected]>
>> ---
>> lib/dpif-netdev.c | 31 +++++++++++++++++++++++++++++--
>> 1 file changed, 29 insertions(+), 2 deletions(-)
>>
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index b35700d..6909a03
>> 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -501,6 +501,7 @@ struct tx_port {
>> long long last_used;
>> struct hmap_node node;
>> struct dp_packet_batch output_pkts;
>> + struct dp_netdev_rxq *output_pkts_rxqs[NETDEV_MAX_BURST];
>> };
>>
>> /* A set of properties for the current processing loop that is not
>> directly @@ -510,6 +511,8 @@ struct tx_port { struct
>> dp_netdev_pmd_thread_ctx {
>> /* Latest measured time. See 'pmd_thread_ctx_time_update()'. */
>> long long now;
>> + /* RX queue from which last packet was received. */
>> + struct dp_netdev_rxq *last_rxq;
>> };
>>
>> /* PMD: Poll modes drivers. PMD accesses devices via polling to
>> eliminate @@ -3155,9 +3158,14 @@ static void
>> dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd,
>> struct tx_port *p) {
>> + int i;
>> int tx_qid;
>> int output_cnt;
>> bool dynamic_txqs;
>> + struct cycle_timer timer;
>> + uint64_t cycles;
>> +
>> + cycle_timer_start(&pmd->perf_stats, &timer);
>>
>> dynamic_txqs = p->port->dynamic_txqs;
>> if (dynamic_txqs) {
>> @@ -3167,12 +3175,22 @@ dp_netdev_pmd_flush_output_on_port(struct
>> dp_netdev_pmd_thread *pmd,
>> }
>>
>> output_cnt = dp_packet_batch_size(&p->output_pkts);
>> + ovs_assert(output_cnt > 0);
>>
>> netdev_send(p->port->netdev, tx_qid, &p->output_pkts, dynamic_txqs);
>> dp_packet_batch_init(&p->output_pkts);
>>
>> pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SENT_PKTS,
>> output_cnt);
>> pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SENT_BATCHES, 1);
>> +
>> + /* Update send cycles for all the rx queues evenly. */
>
> Just a query, is it right that this is distributed evenly?
> If there are more packets from one rx queue than another will it make a
> difference or will the cycles spent sending that batch be the same as a batch
> of a small or larger size?
>
Maybe the "evenly" comment is a misleading? The send cycles for each rxq
is updated proportionally for how many packets that rxq had in the
batch. I think this is about the best that can be done.
>> + cycles = cycle_timer_stop(&pmd->perf_stats, &timer) / output_cnt;
>> + for (i = 0; i < output_cnt; i++) {
>> + if (p->output_pkts_rxqs[i]) {
>> + dp_netdev_rxq_add_cycles(p->output_pkts_rxqs[i],
>> + RXQ_CYCLES_PROC_CURR, cycles);
>> + }
>> + }
>> }
>>
>> static void
>> @@ -3196,10 +3214,14 @@ dp_netdev_process_rxq_port(struct
>> dp_netdev_pmd_thread *pmd,
>> struct cycle_timer timer;
>> int error;
>> int batch_cnt = 0;
>> + uint64_t cycles;
>>
>> /* Measure duration for polling and processing rx burst. */
>> cycle_timer_start(&pmd->perf_stats, &timer);
>> +
>> + pmd->ctx.last_rxq = rxq;
>> dp_packet_batch_init(&batch);
>> +
>> error = netdev_rxq_recv(rxq->rx, &batch);
>> if (!error) {
>> /* At least one packet received. */ @@ -3208,12 +3230,12 @@
>> dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
>>
>> batch_cnt = batch.count;
>> dp_netdev_input(pmd, &batch, port_no);
>> - dp_netdev_pmd_flush_output_packets(pmd);
>>
>> /* Assign processing cycles to rx queue. */
>> - uint64_t cycles = cycle_timer_stop(&pmd->perf_stats, &timer);
>> + cycles = cycle_timer_stop(&pmd->perf_stats, &timer);
>> dp_netdev_rxq_add_cycles(rxq, RXQ_CYCLES_PROC_CURR, cycles);
>>
>> + dp_netdev_pmd_flush_output_packets(pmd);
>> } else {
>> /* Discard cycles. */
>> cycle_timer_stop(&pmd->perf_stats, &timer); @@ -3225,6 +3247,8 @@
>> dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
>> }
>> }
>>
>> + pmd->ctx.last_rxq = NULL;
>> +
>> return batch_cnt;
>> }
>>
>> @@ -4512,6 +4536,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread
>> *pmd, struct dp_netdev *dp,
>> ovs_mutex_init(&pmd->port_mutex);
>> cmap_init(&pmd->flow_table);
>> cmap_init(&pmd->classifiers);
>> + pmd->ctx.last_rxq = NULL;
>> pmd_thread_ctx_time_update(pmd);
>> pmd->next_optimization = pmd->ctx.now + DPCLS_OPTIMIZATION_INTERVAL;
>> pmd->rxq_next_cycle_store = pmd->ctx.now + PMD_RXQ_INTERVAL_LEN; @@ -
>> 5389,6 +5414,8 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
>> *packets_,
>> dp_netdev_pmd_flush_output_on_port(pmd, p);
>> }
>> DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
>> + p->output_pkts_rxqs[dp_packet_batch_size(&p-
>>> output_pkts)] =
>> +
>> + pmd->ctx.last_rxq;
>> dp_packet_batch_add(&p->output_pkts, packet);
>> }
>> return;
>> --
>> 2.7.4
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev