On 12.01.2018 18:52, Jan Scheurich wrote: > Tested-by: Jan Scheurich <[email protected]> > Acked-by: Jan Scheurich <[email protected]> > > One minor comment inline. > > /Jan > >> -----Original Message----- >> From: Ilya Maximets [mailto:[email protected]] >> Sent: Friday, 12 January, 2018 12:17 >> To: [email protected] >> Cc: Heetae Ahn <[email protected]>; Bhanuprakash Bodireddy >> <[email protected]>; Antonio Fischetti >> <[email protected]>; Eelco Chaudron <[email protected]>; Ciara >> Loftus <[email protected]>; Kevin Traynor >> <[email protected]>; Jan Scheurich <[email protected]>; Billy >> O'Mahony <[email protected]>; Ian Stokes >> <[email protected]>; Ilya Maximets <[email protected]> >> Subject: [PATCH v10 2/5] dpif-netdev: Count cycles on per-rxq basis. >> >> Upcoming time-based output batching will allow to collect in a single >> output batch packets from different RX queues. Lets keep the list of >> RX queues for each output packet and collect cycles for them on send. >> >> Signed-off-by: Ilya Maximets <[email protected]> >> --- >> lib/dpif-netdev.c | 31 +++++++++++++++++++++++++++++-- >> 1 file changed, 29 insertions(+), 2 deletions(-) >> >> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >> index b35700d..6909a03 100644 >> --- a/lib/dpif-netdev.c >> +++ b/lib/dpif-netdev.c >> @@ -501,6 +501,7 @@ struct tx_port { >> long long last_used; >> struct hmap_node node; >> struct dp_packet_batch output_pkts; >> + struct dp_netdev_rxq *output_pkts_rxqs[NETDEV_MAX_BURST]; >> }; >> >> /* A set of properties for the current processing loop that is not directly >> @@ -510,6 +511,8 @@ struct tx_port { >> struct dp_netdev_pmd_thread_ctx { >> /* Latest measured time. See 'pmd_thread_ctx_time_update()'. */ >> long long now; >> + /* RX queue from which last packet was received. */ >> + struct dp_netdev_rxq *last_rxq; >> }; >> >> /* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate >> @@ -3155,9 +3158,14 @@ static void >> dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd, >> struct tx_port *p) >> { >> + int i; >> int tx_qid; >> int output_cnt; >> bool dynamic_txqs; >> + struct cycle_timer timer; >> + uint64_t cycles; >> + >> + cycle_timer_start(&pmd->perf_stats, &timer); >> >> dynamic_txqs = p->port->dynamic_txqs; >> if (dynamic_txqs) { >> @@ -3167,12 +3175,22 @@ dp_netdev_pmd_flush_output_on_port(struct >> dp_netdev_pmd_thread *pmd, >> } >> >> output_cnt = dp_packet_batch_size(&p->output_pkts); >> + ovs_assert(output_cnt > 0); >> >> netdev_send(p->port->netdev, tx_qid, &p->output_pkts, dynamic_txqs); >> dp_packet_batch_init(&p->output_pkts); >> >> pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SENT_PKTS, >> output_cnt); >> pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SENT_BATCHES, 1); >> + >> + /* Update send cycles for all the rx queues evenly. */ >> + cycles = cycle_timer_stop(&pmd->perf_stats, &timer) / output_cnt; >> + for (i = 0; i < output_cnt; i++) { >> + if (p->output_pkts_rxqs[i]) { >> + dp_netdev_rxq_add_cycles(p->output_pkts_rxqs[i], >> + RXQ_CYCLES_PROC_CURR, cycles); >> + } >> + } >> } >> >> static void >> @@ -3196,10 +3214,14 @@ dp_netdev_process_rxq_port(struct >> dp_netdev_pmd_thread *pmd, >> struct cycle_timer timer; >> int error; >> int batch_cnt = 0; >> + uint64_t cycles; >> >> /* Measure duration for polling and processing rx burst. */ >> cycle_timer_start(&pmd->perf_stats, &timer); >> + >> + pmd->ctx.last_rxq = rxq; >> dp_packet_batch_init(&batch); >> + >> error = netdev_rxq_recv(rxq->rx, &batch); >> if (!error) { >> /* At least one packet received. */ >> @@ -3208,12 +3230,12 @@ dp_netdev_process_rxq_port(struct >> dp_netdev_pmd_thread *pmd, >> >> batch_cnt = batch.count; >> dp_netdev_input(pmd, &batch, port_no); >> - dp_netdev_pmd_flush_output_packets(pmd); >> >> /* Assign processing cycles to rx queue. */ >> - uint64_t cycles = cycle_timer_stop(&pmd->perf_stats, &timer); >> + cycles = cycle_timer_stop(&pmd->perf_stats, &timer); >> dp_netdev_rxq_add_cycles(rxq, RXQ_CYCLES_PROC_CURR, cycles); >> >> + dp_netdev_pmd_flush_output_packets(pmd); >> } else { >> /* Discard cycles. */ >> cycle_timer_stop(&pmd->perf_stats, &timer); >> @@ -3225,6 +3247,8 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread >> *pmd, >> } >> } >> >> + pmd->ctx.last_rxq = NULL; >> + >> return batch_cnt; >> } >> >> @@ -4512,6 +4536,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread >> *pmd, struct dp_netdev *dp, >> ovs_mutex_init(&pmd->port_mutex); >> cmap_init(&pmd->flow_table); >> cmap_init(&pmd->classifiers); >> + pmd->ctx.last_rxq = NULL; >> pmd_thread_ctx_time_update(pmd); >> pmd->next_optimization = pmd->ctx.now + DPCLS_OPTIMIZATION_INTERVAL; >> pmd->rxq_next_cycle_store = pmd->ctx.now + PMD_RXQ_INTERVAL_LEN; >> @@ -5389,6 +5414,8 @@ dp_execute_cb(void *aux_, struct dp_packet_batch >> *packets_, >> dp_netdev_pmd_flush_output_on_port(pmd, p); >> } >> DP_PACKET_BATCH_FOR_EACH (packet, packets_) { >> + p->output_pkts_rxqs[dp_packet_batch_size(&p->output_pkts)] = >> + >> pmd->ctx.last_rxq; > > This works but seems unnecessarily cryptic. Can't you increment a simple int > variable (i) in the loop and use that to index output_pkts_rxqs[i]?
I'm using 'dp_packet_batch_size()' as index to make clear dependency between filling of the batch itself and the rxqs array. If you'll not insist I'd like to keep it as is. > >> dp_packet_batch_add(&p->output_pkts, packet); >> } >> return; >> -- >> 2.7.4 > > > > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
