On 12.01.2018 19:42, Jan Scheurich wrote: > Hi Ilya, > > I understand that for the purpose of the present patch this is good enough to > count rx and tx packets together, but the PMD performance metrics patch will > depend on counting them separately. It would have been nice if you had simply > included the changes I proposed. Now I will have to revert some of your > changes to make the tx_packets available to pmd_perf_end_iteration(). But so > be it.
Hello, Jan. I tried to find anything about tx_packets in your v5 of supervision patch-set and RFC rebased series on mail-list but failed. At the first glance these series just ignores the tx packets and the cycles spent for sending them. Am I missing something? Or you didn't send these changes to mail-list? In general, I think, it's better if you'll made this changes yourself, because you know your code better. Best regards, Ilya Maximets. > > Tested-by: Jan Scheurich <[email protected]> > Acked-by: Jan Scheurich <[email protected]> > > Regards, Jan > >> -----Original Message----- >> From: Ilya Maximets [mailto:[email protected]] >> Sent: Friday, 12 January, 2018 12:17 >> To: [email protected] >> Cc: Heetae Ahn <[email protected]>; Bhanuprakash Bodireddy >> <[email protected]>; Antonio Fischetti >> <[email protected]>; Eelco Chaudron <[email protected]>; Ciara >> Loftus <[email protected]>; Kevin Traynor >> <[email protected]>; Jan Scheurich <[email protected]>; Billy >> O'Mahony <[email protected]>; Ian Stokes >> <[email protected]>; Ilya Maximets <[email protected]> >> Subject: [PATCH v10 3/5] dpif-netdev: Time based output batching. >> >> This allows to collect packets from more than one RX burst >> and send them together with a configurable intervals. >> >> 'other_config:tx-flush-interval' can be used to configure >> time that a packet can wait in output batch for sending. >> >> 'tx-flush-interval' has microsecond resolution. >> >> Signed-off-by: Ilya Maximets <[email protected]> >> --- >> lib/dpif-netdev.c | 108 >> ++++++++++++++++++++++++++++++++++++++++----------- >> vswitchd/vswitch.xml | 16 ++++++++ >> 2 files changed, 102 insertions(+), 22 deletions(-) >> >> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >> index 6909a03..f0ba2b2 100644 >> --- a/lib/dpif-netdev.c >> +++ b/lib/dpif-netdev.c >> @@ -87,6 +87,9 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev); >> #define MAX_RECIRC_DEPTH 6 >> DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0) >> >> +/* Use instant packet send by default. */ >> +#define DEFAULT_TX_FLUSH_INTERVAL 0 >> + >> /* Configuration parameters. */ >> enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */ >> enum { MAX_METERS = 65536 }; /* Maximum number of meters. */ >> @@ -273,6 +276,9 @@ struct dp_netdev { >> struct hmap ports; >> struct seq *port_seq; /* Incremented whenever a port changes. */ >> >> + /* The time that a packet can wait in output batch for sending. */ >> + atomic_uint32_t tx_flush_interval; >> + >> /* Meters. */ >> struct ovs_mutex meter_locks[N_METER_LOCKS]; >> struct dp_meter *meters[MAX_METERS]; /* Meter bands. */ >> @@ -500,6 +506,7 @@ struct tx_port { >> int qid; >> long long last_used; >> struct hmap_node node; >> + long long flush_time; >> struct dp_packet_batch output_pkts; >> struct dp_netdev_rxq *output_pkts_rxqs[NETDEV_MAX_BURST]; >> }; >> @@ -581,6 +588,9 @@ struct dp_netdev_pmd_thread { >> * than 'cmap_count(dp->poll_threads)'. */ >> uint32_t static_tx_qid; >> >> + /* Number of filled output batches. */ >> + int n_output_batches; >> + >> struct ovs_mutex port_mutex; /* Mutex for 'poll_list' and >> 'tx_ports'. */ >> /* List of rx queues to poll. */ >> struct hmap poll_list OVS_GUARDED; >> @@ -670,8 +680,9 @@ static void dp_netdev_add_rxq_to_pmd(struct >> dp_netdev_pmd_thread *pmd, >> static void dp_netdev_del_rxq_from_pmd(struct dp_netdev_pmd_thread *pmd, >> struct rxq_poll *poll) >> OVS_REQUIRES(pmd->port_mutex); >> -static void >> -dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd); >> +static int >> +dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd, >> + bool force); >> >> static void reconfigure_datapath(struct dp_netdev *dp) >> OVS_REQUIRES(dp->port_mutex); >> @@ -1245,6 +1256,7 @@ create_dp_netdev(const char *name, const struct >> dpif_class *class, >> conntrack_init(&dp->conntrack); >> >> atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN); >> + atomic_init(&dp->tx_flush_interval, DEFAULT_TX_FLUSH_INTERVAL); >> >> cmap_init(&dp->poll_threads); >> >> @@ -2911,7 +2923,7 @@ dpif_netdev_execute(struct dpif *dpif, struct >> dpif_execute *execute) >> dp_packet_batch_init_packet(&pp, execute->packet); >> dp_netdev_execute_actions(pmd, &pp, false, execute->flow, >> execute->actions, execute->actions_len); >> - dp_netdev_pmd_flush_output_packets(pmd); >> + dp_netdev_pmd_flush_output_packets(pmd, true); >> >> if (pmd->core_id == NON_PMD_CORE_ID) { >> ovs_mutex_unlock(&dp->non_pmd_mutex); >> @@ -2960,6 +2972,16 @@ dpif_netdev_set_config(struct dpif *dpif, const >> struct smap *other_config) >> smap_get_ullong(other_config, "emc-insert-inv-prob", >> DEFAULT_EM_FLOW_INSERT_INV_PROB); >> uint32_t insert_min, cur_min; >> + uint32_t tx_flush_interval, cur_tx_flush_interval; >> + >> + tx_flush_interval = smap_get_int(other_config, "tx-flush-interval", >> + DEFAULT_TX_FLUSH_INTERVAL); >> + atomic_read_relaxed(&dp->tx_flush_interval, &cur_tx_flush_interval); >> + if (tx_flush_interval != cur_tx_flush_interval) { >> + atomic_store_relaxed(&dp->tx_flush_interval, tx_flush_interval); >> + VLOG_INFO("Flushing interval for tx queues set to %"PRIu32" us", >> + tx_flush_interval); >> + } >> >> if (!nullable_string_is_equal(dp->pmd_cmask, cmask)) { >> free(dp->pmd_cmask); >> @@ -3154,7 +3176,7 @@ dp_netdev_rxq_get_intrvl_cycles(struct dp_netdev_rxq >> *rx, unsigned idx) >> return processing_cycles; >> } >> >> -static void >> +static int >> dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd, >> struct tx_port *p) >> { >> @@ -3164,6 +3186,7 @@ dp_netdev_pmd_flush_output_on_port(struct >> dp_netdev_pmd_thread *pmd, >> bool dynamic_txqs; >> struct cycle_timer timer; >> uint64_t cycles; >> + uint32_t tx_flush_interval; >> >> cycle_timer_start(&pmd->perf_stats, &timer); >> >> @@ -3180,6 +3203,13 @@ dp_netdev_pmd_flush_output_on_port(struct >> dp_netdev_pmd_thread *pmd, >> netdev_send(p->port->netdev, tx_qid, &p->output_pkts, dynamic_txqs); >> dp_packet_batch_init(&p->output_pkts); >> >> + /* Update time of the next flush. */ >> + atomic_read_relaxed(&pmd->dp->tx_flush_interval, &tx_flush_interval); >> + p->flush_time = pmd->ctx.now + tx_flush_interval; >> + >> + ovs_assert(pmd->n_output_batches > 0); >> + pmd->n_output_batches--; >> + >> pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SENT_PKTS, >> output_cnt); >> pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SENT_BATCHES, 1); >> >> @@ -3191,18 +3221,28 @@ dp_netdev_pmd_flush_output_on_port(struct >> dp_netdev_pmd_thread *pmd, >> RXQ_CYCLES_PROC_CURR, cycles); >> } >> } >> + >> + return output_cnt; >> } >> >> -static void >> -dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd) >> +static int >> +dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd, >> + bool force) >> { >> struct tx_port *p; >> + int output_cnt = 0; >> + >> + if (!pmd->n_output_batches) { >> + return 0; >> + } >> >> HMAP_FOR_EACH (p, node, &pmd->send_port_cache) { >> - if (!dp_packet_batch_is_empty(&p->output_pkts)) { >> - dp_netdev_pmd_flush_output_on_port(pmd, p); >> + if (!dp_packet_batch_is_empty(&p->output_pkts) >> + && (force || pmd->ctx.now >= p->flush_time)) { >> + output_cnt += dp_netdev_pmd_flush_output_on_port(pmd, p); >> } >> } >> + return output_cnt; >> } >> >> static int >> @@ -3213,7 +3253,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread >> *pmd, >> struct dp_packet_batch batch; >> struct cycle_timer timer; >> int error; >> - int batch_cnt = 0; >> + int batch_cnt = 0, output_cnt = 0; >> uint64_t cycles; >> >> /* Measure duration for polling and processing rx burst. */ >> @@ -3235,7 +3275,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread >> *pmd, >> cycles = cycle_timer_stop(&pmd->perf_stats, &timer); >> dp_netdev_rxq_add_cycles(rxq, RXQ_CYCLES_PROC_CURR, cycles); >> >> - dp_netdev_pmd_flush_output_packets(pmd); >> + output_cnt = dp_netdev_pmd_flush_output_packets(pmd, false); >> } else { >> /* Discard cycles. */ >> cycle_timer_stop(&pmd->perf_stats, &timer); >> @@ -3249,7 +3289,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread >> *pmd, >> >> pmd->ctx.last_rxq = NULL; >> >> - return batch_cnt; >> + return batch_cnt + output_cnt; >> } >> >> static struct tx_port * >> @@ -3872,6 +3912,7 @@ dpif_netdev_run(struct dpif *dpif) >> struct dp_netdev *dp = get_dp_netdev(dpif); >> struct dp_netdev_pmd_thread *non_pmd; >> uint64_t new_tnl_seq; >> + bool need_to_flush = true; >> >> ovs_mutex_lock(&dp->port_mutex); >> non_pmd = dp_netdev_get_pmd(dp, NON_PMD_CORE_ID); >> @@ -3882,13 +3923,22 @@ dpif_netdev_run(struct dpif *dpif) >> int i; >> >> for (i = 0; i < port->n_rxq; i++) { >> - dp_netdev_process_rxq_port(non_pmd, >> - &port->rxqs[i], >> - port->port_no); >> + if (dp_netdev_process_rxq_port(non_pmd, >> + &port->rxqs[i], >> + port->port_no)) { >> + need_to_flush = false; >> + } >> } >> } >> } >> - pmd_thread_ctx_time_update(non_pmd); >> + if (need_to_flush) { >> + /* We didn't receive anything in the process loop. >> + * Check if we need to send something. >> + * There was no time updates on current iteration. */ >> + pmd_thread_ctx_time_update(non_pmd); >> + dp_netdev_pmd_flush_output_packets(non_pmd, false); >> + } >> + >> dpif_netdev_xps_revalidate_pmd(non_pmd, false); >> ovs_mutex_unlock(&dp->non_pmd_mutex); >> >> @@ -3939,6 +3989,8 @@ pmd_free_cached_ports(struct dp_netdev_pmd_thread *pmd) >> { >> struct tx_port *tx_port_cached; >> >> + /* Flush all the queued packets. */ >> + dp_netdev_pmd_flush_output_packets(pmd, true); >> /* Free all used tx queue ids. */ >> dpif_netdev_xps_revalidate_pmd(pmd, true); >> >> @@ -4069,6 +4121,7 @@ reload: >> cycles_counter_update(s); >> for (;;) { >> uint64_t iter_packets = 0; >> + >> pmd_perf_start_iteration(s); >> for (i = 0; i < poll_cnt; i++) { >> process_packets = >> @@ -4077,15 +4130,20 @@ reload: >> iter_packets += process_packets; >> } >> >> + if (!iter_packets) { >> + /* We didn't receive anything in the process loop. >> + * Check if we need to send something. >> + * There was no time updates on current iteration. */ >> + pmd_thread_ctx_time_update(pmd); >> + iter_packets += dp_netdev_pmd_flush_output_packets(pmd, false); >> + } >> + >> if (lc++ > 1024) { >> bool reload; >> >> lc = 0; >> >> coverage_try_clear(); >> - /* It's possible that the time was not updated on current >> - * iteration, if there were no received packets. */ >> - pmd_thread_ctx_time_update(pmd); >> dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt); >> if (!ovsrcu_try_quiesce()) { >> emc_cache_slow_sweep(&pmd->flow_cache); >> @@ -4524,6 +4582,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread >> *pmd, struct dp_netdev *dp, >> pmd->core_id = core_id; >> pmd->numa_id = numa_id; >> pmd->need_reload = false; >> + pmd->n_output_batches = 0; >> >> ovs_refcount_init(&pmd->ref_cnt); >> latch_init(&pmd->exit_latch); >> @@ -4713,6 +4772,7 @@ dp_netdev_add_port_tx_to_pmd(struct >> dp_netdev_pmd_thread *pmd, >> >> tx->port = port; >> tx->qid = -1; >> + tx->flush_time = 0LL; >> dp_packet_batch_init(&tx->output_pkts); >> >> hmap_insert(&pmd->tx_ports, &tx->node, hash_port_no(tx->port->port_no)); >> @@ -5407,12 +5467,16 @@ dp_execute_cb(void *aux_, struct dp_packet_batch >> *packets_, >> dp_netdev_pmd_flush_output_on_port(pmd, p); >> } >> #endif >> - if (OVS_UNLIKELY(dp_packet_batch_size(&p->output_pkts) >> - + dp_packet_batch_size(packets_) > >> NETDEV_MAX_BURST)) { >> - /* Some packets was generated while input batch processing. >> - * Flush here to avoid overflow. */ >> + if (dp_packet_batch_size(&p->output_pkts) >> + + dp_packet_batch_size(packets_) > NETDEV_MAX_BURST) { >> + /* Flush here to avoid overflow. */ >> dp_netdev_pmd_flush_output_on_port(pmd, p); >> } >> + >> + if (dp_packet_batch_is_empty(&p->output_pkts)) { >> + pmd->n_output_batches++; >> + } >> + >> DP_PACKET_BATCH_FOR_EACH (packet, packets_) { >> p->output_pkts_rxqs[dp_packet_batch_size(&p->output_pkts)] = >> >> pmd->ctx.last_rxq; >> diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml >> index 58c0ebd..61fb7b1 100644 >> --- a/vswitchd/vswitch.xml >> +++ b/vswitchd/vswitch.xml >> @@ -359,6 +359,22 @@ >> </p> >> </column> >> >> + <column name="other_config" key="tx-flush-interval" >> + type='{"type": "integer", >> + "minInteger": 0, "maxInteger": 1000000}'> >> + <p> >> + Specifies the time in microseconds that a packet can wait in >> output >> + batch for sending i.e. amount of time that packet can spend in an >> + intermediate output queue before sending to netdev. >> + This option can be used to configure balance between throughput >> + and latency. Lower values decreases latency while higher values >> + may be useful to achieve higher performance. >> + </p> >> + <p> >> + Defaults to 0 i.e. instant packet sending (latency optimized). >> + </p> >> + </column> >> + >> <column name="other_config" key="n-handler-threads" >> type='{"type": "integer", "minInteger": 1}'> >> <p> >> -- >> 2.7.4 > > > > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
