PMD threads polls and process packets from Rxq in round-robin fashion. It is
not guaranteed optimal allocation of Rxq across the PMD threads all the time.
The patch series in the following link are trying to offer uniform distribution
of rxqs across the PMD threads.
https://mail.openvswitch.org/pipermail/ovs-dev/2017-May/332734.html
However at first it is necessary for the user to know about the distribution of
PMD cycles across its Rx queues. This patch is extending the existing command
'ovs-appctl dpif-netdev/pmd-rxq-show'
to show the percentage of total PMD thread cycles spent on each port rx queues.
A sample 'ovs-appctl dpif-netdev/pmd-rxq-show' with pmd cycle percentage :
pmd thread numa_id 0 core_id 2:
isolated : false
port: dpdk0
queue-id: 0
polling cycles(% of total polling cycles):88687867260 (24.40%)
processing cycles(% of total processing cycles):741227393476
(65.87%)
port: dpdkvhostuser1
queue-id: 0
polling cycles(% of total polling cycles):274003203688 (75.38%)
processing cycles(% of total processing cycles):384127428352
(34.13%)
pmd thread numa_id 0 core_id 3:
isolated : false
port: dpdk1
queue-id: 0
polling cycles(% of total polling cycles):96022088320 (26.01%)
processing cycles(% of total processing cycles):716284951804
(64.07%)
port: dpdkvhostuser0
queue-id: 0
polling cycles(% of total polling cycles):273103986760 (73.98%)
processing cycles(% of total processing cycles):401626635996
(35.93%)
Signed-off-by: Sugesh Chandran <[email protected]>
---
lib/dpif-netdev.c | 155 +++++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 131 insertions(+), 24 deletions(-)
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index b50164b..e4fcb2e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -332,8 +332,16 @@ enum pmd_cycles_counter_type {
#define XPS_TIMEOUT_MS 500LL
+/* Contained by struct dp_netdev_pmd_thread's 'cycle' member. */
+struct dp_netdev_pmd_cycles {
+ /* Indexed by PMD_CYCLES_*. */
+ atomic_ullong n[PMD_N_CYCLES];
+};
+
/* Contained by struct dp_netdev_port's 'rxqs' member. */
struct dp_netdev_rxq {
+ /* Cycles counters for each queue. */
+ struct dp_netdev_pmd_cycles cycles;
struct dp_netdev_port *port;
struct netdev_rxq *rx;
unsigned core_id; /* Core to which this queue should be
@@ -341,6 +349,7 @@ struct dp_netdev_rxq {
queue doesn't need to be pinned to a
particular core. */
struct dp_netdev_pmd_thread *pmd; /* pmd thread that polls this queue. */
+ uint64_t cycles_zero[PMD_N_CYCLES]; /* cycles to zero out */
};
/* A port in a netdev-based datapath. */
@@ -469,14 +478,8 @@ struct dp_netdev_pmd_stats {
atomic_ullong n[DP_N_STATS];
};
-/* Contained by struct dp_netdev_pmd_thread's 'cycle' member. */
-struct dp_netdev_pmd_cycles {
- /* Indexed by PMD_CYCLES_*. */
- atomic_ullong n[PMD_N_CYCLES];
-};
-
struct polled_queue {
- struct netdev_rxq *rx;
+ struct dp_netdev_rxq *rxq;
odp_port_t port_no;
};
@@ -676,6 +679,10 @@ static inline bool emc_entry_alive(struct emc_entry *ce);
static void emc_clear_entry(struct emc_entry *ce);
static void
+pmd_reset_cycles(struct dp_netdev_pmd_thread *pmd,
+ uint64_t cycles[PMD_N_CYCLES]);
+
+static void
emc_cache_init(struct emc_cache *flow_cache)
{
int i;
@@ -842,9 +849,9 @@ pmd_info_clear_stats(struct ds *reply OVS_UNUSED,
for (i = 0; i < DP_N_STATS; i++) {
pmd->stats_zero[i] = stats[i];
}
- for (i = 0; i < PMD_N_CYCLES; i++) {
- pmd->cycles_zero[i] = cycles[i];
- }
+
+ /* Clear the PMD cycles stats */
+ pmd_reset_cycles(pmd, cycles);
}
static int
@@ -896,7 +903,9 @@ pmd_info_show_rxq(struct ds *reply, struct
dp_netdev_pmd_thread *pmd)
if (pmd->core_id != NON_PMD_CORE_ID) {
const char *prev_name = NULL;
struct rxq_poll *list;
- size_t i, n;
+ size_t i, j, n;
+ uint64_t pmd_cycles[PMD_N_CYCLES];
+ uint64_t port_cycles[PMD_N_CYCLES];
ds_put_format(reply,
"pmd thread numa_id %d core_id %u:\n\tisolated : %s\n",
@@ -905,6 +914,19 @@ pmd_info_show_rxq(struct ds *reply, struct
dp_netdev_pmd_thread *pmd)
ovs_mutex_lock(&pmd->port_mutex);
sorted_poll_list(pmd, &list, &n);
+
+ for (i = 0; i < ARRAY_SIZE(pmd_cycles); i++) {
+ /* Read the PMD CPU cycles */
+ atomic_read_relaxed(&pmd->cycles.n[i], &pmd_cycles[i]);
+ }
+ for (i = 0; i < PMD_N_CYCLES; i++) {
+ if (pmd_cycles[i] > pmd->cycles_zero[i]) {
+ pmd_cycles[i] -= pmd->cycles_zero[i];
+ } else {
+ pmd_cycles[i] = 0;
+ }
+ }
+
for (i = 0; i < n; i++) {
const char *name = netdev_rxq_get_name(list[i].rxq->rx);
@@ -912,11 +934,38 @@ pmd_info_show_rxq(struct ds *reply, struct
dp_netdev_pmd_thread *pmd)
if (prev_name) {
ds_put_cstr(reply, "\n");
}
- ds_put_format(reply, "\tport: %s\tqueue-id:", name);
+ ds_put_format(reply, "\tport: %s\n", name);
}
- ds_put_format(reply, " %d",
+ ds_put_format(reply, "\t\tqueue-id: %d\n",
netdev_rxq_get_queue_id(list[i].rxq->rx));
prev_name = name;
+ if (!pmd_cycles[PMD_CYCLES_POLLING] ||
+ !pmd_cycles[PMD_CYCLES_PROCESSING]) {
+ continue;
+ }
+ for (j = 0; j < ARRAY_SIZE(port_cycles); j++) {
+ /* Read the Rx queue CPU cycles */
+ atomic_read_relaxed(&list[i].rxq->cycles.n[j],
+ &port_cycles[j]);
+ }
+ for (j = 0; j < PMD_N_CYCLES; j++) {
+ if (port_cycles[j] > list[i].rxq->cycles_zero[j]) {
+ port_cycles[j] -= list[i].rxq->cycles_zero[j];
+ } else {
+ port_cycles[j] = 0;
+ }
+ }
+ ds_put_format(reply,
+ "\t\tpolling cycles(%% of total polling cycles):"
+ "%"PRIu64" (%.02f%%)\n"
+ "\t\tprocessing cycles(%% of total processing "
+ "cycles):%"PRIu64" (%.02f%%)\n",
+ port_cycles[PMD_CYCLES_POLLING],
+ port_cycles[PMD_CYCLES_POLLING] /
+ (double) pmd_cycles[PMD_CYCLES_POLLING]* 100,
+ port_cycles[PMD_CYCLES_PROCESSING],
+ port_cycles[PMD_CYCLES_PROCESSING] /
+ (double) pmd_cycles[PMD_CYCLES_PROCESSING]* 100);
}
ovs_mutex_unlock(&pmd->port_mutex);
ds_put_cstr(reply, "\n");
@@ -3076,6 +3125,7 @@ cycles_count_start(struct dp_netdev_pmd_thread *pmd)
/* Stop counting cycles and add them to the counter 'type' */
static inline void
cycles_count_end(struct dp_netdev_pmd_thread *pmd,
+ struct dp_netdev_rxq *rx,
enum pmd_cycles_counter_type type)
OVS_RELEASES(&cycles_counter_fake_mutex)
OVS_NO_THREAD_SAFETY_ANALYSIS
@@ -3083,11 +3133,13 @@ cycles_count_end(struct dp_netdev_pmd_thread *pmd,
unsigned long long interval = cycles_counter() - pmd->last_cycles;
non_atomic_ullong_add(&pmd->cycles.n[type], interval);
+ /* Update the cycles for the specific queue as well */
+ non_atomic_ullong_add(&rx->cycles.n[type], interval);
}
static void
dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
- struct netdev_rxq *rx,
+ struct dp_netdev_rxq *rx,
odp_port_t port_no)
{
struct dp_packet_batch batch;
@@ -3095,19 +3147,19 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread
*pmd,
dp_packet_batch_init(&batch);
cycles_count_start(pmd);
- error = netdev_rxq_recv(rx, &batch);
- cycles_count_end(pmd, PMD_CYCLES_POLLING);
+ error = netdev_rxq_recv(rx->rx, &batch);
+ cycles_count_end(pmd, rx, PMD_CYCLES_POLLING);
if (!error) {
*recirc_depth_get() = 0;
cycles_count_start(pmd);
dp_netdev_input(pmd, &batch, port_no);
- cycles_count_end(pmd, PMD_CYCLES_PROCESSING);
+ cycles_count_end(pmd, rx, PMD_CYCLES_PROCESSING);
} else if (error != EAGAIN && error != EOPNOTSUPP) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
- netdev_rxq_get_name(rx), ovs_strerror(error));
+ netdev_rxq_get_name(rx->rx), ovs_strerror(error));
}
}
@@ -3129,7 +3181,7 @@ static int
port_reconfigure(struct dp_netdev_port *port)
{
struct netdev *netdev = port->netdev;
- int i, err;
+ int i, err, j;
port->need_reconfigure = false;
@@ -3162,6 +3214,11 @@ port_reconfigure(struct dp_netdev_port *port)
if (err) {
return err;
}
+ /* Initialize the rxq stats */
+ for (j = 0 ; j < PMD_N_CYCLES; j++) {
+ atomic_init(&port->rxqs[i].cycles.n[j], 0);
+ port->rxqs[i].cycles_zero[j] = 0;
+ }
port->n_rxq++;
}
@@ -3580,7 +3637,7 @@ dpif_netdev_run(struct dpif *dpif)
int i;
for (i = 0; i < port->n_rxq; i++) {
- dp_netdev_process_rxq_port(non_pmd, port->rxqs[i].rx,
+ dp_netdev_process_rxq_port(non_pmd, &port->rxqs[i],
port->port_no);
}
}
@@ -3687,7 +3744,7 @@ pmd_load_queues_and_ports(struct dp_netdev_pmd_thread
*pmd,
i = 0;
HMAP_FOR_EACH (poll, node, &pmd->poll_list) {
- poll_list[i].rx = poll->rxq->rx;
+ poll_list[i].rxq = poll->rxq;
poll_list[i].port_no = poll->rxq->port->port_no;
i++;
}
@@ -3700,6 +3757,54 @@ pmd_load_queues_and_ports(struct dp_netdev_pmd_thread
*pmd,
return i;
}
+static void
+pmd_reset_rx_queue_cycles(struct dp_netdev_pmd_thread *pmd)
+{
+ int i, j;
+ struct dp_netdev_rxq *rxq;
+ struct rxq_poll *poll_list;
+ size_t n;
+ uint64_t q_cycles[PMD_N_CYCLES];
+
+ sorted_poll_list(pmd, &poll_list, &n);
+
+ for (i = 0; i < n; i++) {
+ rxq = poll_list[i].rxq;
+ for (j = 0; j < ARRAY_SIZE(q_cycles); j++) {
+ /* Read the Rx queue CPU cycles */
+ atomic_read_relaxed(&rxq->cycles.n[j], &q_cycles[j]);
+ rxq->cycles_zero[j] = q_cycles[j];
+ }
+ }
+
+ free(poll_list);
+}
+
+static void
+pmd_reset_cycles(struct dp_netdev_pmd_thread *pmd,
+ uint64_t cycles[PMD_N_CYCLES])
+{
+ int i;
+
+ for (i = 0; i < PMD_N_CYCLES; i++) {
+ pmd->cycles_zero[i] = cycles[i];
+ }
+ /* Clear the Rx queue stats */
+ pmd_reset_rx_queue_cycles(pmd);
+}
+
+static void
+pmd_reset_cycles_stat(struct dp_netdev_pmd_thread *pmd)
+{
+ uint64_t cycles[PMD_N_CYCLES];
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cycles); i++) {
+ atomic_read_relaxed(&pmd->cycles.n[i], &cycles[i]);
+ }
+ pmd_reset_cycles(pmd, cycles);
+}
+
static void *
pmd_thread_main(void *f_)
{
@@ -3723,8 +3828,8 @@ reload:
/* List port/core affinity */
for (i = 0; i < poll_cnt; i++) {
VLOG_DBG("Core %d processing port \'%s\' with queue-id %d\n",
- pmd->core_id, netdev_rxq_get_name(poll_list[i].rx),
- netdev_rxq_get_queue_id(poll_list[i].rx));
+ pmd->core_id, netdev_rxq_get_name(poll_list[i].rxq->rx),
+ netdev_rxq_get_queue_id(poll_list[i].rxq->rx));
}
if (!poll_cnt) {
@@ -3737,7 +3842,7 @@ reload:
for (;;) {
for (i = 0; i < poll_cnt; i++) {
- dp_netdev_process_rxq_port(pmd, poll_list[i].rx,
+ dp_netdev_process_rxq_port(pmd, poll_list[i].rxq,
poll_list[i].port_no);
}
@@ -4326,6 +4431,7 @@ dp_netdev_add_rxq_to_pmd(struct dp_netdev_pmd_thread *pmd,
poll->rxq = rxq;
hmap_insert(&pmd->poll_list, &poll->node, hash);
+ pmd_reset_cycles_stat(pmd);
pmd->need_reload = true;
}
@@ -4339,6 +4445,7 @@ dp_netdev_del_rxq_from_pmd(struct dp_netdev_pmd_thread
*pmd,
free(poll);
pmd->need_reload = true;
+ pmd_reset_cycles_stat(pmd);
}
/* Add 'port' to the tx port cache of 'pmd', which must be reloaded for the
--
2.7.4
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev