On 08.09.2019 19:01, Sriram Vatala wrote:
> OVS may be unable to transmit packets for multiple reasons and
> today there is a single counter to track packets dropped due to
> any of those reasons. The most common reason is that a VM is
> unable to read packets fast enough causing the vhostuser port
> transmit queue on the OVS side to become full. This manifests
> as a problem with VNFs not receiving all packets. Having a
> separate drop counter to track packets dropped because the
> transmit queue is full will clearly indicate that the problem
> is on the VM side and not in OVS. Similarly maintaining separate
> counters for all possible drops helps in indicating sensible
> cause for packet drops.
>
> This patch adds custom software stats counters to track packets
> dropped at port level and these counters are displayed along with
> other stats in "ovs-vsctl get interface <iface> statistics"
> command. The detailed stats will be available for both dpdk and
> vhostuser ports.
>
> Signed-off-by: Sriram Vatala <[email protected]>
> ---
> Changes since v7 :
> Defined structure netdev_dpdk_sw_stats and moved all custom stats
> into it.
> Placed a pointer to netdev_dpdk_sw_stats structure in netdev_dpdk
> strucure.
> stats are reported with prefix with netdev_dpdk
> ---
> include/openvswitch/netdev.h | 14 +++
> lib/netdev-dpdk.c | 109 +++++++++++++++---
> utilities/bugtool/automake.mk | 3 +-
> utilities/bugtool/ovs-bugtool-get-port-stats | 25 ++++
> .../plugins/network-status/openvswitch.xml | 1 +
> vswitchd/vswitch.xml | 24 ++++
> 6 files changed, 156 insertions(+), 20 deletions(-)
> create mode 100755 utilities/bugtool/ovs-bugtool-get-port-stats
>
> diff --git a/include/openvswitch/netdev.h b/include/openvswitch/netdev.h
> index 0c10f7b48..c17e6a97d 100644
> --- a/include/openvswitch/netdev.h
> +++ b/include/openvswitch/netdev.h
> @@ -89,6 +89,20 @@ struct netdev_stats {
> uint64_t rx_jabber_errors;
> };
>
> +/* Custom software stats for dpdk ports */
> +struct netdev_dpdk_sw_stats {
> + /* No. of retries when unable to transmit. */
> + uint64_t tx_retries;
> + /* Pkt drops when unable to transmit; Probably Tx queue is full */
> + uint64_t tx_failure_drops;
> + /* Pkt len greater than max dev MTU */
> + uint64_t tx_mtu_exceeded_drops;
> + /* Pkt drops in egress policier processing */
> + uint64_t tx_qos_drops;
> + /* Pkt drops in ingress policier processing */
> + uint64_t rx_qos_drops;
> +};
This should not be in a common header since the only user is netdev-dpdk.c.
Regarding this code itself:
1. s/policier/policer/
2. s/Pkt/Packet/, s/len/length/
3. s/max dev MTU/device MTU/ (MTU already has 'maximum' word).
4. All comments should end with a period.
> +
> /* Structure representation of custom statistics counter */
> struct netdev_custom_counter {
> uint64_t value;
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index bc20d6843..39aab4672 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -413,11 +413,10 @@ struct netdev_dpdk {
>
> PADDED_MEMBERS(CACHE_LINE_SIZE,
> struct netdev_stats stats;
> - /* Custom stat for retries when unable to transmit. */
> - uint64_t tx_retries;
> + struct netdev_dpdk_sw_stats *sw_stats;
> /* Protects stats */
> rte_spinlock_t stats_lock;
> - /* 4 pad bytes here. */
> + /* 36 pad bytes here. */
> );
>
> PADDED_MEMBERS(CACHE_LINE_SIZE,
> @@ -1171,7 +1170,8 @@ common_construct(struct netdev *netdev, dpdk_port_t
> port_no,
> dev->rte_xstats_ids = NULL;
> dev->rte_xstats_ids_size = 0;
>
> - dev->tx_retries = 0;
> + dev->sw_stats = (struct netdev_dpdk_sw_stats *)
> + xcalloc(1,sizeof(struct netdev_dpdk_sw_stats));
This should be:
dev->sw_stats = xzalloc(sizeof *dev->sw_stats);
Or
dev->sw_stats = dpdk_rte_mzalloc(sizeof *dev->sw_stats);
The later variant will require proper error handling as it could return NULL.
See the Documentation/internals/contributing/coding-style.rst for details.
>
> return 0;
> }
> @@ -1357,6 +1357,7 @@ common_destruct(struct netdev_dpdk *dev)
> ovs_list_remove(&dev->list_node);
> free(ovsrcu_get_protected(struct ingress_policer *,
> &dev->ingress_policer));
> + free(dev->sw_stats);
> ovs_mutex_destroy(&dev->mutex);
> }
>
> @@ -2171,6 +2172,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
> struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
> uint16_t nb_rx = 0;
> uint16_t dropped = 0;
> + uint16_t qos_drops = 0;
> int qid = rxq->queue_id * VIRTIO_QNUM + VIRTIO_TXQ;
> int vid = netdev_dpdk_get_vid(dev);
>
> @@ -2202,11 +2204,13 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
> (struct rte_mbuf **) batch->packets,
> nb_rx, true);
> dropped -= nb_rx;
> + qos_drops = dropped;
'dropped' here already counts 'qos_drops' only. I don't think we need an
extra variable here.
> }
>
> rte_spinlock_lock(&dev->stats_lock);
> netdev_dpdk_vhost_update_rx_counters(&dev->stats, batch->packets,
> nb_rx, dropped);
> + dev->sw_stats->rx_qos_drops += qos_drops;
> rte_spinlock_unlock(&dev->stats_lock);
>
> batch->count = nb_rx;
> @@ -2232,6 +2236,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct
> dp_packet_batch *batch,
> struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
> int nb_rx;
> int dropped = 0;
> + int qos_drops = 0;
>
> if (OVS_UNLIKELY(!(dev->flags & NETDEV_UP))) {
> return EAGAIN;
> @@ -2250,12 +2255,14 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct
> dp_packet_batch *batch,
> (struct rte_mbuf **) batch->packets,
> nb_rx, true);
> dropped -= nb_rx;
> + qos_drops = dropped;
Same here.
> }
>
> /* Update stats to reflect dropped packets */
> if (OVS_UNLIKELY(dropped)) {
> rte_spinlock_lock(&dev->stats_lock);
> dev->stats.rx_dropped += dropped;
> + dev->sw_stats->rx_qos_drops += qos_drops;
> rte_spinlock_unlock(&dev->stats_lock);
> }
>
> @@ -2339,6 +2346,10 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int
> qid,
> struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
> unsigned int total_pkts = cnt;
> unsigned int dropped = 0;
> + unsigned int tx_failure;
> + unsigned int mtu_drops;
> + unsigned int qos_drops;
> + struct netdev_dpdk_sw_stats *sw_stats = dev->sw_stats;
> int i, retries = 0;
> int max_retries = VHOST_ENQ_RETRY_MIN;
> int vid = netdev_dpdk_get_vid(dev);
> @@ -2356,9 +2367,12 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int
> qid,
> rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
>
> cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
> + mtu_drops = total_pkts - cnt;
> + qos_drops = cnt;
> /* Check has QoS has been configured for the netdev */
> cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
> - dropped = total_pkts - cnt;
> + qos_drops -= cnt;
> + dropped = qos_drops + mtu_drops;
>
> do {
> int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
> @@ -2383,12 +2397,16 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int
> qid,
> }
> } while (cnt && (retries++ < max_retries));
>
> + tx_failure = cnt;
> rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
>
> rte_spinlock_lock(&dev->stats_lock);
> netdev_dpdk_vhost_update_tx_counters(&dev->stats, pkts, total_pkts,
> cnt + dropped);
> - dev->tx_retries += MIN(retries, max_retries);
> + sw_stats->tx_retries += MIN(retries, max_retries);
> + sw_stats->tx_failure_drops += tx_failure;
> + sw_stats->tx_mtu_exceeded_drops += mtu_drops;
> + sw_stats->tx_qos_drops += qos_drops;
> rte_spinlock_unlock(&dev->stats_lock);
>
> out:
> @@ -2413,12 +2431,16 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> struct dp_packet_batch *batch)
> struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
> uint32_t cnt = batch_cnt;
> uint32_t dropped = 0;
> + uint32_t tx_failure = 0;
> + uint32_t mtu_drops = 0;
> + uint32_t qos_drops = 0;
> + struct netdev_dpdk_sw_stats *sw_stats = dev->sw_stats;
>
> if (dev->type != DPDK_DEV_VHOST) {
> /* Check if QoS has been configured for this netdev. */
> cnt = netdev_dpdk_qos_run(dev, (struct rte_mbuf **) batch->packets,
> batch_cnt, false);
> - dropped += batch_cnt - cnt;
> + qos_drops = batch_cnt - cnt;
> }
>
> uint32_t txcnt = 0;
> @@ -2431,7 +2453,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct
> dp_packet_batch *batch)
> VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d",
> size, dev->max_packet_len);
>
> - dropped++;
> + mtu_drops++;
> continue;
> }
>
> @@ -2454,13 +2476,17 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> struct dp_packet_batch *batch)
> __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
> txcnt);
> } else {
> - dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, txcnt);
> + tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, txcnt);
> }
> }
>
> + dropped += qos_drops + mtu_drops + tx_failure;
> if (OVS_UNLIKELY(dropped)) {
> rte_spinlock_lock(&dev->stats_lock);
> dev->stats.tx_dropped += dropped;
> + sw_stats->tx_failure_drops += tx_failure;
> + sw_stats->tx_mtu_exceeded_drops += mtu_drops;
> + sw_stats->tx_qos_drops += qos_drops;
> rte_spinlock_unlock(&dev->stats_lock);
> }
> }
> @@ -2485,6 +2511,8 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
> struct dp_packet_batch *batch,
> bool concurrent_txq)
> {
> + struct netdev_dpdk_sw_stats *sw_stats = dev->sw_stats;
This should be defined under the 'else' branch.
> +
> if (OVS_UNLIKELY(!(dev->flags & NETDEV_UP))) {
> dp_packet_delete_batch(batch, true);
> return;
> @@ -2502,18 +2530,25 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
> dp_packet_delete_batch(batch, true);
> } else {
> int tx_cnt, dropped;
> + int tx_failure, mtu_drops, qos_drops;
> int batch_cnt = dp_packet_batch_size(batch);
> struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets;
>
> tx_cnt = netdev_dpdk_filter_packet_len(dev, pkts, batch_cnt);
> + mtu_drops = batch_cnt - tx_cnt;
> + qos_drops = tx_cnt;
> tx_cnt = netdev_dpdk_qos_run(dev, pkts, tx_cnt, true);
> - dropped = batch_cnt - tx_cnt;
> + qos_drops -= tx_cnt;
>
> - dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, tx_cnt);
> + tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, tx_cnt);
>
> + dropped = tx_failure + mtu_drops + qos_drops;
> if (OVS_UNLIKELY(dropped)) {
> rte_spinlock_lock(&dev->stats_lock);
> dev->stats.tx_dropped += dropped;
> + sw_stats->tx_failure_drops += tx_failure;
> + sw_stats->tx_mtu_exceeded_drops += mtu_drops;
> + sw_stats->tx_qos_drops += qos_drops;
> rte_spinlock_unlock(&dev->stats_lock);
> }
> }
> @@ -2772,6 +2807,17 @@ netdev_dpdk_get_custom_stats(const struct netdev
> *netdev,
> uint32_t i;
> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> int rte_xstats_ret;
> + uint16_t index = 0;
> +
> +#define DPDK_CSTATS \
> + DPDK_CSTAT(tx_failure_drops) \
> + DPDK_CSTAT(tx_mtu_exceeded_drops) \
> + DPDK_CSTAT(tx_qos_drops) \
> + DPDK_CSTAT(rx_qos_drops)
> +
> +#define DPDK_CSTAT(NAME) +1
> + custom_stats->size = DPDK_CSTATS;
> +#undef DPDK_CSTAT
>
> ovs_mutex_lock(&dev->mutex);
>
> @@ -2786,9 +2832,10 @@ netdev_dpdk_get_custom_stats(const struct netdev
> *netdev,
> if (rte_xstats_ret > 0 &&
> rte_xstats_ret <= dev->rte_xstats_ids_size) {
>
> - custom_stats->size = rte_xstats_ret;
> + index = rte_xstats_ret;
> + custom_stats->size += rte_xstats_ret;
> custom_stats->counters =
> - (struct netdev_custom_counter *) xcalloc(rte_xstats_ret,
> + (struct netdev_custom_counter *)
> xcalloc(custom_stats->size,
> sizeof(struct netdev_custom_counter));
>
> for (i = 0; i < rte_xstats_ret; i++) {
> @@ -2802,7 +2849,6 @@ netdev_dpdk_get_custom_stats(const struct netdev
> *netdev,
> VLOG_WARN("Cannot get XSTATS values for port: "DPDK_PORT_ID_FMT,
> dev->port_id);
> custom_stats->counters = NULL;
> - custom_stats->size = 0;
> /* Let's clear statistics cache, so it will be
> * reconfigured */
> netdev_dpdk_clear_xstats(dev);
> @@ -2811,6 +2857,27 @@ netdev_dpdk_get_custom_stats(const struct netdev
> *netdev,
> free(values);
> }
>
> + if (custom_stats->counters == NULL) {
> + custom_stats->counters =
> + (struct netdev_custom_counter *) xcalloc(custom_stats->size,
> + sizeof(struct netdev_custom_counter));
> + }
> +
> + rte_spinlock_lock(&dev->stats_lock);
> + i = index;
> +#define DPDK_CSTAT(NAME) \
> + ovs_strlcpy(custom_stats->counters[i++].name, "netdev_dpdk_"#NAME, \
> + NETDEV_CUSTOM_STATS_NAME_SIZE);
> + DPDK_CSTATS;
> +#undef DPDK_CSTAT
> +
> + i = index;
> +#define DPDK_CSTAT(NAME) \
> + custom_stats->counters[i++].value = dev->sw_stats->NAME;
> + DPDK_CSTATS;
> +#undef DPDK_CSTAT
> + rte_spinlock_unlock(&dev->stats_lock);
> +
Above is still a big code duplication. I think, it's better to rename
netdev_dpdk_vhost_get_custom_stats() to something neutral like
netdev_dpdk_get_sw_custom_stats() and re-use it here. DPDK xstats could
be added by calling xrealloc() on resulted sw custom_stats.
For tracking unsupported statistics (tx_retries), we can use same trick
as bridge.c, i.e. setting UINT64_MAX for unsupported and skip them while
reporting.
What do you think?
I could draft another refactoring patch for this if needed.
> ovs_mutex_unlock(&dev->mutex);
>
> return 0;
> @@ -2823,8 +2890,12 @@ netdev_dpdk_vhost_get_custom_stats(const struct netdev
> *netdev,
> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> int i;
>
> -#define VHOST_CSTATS \
> - VHOST_CSTAT(tx_retries)
> +#define VHOST_CSTATS \
> + VHOST_CSTAT(tx_retries) \
> + VHOST_CSTAT(tx_failure_drops) \
> + VHOST_CSTAT(tx_mtu_exceeded_drops) \
> + VHOST_CSTAT(tx_qos_drops) \
> + VHOST_CSTAT(rx_qos_drops)
>
> #define VHOST_CSTAT(NAME) + 1
> custom_stats->size = VHOST_CSTATS;
> @@ -2832,8 +2903,8 @@ netdev_dpdk_vhost_get_custom_stats(const struct netdev
> *netdev,
> custom_stats->counters = xcalloc(custom_stats->size,
> sizeof *custom_stats->counters);
> i = 0;
> -#define VHOST_CSTAT(NAME) \
> - ovs_strlcpy(custom_stats->counters[i++].name, #NAME, \
> +#define VHOST_CSTAT(NAME) \
> + ovs_strlcpy(custom_stats->counters[i++].name, "netdev_dpdk_"#NAME, \
> NETDEV_CUSTOM_STATS_NAME_SIZE);
> VHOST_CSTATS;
> #undef VHOST_CSTAT
> @@ -2843,7 +2914,7 @@ netdev_dpdk_vhost_get_custom_stats(const struct netdev
> *netdev,
> rte_spinlock_lock(&dev->stats_lock);
> i = 0;
> #define VHOST_CSTAT(NAME) \
> - custom_stats->counters[i++].value = dev->NAME;
> + custom_stats->counters[i++].value = dev->sw_stats->NAME;
> VHOST_CSTATS;
> #undef VHOST_CSTAT
> rte_spinlock_unlock(&dev->stats_lock);
> diff --git a/utilities/bugtool/automake.mk b/utilities/bugtool/automake.mk
> index 40980b367..dda58e0a1 100644
> --- a/utilities/bugtool/automake.mk
> +++ b/utilities/bugtool/automake.mk
> @@ -22,7 +22,8 @@ bugtool_scripts = \
> utilities/bugtool/ovs-bugtool-ovs-bridge-datapath-type \
> utilities/bugtool/ovs-bugtool-ovs-vswitchd-threads-affinity \
> utilities/bugtool/ovs-bugtool-qos-configs \
> - utilities/bugtool/ovs-bugtool-get-dpdk-nic-numa
> + utilities/bugtool/ovs-bugtool-get-dpdk-nic-numa \
> + utilities/bugtool/ovs-bugtool-get-port-stats
>
> scripts_SCRIPTS += $(bugtool_scripts)
>
> diff --git a/utilities/bugtool/ovs-bugtool-get-port-stats
> b/utilities/bugtool/ovs-bugtool-get-port-stats
> new file mode 100755
> index 000000000..0fe175e6b
> --- /dev/null
> +++ b/utilities/bugtool/ovs-bugtool-get-port-stats
> @@ -0,0 +1,25 @@
> +#! /bin/bash
> +
> +# This library is free software; you can redistribute it and/or
> +# modify it under the terms of version 2.1 of the GNU Lesser General
> +# Public License as published by the Free Software Foundation.
> +#
> +# This library is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> +# Lesser General Public License for more details.
> +#
> +# Copyright (C) 2019 Ericsson AB
> +
> +for bridge in `ovs-vsctl -- --real list-br`
> +do
> + echo -e "\nBridge : ${bridge}\n"
> + for iface in `ovs-vsctl list-ifaces ${bridge}`
> + do
> + echo -e "iface : ${iface}"
> + ovs-vsctl get interface ${iface} statistics
> + echo -e "\n"
> + done
> + echo -e "iface : ${bridge}"
> + ovs-vsctl get interface ${bridge} statistics
> +done
> diff --git a/utilities/bugtool/plugins/network-status/openvswitch.xml
> b/utilities/bugtool/plugins/network-status/openvswitch.xml
> index d39867c6e..8c1ec643e 100644
> --- a/utilities/bugtool/plugins/network-status/openvswitch.xml
> +++ b/utilities/bugtool/plugins/network-status/openvswitch.xml
> @@ -40,4 +40,5 @@
> <command label="ovs-ofctl-dump-groups"
> filters="ovs">/usr/share/openvswitch/scripts/ovs-bugtool-ovs-ofctl-loop-over-bridges
> "dump-groups"</command>
> <command label="ovs-ofctl-dump-group-stats" filters="ovs"
> repeat="2">/usr/share/openvswitch/scripts/ovs-bugtool-ovs-ofctl-loop-over-bridges
> "dump-group-stats"</command>
> <command label="get_dpdk_nic_numa"
> filters="ovs">/usr/share/openvswitch/scripts/ovs-bugtool-get-dpdk-nic-numa</command>
> + <command label="get_port_statistics"
> filters="ovs">/usr/share/openvswitch/scripts/ovs-bugtool-get-port-stats</command>
> </collect>
> diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
> index 9a743c05b..402f3c8ec 100644
> --- a/vswitchd/vswitch.xml
> +++ b/vswitchd/vswitch.xml
> @@ -3486,6 +3486,30 @@ ovs-vsctl add-port br0 p0 -- set Interface p0
> type=patch options:peer=p1 \
> the above.
> </column>
> </group>
> + <group title="Statistics: Custom Software stats for dpdk ports">
> + <column name="statistics" key="tx_retries">
> + Total number of transmit retries on a vhost-user or
> vhost-user-client
> + interface.
> + </column>
> + <column name="statistics" key="tx_failure_drops">
> + Total number of packets dropped because DPDP transmit API for
> + physical/vhost ports fails to transmit the packets. This happens
> + most likely because the transmit queue is full or has been filled
> + up. There are other reasons as well which are unlikely to happen.
> + </column>
> + <column name="statistics" key="tx_mtu_exceeded_drops">
> + Number of packets dropped due to packet length exceeding the max
> + device MTU.
> + </column>
> + <column name="statistics" key="tx_qos_drops">
> + Total number of packets dropped due to transmission rate exceeding
> + the configured egress policer rate.
> + </column>
> + <column name="statistics" key="rx_qos_drops">
> + Total number of packets dropped due to reception rate exceeding
> + the configured ingress policer rate.
> + </column>
> + </group>
I still think that we should not document these stats here.
> </group>
>
> <group title="Ingress Policing">
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev