On Wed, Jan 31, 2024 at 1:53 AM Daniel Jurgens <dani...@nvidia.com> wrote:
>
> > From: Michael S. Tsirkin <m...@redhat.com>
> > Sent: Tuesday, January 30, 2024 9:53 AM
> > On Tue, Jan 30, 2024 at 03:50:29PM +0000, Daniel Jurgens wrote:
> > > > From: Michael S. Tsirkin <m...@redhat.com>
> > > > Sent: Tuesday, January 30, 2024 9:42 AM On Tue, Jan 30, 2024 at
> > > > 03:40:21PM +0000, Daniel Jurgens wrote:
> > > > > > From: Michael S. Tsirkin <m...@redhat.com>
> > > > > > Sent: Tuesday, January 30, 2024 8:58 AM
> > > > > >
> > > > > > On Tue, Jan 30, 2024 at 08:25:21AM -0600, Daniel Jurgens wrote:
> > > > > > > Add a tx queue stop and wake counters, they are useful for
> > debugging.
> > > > > > >
> > > > > > >     $ ethtool -S ens5f2 | grep 'tx_stop\|tx_wake'
> > > > > > >     ...
> > > > > > >     tx_queue_1_tx_stop: 16726
> > > > > > >     tx_queue_1_tx_wake: 16726
> > > > > > >     ...
> > > > > > >     tx_queue_8_tx_stop: 1500110
> > > > > > >     tx_queue_8_tx_wake: 1500110
> > > > > > >
> > > > > > > Signed-off-by: Daniel Jurgens <dani...@nvidia.com>
> > > > > > > Reviewed-by: Parav Pandit <pa...@nvidia.com>
> > > > > >
> > > > > > Hmm isn't one always same as the other, except when queue is
> > stopped?
> > > > > > And when it is stopped you can see that in the status?
> > > > > > So how is having two useful?
> > > > >
> > > > > At idle the counters will be the same, unless a tx_timeout occurs.
> > > > > But
> > > > under load they can be monitored to see which queues are stopped and
> > > > get an idea of how long they are stopped.
> > > >
> > > > how does it give you the idea of how long they are stopped?
> > >
> > > By serially monitoring the counter you can see stops that persist long
> > intervals that are less than the tx_timeout time.
> >
> > Why don't you monitor queue status directly?
>
> How? I don't know of any interface to check if a queue is stopped.
>
> >
> > > >
> > > > > Other net drivers (not all), also have the wake counter.
> > > >
> > > > Examples?
> > >
> > > [danielj@sw-mtx-051 upstream]$ ethtool -i ens2f1np1
> > > driver: mlx5_core
> > > version: 6.7.0+
> > > ...
> > > [danielj@sw-mtx-051 upstream]$ ethtool -S ens2f1np1 | grep wake
> > >      tx_queue_wake: 0
> > >      tx0_wake: 0
> >
[...]
> > Do they have a stop counter too?
>
> Yes:
> [danielj@sw-mtx-051 upstream]$ ethtool -S ens2f1np1 | grep 'stop\|wake'
>      tx_queue_stopped: 0
>      tx_queue_wake: 0
>      tx0_stopped: 0
>      tx0_wake: 0
>      ....

Yes, that's it! What I know is that only mlx drivers have those two
counters, but they are very useful when debugging some issues or
tracking some historical changes if we want to.

Thanks,
Jason

>
> >
> > > >
> > > > > In my opinion it makes the stop counter more useful, at little cost.
> > > > >
> > > > > >
> > > > > >
> > > > > > > ---
> > > > > > >  drivers/net/virtio_net.c | 26 ++++++++++++++++++++++++--
> > > > > > >  1 file changed, 24 insertions(+), 2 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/net/virtio_net.c
> > > > > > > b/drivers/net/virtio_net.c index 3cb8aa193884..7e3c31ceaf7e
> > > > > > > 100644
> > > > > > > --- a/drivers/net/virtio_net.c
> > > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > > @@ -88,6 +88,8 @@ struct virtnet_sq_stats {
> > > > > > >     u64_stats_t xdp_tx_drops;
> > > > > > >     u64_stats_t kicks;
> > > > > > >     u64_stats_t tx_timeouts;
> > > > > > > +   u64_stats_t tx_stop;
> > > > > > > +   u64_stats_t tx_wake;
> > > > > > >  };
> > > > > > >
> > > > > > >  struct virtnet_rq_stats {
> > > > > > > @@ -112,6 +114,8 @@ static const struct virtnet_stat_desc
> > > > > > virtnet_sq_stats_desc[] = {
> > > > > > >     { "xdp_tx_drops",       VIRTNET_SQ_STAT(xdp_tx_drops) },
> > > > > > >     { "kicks",              VIRTNET_SQ_STAT(kicks) },
> > > > > > >     { "tx_timeouts",        VIRTNET_SQ_STAT(tx_timeouts) },
> > > > > > > +   { "tx_stop",            VIRTNET_SQ_STAT(tx_stop) },
> > > > > > > +   { "tx_wake",            VIRTNET_SQ_STAT(tx_wake) },
> > > > > > >  };
> > > > > > >
> > > > > > >  static const struct virtnet_stat_desc virtnet_rq_stats_desc[]
> > > > > > > = { @@
> > > > > > > -843,6 +847,9 @@ static void check_sq_full_and_disable(struct
> > > > > > > virtnet_info
> > > > > > *vi,
> > > > > > >      */
> > > > > > >     if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
> > > > > > >             netif_stop_subqueue(dev, qnum);
> > > > > > > +           u64_stats_update_begin(&sq->stats.syncp);
> > > > > > > +           u64_stats_inc(&sq->stats.tx_stop);
> > > > > > > +           u64_stats_update_end(&sq->stats.syncp);
> > > > > > >             if (use_napi) {
> > > > > > >                     if
> > (unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
> > > > > > >                             virtqueue_napi_schedule(&sq->napi,
> > sq- vq);
> > > > @@ -851,6 +858,9
> > > > > > >@@  static void check_sq_full_and_disable(struct virtnet_info *vi,
> > > > > > >                     free_old_xmit_skbs(sq, false);
> > > > > > >                     if (sq->vq->num_free >=
> > 2+MAX_SKB_FRAGS) {
> > > > > > >                             netif_start_subqueue(dev, qnum);
> > > > > > > +                           u64_stats_update_begin(&sq-
> > >stats.syncp);
> > > > > > > +                           u64_stats_inc(&sq->stats.tx_wake);
> > > > > > > +                           u64_stats_update_end(&sq-
> > >stats.syncp);
> > > > > > >                             virtqueue_disable_cb(sq->vq);
> > > > > > >                     }
> > > > > > >             }
> > > > > > > @@ -2163,8 +2173,14 @@ static void virtnet_poll_cleantx(struct
> > > > > > receive_queue *rq)
> > > > > > >                     free_old_xmit_skbs(sq, true);
> > > > > > >             } while (unlikely(!virtqueue_enable_cb_delayed(sq-
> > >vq)));
> > > > > > >
> > > > > > > -           if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
> > > > > > > +           if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
> > > > > > > +                   if (netif_tx_queue_stopped(txq)) {
> > > > > > > +                           u64_stats_update_begin(&sq-
> > >stats.syncp);
> > > > > > > +                           u64_stats_inc(&sq->stats.tx_wake);
> > > > > > > +                           u64_stats_update_end(&sq-
> > >stats.syncp);
> > > > > > > +                   }
> > > > > > >                     netif_tx_wake_queue(txq);
> > > > > > > +           }
> > > > > > >
> > > > > > >             __netif_tx_unlock(txq);
> > > > > > >     }
> > > > > > > @@ -2310,8 +2326,14 @@ static int virtnet_poll_tx(struct
> > > > > > > napi_struct
> > > > > > *napi, int budget)
> > > > > > >     virtqueue_disable_cb(sq->vq);
> > > > > > >     free_old_xmit_skbs(sq, true);
> > > > > > >
> > > > > > > -   if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
> > > > > > > +   if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
> > > > > > > +           if (netif_tx_queue_stopped(txq)) {
> > > > > > > +                   u64_stats_update_begin(&sq->stats.syncp);
> > > > > > > +                   u64_stats_inc(&sq->stats.tx_wake);
> > > > > > > +                   u64_stats_update_end(&sq->stats.syncp);
> > > > > > > +           }
> > > > > > >             netif_tx_wake_queue(txq);
> > > > > > > +   }
> > > > > > >
> > > > > > >     opaque = virtqueue_enable_cb_prepare(sq->vq);
> > > > > > >
> > > > > > > --
> > > > > > > 2.42.0
>
>

Reply via email to