在 2026-01-15 16:21:20,"Michael S. Tsirkin" <[email protected]> 写道:
>On Wed, Jan 14, 2026 at 08:36:27PM +0800, Longjun Tang wrote:
>> From: Longjun Tang <[email protected]>
>> 
>> To facilitate tracking the status of virtqueue during rx/tx and
>> interrupt response, add trace point at  corresponding func.
>> 
>> Also,to avoid perf hured,it olny available under debug condition.
>> 
>> Signed-off-by: Longjun Tang <[email protected]>
>> ---
>> v1:
>> I did some tests with iperf3, as follow:
>> host <---> vm
>> 
>> oringnal:
>> rx pps: 90915  rx bitrate: 28.5 Gbits/s
>> tx pps: 54659   tx bitrate: 26.4 Gbits/s
>> 
>> added trace:
>> rx pps: 76810  rx bitrate: 26.7 Gbits/s
>> tx pps: 54455   tx bitrate: 26.4 Gbits/s
>
>probably because you are reading a lot of cache cold
>data and copying it to stack just to discard it when
>trace is disabled?
>
>what if the tracepoints are much ligher weigh, just recording
>the vq number?

Recording only the vq index does offer some improvement, with RX PPS around 
80,000, 
but there's still a performance hurt compared to the original. Probably only in 
DEBUG mode to use.
>
>
>> Based on the results of the above tests, adding tracing has a
>> performance hurt, especially on the rx side. Therefore, these
>> traces should only be added to the debug virtio_net.
>> 
>> When these traces are needed, uncomment the DEBUG macro definition
>> in driver/net/virtio_net.c.
>> ---
>>  drivers/net/virtio_net.c       |  21 ++++-
>>  drivers/net/virtio_net_trace.h | 124 ++++++++++++++++++++++++++
>>  drivers/virtio/virtio_ring.c   | 155 --------------------------------
>>  include/linux/virtio_ring.h    | 156 +++++++++++++++++++++++++++++++++
>>  4 files changed, 297 insertions(+), 159 deletions(-)
>>  create mode 100644 drivers/net/virtio_net_trace.h
>> 
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index 22d894101c01..fabd3e8acb36 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -27,6 +27,11 @@
>>  #include <net/netdev_queues.h>
>>  #include <net/xdp_sock_drv.h>
>>  
>> +#if defined(DEBUG)
>> +#define CREATE_TRACE_POINTS
>> +#include "virtio_net_trace.h"
>> +#endif
>> +
>>  static int napi_weight = NAPI_POLL_WEIGHT;
>>  module_param(napi_weight, int, 0444);
>>  
>> @@ -795,7 +800,9 @@ static void skb_xmit_done(struct virtqueue *vq)
>>      unsigned int index = vq2txq(vq);
>>      struct send_queue *sq = &vi->sq[index];
>>      struct napi_struct *napi = &sq->napi;
>> -
>> +#if defined(DEBUG)
>> +    trace_skb_xmit_done(napi, vq);
>> +#endif
>>      /* Suppress further interrupts. */
>>      virtqueue_disable_cb(vq);
>>  
>> @@ -2671,7 +2678,9 @@ static void receive_buf(struct virtnet_info *vi, 
>> struct receive_queue *rq,
>>  
>>      if (unlikely(!skb))
>>              return;
>> -
>> +#if defined(DEBUG)
>> +    trace_receive_buf(rq->vq, skb);
>> +#endif
>>      virtnet_receive_done(vi, rq, skb, flags);
>>  }
>>  
>> @@ -2877,7 +2886,9 @@ static void skb_recv_done(struct virtqueue *rvq)
>>  {
>>      struct virtnet_info *vi = rvq->vdev->priv;
>>      struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
>> -
>> +#if defined(DEBUG)
>> +    trace_skb_recv_done(&rq->napi, rvq);
>> +#endif
>>      rq->calls++;
>>      virtqueue_napi_schedule(&rq->napi, rvq);
>>  }
>> @@ -3389,7 +3400,9 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, 
>> struct net_device *dev)
>>  
>>      /* timestamp packet in software */
>>      skb_tx_timestamp(skb);
>> -
>> +#if defined(DEBUG)
>> +    trace_start_xmit(sq->vq, skb);
>
>Pls prefix APIs with virtio_net or something.

how about trace_virtio_net_start_xmit ?
>
>> +#endif
>>      /* Try to transmit */
>>      err = xmit_skb(sq, skb, !use_napi);
>>  
>
>> diff --git a/drivers/net/virtio_net_trace.h b/drivers/net/virtio_net_trace.h
>> new file mode 100644
>> index 000000000000..0a008cb8f51d
>> --- /dev/null
>> +++ b/drivers/net/virtio_net_trace.h
>> @@ -0,0 +1,124 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#if defined(DEBUG)
>> +
>> +#undef TRACE_SYSTEM
>> +#define TRACE_SYSTEM virtio_net
>> +
>> +#if !defined(_TRACE_VIRTIO_NET_H) || defined(TRACE_HEADER_MULTI_READ)
>> +#define _TRACE_VIRTIO_NET_H
>> +
>> +#include <linux/virtio.h>
>> +#include <linux/tracepoint.h>
>> +#include <linux/virtio_ring.h>
>> +
>> +DECLARE_EVENT_CLASS(virtio_net_rxtx_temp,
>> +
>> +    TP_PROTO(struct virtqueue *vq, const struct sk_buff *skb),
>> +
>> +    TP_ARGS(vq, skb),
>> +
>> +    TP_STRUCT__entry(
>> +            __string(       name,   vq->name        )
>> +            __field(        unsigned int, num_free  )
>> +            __field(        unsigned int, index     )
>> +            __field(        bool, packed_ring       )
>> +            __field(        bool, broken    )
>> +            __field(        bool, event     )
>> +            __field(        u16, last_used_idx      )
>> +            __field(        __virtio16, avail_flags )
>> +            __field(        __virtio16, avail_idx   )
>> +            __field(        __virtio16, used_idx    )
>> +            __field(        const void *, skbaddr   )
>> +    ),
>> +
>> +    TP_fast_assign(
>> +            __entry->skbaddr = skb;
>> +
>> +            __assign_str(name);
>> +            __entry->num_free = vq->num_free;
>> +            __entry->index = vq->index;
>> +
>> +            struct vring_virtqueue *vvq;
>> +
>> +            vvq = container_of_const(vq, struct vring_virtqueue, vq);
>> +
>> +            __entry->packed_ring = vvq->packed_ring;
>> +            __entry->broken = vvq->broken;
>> +            __entry->event = vvq->event;
>> +            if (!vvq->packed_ring) {
>> +                    __entry->last_used_idx = vvq->last_used_idx;
>> +                    __entry->avail_flags = vvq->split.vring.avail->flags;
>> +                    __entry->avail_idx = vvq->split.vring.avail->idx;
>> +                    __entry->used_idx = vvq->split.vring.used->idx;
>> +            }
>> +    ),
>> +
>> +    TP_printk("skbaddr=%p vq=%s num_free=%u index=%u packed=%d broken=%d 
>> event=%d last_used_idx=%u avail_flags=%u avail_idx=%u used_idx=%u",
>> +            __entry->skbaddr, __get_str(name), __entry->num_free, 
>> __entry->index,
>> +            __entry->packed_ring, __entry->broken, __entry->event, 
>> __entry->last_used_idx,
>> +            __entry->avail_flags, __entry->avail_idx, __entry->used_idx)
>> +)
>> +
>> +DEFINE_EVENT(virtio_net_rxtx_temp, receive_buf,
>> +
>> +    TP_PROTO(struct virtqueue *vq, const struct sk_buff *skb),
>> +
>> +    TP_ARGS(vq, skb)
>> +);
>> +
>> +DEFINE_EVENT(virtio_net_rxtx_temp, start_xmit,
>> +
>> +    TP_PROTO(struct virtqueue *vq, const struct sk_buff *skb),
>> +
>> +    TP_ARGS(vq, skb)
>> +);
>> +
>> +DECLARE_EVENT_CLASS(virtio_net_int_temp,
>> +
>> +    TP_PROTO(struct napi_struct *napi, struct virtqueue *vq),
>> +
>> +    TP_ARGS(napi, vq),
>> +
>> +    TP_STRUCT__entry(
>> +            __string(       dev_name,       napi->dev->name )
>> +            __string(       vq_name,        vq->name)
>> +            __field(        u32,    napi_id )
>> +            __field(        int,    weight  )
>> +    ),
>> +
>> +    TP_fast_assign(
>> +            __assign_str(dev_name);
>> +            __assign_str(vq_name);
>> +            __entry->napi_id = napi->napi_id;
>> +            __entry->weight = napi->weight;
>> +    ),
>> +
>> +    TP_printk("dev=%s vq=%s napi_id=%u weight=%d",
>> +            __get_str(dev_name), __get_str(vq_name), __entry->napi_id, 
>> __entry->weight)
>> +)
>> +
>> +DEFINE_EVENT(virtio_net_int_temp, skb_xmit_done,
>> +
>> +    TP_PROTO(struct napi_struct *napi, struct virtqueue *vq),
>> +
>> +    TP_ARGS(napi, vq)
>> +);
>> +
>> +DEFINE_EVENT(virtio_net_int_temp, skb_recv_done,
>> +
>> +    TP_PROTO(struct napi_struct *napi, struct virtqueue *vq),
>> +
>> +    TP_ARGS(napi, vq)
>> +);
>> +
>> +#undef TRACE_INCLUDE_PATH
>> +#define TRACE_INCLUDE_PATH ../../drivers/net/
>> +#undef TRACE_INCLUDE_FILE
>> +#define TRACE_INCLUDE_FILE virtio_net_trace
>> +
>> +#endif /* _TRACE_VIRTIO_NET_H */
>> +
>> +/* This part must be outside protection */
>> +#include <trace/define_trace.h>
>> +
>> +#endif
>> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>> index ddab68959671..d413ebb42729 100644
>> --- a/drivers/virtio/virtio_ring.c
>> +++ b/drivers/virtio/virtio_ring.c
>> @@ -67,161 +67,6 @@
>>  #define LAST_ADD_TIME_INVALID(vq)
>>  #endif
>>  
>> -struct vring_desc_state_split {
>> -    void *data;                     /* Data for callback. */
>> -
>> -    /* Indirect desc table and extra table, if any. These two will be
>> -     * allocated together. So we won't stress more to the memory allocator.
>> -     */
>> -    struct vring_desc *indir_desc;
>> -};
>> -
>> -struct vring_desc_state_packed {
>> -    void *data;                     /* Data for callback. */
>> -
>> -    /* Indirect desc table and extra table, if any. These two will be
>> -     * allocated together. So we won't stress more to the memory allocator.
>> -     */
>> -    struct vring_packed_desc *indir_desc;
>> -    u16 num;                        /* Descriptor list length. */
>> -    u16 last;                       /* The last desc state in a list. */
>> -};
>> -
>> -struct vring_desc_extra {
>> -    dma_addr_t addr;                /* Descriptor DMA addr. */
>> -    u32 len;                        /* Descriptor length. */
>> -    u16 flags;                      /* Descriptor flags. */
>> -    u16 next;                       /* The next desc state in a list. */
>> -};
>> -
>> -struct vring_virtqueue_split {
>> -    /* Actual memory layout for this queue. */
>> -    struct vring vring;
>> -
>> -    /* Last written value to avail->flags */
>> -    u16 avail_flags_shadow;
>> -
>> -    /*
>> -     * Last written value to avail->idx in
>> -     * guest byte order.
>> -     */
>> -    u16 avail_idx_shadow;
>> -
>> -    /* Per-descriptor state. */
>> -    struct vring_desc_state_split *desc_state;
>> -    struct vring_desc_extra *desc_extra;
>> -
>> -    /* DMA address and size information */
>> -    dma_addr_t queue_dma_addr;
>> -    size_t queue_size_in_bytes;
>> -
>> -    /*
>> -     * The parameters for creating vrings are reserved for creating new
>> -     * vring.
>> -     */
>> -    u32 vring_align;
>> -    bool may_reduce_num;
>> -};
>> -
>> -struct vring_virtqueue_packed {
>> -    /* Actual memory layout for this queue. */
>> -    struct {
>> -            unsigned int num;
>> -            struct vring_packed_desc *desc;
>> -            struct vring_packed_desc_event *driver;
>> -            struct vring_packed_desc_event *device;
>> -    } vring;
>> -
>> -    /* Driver ring wrap counter. */
>> -    bool avail_wrap_counter;
>> -
>> -    /* Avail used flags. */
>> -    u16 avail_used_flags;
>> -
>> -    /* Index of the next avail descriptor. */
>> -    u16 next_avail_idx;
>> -
>> -    /*
>> -     * Last written value to driver->flags in
>> -     * guest byte order.
>> -     */
>> -    u16 event_flags_shadow;
>> -
>> -    /* Per-descriptor state. */
>> -    struct vring_desc_state_packed *desc_state;
>> -    struct vring_desc_extra *desc_extra;
>> -
>> -    /* DMA address and size information */
>> -    dma_addr_t ring_dma_addr;
>> -    dma_addr_t driver_event_dma_addr;
>> -    dma_addr_t device_event_dma_addr;
>> -    size_t ring_size_in_bytes;
>> -    size_t event_size_in_bytes;
>> -};
>> -
>> -struct vring_virtqueue {
>> -    struct virtqueue vq;
>> -
>> -    /* Is this a packed ring? */
>> -    bool packed_ring;
>> -
>> -    /* Is DMA API used? */
>> -    bool use_map_api;
>> -
>> -    /* Can we use weak barriers? */
>> -    bool weak_barriers;
>> -
>> -    /* Other side has made a mess, don't try any more. */
>> -    bool broken;
>> -
>> -    /* Host supports indirect buffers */
>> -    bool indirect;
>> -
>> -    /* Host publishes avail event idx */
>> -    bool event;
>> -
>> -    /* Head of free buffer list. */
>> -    unsigned int free_head;
>> -    /* Number we've added since last sync. */
>> -    unsigned int num_added;
>> -
>> -    /* Last used index  we've seen.
>> -     * for split ring, it just contains last used index
>> -     * for packed ring:
>> -     * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
>> -     * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap 
>> counter.
>> -     */
>> -    u16 last_used_idx;
>> -
>> -    /* Hint for event idx: already triggered no need to disable. */
>> -    bool event_triggered;
>> -
>> -    union {
>> -            /* Available for split ring */
>> -            struct vring_virtqueue_split split;
>> -
>> -            /* Available for packed ring */
>> -            struct vring_virtqueue_packed packed;
>> -    };
>> -
>> -    /* How to notify other side. FIXME: commonalize hcalls! */
>> -    bool (*notify)(struct virtqueue *vq);
>> -
>> -    /* DMA, allocation, and size information */
>> -    bool we_own_ring;
>> -
>> -    union virtio_map map;
>> -
>> -#ifdef DEBUG
>> -    /* They're supposed to lock for us. */
>> -    unsigned int in_use;
>> -
>> -    /* Figure out if their kicks are too delayed. */
>> -    bool last_add_time_valid;
>> -    ktime_t last_add_time;
>> -#endif
>> -};
>> -
>>  static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
>>  static void vring_free(struct virtqueue *_vq);
>>  
>> diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
>> index c97a12c1cda3..1442a02209ec 100644
>> --- a/include/linux/virtio_ring.h
>> +++ b/include/linux/virtio_ring.h
>> @@ -121,4 +121,160 @@ void vring_transport_features(struct virtio_device 
>> *vdev);
>>  irqreturn_t vring_interrupt(int irq, void *_vq);
>>  
>>  u32 vring_notification_data(struct virtqueue *_vq);
>> +
>> +struct vring_desc_state_split {
>> +    void *data;                     /* Data for callback. */
>> +
>> +    /* Indirect desc table and extra table, if any. These two will be
>> +     * allocated together. So we won't stress more to the memory allocator.
>> +     */
>> +    struct vring_desc *indir_desc;
>> +};
>> +
>> +struct vring_desc_state_packed {
>> +    void *data;                     /* Data for callback. */
>> +
>> +    /* Indirect desc table and extra table, if any. These two will be
>> +     * allocated together. So we won't stress more to the memory allocator.
>> +     */
>> +    struct vring_packed_desc *indir_desc;
>> +    u16 num;                        /* Descriptor list length. */
>> +    u16 last;                       /* The last desc state in a list. */
>> +};
>> +
>> +struct vring_desc_extra {
>> +    dma_addr_t addr;                /* Descriptor DMA addr. */
>> +    u32 len;                        /* Descriptor length. */
>> +    u16 flags;                      /* Descriptor flags. */
>> +    u16 next;                       /* The next desc state in a list. */
>> +};
>> +
>> +struct vring_virtqueue_split {
>> +    /* Actual memory layout for this queue. */
>> +    struct vring vring;
>> +
>> +    /* Last written value to avail->flags */
>> +    u16 avail_flags_shadow;
>> +
>> +    /*
>> +     * Last written value to avail->idx in
>> +     * guest byte order.
>> +     */
>> +    u16 avail_idx_shadow;
>> +
>> +    /* Per-descriptor state. */
>> +    struct vring_desc_state_split *desc_state;
>> +    struct vring_desc_extra *desc_extra;
>> +
>> +    /* DMA address and size information */
>> +    dma_addr_t queue_dma_addr;
>> +    size_t queue_size_in_bytes;
>> +
>> +    /*
>> +     * The parameters for creating vrings are reserved for creating new
>> +     * vring.
>> +     */
>> +    u32 vring_align;
>> +    bool may_reduce_num;
>> +};
>> +
>> +struct vring_virtqueue_packed {
>> +    /* Actual memory layout for this queue. */
>> +    struct {
>> +            unsigned int num;
>> +            struct vring_packed_desc *desc;
>> +            struct vring_packed_desc_event *driver;
>> +            struct vring_packed_desc_event *device;
>> +    } vring;
>> +
>> +    /* Driver ring wrap counter. */
>> +    bool avail_wrap_counter;
>> +
>> +    /* Avail used flags. */
>> +    u16 avail_used_flags;
>> +
>> +    /* Index of the next avail descriptor. */
>> +    u16 next_avail_idx;
>> +
>> +    /*
>> +     * Last written value to driver->flags in
>> +     * guest byte order.
>> +     */
>> +    u16 event_flags_shadow;
>> +
>> +    /* Per-descriptor state. */
>> +    struct vring_desc_state_packed *desc_state;
>> +    struct vring_desc_extra *desc_extra;
>> +
>> +    /* DMA address and size information */
>> +    dma_addr_t ring_dma_addr;
>> +    dma_addr_t driver_event_dma_addr;
>> +    dma_addr_t device_event_dma_addr;
>> +    size_t ring_size_in_bytes;
>> +    size_t event_size_in_bytes;
>> +};
>> +
>> +struct vring_virtqueue {
>> +    struct virtqueue vq;
>> +
>> +    /* Is this a packed ring? */
>> +    bool packed_ring;
>> +
>> +    /* Is DMA API used? */
>> +    bool use_map_api;
>> +
>> +    /* Can we use weak barriers? */
>> +    bool weak_barriers;
>> +
>> +    /* Other side has made a mess, don't try any more. */
>> +    bool broken;
>> +
>> +    /* Host supports indirect buffers */
>> +    bool indirect;
>> +
>> +    /* Host publishes avail event idx */
>> +    bool event;
>> +
>> +    /* Head of free buffer list. */
>> +    unsigned int free_head;
>> +    /* Number we've added since last sync. */
>> +    unsigned int num_added;
>> +
>> +    /* Last used index  we've seen.
>> +     * for split ring, it just contains last used index
>> +     * for packed ring:
>> +     * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
>> +     * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap 
>> counter.
>> +     */
>> +    u16 last_used_idx;
>> +
>> +    /* Hint for event idx: already triggered no need to disable. */
>> +    bool event_triggered;
>> +
>> +    union {
>> +            /* Available for split ring */
>> +            struct vring_virtqueue_split split;
>> +
>> +            /* Available for packed ring */
>> +            struct vring_virtqueue_packed packed;
>> +    };
>> +
>> +    /* How to notify other side. FIXME: commonalize hcalls! */
>> +    bool (*notify)(struct virtqueue *vq);
>> +
>> +    /* DMA, allocation, and size information */
>> +    bool we_own_ring;
>> +
>> +    union virtio_map map;
>> +
>> +#ifdef DEBUG
>> +    /* They're supposed to lock for us. */
>> +    unsigned int in_use;
>> +
>> +    /* Figure out if their kicks are too delayed. */
>> +    bool last_add_time_valid;
>> +    ktime_t last_add_time;
>> +#endif
>> +};
>> +
>
>Not excited about exporting all this stuff from virtio-ring
>internals.

If the vring_virtqueue structure doesn't need to be tracked, this stuff doesn't 
need to be exported.
>
>
>>  #endif /* _LINUX_VIRTIO_RING_H */
>> -- 
>> 2.48.1

Reply via email to