Re: [RFC] [ver3 PATCH 3/6] virtio_net: virtio_net driver changes
On Fri, 2011-11-11 at 18:34 +0530, Krishna Kumar wrote: Changes for multiqueue virtio_net driver. [...] @@ -677,25 +730,35 @@ static struct rtnl_link_stats64 *virtnet { struct virtnet_info *vi = netdev_priv(dev); int cpu; - unsigned int start; for_each_possible_cpu(cpu) { - struct virtnet_stats __percpu *stats - = per_cpu_ptr(vi-stats, cpu); - u64 tpackets, tbytes, rpackets, rbytes; - - do { - start = u64_stats_fetch_begin(stats-syncp); - tpackets = stats-tx_packets; - tbytes = stats-tx_bytes; - rpackets = stats-rx_packets; - rbytes = stats-rx_bytes; - } while (u64_stats_fetch_retry(stats-syncp, start)); - - tot-rx_packets += rpackets; - tot-tx_packets += tpackets; - tot-rx_bytes += rbytes; - tot-tx_bytes += tbytes; + int qpair; + + for (qpair = 0; qpair vi-num_queue_pairs; qpair++) { + struct virtnet_send_stats __percpu *tx_stat; + struct virtnet_recv_stats __percpu *rx_stat; While you're at it, you can drop the per-CPU stats and make them only per-queue. There is unlikely to be any benefit in maintaining them per-CPU while receive and transmit processing is serialised per-queue. [...] +static int invoke_find_vqs(struct virtnet_info *vi) +{ + vq_callback_t **callbacks; + struct virtqueue **vqs; + int ret = -ENOMEM; + int i, total_vqs; + char **names; + + /* + * We expect 1 RX virtqueue followed by 1 TX virtqueue, followed + * by the same 'vi-num_queue_pairs-1' more times, and optionally + * one control virtqueue. + */ + total_vqs = vi-num_queue_pairs * 2 + + virtio_has_feature(vi-vdev, VIRTIO_NET_F_CTRL_VQ); + + /* Allocate space for find_vqs parameters */ + vqs = kmalloc(total_vqs * sizeof(*vqs), GFP_KERNEL); + callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL); + names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL); + if (!vqs || !callbacks || !names) + goto err; + + /* Allocate/initialize parameters for recv virtqueues */ + for (i = 0; i vi-num_queue_pairs * 2; i += 2) { + callbacks[i] = skb_recv_done; + names[i] = kasprintf(GFP_KERNEL, input.%d, i / 2); + if (!names[i]) + goto err; + } + + /* Allocate/initialize parameters for send virtqueues */ + for (i = 1; i vi-num_queue_pairs * 2; i += 2) { + callbacks[i] = skb_xmit_done; + names[i] = kasprintf(GFP_KERNEL, output.%d, i / 2); + if (!names[i]) + goto err; + } [...] The RX and TX interrupt names for a multiqueue device should follow the formats device-rx-index and device-tx-index. Ben. -- Ben Hutchings, Staff Engineer, Solarflare Not speaking for my employer; that's the marketing department's job. They asked us to note that Solarflare product names are trademarked. ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [RFC] [ver3 PATCH 3/6] virtio_net: virtio_net driver changes
On Fri, 2011-11-18 at 08:24 +0200, Sasha Levin wrote: On Fri, 2011-11-18 at 01:08 +, Ben Hutchings wrote: On Fri, 2011-11-11 at 18:34 +0530, Krishna Kumar wrote: Changes for multiqueue virtio_net driver. [...] @@ -677,25 +730,35 @@ static struct rtnl_link_stats64 *virtnet { struct virtnet_info *vi = netdev_priv(dev); int cpu; - unsigned int start; for_each_possible_cpu(cpu) { - struct virtnet_stats __percpu *stats - = per_cpu_ptr(vi-stats, cpu); - u64 tpackets, tbytes, rpackets, rbytes; - - do { - start = u64_stats_fetch_begin(stats-syncp); - tpackets = stats-tx_packets; - tbytes = stats-tx_bytes; - rpackets = stats-rx_packets; - rbytes = stats-rx_bytes; - } while (u64_stats_fetch_retry(stats-syncp, start)); - - tot-rx_packets += rpackets; - tot-tx_packets += tpackets; - tot-rx_bytes += rbytes; - tot-tx_bytes += tbytes; + int qpair; + + for (qpair = 0; qpair vi-num_queue_pairs; qpair++) { + struct virtnet_send_stats __percpu *tx_stat; + struct virtnet_recv_stats __percpu *rx_stat; While you're at it, you can drop the per-CPU stats and make them only per-queue. There is unlikely to be any benefit in maintaining them per-CPU while receive and transmit processing is serialised per-queue. It allows you to update stats without a lock. But you'll already be holding a lock related to the queue. Whats the benefit of having them per queue? It should save some memory (and a little time when summing stats, though that's unlikely to matter much). The important thing is that splitting up stats per-CPU *and* per-queue is a waste. Ben. -- Ben Hutchings, Staff Engineer, Solarflare Not speaking for my employer; that's the marketing department's job. They asked us to note that Solarflare product names are trademarked. ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [RFC] [ver3 PATCH 3/6] virtio_net: virtio_net driver changes
On Fri, 2011-11-18 at 18:18 +0200, Sasha Levin wrote: On Fri, 2011-11-18 at 15:40 +, Ben Hutchings wrote: On Fri, 2011-11-18 at 08:24 +0200, Sasha Levin wrote: On Fri, 2011-11-18 at 01:08 +, Ben Hutchings wrote: On Fri, 2011-11-11 at 18:34 +0530, Krishna Kumar wrote: Changes for multiqueue virtio_net driver. [...] @@ -677,25 +730,35 @@ static struct rtnl_link_stats64 *virtnet { struct virtnet_info *vi = netdev_priv(dev); int cpu; - unsigned int start; for_each_possible_cpu(cpu) { - struct virtnet_stats __percpu *stats - = per_cpu_ptr(vi-stats, cpu); - u64 tpackets, tbytes, rpackets, rbytes; - - do { - start = u64_stats_fetch_begin(stats-syncp); - tpackets = stats-tx_packets; - tbytes = stats-tx_bytes; - rpackets = stats-rx_packets; - rbytes = stats-rx_bytes; - } while (u64_stats_fetch_retry(stats-syncp, start)); - - tot-rx_packets += rpackets; - tot-tx_packets += tpackets; - tot-rx_bytes += rbytes; - tot-tx_bytes += tbytes; + int qpair; + + for (qpair = 0; qpair vi-num_queue_pairs; qpair++) { + struct virtnet_send_stats __percpu *tx_stat; + struct virtnet_recv_stats __percpu *rx_stat; While you're at it, you can drop the per-CPU stats and make them only per-queue. There is unlikely to be any benefit in maintaining them per-CPU while receive and transmit processing is serialised per-queue. It allows you to update stats without a lock. But you'll already be holding a lock related to the queue. Right, but now you're holding a queue lock just when playing with the queue, we don't hold it when we process the data - which is when we usually need to update stats. [...] The *stack* is holding the appropriate lock when calling the NAPI poll function or ndo_start_xmit function. Ben. -- Ben Hutchings, Staff Engineer, Solarflare Not speaking for my employer; that's the marketing department's job. They asked us to note that Solarflare product names are trademarked. ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [RFC] [ver3 PATCH 3/6] virtio_net: virtio_net driver changes
On Fri, 2011-11-18 at 15:40 +, Ben Hutchings wrote: On Fri, 2011-11-18 at 08:24 +0200, Sasha Levin wrote: On Fri, 2011-11-18 at 01:08 +, Ben Hutchings wrote: On Fri, 2011-11-11 at 18:34 +0530, Krishna Kumar wrote: Changes for multiqueue virtio_net driver. [...] @@ -677,25 +730,35 @@ static struct rtnl_link_stats64 *virtnet { struct virtnet_info *vi = netdev_priv(dev); int cpu; - unsigned int start; for_each_possible_cpu(cpu) { - struct virtnet_stats __percpu *stats - = per_cpu_ptr(vi-stats, cpu); - u64 tpackets, tbytes, rpackets, rbytes; - - do { - start = u64_stats_fetch_begin(stats-syncp); - tpackets = stats-tx_packets; - tbytes = stats-tx_bytes; - rpackets = stats-rx_packets; - rbytes = stats-rx_bytes; - } while (u64_stats_fetch_retry(stats-syncp, start)); - - tot-rx_packets += rpackets; - tot-tx_packets += tpackets; - tot-rx_bytes += rbytes; - tot-tx_bytes += tbytes; + int qpair; + + for (qpair = 0; qpair vi-num_queue_pairs; qpair++) { + struct virtnet_send_stats __percpu *tx_stat; + struct virtnet_recv_stats __percpu *rx_stat; While you're at it, you can drop the per-CPU stats and make them only per-queue. There is unlikely to be any benefit in maintaining them per-CPU while receive and transmit processing is serialised per-queue. It allows you to update stats without a lock. But you'll already be holding a lock related to the queue. Right, but now you're holding a queue lock just when playing with the queue, we don't hold it when we process the data - which is when we usually need to update stats. Whats the benefit of having them per queue? It should save some memory (and a little time when summing stats, though that's unlikely to matter much). The important thing is that splitting up stats per-CPU *and* per-queue is a waste. Ben. -- Sasha. ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [RFC] [ver3 PATCH 3/6] virtio_net: virtio_net driver changes
On Fri, 2011-11-18 at 01:08 +, Ben Hutchings wrote: On Fri, 2011-11-11 at 18:34 +0530, Krishna Kumar wrote: Changes for multiqueue virtio_net driver. [...] @@ -677,25 +730,35 @@ static struct rtnl_link_stats64 *virtnet { struct virtnet_info *vi = netdev_priv(dev); int cpu; - unsigned int start; for_each_possible_cpu(cpu) { - struct virtnet_stats __percpu *stats - = per_cpu_ptr(vi-stats, cpu); - u64 tpackets, tbytes, rpackets, rbytes; - - do { - start = u64_stats_fetch_begin(stats-syncp); - tpackets = stats-tx_packets; - tbytes = stats-tx_bytes; - rpackets = stats-rx_packets; - rbytes = stats-rx_bytes; - } while (u64_stats_fetch_retry(stats-syncp, start)); - - tot-rx_packets += rpackets; - tot-tx_packets += tpackets; - tot-rx_bytes += rbytes; - tot-tx_bytes += tbytes; + int qpair; + + for (qpair = 0; qpair vi-num_queue_pairs; qpair++) { + struct virtnet_send_stats __percpu *tx_stat; + struct virtnet_recv_stats __percpu *rx_stat; While you're at it, you can drop the per-CPU stats and make them only per-queue. There is unlikely to be any benefit in maintaining them per-CPU while receive and transmit processing is serialised per-queue. It allows you to update stats without a lock. Whats the benefit of having them per queue? -- Sasha. ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[RFC] [ver3 PATCH 3/6] virtio_net: virtio_net driver changes
Changes for multiqueue virtio_net driver. Signed-off-by: krkum...@in.ibm.com --- drivers/net/virtio_net.c | 688 --- include/linux/virtio_net.h |2 2 files changed, 481 insertions(+), 209 deletions(-) diff -ruNp org/drivers/net/virtio_net.c new/drivers/net/virtio_net.c --- org/drivers/net/virtio_net.c2011-11-11 16:44:38.0 +0530 +++ new/drivers/net/virtio_net.c2011-11-11 16:44:59.0 +0530 @@ -40,33 +40,42 @@ module_param(gso, bool, 0444); #define VIRTNET_SEND_COMMAND_SG_MAX2 -struct virtnet_stats { +struct virtnet_send_stats { struct u64_stats_sync syncp; u64 tx_bytes; u64 tx_packets; +}; +struct virtnet_recv_stats { + struct u64_stats_sync syncp; u64 rx_bytes; u64 rx_packets; }; -struct virtnet_info { - struct virtio_device *vdev; - struct virtqueue *rvq, *svq, *cvq; - struct net_device *dev; - struct napi_struct napi; - unsigned int status; +/* Internal representation of a send virtqueue */ +struct send_queue { + /* Virtqueue associated with this send _queue */ + struct virtqueue *vq; - /* Number of input buffers, and max we've ever had. */ - unsigned int num, max; + /* TX: fragments + linear part + virtio header */ + struct scatterlist sg[MAX_SKB_FRAGS + 2]; - /* I like... big packets and I cannot lie! */ - bool big_packets; + /* Active tx statistics */ + struct virtnet_send_stats __percpu *stats; +}; - /* Host will merge rx buffers for big packets (shake it! shake it!) */ - bool mergeable_rx_bufs; +/* Internal representation of a receive virtqueue */ +struct receive_queue { + /* Virtqueue associated with this receive_queue */ + struct virtqueue *vq; + + /* Back pointer to the virtnet_info */ + struct virtnet_info *vi; - /* Active statistics */ - struct virtnet_stats __percpu *stats; + struct napi_struct napi; + + /* Number of input buffers, and max we've ever had. */ + unsigned int num, max; /* Work struct for refilling if we run low on memory. */ struct delayed_work refill; @@ -74,9 +83,29 @@ struct virtnet_info { /* Chain pages by the private ptr. */ struct page *pages; - /* fragments + linear part + virtio header */ - struct scatterlist rx_sg[MAX_SKB_FRAGS + 2]; - struct scatterlist tx_sg[MAX_SKB_FRAGS + 2]; + /* RX: fragments + linear part + virtio header */ + struct scatterlist sg[MAX_SKB_FRAGS + 2]; + + /* Active rx statistics */ + struct virtnet_recv_stats __percpu *stats; +}; + +struct virtnet_info { + int num_queue_pairs;/* # of RX/TX vq pairs */ + + struct send_queue **sq; + struct receive_queue **rq; + struct virtqueue *cvq; + + struct virtio_device *vdev; + struct net_device *dev; + unsigned int status; + + /* I like... big packets and I cannot lie! */ + bool big_packets; + + /* Host will merge rx buffers for big packets (shake it! shake it!) */ + bool mergeable_rx_bufs; }; struct skb_vnet_hdr { @@ -106,22 +135,22 @@ static inline struct skb_vnet_hdr *skb_v * private is used to chain pages for big packets, put the whole * most recent used list in the beginning for reuse */ -static void give_pages(struct virtnet_info *vi, struct page *page) +static void give_pages(struct receive_queue *rq, struct page *page) { struct page *end; /* Find end of list, sew whole thing into vi-pages. */ for (end = page; end-private; end = (struct page *)end-private); - end-private = (unsigned long)vi-pages; - vi-pages = page; + end-private = (unsigned long)rq-pages; + rq-pages = page; } -static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask) +static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) { - struct page *p = vi-pages; + struct page *p = rq-pages; if (p) { - vi-pages = (struct page *)p-private; + rq-pages = (struct page *)p-private; /* clear private here, it is used to chain pages */ p-private = 0; } else @@ -129,15 +158,16 @@ static struct page *get_a_page(struct vi return p; } -static void skb_xmit_done(struct virtqueue *svq) +static void skb_xmit_done(struct virtqueue *vq) { - struct virtnet_info *vi = svq-vdev-priv; + struct virtnet_info *vi = vq-vdev-priv; + int qnum = vq-queue_index / 2; /* RX/TX vqs are allocated in pairs */ /* Suppress further interrupts. */ - virtqueue_disable_cb(svq); + virtqueue_disable_cb(vq); /* We were probably waiting for more output buffers. */ - netif_wake_queue(vi-dev); + netif_wake_subqueue(vi-dev, qnum); } static void set_skb_frag(struct sk_buff *skb, struct