We used to queue tx packets in sk_receive_queue, this is less
efficient since it requires spinlocks to synchronize between producer
and consumer.
This patch tries to address this by:
- switch from sk_receive_queue to a skb_array, and resize it when
tx_queue_len was changed.
- introduce a new proto_ops peek_len which was used for peeking the
skb length.
- implement a tun version of peek_len for vhost_net to use and convert
vhost_net to use peek_len if possible.
Pktgen test shows about 15.3% improvement on guest receiving pps for small
buffers:
Before: ~130pps
After : ~150pps
Signed-off-by: Jason Wang
---
drivers/net/tun.c | 138 +---
drivers/vhost/net.c | 16 +-
include/linux/net.h | 1 +
3 files changed, 146 insertions(+), 9 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 4884802..3be69ea 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -71,6 +71,7 @@
#include
#include
#include
+#include
#include
@@ -167,6 +168,7 @@ struct tun_file {
};
struct list_head next;
struct tun_struct *detached;
+ struct skb_array tx_array;
};
struct tun_flow_entry {
@@ -515,7 +517,11 @@ static struct tun_struct *tun_enable_queue(struct tun_file
*tfile)
static void tun_queue_purge(struct tun_file *tfile)
{
- skb_queue_purge(>sk.sk_receive_queue);
+ struct sk_buff *skb;
+
+ while ((skb = skb_array_consume(>tx_array)) != NULL)
+ kfree_skb(skb);
+
skb_queue_purge(>sk.sk_error_queue);
}
@@ -560,6 +566,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
tun->dev->reg_state == NETREG_REGISTERED)
unregister_netdevice(tun->dev);
}
+ if (tun)
+ skb_array_cleanup(>tx_array);
sock_put(>sk);
}
}
@@ -613,6 +621,7 @@ static void tun_detach_all(struct net_device *dev)
static int tun_attach(struct tun_struct *tun, struct file *file, bool
skip_filter)
{
struct tun_file *tfile = file->private_data;
+ struct net_device *dev = tun->dev;
int err;
err = security_tun_dev_attach(tfile->socket.sk, tun->security);
@@ -642,6 +651,13 @@ static int tun_attach(struct tun_struct *tun, struct file
*file, bool skip_filte
if (!err)
goto out;
}
+
+ if (!tfile->detached &&
+ skb_array_init(>tx_array, dev->tx_queue_len, GFP_KERNEL)) {
+ err = -ENOMEM;
+ goto out;
+ }
+
tfile->queue_index = tun->numqueues;
tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN;
rcu_assign_pointer(tfile->tun, tun);
@@ -891,8 +907,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct
net_device *dev)
nf_reset(skb);
- /* Enqueue packet */
- skb_queue_tail(>socket.sk->sk_receive_queue, skb);
+ if (skb_array_produce(>tx_array, skb))
+ goto drop;
/* Notify and wake up reader process */
if (tfile->flags & TUN_FASYNC)
@@ -1107,7 +1123,7 @@ static unsigned int tun_chr_poll(struct file *file,
poll_table *wait)
poll_wait(file, sk_sleep(sk), wait);
- if (!skb_queue_empty(>sk_receive_queue))
+ if (!skb_array_empty(>tx_array))
mask |= POLLIN | POLLRDNORM;
if (sock_writeable(sk) ||
@@ -1426,22 +1442,61 @@ done:
return total;
}
+static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
+int *err)
+{
+ DECLARE_WAITQUEUE(wait, current);
+ struct sk_buff *skb = NULL;
+
+ skb = skb_array_consume(>tx_array);
+ if (skb)
+ goto out;
+ if (noblock) {
+ *err = -EAGAIN;
+ goto out;
+ }
+
+ add_wait_queue(>wq.wait, );
+ current->state = TASK_INTERRUPTIBLE;
+
+ while (1) {
+ skb = skb_array_consume(>tx_array);
+ if (skb)
+ break;
+ if (signal_pending(current)) {
+ *err = -ERESTARTSYS;
+ break;
+ }
+ if (tfile->socket.sk->sk_shutdown & RCV_SHUTDOWN) {
+ *err = -EFAULT;
+ break;
+ }
+
+ schedule();
+ };
+
+ current->state = TASK_RUNNING;
+ remove_wait_queue(>wq.wait, );
+
+out:
+ return skb;
+}
+
static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
struct iov_iter *to,
int noblock)
{
struct sk_buff *skb;
ssize_t ret;
- int peeked, err, off = 0;
+ int err;
tun_debug(KERN_INFO, tun, "tun_do_read\n");
if (!iov_iter_count(to))
return 0;
- /* Read