Re: [Qemu-devel] [net-next RFC PATCH 4/7] tuntap: multiqueue support

2011-08-14 Thread Jason Wang


- Original Message -
 Le vendredi 12 août 2011 à 09:55 +0800, Jason Wang a écrit :
 
 + rxq = skb_get_rxhash(skb);
 + if (rxq) {
 + tfile = rcu_dereference(tun-tfiles[rxq % numqueues]);
 + if (tfile)
 + goto out;
 + }
 
 You can avoid an expensive divide with following trick :
 
 u32 idx = ((u64)rxq * numqueues)  32;
 

Sure.

 
 
  -static struct tun_struct *tun_get(struct file *file)
  +static void tun_detach_all(struct net_device *dev)
   {
  - return __tun_get(file-private_data);
  + struct tun_struct *tun = netdev_priv(dev);
  + struct tun_file *tfile, *tfile_list[MAX_TAP_QUEUES];
  + int i, j = 0;
  +
  + spin_lock(tun_lock);
  +
  + for (i = 0; i  MAX_TAP_QUEUES  tun-numqueues; i++) {
  + tfile = rcu_dereference_protected(tun-tfiles[i],
  + lockdep_is_held(tun_lock));
  + if (tfile) {
  + wake_up_all(tfile-wq.wait);
  + tfile_list[i++] = tfile;
 
 typo here, you want tfile_list[j++] = tfile;
 

Yes, thanks for catching this.

  + rcu_assign_pointer(tun-tfiles[i], NULL);
  + rcu_assign_pointer(tfile-tun, NULL);
  + --tun-numqueues;
  + }
  + }
  + BUG_ON(tun-numqueues != 0);
  + spin_unlock(tun_lock);
  +
  + synchronize_rcu();
  + for(--j; j = 0; j--)
  + sock_put(tfile_list[j]-sk);
   }
 
 
 Could you take a look at net/packet/af_packet.c, to check how David
 did
 the whole fanout thing ?
 
 __fanout_unlink()
 
 Trick is to not leave NULL entries in the tun-tfiles[] array.
 
 It makes things easier in hot path.

Sure I would go to take a look at this.

 
 
 --
 To unsubscribe from this list: send the line unsubscribe netdev in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at http://vger.kernel.org/majordomo-info.html



Re: [Qemu-devel] [net-next RFC PATCH 4/7] tuntap: multiqueue support

2011-08-14 Thread Jason Wang


- Original Message -
 On Fri, Aug 12, 2011 at 09:55:20AM +0800, Jason Wang wrote:
  With the abstraction that each socket were a backend of a
  queue for userspace, this patch adds multiqueue support for
  tap device by allowing multiple sockets to be attached to a
  tap device. Then we could parallize the transmission by put
  them into different socket.
 
  As queue related information were stored in private_data of
  file new, we could simply implement the multiqueue support
  by add an array of pointers to sockets were stored in the
  tap device. Then ioctls may be added to manipulate those
  pointers for adding or removing queues.
 
  In order to let tx path lockless, NETIF_F_LLTX were used for
  multiqueue tap device. And RCU is used for doing
  synchronization between packet handling and system calls
  such as removing queues.
 
  Currently, multiqueue support is limited for tap , but it's
  easy also enable it for tun if we find it was also helpful.
 
 Question below about calls to tun_get_slot().
 
 Thanx, Paul
 
  Signed-off-by: Jason Wang jasow...@redhat.com
  ---
   drivers/net/tun.c | 376
   ++---
   1 files changed, 243 insertions(+), 133 deletions(-)
 
  diff --git a/drivers/net/tun.c b/drivers/net/tun.c
  index 4cd292a..8bc6dff 100644
  --- a/drivers/net/tun.c
  +++ b/drivers/net/tun.c
  @@ -108,6 +108,8 @@ struct tap_filter {
  unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN];
   };
 
  +#define MAX_TAP_QUEUES (NR_CPUS  16 ? NR_CPUS : 16)
  +
   struct tun_file {
  struct sock sk;
  struct socket socket;
  @@ -115,7 +117,7 @@ struct tun_file {
  int vnet_hdr_sz;
  struct tap_filter txflt;
  atomic_t count;
  - struct tun_struct *tun;
  + struct tun_struct __rcu *tun;
  struct net *net;
  struct fasync_struct *fasync;
  unsigned int flags;
  @@ -124,7 +126,8 @@ struct tun_file {
   struct tun_sock;
 
   struct tun_struct {
  - struct tun_file *tfile;
  + struct tun_file *tfiles[MAX_TAP_QUEUES];
  + unsigned int numqueues;
  unsigned int flags;
  uid_t owner;
  gid_t group;
  @@ -139,80 +142,183 @@ struct tun_struct {
   #endif
   };
 
  -static int tun_attach(struct tun_struct *tun, struct file *file)
  +static DEFINE_SPINLOCK(tun_lock);
  +
  +/*
  + * get_slot: return a [unused/occupied] slot in tun-tfiles[]:
  + * - if 'f' is NULL, return the first empty slot;
  + * - otherwise, return the slot this pointer occupies.
  + */
  +static int tun_get_slot(struct tun_struct *tun, struct tun_file
  *tfile)
   {
  - struct tun_file *tfile = file-private_data;
  - int err;
  + int i;
 
  - ASSERT_RTNL();
  + for (i = 0; i  MAX_TAP_QUEUES; i++) {
  + if (rcu_dereference(tun-tfiles[i]) == tfile)
  + return i;
  + }
 
  - netif_tx_lock_bh(tun-dev);
  + /* Should never happen */
  + BUG_ON(1);
  +}
 
  - err = -EINVAL;
  - if (tfile-tun)
  - goto out;
  +/*
  + * tun_get_queue(): calculate the queue index
  + * - if skbs comes from mq nics, we can just borrow
  + * - if not, calculate from the hash
  + */
  +static struct tun_file *tun_get_queue(struct net_device *dev,
  + struct sk_buff *skb)
  +{
  + struct tun_struct *tun = netdev_priv(dev);
  + struct tun_file *tfile = NULL;
  + int numqueues = tun-numqueues;
  + __u32 rxq;
 
  - err = -EBUSY;
  - if (tun-tfile)
  + BUG_ON(!rcu_read_lock_held());
  +
  + if (!numqueues)
  goto out;
 
  - err = 0;
  - tfile-tun = tun;
  - tun-tfile = tfile;
  - netif_carrier_on(tun-dev);
  - dev_hold(tun-dev);
  - sock_hold(tfile-sk);
  - atomic_inc(tfile-count);
  + if (likely(skb_rx_queue_recorded(skb))) {
  + rxq = skb_get_rx_queue(skb);
  +
  + while (unlikely(rxq = numqueues))
  + rxq -= numqueues;
  +
  + tfile = rcu_dereference(tun-tfiles[rxq]);
  + if (tfile)
  + goto out;
  + }
  +
  + /* Check if we can use flow to select a queue */
  + rxq = skb_get_rxhash(skb);
  + if (rxq) {
  + tfile = rcu_dereference(tun-tfiles[rxq % numqueues]);
  + if (tfile)
  + goto out;
  + }
  +
  + /* Everything failed - find first available queue */
  + for (rxq = 0; rxq  MAX_TAP_QUEUES; rxq++) {
  + tfile = rcu_dereference(tun-tfiles[rxq]);
  + if (tfile)
  + break;
  + }
 
   out:
  - netif_tx_unlock_bh(tun-dev);
  - return err;
  + return tfile;
   }
 
  -static void __tun_detach(struct tun_struct *tun)
  +static int tun_detach(struct tun_file *tfile, bool clean)
   {
  - struct tun_file *tfile = tun-tfile;
  - /* Detach from net device */
  - netif_tx_lock_bh(tun-dev);
  - netif_carrier_off(tun-dev);
  - tun-tfile = NULL;
  - netif_tx_unlock_bh(tun-dev);
  -
  - /* Drop read queue */
  - skb_queue_purge(tfile-socket.sk-sk_receive_queue);
  -
  - /* Drop the extra count on the net device */
  - dev_put(tun-dev);
  -}
  + struct tun_struct *tun;
  + struct net_device *dev = NULL;
  + bool destroy = false;
 
  -static void tun_detach(struct tun_struct *tun)
  -{
  - rtnl_lock();
  - __tun_detach(tun);
  - rtnl_unlock();
  -}
  + spin_lock(tun_lock);
 
  

Re: [Qemu-devel] [net-next RFC PATCH 4/7] tuntap: multiqueue support

2011-08-12 Thread Eric Dumazet
Le vendredi 12 août 2011 à 09:55 +0800, Jason Wang a écrit :

+   rxq = skb_get_rxhash(skb);
+   if (rxq) {
+   tfile = rcu_dereference(tun-tfiles[rxq % numqueues]);
+   if (tfile)
+   goto out;
+   }

You can avoid an expensive divide with following trick :

u32 idx = ((u64)rxq * numqueues)  32;



 -static struct tun_struct *tun_get(struct file *file)
 +static void tun_detach_all(struct net_device *dev)
  {
 - return __tun_get(file-private_data);
 + struct tun_struct *tun = netdev_priv(dev);
 + struct tun_file *tfile, *tfile_list[MAX_TAP_QUEUES];
 + int i, j = 0;
 +
 + spin_lock(tun_lock);
 +
 + for (i = 0; i  MAX_TAP_QUEUES  tun-numqueues; i++) {
 + tfile = rcu_dereference_protected(tun-tfiles[i],
 + lockdep_is_held(tun_lock));
 + if (tfile) {
 + wake_up_all(tfile-wq.wait);
 + tfile_list[i++] = tfile;

typo here, you want tfile_list[j++] = tfile;

 + rcu_assign_pointer(tun-tfiles[i], NULL);
 + rcu_assign_pointer(tfile-tun, NULL);
 + --tun-numqueues;
 + }
 + }
 + BUG_ON(tun-numqueues != 0);
 + spin_unlock(tun_lock);
 +
 + synchronize_rcu();
 + for(--j; j = 0; j--)
 + sock_put(tfile_list[j]-sk);
  }
  

Could you take a look at net/packet/af_packet.c, to check how David did
the whole fanout thing ?

__fanout_unlink() 

Trick is to not leave NULL entries in the tun-tfiles[] array.

It makes things easier in hot path.





Re: [Qemu-devel] [net-next RFC PATCH 4/7] tuntap: multiqueue support

2011-08-12 Thread Paul E. McKenney
On Fri, Aug 12, 2011 at 09:55:20AM +0800, Jason Wang wrote:
 With the abstraction that each socket were a backend of a
 queue for userspace, this patch adds multiqueue support for
 tap device by allowing multiple sockets to be attached to a
 tap device. Then we could parallize the transmission by put
 them into different socket.
 
 As queue related information were stored in private_data of
 file new, we could simply implement the multiqueue support
 by add an array of pointers to sockets were stored in the
 tap device. Then ioctls may be added to manipulate those
 pointers for adding or removing queues.
 
 In order to let tx path lockless, NETIF_F_LLTX were used for
 multiqueue tap device. And RCU is used for doing
 synchronization between packet handling and system calls
 such as removing queues.
 
 Currently, multiqueue support is limited for tap , but it's
 easy also enable it for tun if we find it was also helpful.

Question below about calls to tun_get_slot().

Thanx, Paul

 Signed-off-by: Jason Wang jasow...@redhat.com
 ---
  drivers/net/tun.c |  376 
 ++---
  1 files changed, 243 insertions(+), 133 deletions(-)
 
 diff --git a/drivers/net/tun.c b/drivers/net/tun.c
 index 4cd292a..8bc6dff 100644
 --- a/drivers/net/tun.c
 +++ b/drivers/net/tun.c
 @@ -108,6 +108,8 @@ struct tap_filter {
   unsigned char   addr[FLT_EXACT_COUNT][ETH_ALEN];
  };
 
 +#define MAX_TAP_QUEUES (NR_CPUS  16 ? NR_CPUS : 16)
 +
  struct tun_file {
   struct sock sk;
   struct socket socket;
 @@ -115,7 +117,7 @@ struct tun_file {
   int vnet_hdr_sz;
   struct tap_filter txflt;
   atomic_t count;
 - struct tun_struct *tun;
 + struct tun_struct __rcu *tun;
   struct net *net;
   struct fasync_struct *fasync;
   unsigned int flags;
 @@ -124,7 +126,8 @@ struct tun_file {
  struct tun_sock;
 
  struct tun_struct {
 - struct tun_file *tfile;
 + struct tun_file *tfiles[MAX_TAP_QUEUES];
 + unsigned intnumqueues;
   unsigned intflags;
   uid_t   owner;
   gid_t   group;
 @@ -139,80 +142,183 @@ struct tun_struct {
  #endif
  };
 
 -static int tun_attach(struct tun_struct *tun, struct file *file)
 +static DEFINE_SPINLOCK(tun_lock);
 +
 +/*
 + * get_slot: return a [unused/occupied] slot in tun-tfiles[]:
 + * - if 'f' is NULL, return the first empty slot;
 + * - otherwise, return the slot this pointer occupies.
 + */
 +static int tun_get_slot(struct tun_struct *tun, struct tun_file *tfile)
  {
 - struct tun_file *tfile = file-private_data;
 - int err;
 + int i;
 
 - ASSERT_RTNL();
 + for (i = 0; i  MAX_TAP_QUEUES; i++) {
 + if (rcu_dereference(tun-tfiles[i]) == tfile)
 + return i;
 + }
 
 - netif_tx_lock_bh(tun-dev);
 +   /* Should never happen */
 +   BUG_ON(1);
 +}
 
 - err = -EINVAL;
 - if (tfile-tun)
 - goto out;
 +/*
 + * tun_get_queue(): calculate the queue index
 + * - if skbs comes from mq nics, we can just borrow
 + * - if not, calculate from the hash
 + */
 +static struct tun_file *tun_get_queue(struct net_device *dev,
 + struct sk_buff *skb)
 +{
 + struct tun_struct *tun = netdev_priv(dev);
 + struct tun_file *tfile = NULL;
 + int numqueues = tun-numqueues;
 + __u32 rxq;
 
 - err = -EBUSY;
 - if (tun-tfile)
 + BUG_ON(!rcu_read_lock_held());
 +
 + if (!numqueues)
   goto out;
 
 - err = 0;
 - tfile-tun = tun;
 - tun-tfile = tfile;
 - netif_carrier_on(tun-dev);
 - dev_hold(tun-dev);
 - sock_hold(tfile-sk);
 - atomic_inc(tfile-count);
 + if (likely(skb_rx_queue_recorded(skb))) {
 + rxq = skb_get_rx_queue(skb);
 +
 + while (unlikely(rxq = numqueues))
 + rxq -= numqueues;
 +
 + tfile = rcu_dereference(tun-tfiles[rxq]);
 + if (tfile)
 + goto out;
 + }
 +
 + /* Check if we can use flow to select a queue */
 + rxq = skb_get_rxhash(skb);
 + if (rxq) {
 + tfile = rcu_dereference(tun-tfiles[rxq % numqueues]);
 + if (tfile)
 + goto out;
 + }
 +
 + /* Everything failed - find first available queue */
 + for (rxq = 0; rxq  MAX_TAP_QUEUES; rxq++) {
 + tfile = rcu_dereference(tun-tfiles[rxq]);
 + if (tfile)
 + break;
 + }
 
  out:
 - netif_tx_unlock_bh(tun-dev);
 - return err;
 + return tfile;
  }
 
 -static void __tun_detach(struct tun_struct *tun)
 +static int tun_detach(struct tun_file *tfile, bool clean)
  {
 - struct tun_file *tfile = tun-tfile;
 - /* Detach from net device */
 - netif_tx_lock_bh(tun-dev);
 - netif_carrier_off(tun-dev);
 - tun-tfile =