Author: arekm                        Date: Wed Aug 25 17:02:52 2010 GMT
Module: packages                      Tag: HEAD
---- Log message:
- updated imq patch

---- Files affected:
packages/kernel:
   kernel-imq.patch (1.10 -> 1.11) , kernel.spec (1.810 -> 1.811) 

---- Diffs:

================================================================
Index: packages/kernel/kernel-imq.patch
diff -u packages/kernel/kernel-imq.patch:1.10 
packages/kernel/kernel-imq.patch:1.11
--- packages/kernel/kernel-imq.patch:1.10       Thu Aug  5 21:52:26 2010
+++ packages/kernel/kernel-imq.patch    Wed Aug 25 19:02:44 2010
@@ -1,7 +1,7 @@
-diff -uNr linux-2.6.34/drivers/net/imq.c linux-2.6.34-imq/drivers/net/imq.c
---- linux-2.6.34/drivers/net/imq.c     1970-01-01 02:00:00.000000000 +0200
-+++ linux-2.6.34-imq/drivers/net/imq.c 2010-06-02 10:05:45.752109073 +0300
-@@ -0,0 +1,635 @@
+diff -uNr linux-2.6.35/drivers/net/imq.c 
linux-2.6.35-imq-multiqueue-test1/drivers/net/imq.c
+--- linux-2.6.35/drivers/net/imq.c     1970-01-01 02:00:00.000000000 +0200
++++ linux-2.6.35-imq-multiqueue-test1/drivers/net/imq.c        2010-08-15 
13:54:30.070063067 +0300
+@@ -0,0 +1,774 @@
 +/*
 + *             Pseudo-driver for the intermediate queue device.
 + *
@@ -51,7 +51,7 @@
 + *             I didn't forget anybody). I apologize again for my lack of 
time.
 + *
 + *
-+ *             2008/06/17 - 2.6.25 - Changed imq.c to use qdisc_run() instead 
++ *             2008/06/17 - 2.6.25 - Changed imq.c to use qdisc_run() instead
 + *             of qdisc_restart() and moved qdisc_run() to tasklet to avoid
 + *             recursive locking. New initialization routines to fix 'rmmod' 
not
 + *             working anymore. Used code from ifb.c. (Jussi Kivilinna)
@@ -86,6 +86,22 @@
 + *             2010/02/25 - (Jussi Kivilinna)
 + *              - Port to 2.6.33
 + *
++ *             2010/08/15 - (Jussi Kivilinna)
++ *              - Port to 2.6.35
++ *              - Simplify hook registration by using nf_register_hooks.
++ *              - nf_reinject doesn't need spinlock around it, therefore 
remove
++ *                imq_nf_reinject function. Other nf_reinject users protect
++ *                their own data with spinlock. With IMQ however all data is
++ *                needed is stored per skbuff, so no locking is needed.
++ *              - Changed IMQ to use 'separate' NF_IMQ_QUEUE instead of
++ *                NF_QUEUE, this allows working coexistance of IMQ and other
++ *                NF_QUEUE users.
++ *              - Make IMQ multi-queue. Number of IMQ device queues can be
++ *                increased with 'numqueues' module parameters. Default number
++ *                of queues is 1, in other words by default IMQ works as
++ *                single-queue device. Multi-queue selection is based on 
++ *                IFB multi-queue patch by Changli Gao <[email protected]>.
++ *
 + *           Also, many thanks to pablo Sebastian Greco for making the initial
 + *           patch and to those who helped the testing.
 + *
@@ -109,66 +125,81 @@
 +#include <linux/imq.h>
 +#include <net/pkt_sched.h>
 +#include <net/netfilter/nf_queue.h>
++#include <net/sock.h>
++#include <linux/ip.h>
++#include <linux/ipv6.h>
++#include <linux/if_vlan.h>
++#include <linux/if_pppox.h>
++#include <net/ip.h>
++#include <net/ipv6.h>
++
++static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num);
 +
 +static nf_hookfn imq_nf_hook;
 +
-+static struct nf_hook_ops imq_ingress_ipv4 = {
-+      .hook           = imq_nf_hook,
-+      .owner          = THIS_MODULE,
-+      .pf             = PF_INET,
-+      .hooknum        = NF_INET_PRE_ROUTING,
++static struct nf_hook_ops imq_ops[] = {
++      {
++      /* imq_ingress_ipv4 */
++              .hook           = imq_nf_hook,
++              .owner          = THIS_MODULE,
++              .pf             = PF_INET,
++              .hooknum        = NF_INET_PRE_ROUTING,
 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
-+      .priority       = NF_IP_PRI_MANGLE + 1
++              .priority       = NF_IP_PRI_MANGLE + 1,
 +#else
-+      .priority       = NF_IP_PRI_NAT_DST + 1
++              .priority       = NF_IP_PRI_NAT_DST + 1,
 +#endif
-+};
-+
-+static struct nf_hook_ops imq_egress_ipv4 = {
-+      .hook           = imq_nf_hook,
-+      .owner          = THIS_MODULE,
-+      .pf             = PF_INET,
-+      .hooknum        = NF_INET_POST_ROUTING,
++      },
++      {
++      /* imq_egress_ipv4 */
++              .hook           = imq_nf_hook,
++              .owner          = THIS_MODULE,
++              .pf             = PF_INET,
++              .hooknum        = NF_INET_POST_ROUTING,
 +#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
-+      .priority       = NF_IP_PRI_LAST
++              .priority       = NF_IP_PRI_LAST,
 +#else
-+      .priority       = NF_IP_PRI_NAT_SRC - 1
++              .priority       = NF_IP_PRI_NAT_SRC - 1,
 +#endif
-+};
-+
++      },
 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-+static struct nf_hook_ops imq_ingress_ipv6 = {
-+      .hook           = imq_nf_hook,
-+      .owner          = THIS_MODULE,
-+      .pf             = PF_INET6,
-+      .hooknum        = NF_INET_PRE_ROUTING,
++      {
++      /* imq_ingress_ipv6 */
++              .hook           = imq_nf_hook,
++              .owner          = THIS_MODULE,
++              .pf             = PF_INET6,
++              .hooknum        = NF_INET_PRE_ROUTING,
 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
-+      .priority       = NF_IP6_PRI_MANGLE + 1
++              .priority       = NF_IP6_PRI_MANGLE + 1,
 +#else
-+      .priority       = NF_IP6_PRI_NAT_DST + 1
++              .priority       = NF_IP6_PRI_NAT_DST + 1,
 +#endif
-+};
-+
-+static struct nf_hook_ops imq_egress_ipv6 = {
-+      .hook           = imq_nf_hook,
-+      .owner          = THIS_MODULE,
-+      .pf             = PF_INET6,
-+      .hooknum        = NF_INET_POST_ROUTING,
++      },
++      {
++      /* imq_egress_ipv6 */
++              .hook           = imq_nf_hook,
++              .owner          = THIS_MODULE,
++              .pf             = PF_INET6,
++              .hooknum        = NF_INET_POST_ROUTING,
 +#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
-+      .priority       = NF_IP6_PRI_LAST
++              .priority       = NF_IP6_PRI_LAST,
 +#else
-+      .priority       = NF_IP6_PRI_NAT_SRC - 1
++              .priority       = NF_IP6_PRI_NAT_SRC - 1,
 +#endif
-+};
++      },
 +#endif
++};
 +
 +#if defined(CONFIG_IMQ_NUM_DEVS)
-+static unsigned int numdevs = CONFIG_IMQ_NUM_DEVS;
++static int numdevs = CONFIG_IMQ_NUM_DEVS;
 +#else
-+static unsigned int numdevs = IMQ_MAX_DEVS;
++static int numdevs = IMQ_MAX_DEVS;
 +#endif
 +
-+static DEFINE_SPINLOCK(imq_nf_queue_lock);
++#define IMQ_MAX_QUEUES 32
++static int numqueues = 1;
++
++/*static DEFINE_SPINLOCK(imq_nf_queue_lock);*/
 +
 +static struct net_device *imq_devs_cache[IMQ_MAX_DEVS];
 +
@@ -193,49 +224,6 @@
 +      skb_restore_cb(skb); /* kfree backup */
 +}
 +
-+/* locking not needed when called from imq_nf_queue */
-+static void imq_nf_reinject_lockless(struct nf_queue_entry *entry,
-+                                              unsigned int verdict)
-+{
-+      int status;
-+
-+      if (!entry->next_outfn) {
-+              nf_reinject(entry, verdict);
-+              return;
-+      }
-+
-+      status = entry->next_outfn(entry, entry->next_queuenum);
-+      if (status < 0) {
-+              nf_queue_entry_release_refs(entry);
-+              kfree_skb(entry->skb);
-+              kfree(entry);
-+      }
-+}
-+
-+static void imq_nf_reinject(struct nf_queue_entry *entry, unsigned int 
verdict)
-+{
-+      int status;
-+
-+      if (!entry->next_outfn) {
-+              spin_lock_bh(&imq_nf_queue_lock);
-+              nf_reinject(entry, verdict);
-+              spin_unlock_bh(&imq_nf_queue_lock);
-+              return;
-+      }
-+
-+      rcu_read_lock();
-+      local_bh_disable();
-+      status = entry->next_outfn(entry, entry->next_queuenum);
-+      local_bh_enable();
-+      if (status < 0) {
-+              nf_queue_entry_release_refs(entry);
-+              kfree_skb(entry->skb);
-+              kfree(entry);
-+      }
-+
-+      rcu_read_unlock();
-+}
-+
 +static netdev_tx_t imq_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 +{
 +      struct nf_queue_entry *entry = skb->nf_queue_entry;
@@ -275,17 +263,184 @@
 +      skb->imq_flags = 0;
 +      skb->destructor = NULL;
 +
-+      imq_nf_reinject(entry, NF_ACCEPT);
++      nf_reinject(entry, NF_ACCEPT);
 +
 +      return NETDEV_TX_OK;
 +}
 +
++static u32 imq_hashrnd;
++
++static inline __be16 pppoe_proto(const struct sk_buff *skb)
++{
++      return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
++                      sizeof(struct pppoe_hdr)));
++}
++
++static u16 imq_hash(struct net_device *dev, struct sk_buff *skb)
++{
++      unsigned int pull_len;
++      u16 protocol = skb->protocol;
++      u32 addr1, addr2;
++      u32 hash, ihl = 0;
++      union {
++              u16 in16[2];
++              u32 in32;
++      } ports;
++      u8 ip_proto;
++
++      pull_len = 0;
++
++recheck:
++      switch (protocol) {
++      case htons(ETH_P_8021Q): {
++              if (unlikely(skb_pull(skb, VLAN_HLEN) == NULL))
++                      goto other;
++
++              pull_len += VLAN_HLEN;
++              skb->network_header += VLAN_HLEN;
++
++              protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
++              goto recheck;
++      }
++
++      case htons(ETH_P_PPP_SES): {
++              if (unlikely(skb_pull(skb, PPPOE_SES_HLEN) == NULL))
++                      goto other;
++
++              pull_len += PPPOE_SES_HLEN;
++              skb->network_header += PPPOE_SES_HLEN;
++
++              protocol = pppoe_proto(skb);
++              goto recheck;
++      }
++
++      case htons(ETH_P_IP): {
++              const struct iphdr *iph = ip_hdr(skb);
++
++              if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr))))
++                      goto other;
++
++              addr1 = iph->daddr;
++              addr2 = iph->saddr;
++
++              ip_proto = !(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) ?
++                               iph->protocol : 0;
++              ihl = ip_hdrlen(skb);
++
++              break;
++      }
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++      case htons(ETH_P_IPV6): {
++              const struct ipv6hdr *iph = ipv6_hdr(skb);
++
++              if (unlikely(!pskb_may_pull(skb, sizeof(struct ipv6hdr))))
++                      goto other;
++
++              addr1 = iph->daddr.s6_addr32[3];
++              addr2 = iph->saddr.s6_addr32[3];
++              ihl = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &ip_proto);
++              if (unlikely(ihl < 0))
++                      goto other;
++
++              break;
++      }
++#endif
++      default:
++other:
++              if (pull_len != 0) {
++                      skb_push(skb, pull_len);
++                      skb->network_header -= pull_len;
++              }
++
++              return (u16)(ntohs(protocol) % dev->real_num_tx_queues);
++      }
++
++      if (addr1 > addr2)
++              swap(addr1, addr2);
++
++      switch (ip_proto) {
++      case IPPROTO_TCP:
++      case IPPROTO_UDP:
++      case IPPROTO_DCCP:
++      case IPPROTO_ESP:
++      case IPPROTO_AH:
++      case IPPROTO_SCTP:
++      case IPPROTO_UDPLITE: {
++              if (likely(skb_copy_bits(skb, ihl, &ports.in32, 4) >= 0)) {
++                      if (ports.in16[0] > ports.in16[1])
++                              swap(ports.in16[0], ports.in16[1]);
++                      break;
++              }
++              /* fall-through */
++      }
++      default:
++              ports.in32 = 0;
++              break;
++      }
++
++      if (pull_len != 0) {
++              skb_push(skb, pull_len);
++              skb->network_header -= pull_len;
++      }
++
++      hash = jhash_3words(addr1, addr2, ports.in32, imq_hashrnd ^ ip_proto);
++
++      return (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
++}
++
++static inline bool sk_tx_queue_recorded(struct sock *sk)
++{
++      return (sk_tx_queue_get(sk) >= 0);
++}
++
++static struct netdev_queue *imq_select_queue(struct net_device *dev,
++                                              struct sk_buff *skb)
++{
++      u16 queue_index = 0;
++      u32 hash;
++
++      if (likely(dev->real_num_tx_queues == 1))
++              goto out;
++
++      /* IMQ can be receiving ingress or engress packets. */
++
++      /* Check first for if rx_queue is set */
++      if (skb_rx_queue_recorded(skb)) {
++              queue_index = skb_get_rx_queue(skb);
++              goto out;
++      }
++
++      /* Check if socket has tx_queue set */
++      if (sk_tx_queue_recorded(skb->sk)) {
++              queue_index = sk_tx_queue_get(skb->sk);
++              goto out;
++      }
++
++      /* Try use socket hash */
++      if (skb->sk && skb->sk->sk_hash) {
++              hash = skb->sk->sk_hash;
++              queue_index =
++                      (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
++              goto out;
++      }
++
++      /* Generate hash from packet data */
++      queue_index = imq_hash(dev, skb);
++
++out:
++      if (unlikely(queue_index >= dev->real_num_tx_queues))
++              queue_index = (u16)((u32)queue_index % dev->real_num_tx_queues);
++
++      return netdev_get_tx_queue(dev, queue_index);
++}
++
 +static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num)
 +{
 +      struct net_device *dev;
 +      struct sk_buff *skb_orig, *skb, *skb_shared;
 +      struct Qdisc *q;
 +      struct netdev_queue *txq;
++      spinlock_t *root_lock;
 +      int users, index;
 +      int retval = -EINVAL;
 +
@@ -307,7 +462,7 @@
 +              /* get device by name and cache result */
 +              snprintf(buf, sizeof(buf), "imq%d", index);
 +              dev = dev_get_by_name(&init_net, buf);
-+              if (!dev) {
++              if (unlikely(!dev)) {
 +                      /* not found ?!*/
 +                      BUG();
 +                      retval = -ENODEV;
@@ -320,7 +475,7 @@
 +
 +      if (unlikely(!(dev->flags & IFF_UP))) {
 +              entry->skb->imq_flags = 0;
-+              imq_nf_reinject_lockless(entry, NF_ACCEPT);
++              nf_reinject(entry, NF_ACCEPT);
 +              retval = 0;
 +              goto out;
 +      }
@@ -333,7 +488,7 @@
 +      if (unlikely(skb->destructor)) {
 +              skb_orig = skb;
 +              skb = skb_clone(skb, GFP_ATOMIC);
-+              if (!skb) {
++              if (unlikely(!skb)) {
 +                      retval = -ENOMEM;
 +                      goto out;
 +              }
@@ -345,13 +500,18 @@
 +      dev->stats.rx_bytes += skb->len;
 +      dev->stats.rx_packets++;
 +
-+      txq = dev_pick_tx(dev, skb);
++      /* Disables softirqs for lock below */
++      rcu_read_lock_bh();
++
++      /* Multi-queue selection */
++      txq = imq_select_queue(dev, skb);
 +
 +      q = rcu_dereference(txq->qdisc);
 +      if (unlikely(!q->enqueue))
 +              goto packet_not_eaten_by_imq_dev;
 +
-+      spin_lock_bh(qdisc_lock(q));
++      root_lock = qdisc_lock(q);
++      spin_lock(root_lock);
 +
 +      users = atomic_read(&skb->users);
 +
@@ -366,10 +526,11 @@
 +              skb->destructor = &imq_skb_destructor;
 +
 +              /* cloned? */
-+              if (skb_orig)
++              if (unlikely(skb_orig))
 +                      kfree_skb(skb_orig); /* free original */
 +
-+              spin_unlock_bh(qdisc_lock(q));
++              spin_unlock(root_lock);
++              rcu_read_unlock_bh();
 +
 +              /* schedule qdisc dequeue */
 +              __netif_schedule(q);
@@ -382,13 +543,15 @@
 +              /* qdisc dropped packet and decreased skb reference count of
 +               * skb, so we don't really want to and try refree as that would
 +               * actually destroy the skb. */
-+              spin_unlock_bh(qdisc_lock(q));
++              spin_unlock(root_lock);
 +              goto packet_not_eaten_by_imq_dev;
 +      }
 +
 +packet_not_eaten_by_imq_dev:
++      rcu_read_unlock_bh();
++
 +      /* cloned? restore original */
-+      if (skb_orig) {
++      if (unlikely(skb_orig)) {
 +              kfree_skb(skb);
 +              entry->skb = skb_orig;
 +      }
@@ -397,20 +560,12 @@
 +      return retval;
 +}
 +
-+static struct nf_queue_handler nfqh = {
-+      .name  = "imq",
-+      .outfn = imq_nf_queue,
-+};
-+
 +static unsigned int imq_nf_hook(unsigned int hook, struct sk_buff *pskb,
 +                              const struct net_device *indev,
 +                              const struct net_device *outdev,
 +                              int (*okfn)(struct sk_buff *))
 +{
-+      if (pskb->imq_flags & IMQ_F_ENQUEUE)
-+              return NF_QUEUE;
-+
-+      return NF_ACCEPT;
++      return (pskb->imq_flags & IMQ_F_ENQUEUE) ? NF_IMQ_QUEUE : NF_ACCEPT;
 +}
 +
 +static int imq_close(struct net_device *dev)
@@ -472,43 +627,22 @@
 +      .validate       = imq_validate,
 +};
 +
++static const struct nf_queue_handler imq_nfqh = {
++      .name  = "imq",
++      .outfn = imq_nf_queue,
++};
++
 +static int __init imq_init_hooks(void)
 +{
-+      int err;
-+
-+      nf_register_queue_imq_handler(&nfqh);
-+
-+      err = nf_register_hook(&imq_ingress_ipv4);
-+      if (err)
-+              goto err1;
-+
-+      err = nf_register_hook(&imq_egress_ipv4);
-+      if (err)
-+              goto err2;
++      int ret;
 +
-+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-+      err = nf_register_hook(&imq_ingress_ipv6);
-+      if (err)
-+              goto err3;
-+
-+      err = nf_register_hook(&imq_egress_ipv6);
-+      if (err)
-+              goto err4;
-+#endif
++      nf_register_queue_imq_handler(&imq_nfqh);
 +
-+      return 0;
++      ret = nf_register_hooks(imq_ops, ARRAY_SIZE(imq_ops));
++      if (ret < 0)
++              nf_unregister_queue_imq_handler();
 +
-+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-+err4:
-+      nf_unregister_hook(&imq_ingress_ipv6);
-+err3:
-+      nf_unregister_hook(&imq_egress_ipv4);
-+#endif
-+err2:
-+      nf_unregister_hook(&imq_ingress_ipv4);
-+err1:
-+      nf_unregister_queue_imq_handler();
-+      return err;
++      return ret;
 +}
 +
 +static int __init imq_init_one(int index)
@@ -516,7 +650,7 @@
 +      struct net_device *dev;
 +      int ret;
 +
-+      dev = alloc_netdev(0, "imq%d", imq_setup);
++      dev = alloc_netdev_mq(0, "imq%d", imq_setup, numqueues);
 +      if (!dev)
 +              return -ENOMEM;
 +
@@ -545,6 +679,14 @@
 +              return -EINVAL;
 +      }
 +
++      if (numqueues < 1 || numqueues > IMQ_MAX_QUEUES) {
++              printk(KERN_ERR "IMQ: numqueues has to be betweed 1 and %u\n",
++                     IMQ_MAX_QUEUES);
++              return -EINVAL;
++      }
++
++      get_random_bytes(&imq_hashrnd, sizeof(imq_hashrnd));
++
 +      rtnl_lock();
 +      err = __rtnl_link_register(&imq_link_ops);
 +
@@ -584,7 +726,8 @@
 +              return err;
 +      }
 +
-+      printk(KERN_INFO "IMQ driver loaded successfully.\n");
++      printk(KERN_INFO "IMQ driver loaded successfully. "
++              "(numdevs = %d, numqueues = %d)\n", numdevs, numqueues);
 +
 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
 +      printk(KERN_INFO "\tHooking IMQ before NAT on PREROUTING.\n");
@@ -602,13 +745,7 @@
<<Diff was trimmed, longer than 597 lines>>

---- CVS-web:
    
http://cvs.pld-linux.org/cgi-bin/cvsweb.cgi/packages/kernel/kernel-imq.patch?r1=1.10&r2=1.11&f=u
    
http://cvs.pld-linux.org/cgi-bin/cvsweb.cgi/packages/kernel/kernel.spec?r1=1.810&r2=1.811&f=u

_______________________________________________
pld-cvs-commit mailing list
[email protected]
http://lists.pld-linux.org/mailman/listinfo/pld-cvs-commit

Reply via email to