Signed-off-by: Gregory Haskins <[EMAIL PROTECTED]>
---

 drivers/net/Kconfig    |    4 
 drivers/net/Makefile   |    2 
 drivers/net/ioqnet.c   |  631 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/ioqnet.h |   42 +++
 4 files changed, 679 insertions(+), 0 deletions(-)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index fb99cd4..eb46c07 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2947,6 +2947,10 @@ config NETCONSOLE
        If you want to log kernel messages over the network, enable this.
        See <file:Documentation/networking/netconsole.txt> for details.
 
+config IOQNET
+       tristate "IOQ based paravirtualized network driver"
+       select IOQ
+
 endif #NETDEVICES
 
 config NETPOLL
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index a77affa..f1b4916 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -224,6 +224,8 @@ obj-$(CONFIG_ENP2611_MSF_NET) += ixp2000/
 
 obj-$(CONFIG_NETCONSOLE) += netconsole.o
 
+obj-$(CONFIG_IOQNET) += ioqnet.o
+
 obj-$(CONFIG_FS_ENET) += fs_enet/
 
 obj-$(CONFIG_NETXEN_NIC) += netxen/
diff --git a/drivers/net/ioqnet.c b/drivers/net/ioqnet.c
new file mode 100644
index 0000000..5500631
--- /dev/null
+++ b/drivers/net/ioqnet.c
@@ -0,0 +1,631 @@
+/*
+ * ioqnet - A paravirtualized network device based on the IOQ interface
+ *
+ * Copyright (C) 2007 Novell, Gregory Haskins <[EMAIL PROTECTED]>
+ *
+ * Derived from the SNULL example from the book "Linux Device
+ * Drivers" by Alessandro Rubini and Jonathan Corbet, published
+ * by O'Reilly & Associates.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+
+#include <linux/sched.h>
+#include <linux/kernel.h> /* printk() */
+#include <linux/slab.h> /* kmalloc() */
+#include <linux/errno.h>  /* error codes */
+#include <linux/types.h>  /* size_t */
+#include <linux/interrupt.h> /* mark_bh */
+
+#include <linux/in.h>
+#include <linux/netdevice.h>   /* struct device, and other headers */
+#include <linux/etherdevice.h> /* eth_type_trans */
+#include <linux/ip.h>          /* struct iphdr */
+#include <linux/tcp.h>         /* struct tcphdr */
+#include <linux/skbuff.h>
+#include <linux/ioq.h>
+#include <linux/pvbus.h>
+
+#include <linux/in6.h>
+#include <asm/checksum.h>
+
+#include <linux/ioqnet.h>
+
+MODULE_AUTHOR("Gregory Haskins");
+MODULE_LICENSE("GPL");
+
+#undef PDEBUG             /* undef it, just in case */
+#ifdef IOQNET_DEBUG
+#  define PDEBUG(fmt, args...) printk( KERN_DEBUG "ioqnet: " fmt, ## args)
+#else
+#  define PDEBUG(fmt, args...) /* not debugging: nothing */
+#endif
+
+static int timeout = 5;   /* In jiffies */
+module_param(timeout, int, 0);
+
+#define RX_RINGLEN 64
+#define TX_RINGLEN 64
+#define TX_PTRS_PER_DESC 64
+
+struct ioqnet_queue {
+       struct ioq              *queue;
+       struct ioq_notifier      notifier;
+};
+
+struct ioqnet_tx_desc {
+       struct sk_buff      *skb;
+       struct ioqnet_tx_ptr data[TX_PTRS_PER_DESC];
+};
+
+struct ioqnet_priv {
+       spinlock_t               lock;
+       struct net_device       *dev;
+       struct net_device_stats  stats;
+       struct ioqnet_queue      rxq;
+       struct ioqnet_queue      txq;
+       struct tasklet_struct    txtask;
+       u64                      pvb_instance;
+};
+
+static int ioqnet_queue_init(struct ioqnet_priv *priv,
+                            struct ioqnet_queue *q,
+                            size_t ringsize,
+                            void (*func)(struct ioq_notifier*))
+{
+       int ret = pvbus_ops->ioqmgr->create(pvbus_ops->ioqmgr,
+                                           &q->queue, ringsize, 0);
+       if (ret < 0)
+               return ret;
+
+       q->notifier.signal = func;
+
+       return 0;
+}
+
+/*
+ * Enable and disable receive interrupts.
+ */
+static void ioqnet_rx_ints(struct net_device *dev, int enable)
+{
+       struct ioqnet_priv *priv = netdev_priv(dev);
+       struct ioq *ioq = priv->rxq.queue;
+       if (enable)
+               ioq_start(ioq, 0);
+       else
+               ioq_stop(ioq, 0);
+}
+
+static void ioqnet_alloc_rx_desc(struct ioq_ring_desc *desc, size_t len)
+{
+       struct sk_buff *skb = dev_alloc_skb(len + 2);
+       BUG_ON(!skb);
+
+       skb_reserve(skb, 2); /* align IP on 16B boundary */
+
+       desc->cookie = (u64)skb;
+       desc->ptr    = (u64)__pa(skb->data);
+       desc->len    = len; /* total length  */
+       desc->alen   = 0;   /* actual length - to be filled in by host */
+
+       mb();
+       desc->valid  = 1;
+       desc->sown   = 1;   /* give ownership to the south */
+       mb();
+}
+
+static void ioqnet_setup_rx(struct ioqnet_priv *priv)
+{
+       struct ioq *ioq = priv->rxq.queue;
+       struct ioq_iterator iter;
+       int ret;
+       int i;
+
+       /*
+        * We want to iterate on the "valid" index.  By default the iterator
+        * will not "autoupdate" which means it will not hypercall the host
+        * with our changes.  This is good, because we are really just
+        * initializing stuff here anyway.  Note that you can always manually
+        * signal the host with ioq_signal() if the autoupdate feature is not
+        * used.
+        */
+       ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0);
+       BUG_ON(ret < 0);
+
+       /*
+        * Seek to the head of the valid index (which should be our first
+        * item, since the queue is brand-new)
+        */
+       ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0);
+       BUG_ON(ret < 0);
+
+       /*
+        * Now populate each descriptor with an empty SKB and mark it valid
+        */
+       for (i = 0; i < RX_RINGLEN; ++i) {
+               ioqnet_alloc_rx_desc(iter.desc, priv->dev->mtu);
+
+               /*
+                * This push operation will simultaneously advance the
+                * valid-head index and increment our position in the queue
+                * by one.
+                */
+               ret = ioq_iter_push(&iter, 0);
+               BUG_ON(ret < 0);
+       }
+}
+
+static void ioqnet_setup_tx(struct ioqnet_priv *priv)
+{
+       struct ioq *ioq = priv->txq.queue;
+       struct ioq_iterator iter;
+       int ret;
+       int i;
+
+       /*
+        * We setup the tx-desc in a similar way to how we did the rx SKBs
+        */
+       ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0);
+       BUG_ON(ret < 0);
+
+       ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0);
+       BUG_ON(ret < 0);
+
+       for (i = 0; i < TX_RINGLEN; ++i) {
+               struct ioq_ring_desc  *desc = iter.desc;
+               struct ioqnet_tx_desc *txdesc = kzalloc(sizeof(*txdesc),
+                                                       GFP_KERNEL | GFP_DMA);
+
+               desc->cookie = (u64)txdesc;
+               desc->ptr    = (u64)__pa(&txdesc->data[0]);
+               desc->len    = TX_PTRS_PER_DESC; /* "len" is "count" */
+               desc->alen   = 0;
+               desc->valid  = 0; /* mark it "invalid" since payload empty */
+               desc->sown   = 0; /* retain ownership until "inuse" */
+
+               /*
+                * One big difference between the RX and TX ring is that
+                * we are going to do an "iter++" here instead of an
+                * "iter->push()".  That is because we don't want to actually
+                * advance the valid-index.  We use the valid index to
+                * determine the difference between outstanding consumed and
+                * outstanding unconsumed packets
+                */
+               ret = ioq_iter_seek(&iter, ioq_seek_next, 0, 0);
+               BUG_ON(ret < 0);
+       }
+}
+
+/*
+ * Open and close
+ */
+
+int ioqnet_open(struct net_device *dev)
+{
+       netif_start_queue(dev);
+       return 0;
+}
+
+int ioqnet_release(struct net_device *dev)
+{
+       netif_stop_queue(dev);
+       return 0;
+}
+
+/*
+ * Configuration changes (passed on by ifconfig)
+ */
+int ioqnet_config(struct net_device *dev, struct ifmap *map)
+{
+       if (dev->flags & IFF_UP) /* can't act on a running interface */
+               return -EBUSY;
+
+       /* Don't allow changing the I/O address */
+       if (map->base_addr != dev->base_addr) {
+               printk(KERN_WARNING "ioqnet: Can't change I/O address\n");
+               return -EOPNOTSUPP;
+       }
+
+       /* ignore other fields */
+       return 0;
+}
+
+/*
+ * The poll implementation.
+ */
+static int ioqnet_poll(struct net_device *dev, int *budget)
+{
+       int npackets = 0, quota = min(dev->quota, *budget);
+       struct ioqnet_priv *priv = netdev_priv(dev);
+       struct ioq_iterator iter;
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&priv->lock, flags);
+
+       /* We want to iterate on the tail of the in-use index */
+       ret = ioq_iter_init(priv->rxq.queue, &iter, ioq_idxtype_inuse, 0);
+       BUG_ON(ret < 0);
+
+       ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0);
+       BUG_ON(ret < 0);
+
+       /*
+        * We stop if we have met the quota or there are no more packets.
+        * The EOM is indicated by finding a packet that is still owned by
+        * the south side
+        */
+       while ((npackets < quota) && (!iter.desc->sown)) {
+               struct sk_buff *skb = (struct sk_buff*)iter.desc->cookie;
+
+               skb_push(skb, iter.desc->alen);
+
+               /* Maintain stats */
+               npackets++;
+               priv->stats.rx_packets++;
+               priv->stats.rx_bytes += iter.desc->alen;
+
+               /* Pass the buffer up to the stack */
+               skb->dev      = dev;
+               skb->protocol = eth_type_trans(skb, dev);
+               netif_receive_skb(skb);
+
+               mb();
+
+               /* Grab a new buffer to put in the ring */
+               ioqnet_alloc_rx_desc(iter.desc, dev->mtu);
+
+               /* Advance the in-use tail */
+               ret = ioq_iter_pop(&iter, 0);
+               BUG_ON(ret < 0);
+
+               /* Toggle the lock */
+               spin_unlock_irqrestore(&priv->lock, flags);
+               spin_lock_irqsave(&priv->lock, flags);
+       }
+
+       /*
+        * If we processed all packets, we're done; tell the kernel and
+        * reenable ints
+        */
+       *budget -= npackets;
+       dev->quota -= npackets;
+       if (ioq_empty(priv->rxq.queue, ioq_idxtype_inuse)) {
+               /* FIXME: there is a race with enabling interrupts */
+               netif_rx_complete(dev);
+               ioqnet_rx_ints(dev, 1);
+               ret = 0;
+       } else
+               /* We couldn't process everything. */
+               ret = 1;
+
+       spin_unlock_irqrestore(&priv->lock, flags);
+
+       /* And let the south side know that we changed the rx-queue */
+       ioq_signal(priv->rxq.queue, 0);
+
+       return ret;
+}
+
+/*
+ * Transmit a packet (called by the kernel)
+ */
+int ioqnet_tx_start(struct sk_buff *skb, struct net_device *dev)
+{
+       struct ioqnet_priv    *priv = netdev_priv(dev);
+       struct ioq_iterator    viter;
+       struct ioq_iterator    uiter;
+       struct ioqnet_tx_desc *txdesc;
+       int ret;
+       int i;
+       unsigned long flags;
+
+       if (skb->len < ETH_ZLEN)
+               return -EINVAL;
+
+       spin_lock_irqsave(&priv->lock, flags);
+
+       if (ioq_full(priv->txq.queue, ioq_idxtype_valid)) {
+               /*
+                * We must flow-control the kernel by disabling the queue
+                */
+               spin_unlock_irqrestore(&priv->lock, flags);
+               netif_stop_queue(dev);
+               return 0;
+       }
+
+       /*
+        * We want to iterate on the head of both the "inuse" and "valid" index
+        */
+       ret = ioq_iter_init(priv->txq.queue, &viter, ioq_idxtype_valid, 0);
+       BUG_ON(ret < 0);
+       ret = ioq_iter_init(priv->txq.queue, &uiter, ioq_idxtype_inuse, 0);
+       BUG_ON(ret < 0);
+
+       ret = ioq_iter_seek(&viter, ioq_seek_head, 0, 0);
+       BUG_ON(ret < 0);
+       ret = ioq_iter_seek(&uiter, ioq_seek_head, 0, 0);
+       BUG_ON(ret < 0);
+
+       /* The head pointers should move in lockstep */
+       BUG_ON(uiter.pos != viter.pos);
+
+       dev->trans_start = jiffies; /* save the timestamp */
+       skb_get(skb);               /* add a refcount */
+
+       txdesc = (struct ioqnet_tx_desc*)uiter.desc->cookie;
+
+       for (i = 0; i < 1; ++i) { /* Someday we will support SG */
+               txdesc->data[i].len  = (u64)skb->len;
+               txdesc->data[i].data = (u64)__pa(skb->data);
+
+               uiter.desc->alen++;
+       }
+
+       txdesc->skb        = skb; /* save the skb for future release */
+
+       mb();
+       uiter.desc->valid  = 1;
+       uiter.desc->sown   = 1;        /* give ownership to the south */
+       mb();
+
+       /* Advance both indexes together */
+       ret = ioq_iter_push(&viter, 0);
+       BUG_ON(ret < 0);
+       ret = ioq_iter_push(&uiter, 0);
+       BUG_ON(ret < 0);
+
+       /*
+        * This will signal the south side to consume the packet
+        */
+       ioq_signal(priv->txq.queue, 0);
+
+       spin_unlock_irqrestore(&priv->lock, flags);
+
+       return 0;
+}
+
+/*
+ * called by the tx interrupt handler to indicate that one or more packets
+ * have been consumed
+ */
+void ioqnet_tx_complete(unsigned long data)
+{
+       struct ioqnet_priv *priv = (struct ioqnet_priv*)data;
+       struct ioq_iterator iter;
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&priv->lock, flags);
+
+       /* We want to iterate on the tail of the valid index */
+       ret = ioq_iter_init(priv->rxq.queue, &iter, ioq_idxtype_valid, 0);
+       BUG_ON(ret < 0);
+
+       ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0);
+       BUG_ON(ret < 0);
+
+       /*
+        * We are done once we find the first packet either invalid or still
+        * owned by the south-side
+        */
+       while (iter.desc->valid && !iter.desc->sown) {
+               struct ioqnet_tx_desc *txdesc;
+               struct sk_buff        *skb;
+
+               txdesc  = (struct ioqnet_tx_desc*)iter.desc->cookie;
+               skb     = txdesc->skb;
+
+               /* Maintain stats */
+               priv->stats.tx_packets++;
+               priv->stats.tx_bytes += skb->len;
+
+               /* Reset the descriptor */
+               mb();
+               iter.desc->alen   = 0;
+               iter.desc->valid  = 0;
+               mb();
+
+               dev_kfree_skb(skb);
+
+               /* Advance the valid-index tail */
+               ret = ioq_iter_pop(&iter, 0);
+               BUG_ON(ret < 0);
+
+               /* Toggle the lock */
+               spin_unlock_irqrestore(&priv->lock, flags);
+               spin_lock_irqsave(&priv->lock, flags);
+       }
+
+       /*
+        * If we were previously stopped due to flow control, restart the
+        * processing
+        */
+       if (netif_queue_stopped(priv->dev)
+           && !ioq_full(priv->txq.queue, ioq_idxtype_inuse)) {
+
+               netif_wake_queue(priv->dev);
+       }
+
+       spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+/*
+ * Ioctl commands
+ */
+int ioqnet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+       PDEBUG("ioctl\n");
+       return 0;
+}
+
+/*
+ * Return statistics to the caller
+ */
+struct net_device_stats *ioqnet_stats(struct net_device *dev)
+{
+       struct ioqnet_priv *priv = netdev_priv(dev);
+       return &priv->stats;
+}
+
+static void ioq_rx_notify(struct ioq_notifier *notifier)
+{
+       struct ioqnet_priv *priv;
+       struct net_device  *dev;
+
+       priv = container_of(notifier, struct ioqnet_priv, rxq.notifier);
+       dev = priv->dev;
+
+       ioqnet_rx_ints(dev, 0);  /* Disable further interrupts */
+       netif_rx_schedule(dev);
+}
+
+static void ioq_tx_notify(struct ioq_notifier *notifier)
+{
+       struct ioqnet_priv *priv;
+
+       priv = container_of(notifier, struct ioqnet_priv, txq.notifier);
+
+       tasklet_schedule(&priv->txtask);
+}
+
+/* Perform a hypercall to register/connect our queues */
+static int ioqnet_connect(struct ioqnet_priv *priv)
+{
+       struct ioqnet_connect data = {
+               .rxq = priv->rxq.queue->id,
+               .txq = priv->txq.queue->id,
+       };
+
+       return pvbus_ops->call(priv->pvb_instance, IOQNET_CONNECT,
+                              &data, sizeof(data), 0);
+}
+
+/* Perform a hypercall to get the assigned MAC addr */
+static int ioqnet_query_mac(struct ioqnet_priv *priv)
+{
+       return pvbus_ops->call(priv->pvb_instance,
+                              IOQNET_QUERY_MAC,
+                              priv->dev->dev_addr,
+                              ETH_ALEN, 0);
+}
+
+/*
+ * The init function (sometimes called probe).
+ * It is invoked by register_netdev()
+ */
+void ioqnet_init(struct net_device *dev)
+{
+       struct ioqnet_priv *priv = netdev_priv(dev);;
+
+       ether_setup(dev); /* assign some of the fields */
+
+       dev->open              = ioqnet_open;
+       dev->stop              = ioqnet_release;
+       dev->set_config        = ioqnet_config;
+       dev->hard_start_xmit   = ioqnet_tx_start;
+       dev->do_ioctl          = ioqnet_ioctl;
+       dev->get_stats         = ioqnet_stats;
+       dev->poll              = ioqnet_poll;
+       dev->weight            = 2;
+       dev->hard_header_cache = NULL;      /* Disable caching */
+
+       if (ioqnet_query_mac(priv) < 0)
+               printk("IOQNET: Could not obtain MAC address for %lld\n",
+                      priv->pvb_instance);
+
+       spin_lock_init(&priv->lock);
+       priv->dev = dev;
+       tasklet_init(&priv->txtask, ioqnet_tx_complete, (unsigned long)priv);
+
+       ioqnet_queue_init(priv, &priv->rxq, RX_RINGLEN, ioq_rx_notify);
+       ioqnet_queue_init(priv, &priv->txq, TX_RINGLEN, ioq_tx_notify);
+
+       ioqnet_setup_rx(priv);
+       ioqnet_setup_tx(priv);
+
+       ioqnet_rx_ints(dev, 1);         /* enable receive interrupts */
+       ioq_start(priv->txq.queue, 0);  /* enable transmit interrupts */
+
+       /* Now connect the host side driver to us via the PVBUS/IOQ */
+       if (ioqnet_connect(priv) < 0)
+               printk("IOQNET: Could not initialize instance %lld\n",
+                      priv->pvb_instance);
+
+}
+
+/*
+ * Finally, the module stuff
+ */
+
+void ioqnet_cleanup(void)
+{
+#if 0
+       if (ioqnet_dev) {
+               unregister_netdev(ioqnet_dev);
+               /* FIXME: free memory in rings */
+               /* FIXME: free IOQ interfaces */
+               free_netdev(ioqnet_dev);
+       }
+#endif
+       return;
+}
+
+#define MAX_DEVICES 16
+
+int ioqnet_init_module(void)
+{
+       int ret = -ENOMEM;
+       struct pvbus_dev devs[MAX_DEVICES];
+       size_t count = MAX_DEVICES;
+       int i;
+
+       ret = pvbus_ops->enumerate("ioqnet", devs, &count, 0);
+       if (ret < 0)
+               return ret;
+
+       for (i = 0; i < count; ++i) {
+               struct net_device  *dev;
+               struct pvbus_dev   *pdev = &devs[i];
+               struct ioqnet_priv *priv;
+
+               if (pdev->version != IOQNET_VERSION) {
+                       printk(KERN_DEBUG "IOQNET: Skipping instance %lld " \
+                              "due to mistatched version (found %d, " \
+                              "expected %d)\n",
+                              pdev->instance, pdev->version, IOQNET_VERSION);
+                       continue;
+               }
+
+               dev = alloc_netdev(sizeof(struct ioqnet_priv), "ioq%d",
+                                  ioqnet_init);
+
+               if (!dev)
+                       return -ENOMEM;
+
+               priv = netdev_priv(dev);
+
+               memset(priv, 0, sizeof(*priv));
+
+               /*
+                * Set the PVB instance id for now.  The rest will be
+                * configured inside the init function
+                */
+               priv->pvb_instance = pdev->instance;
+
+               ret = register_netdev(dev);
+               if (ret < 0) {
+                       printk("ioqnet: error %i registering device \"%s\"\n",
+                              ret, dev->name);
+                       free_netdev(dev);
+               }
+       }
+
+       return ret;
+}
+
+
+module_init(ioqnet_init_module);
+module_exit(ioqnet_cleanup);
diff --git a/include/linux/ioqnet.h b/include/linux/ioqnet.h
new file mode 100644
index 0000000..1dff3dd
--- /dev/null
+++ b/include/linux/ioqnet.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2007 Novell.  All Rights Reserved.
+ *
+ * IOQ Network Driver
+ *
+ * Author:
+ *      Gregory Haskins <[EMAIL PROTECTED]>
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _IOQNET_H
+#define _IOQNET_H
+
+#define IOQNET_VERSION 1
+
+/* IOQNET functions (invoked via pvbus_ops->call()) */
+#define IOQNET_CONNECT     1
+#define IOQNET_QUERY_MAC   2
+
+struct ioqnet_connect {
+       ioq_id_t rxq;
+       ioq_id_t txq;
+};
+
+struct ioqnet_tx_ptr {
+       u64 len;
+       u64 data;
+};
+
+#endif /* _IOQNET_H */


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to