Signed-off-by: Gregory Haskins <[EMAIL PROTECTED]> --- drivers/net/Kconfig | 4 drivers/net/Makefile | 2 drivers/net/ioqnet.c | 631 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/ioqnet.h | 42 +++ 4 files changed, 679 insertions(+), 0 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index fb99cd4..eb46c07 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2947,6 +2947,10 @@ config NETCONSOLE If you want to log kernel messages over the network, enable this. See <file:Documentation/networking/netconsole.txt> for details. +config IOQNET + tristate "IOQ based paravirtualized network driver" + select IOQ + endif #NETDEVICES config NETPOLL diff --git a/drivers/net/Makefile b/drivers/net/Makefile index a77affa..f1b4916 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -224,6 +224,8 @@ obj-$(CONFIG_ENP2611_MSF_NET) += ixp2000/ obj-$(CONFIG_NETCONSOLE) += netconsole.o +obj-$(CONFIG_IOQNET) += ioqnet.o + obj-$(CONFIG_FS_ENET) += fs_enet/ obj-$(CONFIG_NETXEN_NIC) += netxen/ diff --git a/drivers/net/ioqnet.c b/drivers/net/ioqnet.c new file mode 100644 index 0000000..5500631 --- /dev/null +++ b/drivers/net/ioqnet.c @@ -0,0 +1,631 @@ +/* + * ioqnet - A paravirtualized network device based on the IOQ interface + * + * Copyright (C) 2007 Novell, Gregory Haskins <[EMAIL PROTECTED]> + * + * Derived from the SNULL example from the book "Linux Device + * Drivers" by Alessandro Rubini and Jonathan Corbet, published + * by O'Reilly & Associates. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/moduleparam.h> + +#include <linux/sched.h> +#include <linux/kernel.h> /* printk() */ +#include <linux/slab.h> /* kmalloc() */ +#include <linux/errno.h> /* error codes */ +#include <linux/types.h> /* size_t */ +#include <linux/interrupt.h> /* mark_bh */ + +#include <linux/in.h> +#include <linux/netdevice.h> /* struct device, and other headers */ +#include <linux/etherdevice.h> /* eth_type_trans */ +#include <linux/ip.h> /* struct iphdr */ +#include <linux/tcp.h> /* struct tcphdr */ +#include <linux/skbuff.h> +#include <linux/ioq.h> +#include <linux/pvbus.h> + +#include <linux/in6.h> +#include <asm/checksum.h> + +#include <linux/ioqnet.h> + +MODULE_AUTHOR("Gregory Haskins"); +MODULE_LICENSE("GPL"); + +#undef PDEBUG /* undef it, just in case */ +#ifdef IOQNET_DEBUG +# define PDEBUG(fmt, args...) printk( KERN_DEBUG "ioqnet: " fmt, ## args) +#else +# define PDEBUG(fmt, args...) /* not debugging: nothing */ +#endif + +static int timeout = 5; /* In jiffies */ +module_param(timeout, int, 0); + +#define RX_RINGLEN 64 +#define TX_RINGLEN 64 +#define TX_PTRS_PER_DESC 64 + +struct ioqnet_queue { + struct ioq *queue; + struct ioq_notifier notifier; +}; + +struct ioqnet_tx_desc { + struct sk_buff *skb; + struct ioqnet_tx_ptr data[TX_PTRS_PER_DESC]; +}; + +struct ioqnet_priv { + spinlock_t lock; + struct net_device *dev; + struct net_device_stats stats; + struct ioqnet_queue rxq; + struct ioqnet_queue txq; + struct tasklet_struct txtask; + u64 pvb_instance; +}; + +static int ioqnet_queue_init(struct ioqnet_priv *priv, + struct ioqnet_queue *q, + size_t ringsize, + void (*func)(struct ioq_notifier*)) +{ + int ret = pvbus_ops->ioqmgr->create(pvbus_ops->ioqmgr, + &q->queue, ringsize, 0); + if (ret < 0) + return ret; + + q->notifier.signal = func; + + return 0; +} + +/* + * Enable and disable receive interrupts. + */ +static void ioqnet_rx_ints(struct net_device *dev, int enable) +{ + struct ioqnet_priv *priv = netdev_priv(dev); + struct ioq *ioq = priv->rxq.queue; + if (enable) + ioq_start(ioq, 0); + else + ioq_stop(ioq, 0); +} + +static void ioqnet_alloc_rx_desc(struct ioq_ring_desc *desc, size_t len) +{ + struct sk_buff *skb = dev_alloc_skb(len + 2); + BUG_ON(!skb); + + skb_reserve(skb, 2); /* align IP on 16B boundary */ + + desc->cookie = (u64)skb; + desc->ptr = (u64)__pa(skb->data); + desc->len = len; /* total length */ + desc->alen = 0; /* actual length - to be filled in by host */ + + mb(); + desc->valid = 1; + desc->sown = 1; /* give ownership to the south */ + mb(); +} + +static void ioqnet_setup_rx(struct ioqnet_priv *priv) +{ + struct ioq *ioq = priv->rxq.queue; + struct ioq_iterator iter; + int ret; + int i; + + /* + * We want to iterate on the "valid" index. By default the iterator + * will not "autoupdate" which means it will not hypercall the host + * with our changes. This is good, because we are really just + * initializing stuff here anyway. Note that you can always manually + * signal the host with ioq_signal() if the autoupdate feature is not + * used. + */ + ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); + + /* + * Seek to the head of the valid index (which should be our first + * item, since the queue is brand-new) + */ + ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + + /* + * Now populate each descriptor with an empty SKB and mark it valid + */ + for (i = 0; i < RX_RINGLEN; ++i) { + ioqnet_alloc_rx_desc(iter.desc, priv->dev->mtu); + + /* + * This push operation will simultaneously advance the + * valid-head index and increment our position in the queue + * by one. + */ + ret = ioq_iter_push(&iter, 0); + BUG_ON(ret < 0); + } +} + +static void ioqnet_setup_tx(struct ioqnet_priv *priv) +{ + struct ioq *ioq = priv->txq.queue; + struct ioq_iterator iter; + int ret; + int i; + + /* + * We setup the tx-desc in a similar way to how we did the rx SKBs + */ + ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + + for (i = 0; i < TX_RINGLEN; ++i) { + struct ioq_ring_desc *desc = iter.desc; + struct ioqnet_tx_desc *txdesc = kzalloc(sizeof(*txdesc), + GFP_KERNEL | GFP_DMA); + + desc->cookie = (u64)txdesc; + desc->ptr = (u64)__pa(&txdesc->data[0]); + desc->len = TX_PTRS_PER_DESC; /* "len" is "count" */ + desc->alen = 0; + desc->valid = 0; /* mark it "invalid" since payload empty */ + desc->sown = 0; /* retain ownership until "inuse" */ + + /* + * One big difference between the RX and TX ring is that + * we are going to do an "iter++" here instead of an + * "iter->push()". That is because we don't want to actually + * advance the valid-index. We use the valid index to + * determine the difference between outstanding consumed and + * outstanding unconsumed packets + */ + ret = ioq_iter_seek(&iter, ioq_seek_next, 0, 0); + BUG_ON(ret < 0); + } +} + +/* + * Open and close + */ + +int ioqnet_open(struct net_device *dev) +{ + netif_start_queue(dev); + return 0; +} + +int ioqnet_release(struct net_device *dev) +{ + netif_stop_queue(dev); + return 0; +} + +/* + * Configuration changes (passed on by ifconfig) + */ +int ioqnet_config(struct net_device *dev, struct ifmap *map) +{ + if (dev->flags & IFF_UP) /* can't act on a running interface */ + return -EBUSY; + + /* Don't allow changing the I/O address */ + if (map->base_addr != dev->base_addr) { + printk(KERN_WARNING "ioqnet: Can't change I/O address\n"); + return -EOPNOTSUPP; + } + + /* ignore other fields */ + return 0; +} + +/* + * The poll implementation. + */ +static int ioqnet_poll(struct net_device *dev, int *budget) +{ + int npackets = 0, quota = min(dev->quota, *budget); + struct ioqnet_priv *priv = netdev_priv(dev); + struct ioq_iterator iter; + unsigned long flags; + int ret; + + spin_lock_irqsave(&priv->lock, flags); + + /* We want to iterate on the tail of the in-use index */ + ret = ioq_iter_init(priv->rxq.queue, &iter, ioq_idxtype_inuse, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0); + BUG_ON(ret < 0); + + /* + * We stop if we have met the quota or there are no more packets. + * The EOM is indicated by finding a packet that is still owned by + * the south side + */ + while ((npackets < quota) && (!iter.desc->sown)) { + struct sk_buff *skb = (struct sk_buff*)iter.desc->cookie; + + skb_push(skb, iter.desc->alen); + + /* Maintain stats */ + npackets++; + priv->stats.rx_packets++; + priv->stats.rx_bytes += iter.desc->alen; + + /* Pass the buffer up to the stack */ + skb->dev = dev; + skb->protocol = eth_type_trans(skb, dev); + netif_receive_skb(skb); + + mb(); + + /* Grab a new buffer to put in the ring */ + ioqnet_alloc_rx_desc(iter.desc, dev->mtu); + + /* Advance the in-use tail */ + ret = ioq_iter_pop(&iter, 0); + BUG_ON(ret < 0); + + /* Toggle the lock */ + spin_unlock_irqrestore(&priv->lock, flags); + spin_lock_irqsave(&priv->lock, flags); + } + + /* + * If we processed all packets, we're done; tell the kernel and + * reenable ints + */ + *budget -= npackets; + dev->quota -= npackets; + if (ioq_empty(priv->rxq.queue, ioq_idxtype_inuse)) { + /* FIXME: there is a race with enabling interrupts */ + netif_rx_complete(dev); + ioqnet_rx_ints(dev, 1); + ret = 0; + } else + /* We couldn't process everything. */ + ret = 1; + + spin_unlock_irqrestore(&priv->lock, flags); + + /* And let the south side know that we changed the rx-queue */ + ioq_signal(priv->rxq.queue, 0); + + return ret; +} + +/* + * Transmit a packet (called by the kernel) + */ +int ioqnet_tx_start(struct sk_buff *skb, struct net_device *dev) +{ + struct ioqnet_priv *priv = netdev_priv(dev); + struct ioq_iterator viter; + struct ioq_iterator uiter; + struct ioqnet_tx_desc *txdesc; + int ret; + int i; + unsigned long flags; + + if (skb->len < ETH_ZLEN) + return -EINVAL; + + spin_lock_irqsave(&priv->lock, flags); + + if (ioq_full(priv->txq.queue, ioq_idxtype_valid)) { + /* + * We must flow-control the kernel by disabling the queue + */ + spin_unlock_irqrestore(&priv->lock, flags); + netif_stop_queue(dev); + return 0; + } + + /* + * We want to iterate on the head of both the "inuse" and "valid" index + */ + ret = ioq_iter_init(priv->txq.queue, &viter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); + ret = ioq_iter_init(priv->txq.queue, &uiter, ioq_idxtype_inuse, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&viter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + ret = ioq_iter_seek(&uiter, ioq_seek_head, 0, 0); + BUG_ON(ret < 0); + + /* The head pointers should move in lockstep */ + BUG_ON(uiter.pos != viter.pos); + + dev->trans_start = jiffies; /* save the timestamp */ + skb_get(skb); /* add a refcount */ + + txdesc = (struct ioqnet_tx_desc*)uiter.desc->cookie; + + for (i = 0; i < 1; ++i) { /* Someday we will support SG */ + txdesc->data[i].len = (u64)skb->len; + txdesc->data[i].data = (u64)__pa(skb->data); + + uiter.desc->alen++; + } + + txdesc->skb = skb; /* save the skb for future release */ + + mb(); + uiter.desc->valid = 1; + uiter.desc->sown = 1; /* give ownership to the south */ + mb(); + + /* Advance both indexes together */ + ret = ioq_iter_push(&viter, 0); + BUG_ON(ret < 0); + ret = ioq_iter_push(&uiter, 0); + BUG_ON(ret < 0); + + /* + * This will signal the south side to consume the packet + */ + ioq_signal(priv->txq.queue, 0); + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; +} + +/* + * called by the tx interrupt handler to indicate that one or more packets + * have been consumed + */ +void ioqnet_tx_complete(unsigned long data) +{ + struct ioqnet_priv *priv = (struct ioqnet_priv*)data; + struct ioq_iterator iter; + unsigned long flags; + int ret; + + spin_lock_irqsave(&priv->lock, flags); + + /* We want to iterate on the tail of the valid index */ + ret = ioq_iter_init(priv->rxq.queue, &iter, ioq_idxtype_valid, 0); + BUG_ON(ret < 0); + + ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0); + BUG_ON(ret < 0); + + /* + * We are done once we find the first packet either invalid or still + * owned by the south-side + */ + while (iter.desc->valid && !iter.desc->sown) { + struct ioqnet_tx_desc *txdesc; + struct sk_buff *skb; + + txdesc = (struct ioqnet_tx_desc*)iter.desc->cookie; + skb = txdesc->skb; + + /* Maintain stats */ + priv->stats.tx_packets++; + priv->stats.tx_bytes += skb->len; + + /* Reset the descriptor */ + mb(); + iter.desc->alen = 0; + iter.desc->valid = 0; + mb(); + + dev_kfree_skb(skb); + + /* Advance the valid-index tail */ + ret = ioq_iter_pop(&iter, 0); + BUG_ON(ret < 0); + + /* Toggle the lock */ + spin_unlock_irqrestore(&priv->lock, flags); + spin_lock_irqsave(&priv->lock, flags); + } + + /* + * If we were previously stopped due to flow control, restart the + * processing + */ + if (netif_queue_stopped(priv->dev) + && !ioq_full(priv->txq.queue, ioq_idxtype_inuse)) { + + netif_wake_queue(priv->dev); + } + + spin_unlock_irqrestore(&priv->lock, flags); +} + +/* + * Ioctl commands + */ +int ioqnet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + PDEBUG("ioctl\n"); + return 0; +} + +/* + * Return statistics to the caller + */ +struct net_device_stats *ioqnet_stats(struct net_device *dev) +{ + struct ioqnet_priv *priv = netdev_priv(dev); + return &priv->stats; +} + +static void ioq_rx_notify(struct ioq_notifier *notifier) +{ + struct ioqnet_priv *priv; + struct net_device *dev; + + priv = container_of(notifier, struct ioqnet_priv, rxq.notifier); + dev = priv->dev; + + ioqnet_rx_ints(dev, 0); /* Disable further interrupts */ + netif_rx_schedule(dev); +} + +static void ioq_tx_notify(struct ioq_notifier *notifier) +{ + struct ioqnet_priv *priv; + + priv = container_of(notifier, struct ioqnet_priv, txq.notifier); + + tasklet_schedule(&priv->txtask); +} + +/* Perform a hypercall to register/connect our queues */ +static int ioqnet_connect(struct ioqnet_priv *priv) +{ + struct ioqnet_connect data = { + .rxq = priv->rxq.queue->id, + .txq = priv->txq.queue->id, + }; + + return pvbus_ops->call(priv->pvb_instance, IOQNET_CONNECT, + &data, sizeof(data), 0); +} + +/* Perform a hypercall to get the assigned MAC addr */ +static int ioqnet_query_mac(struct ioqnet_priv *priv) +{ + return pvbus_ops->call(priv->pvb_instance, + IOQNET_QUERY_MAC, + priv->dev->dev_addr, + ETH_ALEN, 0); +} + +/* + * The init function (sometimes called probe). + * It is invoked by register_netdev() + */ +void ioqnet_init(struct net_device *dev) +{ + struct ioqnet_priv *priv = netdev_priv(dev);; + + ether_setup(dev); /* assign some of the fields */ + + dev->open = ioqnet_open; + dev->stop = ioqnet_release; + dev->set_config = ioqnet_config; + dev->hard_start_xmit = ioqnet_tx_start; + dev->do_ioctl = ioqnet_ioctl; + dev->get_stats = ioqnet_stats; + dev->poll = ioqnet_poll; + dev->weight = 2; + dev->hard_header_cache = NULL; /* Disable caching */ + + if (ioqnet_query_mac(priv) < 0) + printk("IOQNET: Could not obtain MAC address for %lld\n", + priv->pvb_instance); + + spin_lock_init(&priv->lock); + priv->dev = dev; + tasklet_init(&priv->txtask, ioqnet_tx_complete, (unsigned long)priv); + + ioqnet_queue_init(priv, &priv->rxq, RX_RINGLEN, ioq_rx_notify); + ioqnet_queue_init(priv, &priv->txq, TX_RINGLEN, ioq_tx_notify); + + ioqnet_setup_rx(priv); + ioqnet_setup_tx(priv); + + ioqnet_rx_ints(dev, 1); /* enable receive interrupts */ + ioq_start(priv->txq.queue, 0); /* enable transmit interrupts */ + + /* Now connect the host side driver to us via the PVBUS/IOQ */ + if (ioqnet_connect(priv) < 0) + printk("IOQNET: Could not initialize instance %lld\n", + priv->pvb_instance); + +} + +/* + * Finally, the module stuff + */ + +void ioqnet_cleanup(void) +{ +#if 0 + if (ioqnet_dev) { + unregister_netdev(ioqnet_dev); + /* FIXME: free memory in rings */ + /* FIXME: free IOQ interfaces */ + free_netdev(ioqnet_dev); + } +#endif + return; +} + +#define MAX_DEVICES 16 + +int ioqnet_init_module(void) +{ + int ret = -ENOMEM; + struct pvbus_dev devs[MAX_DEVICES]; + size_t count = MAX_DEVICES; + int i; + + ret = pvbus_ops->enumerate("ioqnet", devs, &count, 0); + if (ret < 0) + return ret; + + for (i = 0; i < count; ++i) { + struct net_device *dev; + struct pvbus_dev *pdev = &devs[i]; + struct ioqnet_priv *priv; + + if (pdev->version != IOQNET_VERSION) { + printk(KERN_DEBUG "IOQNET: Skipping instance %lld " \ + "due to mistatched version (found %d, " \ + "expected %d)\n", + pdev->instance, pdev->version, IOQNET_VERSION); + continue; + } + + dev = alloc_netdev(sizeof(struct ioqnet_priv), "ioq%d", + ioqnet_init); + + if (!dev) + return -ENOMEM; + + priv = netdev_priv(dev); + + memset(priv, 0, sizeof(*priv)); + + /* + * Set the PVB instance id for now. The rest will be + * configured inside the init function + */ + priv->pvb_instance = pdev->instance; + + ret = register_netdev(dev); + if (ret < 0) { + printk("ioqnet: error %i registering device \"%s\"\n", + ret, dev->name); + free_netdev(dev); + } + } + + return ret; +} + + +module_init(ioqnet_init_module); +module_exit(ioqnet_cleanup); diff --git a/include/linux/ioqnet.h b/include/linux/ioqnet.h new file mode 100644 index 0000000..1dff3dd --- /dev/null +++ b/include/linux/ioqnet.h @@ -0,0 +1,42 @@ +/* + * Copyright 2007 Novell. All Rights Reserved. + * + * IOQ Network Driver + * + * Author: + * Gregory Haskins <[EMAIL PROTECTED]> + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef _IOQNET_H +#define _IOQNET_H + +#define IOQNET_VERSION 1 + +/* IOQNET functions (invoked via pvbus_ops->call()) */ +#define IOQNET_CONNECT 1 +#define IOQNET_QUERY_MAC 2 + +struct ioqnet_connect { + ioq_id_t rxq; + ioq_id_t txq; +}; + +struct ioqnet_tx_ptr { + u64 len; + u64 data; +}; + +#endif /* _IOQNET_H */ ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel