plain text document attachment (lguest64-device.patch)
We started working a little bit on the devices for lguest64.
This is still very much a work-in-progress and needs much more work.

Signed-off-by: Steven Rostedt <[EMAIL PROTECTED]>
Signed-off-by: Glauber de Oliveira Costa <[EMAIL PROTECTED]>
Cc: Chris Wright <[EMAIL PROTECTED]>



Index: work-pv/include/asm-x86_64/lguest_device.h
===================================================================
--- /dev/null
+++ work-pv/include/asm-x86_64/lguest_device.h
@@ -0,0 +1,31 @@
+#ifndef _ASM_LGUEST_DEVICE_H
+#define _ASM_LGUEST_DEVICE_H
+/* Everything you need to know about lguest devices. */
+#include <linux/device.h>
+#include <asm/lguest.h>
+#include <asm/lguest_user.h>
+
+struct lguest_device {
+       /* Unique busid, and index into lguest_page->devices[] */
+       /* By convention, each device can use irq index+1 if it wants to. */
+       unsigned int index;
+
+       struct device dev;
+
+       /* Driver can hang data off here. */
+       void *private;
+};
+
+struct lguest_driver {
+       const char *name;
+       struct module *owner;
+       u16 device_type;
+       int (*probe)(struct lguest_device *dev);
+       void (*remove)(struct lguest_device *dev);
+
+       struct device_driver drv;
+};
+
+extern int register_lguest_driver(struct lguest_driver *drv);
+extern void unregister_lguest_driver(struct lguest_driver *drv);
+#endif /* _ASM_LGUEST_DEVICE_H */
Index: work-pv/arch/x86_64/lguest/lguest_bus.c
===================================================================
--- /dev/null
+++ work-pv/arch/x86_64/lguest/lguest_bus.c
@@ -0,0 +1,180 @@
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <asm/lguest_device.h>
+#include <asm/lguest.h>
+#include <asm/io.h>
+
+static ssize_t type_show(struct device *_dev,
+                         struct device_attribute *attr, char *buf)
+{
+       struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+       return sprintf(buf, "%hu", lguest_devices[dev->index].type);
+}
+static ssize_t features_show(struct device *_dev,
+                             struct device_attribute *attr, char *buf)
+{
+       struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+       return sprintf(buf, "%hx", lguest_devices[dev->index].features);
+}
+static ssize_t pfn_show(struct device *_dev,
+                        struct device_attribute *attr, char *buf)
+{
+       struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+       return sprintf(buf, "%llu", lguest_devices[dev->index].pfn);
+}
+static ssize_t status_show(struct device *_dev,
+                           struct device_attribute *attr, char *buf)
+{
+       struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+       return sprintf(buf, "%hx", lguest_devices[dev->index].status);
+}
+static ssize_t status_store(struct device *_dev, struct device_attribute *attr,
+                            const char *buf, size_t count)
+{
+       struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+       if (sscanf(buf, "%hi", &lguest_devices[dev->index].status) != 1)
+               return -EINVAL;
+       return count;
+}
+static struct device_attribute lguest_dev_attrs[] = {
+       __ATTR_RO(type),
+       __ATTR_RO(features),
+       __ATTR_RO(pfn),
+       __ATTR(status, 0644, status_show, status_store),
+       __ATTR_NULL
+};
+
+static int lguest_dev_match(struct device *_dev, struct device_driver *_drv)
+{
+       struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+       struct lguest_driver *drv = container_of(_drv,struct lguest_driver,drv);
+
+       return (drv->device_type == lguest_devices[dev->index].type);
+}
+
+struct lguest_bus {
+       struct bus_type bus;
+       struct device dev;
+};
+
+static struct lguest_bus lguest_bus = {
+       .bus = {
+               .name  = "lguest",
+               .match = lguest_dev_match,
+               .dev_attrs = lguest_dev_attrs,
+       },
+       .dev = {
+               .parent = NULL,
+               .bus_id = "lguest",
+       }
+};
+
+static int lguest_dev_probe(struct device *_dev)
+{
+       int ret;
+       struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+       struct lguest_driver *drv = container_of(dev->dev.driver,
+                                               struct lguest_driver, drv);
+
+       lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER;
+       ret = drv->probe(dev);
+       if (ret == 0)
+               lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER_OK;
+       return ret;
+}
+
+static int lguest_dev_remove(struct device *_dev)
+{
+       struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+       struct lguest_driver *drv = container_of(dev->dev.driver,
+                                               struct lguest_driver, drv);
+
+       if (dev->dev.driver && drv->remove)
+               drv->remove(dev);
+       put_device(&dev->dev);
+       return 0;
+}
+
+int register_lguest_driver(struct lguest_driver *drv)
+{
+       if (!lguest_devices)
+               return 0;
+
+       drv->drv.bus = &lguest_bus.bus;
+       drv->drv.name = drv->name;
+       drv->drv.owner = drv->owner;
+       drv->drv.probe = lguest_dev_probe;
+       drv->drv.remove = lguest_dev_remove;
+
+       return driver_register(&drv->drv);
+}
+EXPORT_SYMBOL_GPL(register_lguest_driver);
+
+void unregister_lguest_driver(struct lguest_driver *drv)
+{
+       if (!lguest_devices)
+               return;
+
+       driver_unregister(&drv->drv);
+}
+EXPORT_SYMBOL_GPL(unregister_lguest_driver);
+
+static void release_lguest_device(struct device *_dev)
+{
+       struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+
+       lguest_devices[dev->index].status |= LGUEST_DEVICE_S_REMOVED_ACK;
+       kfree(dev);
+}
+
+static void add_lguest_device(unsigned int index)
+{
+       struct lguest_device *new;
+
+       lguest_devices[index].status |= LGUEST_DEVICE_S_ACKNOWLEDGE;
+       new = kmalloc(sizeof(struct lguest_device), GFP_KERNEL);
+       if (!new) {
+               printk(KERN_EMERG "Cannot allocate lguest device %u\n", index);
+               lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED;
+               return;
+       }
+
+       new->index = index;
+       new->private = NULL;
+       memset(&new->dev, 0, sizeof(new->dev));
+       new->dev.parent = &lguest_bus.dev;
+       new->dev.bus = &lguest_bus.bus;
+       new->dev.release = release_lguest_device;
+       sprintf(new->dev.bus_id, "%u", index);
+       if (device_register(&new->dev) != 0) {
+               printk(KERN_EMERG "Cannot register lguest device %u\n", index);
+               lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED;
+               kfree(new);
+       }
+}
+
+static void scan_devices(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < LGUEST_MAX_DEVICES; i++)
+               if (lguest_devices[i].type)
+                       add_lguest_device(i);
+}
+
+static int __init lguest_bus_init(void)
+{
+       if (strcmp(paravirt_ops.name, "lguest") != 0)
+               return 0;
+
+       /* Devices are in page above top of "normal" mem. */
+       lguest_devices = ioremap(max_pfn << PAGE_SHIFT, PAGE_SIZE);
+
+       if (bus_register(&lguest_bus.bus) != 0
+           || device_register(&lguest_bus.dev) != 0)
+               panic("lguest bus registration failed");
+
+       scan_devices();
+       return 0;
+}
+postcore_initcall(lguest_bus_init);
Index: work-pv/arch/x86_64/lguest/io.c
===================================================================
--- /dev/null
+++ work-pv/arch/x86_64/lguest/io.c
@@ -0,0 +1,425 @@
+/* Simple I/O model for guests, based on shared memory.
+ * Copyright (C) 2006 Rusty Russell IBM Corporation
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ */
+#include <linux/types.h>
+#include <linux/futex.h>
+#include <linux/jhash.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/uaccess.h>
+#include <asm/lguest.h>
+#include <asm/lguest_user.h>
+#include "lguest.h"
+
+static struct list_head dma_hash[64];
+
+/* FIXME: allow multi-page lengths. */
+static int check_dma_list(struct lguest_guest_info *linfo,
+                               const struct lguest_dma *dma)
+{
+       unsigned int i;
+
+       for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) {
+               if (!dma->len[i])
+                       return 1;
+               if (!lguest_address_ok(linfo, dma->addr[i]))
+                       goto kill;
+               if (dma->len[i] > PAGE_SIZE)
+                       goto kill;
+               /* We could do over a page, but is it worth it? */
+               if ((dma->addr[i] % PAGE_SIZE) + dma->len[i] > PAGE_SIZE)
+                       goto kill;
+       }
+       return 1;
+
+kill:
+       kill_guest(linfo, "bad DMA entry: [EMAIL PROTECTED]", dma->len[i], 
dma->addr[i]);
+       return 0;
+}
+
+static unsigned int hash(const union futex_key *key)
+{
+       return jhash2((u32*)&key->both.word,
+                     (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
+                     key->both.offset)
+               % ARRAY_SIZE(dma_hash);
+}
+
+/* Must hold read lock on dmainfo owner's current->mm->mmap_sem */
+static void unlink_dma(struct lguest_dma_info *dmainfo)
+{
+       BUG_ON(!mutex_is_locked(&lguest_lock));
+       dmainfo->interrupt = 0;
+       list_del(&dmainfo->list);
+       drop_futex_key_refs(&dmainfo->key);
+}
+
+static inline int key_eq(const union futex_key *a, const union futex_key *b)
+{
+       return (a->both.word == b->both.word
+               && a->both.ptr == b->both.ptr
+               && a->both.offset == b->both.offset);
+}
+
+static u32 unbind_dma(struct lguest_guest_info *linfo,
+                     const union futex_key *key,
+                     unsigned long dmas)
+{
+       int i, ret = 0;
+
+       for (i = 0; i < LGUEST_MAX_DMA; i++) {
+               if (key_eq(key, &linfo->dma[i].key) && dmas == 
linfo->dma[i].dmas) {
+                       unlink_dma(&linfo->dma[i]);
+                       ret = 1;
+                       break;
+               }
+       }
+       return ret;
+}
+
+u32 bind_dma(struct lguest_guest_info *linfo, unsigned long addr,
+                               unsigned long dmas, u16 numdmas, u8 interrupt)
+{
+       unsigned int i;
+       u32 ret = 0;
+       union futex_key key;
+
+       printk("inside the handler, with args: %lx, %lx, %x, 
%x\n",addr,dmas,numdmas,interrupt);
+       if (interrupt >= LGUEST_IRQS)
+               return 0;
+
+       mutex_lock(&lguest_lock);
+       down_read(&current->mm->mmap_sem);
+       printk("Trying to get futex key...  ");
+       if (get_futex_key((u32 __user *)addr, &key) != 0) {
+               kill_guest(linfo, "bad dma address %#lx", addr);
+               goto unlock;
+       }
+       printk("Got it.\n");
+       get_futex_key_refs(&key);
+
+       if (interrupt == 0)
+               ret = unbind_dma(linfo, &key, dmas);
+       else {
+               for (i = 0; i < LGUEST_MAX_DMA; i++) {
+                       if (linfo->dma[i].interrupt == 0) {
+                               linfo->dma[i].dmas = dmas;
+                               linfo->dma[i].num_dmas = numdmas;
+                               linfo->dma[i].next_dma = 0;
+                               linfo->dma[i].key = key;
+                               linfo->dma[i].guest_id = linfo->guest_id;
+                               linfo->dma[i].interrupt = interrupt;
+                               list_add(&linfo->dma[i].list,
+                                        &dma_hash[hash(&key)]);
+                               ret = 1;
+                               printk("Will return, holding a reference\n");
+                               goto unlock;
+                       }
+               }
+       }
+       printk("Will return, _without_ a reference\n");
+       drop_futex_key_refs(&key);
+unlock:
+       up_read(&current->mm->mmap_sem);
+       mutex_unlock(&lguest_lock);
+       return ret;
+}
+/* lhread from another guest */
+static int lhread_other(struct lguest_guest_info *linfo,
+                       void *buf, u32 addr, unsigned bytes)
+{
+       if (addr + bytes < addr
+           || !lguest_address_ok(linfo, addr+bytes)
+           || access_process_vm(linfo->tsk, addr, buf, bytes, 0) != bytes) {
+               memset(buf, 0, bytes);
+               kill_guest(linfo, "bad address in registered DMA struct");
+               return 0;
+       }
+       return 1;
+}
+
+/* lhwrite to another guest */
+static int lhwrite_other(struct lguest_guest_info *linfo, u32 addr,
+                        const void *buf, unsigned bytes)
+{
+       if (addr + bytes < addr
+           || !lguest_address_ok(linfo, addr+bytes)
+           || (access_process_vm(linfo->tsk, addr, (void *)buf, bytes, 1)
+               != bytes)) {
+               kill_guest(linfo, "bad address writing to registered DMA");
+               return 0;
+       }
+       return 1;
+}
+
+static u32 copy_data(const struct lguest_dma *src,
+                    const struct lguest_dma *dst,
+                    struct page *pages[])
+{
+       unsigned int totlen, si, di, srcoff, dstoff;
+       void *maddr = NULL;
+
+       totlen = 0;
+       si = di = 0;
+       srcoff = dstoff = 0;
+       while (si < LGUEST_MAX_DMA_SECTIONS && src->len[si]
+              && di < LGUEST_MAX_DMA_SECTIONS && dst->len[di]) {
+               u32 len = min(src->len[si] - srcoff, dst->len[di] - dstoff);
+
+               if (!maddr)
+                       maddr = kmap(pages[di]);
+
+               /* FIXME: This is not completely portable, since
+                  archs do different things for copy_to_user_page. */
+               if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE,
+                                  (void *__user)src->addr[si], len) != 0) {
+                       totlen = 0;
+                       break;
+               }
+
+               totlen += len;
+               srcoff += len;
+               dstoff += len;
+               if (srcoff == src->len[si]) {
+                       si++;
+                       srcoff = 0;
+               }
+               if (dstoff == dst->len[di]) {
+                       kunmap(pages[di]);
+                       maddr = NULL;
+                       di++;
+                       dstoff = 0;
+               }
+       }
+
+       if (maddr)
+               kunmap(pages[di]);
+
+       return totlen;
+}
+
+/* Src is us, ie. current. */
+static u32 do_dma(struct lguest_guest_info *srclg, const struct lguest_dma 
*src,
+                 struct lguest_guest_info *dstlg, const struct lguest_dma *dst)
+{
+       int i;
+       u32 ret;
+       struct page *pages[LGUEST_MAX_DMA_SECTIONS];
+
+       if (!check_dma_list(dstlg, dst) || !check_dma_list(srclg, src))
+               return 0;
+
+       /* First get the destination pages */
+       for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) {
+               if (dst->len[i] == 0)
+                       break;
+               if (get_user_pages(dstlg->tsk, dstlg->mm,
+                                  dst->addr[i], 1, 1, 1, pages+i, NULL)
+                   != 1) {
+                       ret = 0;
+                       goto drop_pages;
+               }
+       }
+
+       /* Now copy until we run out of src or dst. */
+       ret = copy_data(src, dst, pages);
+
+drop_pages:
+       while (--i >= 0)
+               put_page(pages[i]);
+       return ret;
+}
+
+/* We cache one process to wakeup: helps for batching & wakes outside locks. */
+void set_wakeup_process(struct lguest_guest_info *linfo,
+                                               struct task_struct *p)
+{
+       if (p == linfo->wake)
+               return;
+
+       if (linfo->wake) {
+               wake_up_process(linfo->wake);
+               put_task_struct(linfo->wake);
+       }
+       linfo->wake = p;
+       if (linfo->wake)
+               get_task_struct(linfo->wake);
+}
+
+static int dma_transfer(struct lguest_guest_info *srclg,
+                       unsigned long udma,
+                       struct lguest_dma_info *dst)
+{
+#if 0
+       struct lguest_dma dst_dma, src_dma;
+       struct lguest_guest_info *dstlg;
+       u32 i, dma = 0;
+
+       dstlg = &lguests[dst->guest_id];
+       /* Get our dma list. */
+       lhread(srclg, &src_dma, udma, sizeof(src_dma));
+
+       /* We can't deadlock against them dmaing to us, because this
+        * is all under the lguest_lock. */
+       down_read(&dstlg->mm->mmap_sem);
+
+       for (i = 0; i < dst->num_dmas; i++) {
+               dma = (dst->next_dma + i) % dst->num_dmas;
+               if (!lhread_other(dstlg, &dst_dma,
+                                 dst->dmas + dma * sizeof(struct lguest_dma),
+                                 sizeof(dst_dma))) {
+                       goto fail;
+               }
+               if (!dst_dma.used_len)
+                       break;
+       }
+       if (i != dst->num_dmas) {
+               unsigned long used_lenp;
+               unsigned int ret;
+
+               ret = do_dma(srclg, &src_dma, dstlg, &dst_dma);
+               /* Put used length in src. */
+               lhwrite_u32(srclg,
+                           udma+offsetof(struct lguest_dma, used_len), ret);
+               if (ret == 0 && src_dma.len[0] != 0)
+                       goto fail;
+
+               /* Make sure destination sees contents before length. */
+               mb();
+               used_lenp = dst->dmas
+                       + dma * sizeof(struct lguest_dma)
+                       + offsetof(struct lguest_dma, used_len);
+               lhwrite_other(dstlg, used_lenp, &ret, sizeof(ret));
+               dst->next_dma++;
+       }
+       up_read(&dstlg->mm->mmap_sem);
+
+       /* Do this last so dst doesn't simply sleep on lock. */
+       set_bit(dst->interrupt, dstlg->irqs_pending);
+       set_wakeup_process(srclg, dstlg->tsk);
+       return i == dst->num_dmas;
+
+fail:
+       up_read(&dstlg->mm->mmap_sem);
+#endif
+       return 0;
+}
+
+int send_dma(struct lguest_guest_info *linfo, unsigned long addr,
+                                                       unsigned long udma)
+{
+       union futex_key key;
+       int pending = 0, empty = 0;
+
+       printk("inside send_dma, with args: %lx, %lx\n",addr,udma);
+again:
+       mutex_lock(&lguest_lock);
+       down_read(&current->mm->mmap_sem);
+       if (get_futex_key((u32 __user *)addr, &key) != 0) {
+               kill_guest(linfo, "bad sending DMA address");
+               goto unlock;
+       }
+       /* Shared mapping?  Look for other guests... */
+       if (key.shared.offset & 1) {
+               struct lguest_dma_info *i, *n;
+               list_for_each_entry_safe(i, n, &dma_hash[hash(&key)], list) {
+                       if (i->guest_id == linfo->guest_id)
+                               continue;
+                       if (!key_eq(&key, &i->key))
+                               continue;
+
+                       empty += dma_transfer(linfo, udma, i);
+                       break;
+               }
+               if (empty == 1) {
+                       /* Give any recipients one chance to restock. */
+                       up_read(&current->mm->mmap_sem);
+                       mutex_unlock(&lguest_lock);
+                       yield();
+                       empty++;
+                       goto again;
+               }
+               pending = 0;
+       } else {
+               /* Private mapping: tell our userspace. */
+               linfo->dma_is_pending = 1;
+               linfo->pending_dma = udma;
+               linfo->pending_addr = addr;
+               pending = 1;
+       }
+unlock:
+       up_read(&current->mm->mmap_sem);
+       mutex_unlock(&lguest_lock);
+       printk("Returning send_dma with pending: %x\n",pending);
+       return pending;
+}
+void release_all_dma(struct lguest_guest_info *linfo)
+{
+       unsigned int i;
+
+       BUG_ON(!mutex_is_locked(&lguest_lock));
+
+       down_read(&linfo->mm->mmap_sem);
+       for (i = 0; i < LGUEST_MAX_DMA; i++) {
+               if (linfo->dma[i].interrupt)
+                       unlink_dma(&linfo->dma[i]);
+       }
+       up_read(&linfo->mm->mmap_sem);
+}
+
+/* Userspace wants a dma buffer from this guest. */
+unsigned long get_dma_buffer(struct lguest_guest_info *linfo,
+                            unsigned long addr, unsigned long *interrupt)
+{
+       unsigned long ret = 0;
+       union futex_key key;
+       struct lguest_dma_info *i;
+
+       mutex_lock(&lguest_lock);
+       down_read(&current->mm->mmap_sem);
+       if (get_futex_key((u32 __user *)addr, &key) != 0) {
+               kill_guest(linfo, "bad registered DMA buffer");
+               goto unlock;
+       }
+       list_for_each_entry(i, &dma_hash[hash(&key)], list) {
+               if (key_eq(&key, &i->key) && i->guest_id == linfo->guest_id) {
+                       unsigned int j;
+                       for (j = 0; j < i->num_dmas; j++) {
+                               struct lguest_dma dma;
+
+                               ret = i->dmas + j * sizeof(struct lguest_dma);
+                               lhread(linfo, &dma, ret, sizeof(dma));
+                               if (dma.used_len == 0)
+                                       break;
+                       }
+                       *interrupt = i->interrupt;
+                       break;
+               }
+       }
+unlock:
+       up_read(&current->mm->mmap_sem);
+       mutex_unlock(&lguest_lock);
+       return ret;
+}
+
+void lguest_io_init(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(dma_hash); i++)
+               INIT_LIST_HEAD(&dma_hash[i]);
+}

--

_______________________________________________
Virtualization mailing list
[email protected]
https://lists.osdl.org/mailman/listinfo/virtualization

Reply via email to