On Fri, 2008-06-13 at 17:09 +0300, Avi Kivity wrote:
> Mark McLoughlin wrote:
> > - .ioctl = vring_ioctl,
> > + .unlocked_ioctl = vring_ioctl,
> > + .compat_ioctl = vring_compat_ioctl,
> >
>
> I think you can set compat_ioctl = vring_ioctl (that's what kvm does).
Don't pointer args need the compat_ptr() conversion on s390x?
> > diff --git a/include/linux/vring.h b/include/linux/vring.h
> > index 47c8848..de4125d 100644
> > --- a/include/linux/vring.h
> > +++ b/include/linux/vring.h
> > @@ -21,8 +21,14 @@
> > #include <linux/types.h>
> >
> > /* Ioctl defines. */
> > -#define VRINGSETBASE _IO(0xAD, 0)
> > -#define VRINGSETLIMIT _IO(0xAD, 1)
> > +#define VRINGSETINFO _IO(0xAD, 0)
> > +
> > +struct vring_ioctl_info {
> > + __u16 num_descs;
> >
>
> Padding for 64-bits here, otherwise compat_ioctl breaks.
Nice catch. Fixed below.
Thanks,
Mark.
From: Mark McLoughlin <[EMAIL PROTECTED]>
Subject: vring: Replace mmap() interface with ioctl()
/dev/vring's mmap() interface is a strange creature. It
serves as a way for userland to supply the address of the
already allocated ring descriptors, but causes those pages
to be re-maped as a natural side effect of the mmap()
This is not an issue for lguest because it does the mmap()
before even starting the guest. However, in the case of kvm,
the guest allocates the ring and informs the host of its
addresss. If we then mmap() it, we cause it to be remapped
to new pages which the vring driver will then use.
Now, KVM guests don't actually use the ring pages before
informing the host of its address, so we could probably just
invalidate the guest's shadow page table and have the new
pfns picked up. That would be an odd requirement to impose
on the guest ABI, though.
Since the mmap() semantics are so strange, switch to using a
single ioctl() for setting up the ring.
(Against misc:dev_vring.patch and misc:ringfd-base-limit.patch)
Signed-off-by: Mark McLoughlin <[EMAIL PROTECTED]>
Index: linux-2.6/drivers/char/vring.c
===================================================================
--- linux-2.6.orig/drivers/char/vring.c 2008-06-19 09:03:11.000000000 +0100
+++ linux-2.6/drivers/char/vring.c 2008-06-19 09:04:36.000000000 +0100
@@ -22,6 +22,7 @@
#include <linux/mutex.h>
#include <linux/wait.h>
#include <linux/fs.h>
+#include <linux/compat.h>
#include <linux/poll.h>
#include <linux/module.h>
#include <linux/miscdevice.h>
@@ -126,22 +127,21 @@
return 0;
}
-static int vring_mmap(struct file *filp, struct vm_area_struct *vma)
+static long vring_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
- unsigned long size, num_descs;
struct vring_info *vr = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ struct vring_ioctl_info info;
+ unsigned long descs;
int err;
- /* We overload mmap's offset to hold the ring number. */
- num_descs = vma->vm_pgoff;
+ if (cmd != VRINGSETINFO)
+ return -ENOTTY;
- /* Must be a power of two, and limit indices to a u16. */
- if (!is_power_of_2(num_descs) || num_descs > 65536)
- return -EINVAL;
+ if (copy_from_user(&info, argp, sizeof(info)))
+ return -EFAULT;
- /* mmap size must be what we expect for such a ring. */
- size = vma->vm_end - vma->vm_start;
- if (size != ALIGN(vring_size(num_descs, PAGE_SIZE), PAGE_SIZE))
+ if (!is_power_of_2(info.num_descs))
return -EINVAL;
/* We only let them map this in one place. */
@@ -151,9 +151,14 @@
goto unlock;
}
- vring_init(&vr->ring, num_descs, (void *)vma->vm_start, PAGE_SIZE);
+ descs = info.descs;
+ vring_init(&vr->ring, info.num_descs, (void *)descs, PAGE_SIZE);
- vr->mask = num_descs - 1;
+ vr->mask = info.num_descs - 1;
+ vr->base = info.base;
+ vr->limit = info.limit;
+ if (vr->limit == 0)
+ vr->limit = -1UL;
err = 0;
unlock:
@@ -161,6 +166,16 @@
return err;
}
+#ifdef CONFIG_COMPAT
+static long vring_compat_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ return vring_ioctl(filp, cmd, (unsigned long)compat_ptr(arg));
+}
+#else
+#define vring_compat_ioctl NULL
+#endif
+
static int vring_open(struct inode *in, struct file *filp)
{
struct vring_info *vr;
@@ -176,32 +191,14 @@
return 0;
}
-static int vring_ioctl(struct inode *in, struct file *filp,
- unsigned int cmd, unsigned long arg)
-{
- struct vring_info *vr = filp->private_data;
-
- switch (cmd) {
- case VRINGSETBASE:
- vr->base = arg;
- break;
- case VRINGSETLIMIT:
- vr->limit = arg;
- break;
- default:
- return -ENOTTY;
- }
- return 0;
-}
-
static const struct file_operations vring_fops = {
.open = vring_open,
.release = vring_release,
- .mmap = vring_mmap,
.read = vring_read,
.write = vring_write,
.poll = vring_poll,
- .ioctl = vring_ioctl,
+ .unlocked_ioctl = vring_ioctl,
+ .compat_ioctl = vring_compat_ioctl,
};
/**
Index: linux-2.6/include/linux/vring.h
===================================================================
--- linux-2.6.orig/include/linux/vring.h 2008-06-19 09:03:11.000000000
+0100
+++ linux-2.6/include/linux/vring.h 2008-06-19 09:05:56.000000000 +0100
@@ -21,8 +21,15 @@
#include <linux/types.h>
/* Ioctl defines. */
-#define VRINGSETBASE _IO(0xAD, 0)
-#define VRINGSETLIMIT _IO(0xAD, 1)
+#define VRINGSETINFO _IO(0xAD, 0)
+
+struct vring_ioctl_info {
+ __u16 num_descs;
+ __u8 padding[6];
+ __u64 descs;
+ __u64 base;
+ __u64 limit;
+};
#ifdef __KERNEL__
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html