On Wed, Feb 13, 2013 at 9:49 PM, Scott Wood <[email protected]> wrote:
> Currently, devices that are emulated inside KVM are configured in a
> hardcoded manner based on an assumption that any given architecture
> only has one way to do it. If there's any need to access device state,
> it is done through inflexible one-purpose-only IOCTLs (e.g.
> KVM_GET/SET_LAPIC). Defining new IOCTLs for every little thing is
> cumbersome and depletes a limited numberspace.
>
> This API provides a mechanism to instantiate a device of a certain
> type, returning an ID that can be used to set/get attributes of the
> device. Attributes may include configuration parameters (e.g.
> register base address), device state, operational commands, etc. It
> is similar to the ONE_REG API, except that it acts on devices rather
> than vcpus.
>
> Both device types and individual attributes can be tested without having
> to create the device or get/set the attribute, without the need for
> separately managing enumerated capabilities.
>
> Signed-off-by: Scott Wood <[email protected]>
> ---
> Documentation/virtual/kvm/api.txt | 76 ++++++++++++++++++
> Documentation/virtual/kvm/devices/README | 1 +
> include/linux/kvm_host.h | 21 +++++
> include/uapi/linux/kvm.h | 25 ++++++
> virt/kvm/kvm_main.c | 127
> ++++++++++++++++++++++++++++++
> 5 files changed, 250 insertions(+)
> create mode 100644 Documentation/virtual/kvm/devices/README
>
> diff --git a/Documentation/virtual/kvm/api.txt
> b/Documentation/virtual/kvm/api.txt
> index c2534c3..5bcdb42 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -2122,6 +2122,82 @@ header; first `n_valid' valid entries with contents
> from the data
> written, then `n_invalid' invalid entries, invalidating any previously
> valid entries found.
>
> +4.79 KVM_CREATE_DEVICE
> +
> +Capability: KVM_CAP_DEVICE_CTRL
> +Type: vm ioctl
> +Parameters: struct kvm_create_device (in/out)
> +Returns: 0 on success, -1 on error
> +Errors:
> + ENODEV: The device type is unknown or unsupported
> + EEXIST: Device already created, and this type of device may not
> + be instantiated multiple times
> + ENOSPC: Too many devices have been created
> +
> + Other error conditions may be defined by individual device types.
> +
> +Creates an emulated device in the kernel. The returned handle
> +can be used with KVM_SET/GET_DEVICE_ATTR.
> +
> +If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the
> +device type is supported (not necessarily whether it can be created
> +in the current vm).
> +
> +Individual devices should not define flags. Attributes should be used
> +for specifying any behavior that is not implied by the device type
> +number.
> +
> +struct kvm_create_device {
> + __u32 type; /* in: KVM_DEV_TYPE_xxx */
> + __u32 id; /* out: device handle */
> + __u32 flags; /* in: KVM_CREATE_DEVICE_xxx */
> +};
> +
> +4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
> +
> +Capability: KVM_CAP_DEVICE_CTRL
> +Type: vm ioctl
> +Parameters: struct kvm_device_attr
> +Returns: 0 on success, -1 on error
> +Errors:
> + ENODEV: The device id is invalid
> + ENXIO: The group or attribute is unknown/unsupported for this device
> + EPERM: The attribute cannot (currently) be accessed this way
> + (e.g. read-only attribute, or attribute that only makes
> + sense when the device is in a different state)
> +
> + Other error conditions may be defined by individual device types.
> +
> +Gets/sets a specified piece of device configuration and/or state. The
> +semantics are device-specific except for certain global attributes. See
> +individual device documentation in the "devices" directory. As with
> +ONE_REG, the size of the data transferred is defined by the particular
> +attribute.
> +
> +Attributes in group KVM_DEV_ATTR_COMMON are not device-specific:
> + KVM_DEV_ATTR_TYPE (ro, 32-bit): the device type passed to
> KVM_CREATE_DEVICE
> +
> +struct kvm_device_attr {
> + __u32 dev; /* id from KVM_CREATE_DEVICE */
> + __u32 group; /* KVM_DEV_ATTR_COMMON or device-defined */
> + __u64 attr; /* group-defined */
> + __u64 addr; /* userspace address of attr data */
> +};
> +
> +4.81 KVM_HAS_DEVICE_ATTR
> +
> +Capability: KVM_CAP_DEVICE_CTRL
> +Type: vm ioctl
> +Parameters: struct kvm_device_attr
> +Returns: 0 on success, -1 on error
> +Errors:
> + ENODEV: The device id is invalid
> + ENXIO: The group or attribute is unknown/unsupported for this device
> +
> +Tests whether a device supports a particular attribute. A successful
> +return indicates the attribute is implemented. It does not necessarily
> +indicate that the attribute can be read or written in the device's
> +current state. "addr" is ignored.
>
> 5. The kvm_run structure
> ------------------------
> diff --git a/Documentation/virtual/kvm/devices/README
> b/Documentation/virtual/kvm/devices/README
> new file mode 100644
> index 0000000..34a6983
> --- /dev/null
> +++ b/Documentation/virtual/kvm/devices/README
> @@ -0,0 +1 @@
> +This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL.
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 0350e0d..dbaf012 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -335,6 +335,25 @@ struct kvm_memslots {
> short id_to_index[KVM_MEM_SLOTS_NUM];
> };
>
> +/*
> + * The worst case number of simultaneous devices will likely be very low
> + * (usually zero or one) for the forseeable future. If the worst case
> + * exceeds this, then it can be increased, or we can convert to idr.
> + */
This comment is on the heavy side (if at all needed). If you want to
remind people of idr, just put that in a single line. A define is a
define is a define.
> +#define KVM_MAX_DEVICES 4
> +
> +struct kvm_device {
> + u32 type;
> +
> + int (*set_attr)(struct kvm *kvm, struct kvm_device *dev,
> + struct kvm_device_attr *attr);
> + int (*get_attr)(struct kvm *kvm, struct kvm_device *dev,
> + struct kvm_device_attr *attr);
> + int (*has_attr)(struct kvm *kvm, struct kvm_device *dev,
> + struct kvm_device_attr *attr);
> + void (*destroy)(struct kvm *kvm, struct kvm_device *dev);
> +};
> +
> struct kvm {
> spinlock_t mmu_lock;
> struct mutex slots_lock;
> @@ -385,6 +404,8 @@ struct kvm {
> long mmu_notifier_count;
> #endif
> long tlbs_dirty;
> + struct kvm_device *devices[KVM_MAX_DEVICES];
> + unsigned int num_devices;
> };
>
> #define kvm_err(fmt, ...) \
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 9a2db57..1f348e0 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -662,6 +662,7 @@ struct kvm_ppc_smmu_info {
> #define KVM_CAP_PPC_HTAB_FD 84
> #define KVM_CAP_S390_CSS_SUPPORT 85
> #define KVM_CAP_PPC_EPR 86
> +#define KVM_CAP_DEVICE_CTRL 87
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
> @@ -890,6 +891,30 @@ struct kvm_s390_ucas_mapping {
> /* Available with KVM_CAP_PPC_HTAB_FD */
> #define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd)
>
> +/* Available with KVM_CAP_DEVICE_CTRL */
> +#define KVM_CREATE_DEVICE_TEST 1
> +
> +struct kvm_create_device {
> + __u32 type; /* in: KVM_DEV_TYPE_xxx */
> + __u32 id; /* out: device handle */
> + __u32 flags; /* in: KVM_CREATE_DEVICE_xxx */
> +};
> +
> +struct kvm_device_attr {
> + __u32 dev; /* id from KVM_CREATE_DEVICE */
> + __u32 group; /* KVM_DEV_ATTR_COMMON or device-defined */
> + __u64 attr; /* group-defined */
> + __u64 addr; /* userspace address of attr data */
> +};
> +
> +#define KVM_DEV_ATTR_COMMON 0
> +#define KVM_DEV_ATTR_TYPE 0 /* 32-bit */
> +
> +#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xac, struct
> kvm_create_device)
> +#define KVM_SET_DEVICE_ATTR _IOW(KVMIO, 0xad, struct kvm_device_attr)
> +#define KVM_GET_DEVICE_ATTR _IOW(KVMIO, 0xae, struct kvm_device_attr)
_IOWR ?
> +#define KVM_HAS_DEVICE_ATTR _IOW(KVMIO, 0xaf, struct kvm_device_attr)
> +
> /*
> * ioctls for vcpu fds
> */
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 2e93630..baf8481 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -580,6 +580,18 @@ void kvm_free_physmem(struct kvm *kvm)
> kfree(kvm->memslots);
> }
>
> +static void kvm_destroy_devices(struct kvm *kvm)
> +{
> + int i;
> +
> + for (i = 0; i < kvm->num_devices; i++) {
> + kvm->devices[i]->destroy(kvm, kvm->devices[i]);
> + kvm->devices[i] = NULL;
> + }
> +
> + kvm->num_devices = 0;
> +}
> +
> static void kvm_destroy_vm(struct kvm *kvm)
> {
> int i;
> @@ -590,6 +602,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
> list_del(&kvm->vm_list);
> raw_spin_unlock(&kvm_lock);
> kvm_free_irq_routing(kvm);
> + kvm_destroy_devices(kvm);
> for (i = 0; i < KVM_NR_BUSES; i++)
> kvm_io_bus_destroy(kvm->buses[i]);
> kvm_coalesced_mmio_free(kvm);
> @@ -2178,6 +2191,86 @@ out:
> }
> #endif
>
> +static int kvm_ioctl_create_device(struct kvm *kvm,
> + struct kvm_create_device *cd)
> +{
> + struct kvm_device *dev = NULL;
> + bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
> + int id;
> + int r;
> +
> + mutex_lock(&kvm->lock);
> +
> + id = kvm->num_devices;
> + if (id >= KVM_MAX_DEVICES && !test) {
> + r = -ENOSPC;
> + goto out;
> + }
> +
> + switch (cd->type) {
> + default:
> + r = -ENODEV;
> + goto out;
> + }
do we really believe that there will be any arch-generic recognition
of types? shouldn't this be a call to an arch-specific function
instead. Which makes me wonder whether the device type IDs should be
arch specific as well...
> +
> + if (test) {
> + WARN_ON_ONCE(dev);
> + goto out;
> + }
> +
> + if (r == 0) {
> + WARN_ON_ONCE(dev->type != cd->type);
> +
> + kvm->devices[id] = dev;
> + cd->id = id;
> + kvm->num_devices++;
> + }
> +
> +out:
> + mutex_unlock(&kvm->lock);
> + return r;
> +}
> +
> +static int kvm_ioctl_device_attr(struct kvm *kvm, int ioctl,
> + struct kvm_device_attr *attr)
> +{
> + struct kvm_device *dev;
> + int (*accessor)(struct kvm *kvm, struct kvm_device *dev,
> + struct kvm_device_attr *attr);
> +
> + if (attr->dev >= KVM_MAX_DEVICES)
> + return -ENODEV;
> +
> + dev = kvm->devices[attr->dev];
> + if (!dev)
> + return -ENODEV;
> +
> + switch (ioctl) {
> + case KVM_SET_DEVICE_ATTR:
> + if (attr->group == KVM_DEV_ATTR_COMMON &&
> + attr->attr == KVM_DEV_ATTR_TYPE)
> + return -EPERM;
> +
> + accessor = dev->set_attr;
> + break;
> + case KVM_GET_DEVICE_ATTR:
> + if (attr->group == KVM_DEV_ATTR_COMMON &&
> + attr->attr == KVM_DEV_ATTR_TYPE)
> + return dev->type;
> +
> + accessor = dev->get_attr;
> + break;
> + case KVM_HAS_DEVICE_ATTR:
> + accessor = dev->has_attr;
> + break;
> + }
> +
> + if (!accessor)
> + return -EPERM;
> +
> + return accessor(kvm, dev, attr);
> +}
> +
> static long kvm_vm_ioctl(struct file *filp,
> unsigned int ioctl, unsigned long arg)
> {
> @@ -2292,6 +2385,40 @@ static long kvm_vm_ioctl(struct file *filp,
> break;
> }
> #endif
> + case KVM_CREATE_DEVICE: {
> + struct kvm_create_device cd;
> +
> + r = -EFAULT;
> + if (copy_from_user(&cd, argp, sizeof(cd)))
> + goto out;
> +
> + r = kvm_ioctl_create_device(kvm, &cd);
> + if (r)
> + goto out;
> +
> + r = -EFAULT;
> + if (copy_to_user(argp, &cd, sizeof(cd)))
> + goto out;
> +
> + r = 0;
> + break;
> + }
> + case KVM_SET_DEVICE_ATTR:
> + case KVM_GET_DEVICE_ATTR:
> + case KVM_HAS_DEVICE_ATTR: {
> + struct kvm_device_attr attr;
> +
> + r = -EFAULT;
> + if (copy_from_user(&attr, argp, sizeof(attr)))
> + goto out;
> +
> + r = kvm_ioctl_device_attr(kvm, ioctl, &attr);
> + if (r)
> + goto out;
> +
> + r = 0;
> + break;
> + }
> default:
> r = kvm_arch_vm_ioctl(filp, ioctl, arg);
> if (r == -ENOTTY)
> --
> 1.7.9.5
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html