On 07.03.2013, at 04:29, Paul Mackerras wrote:
> This adds a new ioctl, KVM_SET_IRQ_ARCHITECTURE, which is intended to
> be called by userspace to specify that it wishes the kernel to emulate
> a specific interrupt controller architecture. This doesn't imply the
> creation of any specific device, but does indicate that when vcpus are
> created, space for per-vcpu interrupt controller state should be
> allocated. Having this ioctl enables userspace to defer creation of
> the actual interrupt controller device(s) until after the vcpus are
> created.
>
> The KVM_SET_IRQ_ARCHITECTURE ioctl takes a single 32-bit unsigned int
> as its parameter. Values for this parameter will be defined in
> subsequent patches. The value of 0 means no in-kernel interrupt
> controller emulation.
>
> In order to ensure that this ioctl will fail once any vcpu has been
> created, we use an arch.vcpus_created field to indicate that vcpu
> creation has commenced. We don't use the online_vcpus field because
> that is only incremented after vcpu creation; if we used that there
> would be a window during the first KVM_CREATE_VCPU ioctl where the
> first vcpu had been created but the interrupt architecture could still
> be changed.
>
> Signed-off-by: Paul Mackerras <[email protected]>
Could you please (in a quick and drafty way) try and see if setting the IRQ
arch (using enable_cap) after the vcpu got created would work for you?
That enable_cap would then have to loop through all devices and notify irq
controllers that a new cpu got spawned.
All vcpu local payloads would have to get allocated and initialized outside of
vcpu_create too then.
I don't have a good feeling for how hard this would be and whether locking
would become overly difficult. I think it's fair to restrict the enable_cap to
only work when no other vcpu is running. Of course, not requiring a stopped
machine would make hotplug easier for user space :).
If it turns out to be hard and / or so complex that it's likely to make things
more buggy than is worth, this patch is the way to go IMHO.
Alex
> ---
> So, should this all be done in generic code?
>
> Documentation/virtual/kvm/api.txt | 22 ++++++++++++
> arch/powerpc/include/asm/kvm_host.h | 2 ++
> arch/powerpc/kvm/powerpc.c | 66 +++++++++++++++++++++++++++++++++--
> include/uapi/linux/kvm.h | 3 ++
> 4 files changed, 90 insertions(+), 3 deletions(-)
>
> diff --git a/Documentation/virtual/kvm/api.txt
> b/Documentation/virtual/kvm/api.txt
> index cce500a..d550313 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -2126,6 +2126,28 @@ header; first `n_valid' valid entries with contents
> from the data
> written, then `n_invalid' invalid entries, invalidating any previously
> valid entries found.
>
> +4.79 KVM_SET_IRQ_ARCHITECTURE
> +
> +Capability: KVM_CAP_IRQ_ARCH
> +Architecture: powerpc
> +Type: vm ioctl
> +Parameters: Pointer to u32 (in)
> +Returns: 0 on success, -1 on error
> +
> +This is called before any vcpus are created, if in-kernel interrupt
> +controller emulation is desired, to specify what overall interrupt
> +controller architecture should be emulated. Having this called before
> +any vcpus are created allows per-vcpu interrupt controller state to be
> +allocated at vcpu creation time, and allows the creation of the actual
> +interrupt controller device to be deferred until after the vcpus are
> +created.
> +
> +The parameter is a 32-bit unsigned integer specifying the
> +architecture, or the value 0 to specify that no emulation should be
> +done. If the requested architecture is not supported by the kernel,
> +this ioctl returns an EINVAL error. Otherwise, if this ioctl is
> +called after any vcpus have been created, it returns an EBUSY error.
> +
>
> 5. The kvm_run structure
> ------------------------
> diff --git a/arch/powerpc/include/asm/kvm_host.h
> b/arch/powerpc/include/asm/kvm_host.h
> index 8a72d59..e21ea1f 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -227,6 +227,8 @@ struct kvm_arch_memory_slot {
> };
>
> struct kvm_arch {
> + unsigned int irq_arch;
> + int vcpus_created;
> unsigned int lpid;
> #ifdef CONFIG_KVM_BOOK3S_64_HV
> unsigned long hpt_virt;
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index 934413c..b681746 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -441,10 +441,21 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
> unsigned int id)
> {
> struct kvm_vcpu *vcpu;
> vcpu = kvmppc_core_vcpu_create(kvm, id);
> - if (!IS_ERR(vcpu)) {
> - vcpu->arch.wqp = &vcpu->wq;
> - kvmppc_create_vcpu_debugfs(vcpu, id);
> + if (IS_ERR(vcpu))
> + goto out;
> +
> + /* Create per-vcpu irq controller state if needed */
> + mutex_lock(&kvm->lock);
> + kvm->arch.vcpus_created = 1;
> + switch (kvm->arch.irq_arch) {
> + default:
> + break;
> }
> + mutex_unlock(&kvm->lock);
> +
> + vcpu->arch.wqp = &vcpu->wq;
> + kvmppc_create_vcpu_debugfs(vcpu, id);
> + out:
> return vcpu;
> }
>
> @@ -459,6 +470,12 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
> hrtimer_cancel(&vcpu->arch.dec_timer);
> tasklet_kill(&vcpu->arch.tasklet);
>
> + /* Destroy per-vcpu irq controller state */
> + switch (vcpu->kvm->arch.irq_arch) {
> + default:
> + break;
> + }
> +
> kvmppc_remove_vcpu_debugfs(vcpu);
> kvmppc_core_vcpu_free(vcpu);
> }
> @@ -996,6 +1013,49 @@ long kvm_arch_vm_ioctl(struct file *filp,
> break;
> }
> #endif /* CONFIG_PPC_BOOK3S_64 */
> +
> + case KVM_IRQ_LINE: {
> + struct kvm *kvm = filp->private_data;
> + struct kvm_irq_level args;
> +
> + r = -EFAULT;
> + if (copy_from_user(&args, argp, sizeof(args)))
> + break;
> + switch (kvm->arch.irq_arch) {
> + default:
> + r = -EINVAL;
> + }
> + break;
> + }
> +
> + case KVM_SET_IRQ_ARCHITECTURE: {
> + struct kvm *kvm = filp->private_data;
> + u32 arch_id;
> +
> + r = -EFAULT;
> + if (get_user(arch_id, (u32 __user *)argp))
> + break;
> + r = 0;
> + switch (arch_id) {
> + case 0: /* no emulation */
> + break;
> + default:
> + r = -EINVAL;
> + break;
> + }
> + if (r)
> + break;
> +
> + /* mutex ensures we don't race with vcpu creation */
> + mutex_lock(&kvm->lock);
> + if (!kvm->arch.vcpus_created)
> + kvm->arch.irq_arch = arch_id;
> + else
> + r = -EBUSY;
> + mutex_unlock(&kvm->lock);
> + break;
> + }
> +
> default:
> r = -ENOTTY;
> }
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 9a2db57..2c0ea1c 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -662,6 +662,7 @@ struct kvm_ppc_smmu_info {
> #define KVM_CAP_PPC_HTAB_FD 84
> #define KVM_CAP_S390_CSS_SUPPORT 85
> #define KVM_CAP_PPC_EPR 86
> +#define KVM_CAP_IRQ_ARCH 87
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
> @@ -889,6 +890,8 @@ struct kvm_s390_ucas_mapping {
> #define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma)
> /* Available with KVM_CAP_PPC_HTAB_FD */
> #define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd)
> +/* Available with KVM_CAP_IRQ_ARCH */
> +#define KVM_SET_IRQ_ARCHITECTURE _IOW(KVMIO, 0xab, __u32)
>
> /*
> * ioctls for vcpu fds
> --
> 1.7.10.4
>
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html