On Sat, Oct 20, 2012 at 6:27 AM, Marc Zyngier <[email protected]> wrote:
> On Sat, 20 Oct 2012 00:14:42 -0400, Christoffer Dall
> <[email protected]> wrote:
>> The vgic virtual cpu and emulated distributor interfaces must
>> be mapped at a given physical address in the guest. This address is
>> provided through the KVM_SET_DEVICE_ADDRESS ioctl, which happens after
>> the KVM_CREATE_IRQCHIP ioctl is called, but before the first VCPU is
>> excuted thorugh KVM_RUN. We create the vgic on KVM_CREATE_IRQCHIP, but
>> query kvm_vgic_ready(kvm), which checks if the vgic.vctrl_base field has
>> been set, before we execute a VCPU, and if it has not been set, we call
>> kvm_vgic_init, which takes care of the remaining setup.
>>
>> We use the IS_VGIC_ADDR_UNDEF() macro, which compares to the
>> VGIC_ADDR_UNDEF constant, to check if an address has been set; it's
>> unlikely that a device will sit on address 0, but since this is a part
>> of main kernel boot procedure if this stuff is enabled in the config,
>> I'm being paranoid.
>>
>> The distributor and vcpu base addresses used to be a per-host setting
>> global for all VMs, but this is not a requirement and when we want to
>> emulate several boards on a single host, we need the flexibility of
>> storing these guest addresses on a per-VM basis.
>>
>> Signed-off-by: Christoffer Dall <[email protected]>
>> ---
>> arch/arm/include/asm/kvm_vgic.h | 21 ++++++++--
>> arch/arm/kvm/arm.c | 10 ++++-
>> arch/arm/kvm/vgic.c | 82
>> +++++++++++++++++++++++++++------------
>> 3 files changed, 84 insertions(+), 29 deletions(-)
>>
>> diff --git a/arch/arm/include/asm/kvm_vgic.h
>> b/arch/arm/include/asm/kvm_vgic.h
>> index a688132..2de167f 100644
>> --- a/arch/arm/include/asm/kvm_vgic.h
>> +++ b/arch/arm/include/asm/kvm_vgic.h
>> @@ -154,13 +154,14 @@ static inline void vgic_bytemap_set_irq_val(struct
>> vgic_bytemap *x,
>> struct vgic_dist {
>> #ifdef CONFIG_KVM_ARM_VGIC
>> spinlock_t lock;
>> + bool ready;
>>
>> /* Virtual control interface mapping */
>> void __iomem *vctrl_base;
>>
>> - /* Distributor mapping in the guest */
>> - unsigned long vgic_dist_base;
>> - unsigned long vgic_dist_size;
>> + /* Distributor and vcpu interface mapping in the guest */
>> + phys_addr_t vgic_dist_base;
>> + phys_addr_t vgic_cpu_base;
>>
>> /* Distributor enabled */
>> u32 enabled;
>> @@ -243,6 +244,7 @@ struct kvm_exit_mmio;
>> #ifdef CONFIG_KVM_ARM_VGIC
>> int kvm_vgic_hyp_init(void);
>> int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr);
>> +int kvm_vgic_create(struct kvm *kvm);
>> int kvm_vgic_init(struct kvm *kvm);
>> void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
>> void kvm_vgic_sync_to_cpu(struct kvm_vcpu *vcpu);
>> @@ -252,8 +254,9 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid,
>> unsigned int irq_num,
>> int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
>> bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
>> struct kvm_exit_mmio *mmio);
>> +bool irqchip_in_kernel(struct kvm *kvm);
>>
>> -#define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base))
>> +#define vgic_initialized(k) ((k)->arch.vgic.ready)
>> #define
>> vgic_active_irq(v) (atomic_read(&(v)->arch.vgic_cpu.irq_active_count)
> ==
>> 0)
>>
>> #else
>> @@ -267,6 +270,11 @@ static inline int kvm_vgic_set_addr(struct kvm
> *kvm,
>> unsigned long type, u64 add
>> return 0;
>> }
>>
>> +static inline int kvm_vgic_create(struct kvm *kvm)
>> +{
>> + return 0;
>> +}
>> +
>> static inline int kvm_vgic_init(struct kvm *kvm)
>> {
>> return 0;
>> @@ -298,6 +306,11 @@ static inline int irqchip_in_kernel(struct kvm
> *kvm)
>> return 0;
>> }
>>
>> +static inline bool kvm_vgic_initialized(struct kvm *kvm)
>> +{
>> + return true;
>> +}
>> +
>> static inline int vgic_active_irq(struct kvm_vcpu *vcpu)
>> {
>> return 0;
>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
>> index 282794e..d64783e 100644
>> --- a/arch/arm/kvm/arm.c
>> +++ b/arch/arm/kvm/arm.c
>> @@ -636,6 +636,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu,
>> struct kvm_run *run)
>> if (unlikely(vcpu->arch.target < 0))
>> return -ENOEXEC;
>>
>> + /* Initalize the VGIC before running the vcpu */
>> + if (unlikely(irqchip_in_kernel(vcpu->kvm) &&
>> + !vgic_initialized(vcpu->kvm))) {
>> + ret = kvm_vgic_init(vcpu->kvm);
>> + if (ret)
>> + return ret;
>> + }
>> +
>> if (run->exit_reason == KVM_EXIT_MMIO) {
>> ret = kvm_handle_mmio_return(vcpu, vcpu->run);
>> if (ret)
>> @@ -889,7 +897,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
>> #ifdef CONFIG_KVM_ARM_VGIC
>> case KVM_CREATE_IRQCHIP: {
>> if (vgic_present)
>> - return kvm_vgic_init(kvm);
>> + return kvm_vgic_create(kvm);
>> else
>> return -EINVAL;
>> }
>> diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
>> index d63b7f8..fa591db 100644
>> --- a/arch/arm/kvm/vgic.c
>> +++ b/arch/arm/kvm/vgic.c
>> @@ -65,12 +65,17 @@
>> * interrupt line to be sampled again.
>> */
>>
>> -/* Temporary hacks, need to be provided by userspace emulation */
>> -#define VGIC_DIST_BASE 0x2c001000
>> +#define VGIC_ADDR_UNDEF (-1)
>> +#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == (typeof(_x))VGIC_ADDR_UNDEF)
>
> This is an awkward construct. You should really know the type of what
> you're checking. Consider:
I don't think it's awkward, but a pretty common construct. I can't
know which type I check when it's used for both a pointer and a
phys_addr_t.
>
> #define VGIC_ADDR_UNDEF ((phys_addr_t)(~0UL)
> #define IS_VGIC_ADDR_UNDEF(x) ((x) == VGIC_ADDR_UNDEF)
>
>> +
>> +
>> #define VGIC_DIST_SIZE 0x1000
>> #define VGIC_CPU_BASE 0x2c002000
>> #define VGIC_CPU_SIZE 0x2000
>>
>> +/* Physical address of vgic virtual cpu interface */
>> +static phys_addr_t vgic_vcpu_base;
>> +
>> /* Virtual control interface base address */
>> static void __iomem *vgic_vctrl_base;
>>
>> @@ -538,7 +543,7 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct
>> kvm_run *run, struct kvm_exi
>>
>> if (!irqchip_in_kernel(vcpu->kvm) ||
>> mmio->phys_addr < base ||
>> - (mmio->phys_addr + mmio->len) > (base + dist->vgic_dist_size))
>> + (mmio->phys_addr + mmio->len) > (base + VGIC_DIST_SIZE))
>> return false;
>>
>> range = find_matching_range(vgic_ranges, mmio, base);
>> @@ -1027,7 +1032,7 @@ void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
>> vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
>> }
>>
>> - BUG_ON(!vcpu->kvm->arch.vgic.vctrl_base);
>> + BUG_ON(IS_VGIC_ADDR_UNDEF(vcpu->kvm->arch.vgic.vctrl_base));
>> reg = readl_relaxed(vcpu->kvm->arch.vgic.vctrl_base + GICH_VTR);
>> vgic_cpu->nr_lr = (reg & 0x1f) + 1;
>>
>> @@ -1101,29 +1106,23 @@ out_free_irq:
>>
>> int kvm_vgic_init(struct kvm *kvm)
>> {
>> - int ret, i;
>> - struct resource vcpu_res;
>> + int ret = 0, i;
>>
>> mutex_lock(&kvm->lock);
>>
>> - if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
>> - kvm_err("Cannot obtain VCPU resource\n");
>> - ret = -ENXIO;
>> + if (vgic_initialized(kvm))
>> goto out;
>> - }
>>
>> - if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) {
>> - ret = -EEXIST;
>> + if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
>> + IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
>> + kvm_err("Need to set vgic cpu and dist addresses first\n");
>> + ret = -ENXIO;
>> goto out;
>> }
>>
>> - spin_lock_init(&kvm->arch.vgic.lock);
>> - kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
>> - kvm->arch.vgic.vgic_dist_base = VGIC_DIST_BASE;
>> - kvm->arch.vgic.vgic_dist_size = VGIC_DIST_SIZE;
>> + ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
>> + vgic_vcpu_base, VGIC_CPU_SIZE);
>>
>> - ret = kvm_phys_addr_ioremap(kvm, VGIC_CPU_BASE,
>> - vcpu_res.start, VGIC_CPU_SIZE);
>> if (ret) {
>> kvm_err("Unable to remap VGIC CPU to VCPU\n");
>> goto out;
>> @@ -1132,12 +1131,45 @@ int kvm_vgic_init(struct kvm *kvm)
>> for (i = 32; i < VGIC_NR_IRQS; i += 4)
>> vgic_set_target_reg(kvm, 0, i);
>>
>> + kvm_timer_init(kvm);
>> + kvm->arch.vgic.ready = true;
>> out:
>> mutex_unlock(&kvm->lock);
>> + return ret;
>> +}
>>
>> - if (!ret)
>> - kvm_timer_init(kvm);
>> +bool irqchip_in_kernel(struct kvm *kvm)
>> +{
>> + return !(IS_VGIC_ADDR_UNDEF(vgic_vcpu_base));
>
> Erm... This check is supposed VM specific. Here, you check something that
> will be initialized once and for all when the first VM is run. Use one of
> the vgic *guest* base addresses instead.
>
Ouch, right you are.
(This whole scheme of checking various base addresses for whether
something is initialized or not seems quite brittle, but I don't want
to rework it before a merge)
>> +}
>>
>> +int kvm_vgic_create(struct kvm *kvm)
>> +{
>> + int ret;
>> + struct resource vcpu_res;
>> +
>> + mutex_lock(&kvm->lock);
>> +
>> + if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
>> + kvm_err("Cannot obtain VCPU resource\n");
>> + ret = -ENXIO;
>> + goto out;
>> + }
>
> This function is called on every VM creation. Consider moving this lookup
> and the vgic_vcpu_base assignment to kvm_vgic_hyp_init().
>
>> + if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) {
>> + ret = -EEXIST;
>> + goto out;
>> + }
>> +
>> + spin_lock_init(&kvm->arch.vgic.lock);
>> + kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
>> + kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
>> + kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
>> +
>> + vgic_vcpu_base = vcpu_res.start;
>> + ret = 0;
>> +out:
>> + mutex_unlock(&kvm->lock);
>> return ret;
>> }
>>
>> @@ -1151,12 +1183,14 @@ int kvm_vgic_set_addr(struct kvm *kvm, unsigned
>> long type, u64 addr)
>> mutex_lock(&kvm->lock);
>> switch (type) {
>> case KVM_VGIC_V2_ADDR_TYPE_DIST:
>> - if (addr != VGIC_DIST_BASE)
>> - return -EINVAL;
>> + if (!IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base))
>> + return -EEXIST;
>> + kvm->arch.vgic.vgic_dist_base = addr;
>> break;
>> case KVM_VGIC_V2_ADDR_TYPE_CPU:
>> - if (addr != VGIC_CPU_BASE)
>> - return -EINVAL;
>> + if (!IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base))
>> + return -EEXIST;
>> + kvm->arch.vgic.vgic_cpu_base = addr;
>> break;
>> default:
>> r = -ENODEV;
>
> We need some additional checks here about the validity of the address (at
> least page aligned, and non overlapping with memory or other in-kernel
> device models).
>
since we don't have any other in-kernel device models that are memory
mapped, we should cross that bridge when/if we get there. It will not
affect stability of the host kernel, merely not properly boot a guest
if you supply bogus overlapping io regions.
See patch diff:
diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
index 2de167f..3c71c79 100644
--- a/arch/arm/include/asm/kvm_vgic.h
+++ b/arch/arm/include/asm/kvm_vgic.h
@@ -256,6 +256,7 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu,
struct kvm_run *run,
struct kvm_exit_mmio *mmio);
bool irqchip_in_kernel(struct kvm *kvm);
+#define irqchip_in_kernel(k) ((k)->arch.vgic.vctrl_base)
#define vgic_initialized(k) ((k)->arch.vgic.ready)
#define vgic_active_irq(v)
(atomic_read(&(v)->arch.vgic_cpu.irq_active_count)
== 0)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 0ab098e..e5ace0e 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -422,7 +422,7 @@ static void stage2_clear_pte(struct kvm *kvm,
phys_addr_t addr)
}
static void stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
- phys_addr_t addr, const pte_t *new_pte)
+ phys_addr_t addr, const pte_t *new_pte, bool iomap)
{
pgd_t *pgd;
pud_t *pud;
@@ -454,6 +454,9 @@ static void stage2_set_pte(struct kvm *kvm, struct
kvm_mmu_memory_cache *cache,
} else
pte = pte_offset_kernel(pmd, addr);
+ if (iomap && pte_present(old_pte))
+ return -EFAULT;
+
/* Create 2nd stage page table mapping - Level 3 */
old_pte = *pte;
set_pte_ext(pte, *new_pte, 0);
@@ -489,7 +492,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm,
phys_addr_t guest_ipa,
if (ret)
goto out;
spin_lock(&kvm->mmu_lock);
- stage2_set_pte(kvm, &cache, addr, &pte);
+ stage2_set_pte(kvm, &cache, addr, &pte, true);
spin_unlock(&kvm->mmu_lock);
pfn++;
@@ -565,7 +568,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu,
phys_addr_t fault_ipa,
pte_val(new_pte) |= L_PTE_S2_RDWR;
kvm_set_pfn_dirty(pfn);
}
- stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte);
+ stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false);
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
@@ -716,7 +719,7 @@ static void kvm_set_spte_handler(struct kvm *kvm,
gpa_t gpa, void *data)
{
pte_t *pte = (pte_t *)data;
- stage2_set_pte(kvm, NULL, gpa, pte);
+ stage2_set_pte(kvm, NULL, gpa, pte, false);
}
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
index fa591db..8fe0e70 100644
--- a/arch/arm/kvm/vgic.c
+++ b/arch/arm/kvm/vgic.c
@@ -1054,6 +1054,7 @@ int kvm_vgic_hyp_init(void)
int ret;
unsigned int irq;
struct resource vctrl_res;
+ struct resource vcpu_res;
vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic");
if (!vgic_node)
@@ -1094,6 +1095,13 @@ int kvm_vgic_hyp_init(void)
kvm_info("%s@%llx IRQ%d\n", vgic_node->name, vctrl_res.start, irq);
on_each_cpu(vgic_init_maintenance_interrupt, &irq, 1);
+ if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
+ kvm_err("Cannot obtain VCPU resource\n");
+ ret = -ENXIO;
+ goto out_unmap;
+ }
+ vgic_vcpu_base = vcpu_res.start;
+
return 0;
out_unmap:
@@ -1138,24 +1146,12 @@ out:
return ret;
}
-bool irqchip_in_kernel(struct kvm *kvm)
-{
- return !(IS_VGIC_ADDR_UNDEF(vgic_vcpu_base));
-}
-
int kvm_vgic_create(struct kvm *kvm)
{
int ret;
- struct resource vcpu_res;
mutex_lock(&kvm->lock);
- if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
- kvm_err("Cannot obtain VCPU resource\n");
- ret = -ENXIO;
- goto out;
- }
-
if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) {
ret = -EEXIST;
goto out;
@@ -1166,36 +1162,62 @@ int kvm_vgic_create(struct kvm *kvm)
kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
- vgic_vcpu_base = vcpu_res.start;
ret = 0;
out:
mutex_unlock(&kvm->lock);
return ret;
}
+static bool vgic_ioaddr_overlap(struct kvm *kvm)
+{
+ phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
+ phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
+
+ if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu))
+ return false;
+ if ((dist <= cpu && dist + VGIC_DIST_SIZE > cpu) ||
+ (cpu <= dist && cpu + VGIC_CPU_SIZE > dist))
+ return true;
+ return false;
+}
+
int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
{
int r = 0;
+ struct vgic_dist *vgic = &kvm->arch.vgic;
if (addr & ~KVM_PHYS_MASK)
return -E2BIG;
+ if (addr & ~PAGE_MASK)
+ return -EINVAL;
+
mutex_lock(&kvm->lock);
switch (type) {
case KVM_VGIC_V2_ADDR_TYPE_DIST:
- if (!IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base))
+ if (!IS_VGIC_ADDR_UNDEF(vgic->vgic_dist_base))
return -EEXIST;
+ if (addr + VGIC_DIST_SIZE < addr)
+ return -EINVAL;
kvm->arch.vgic.vgic_dist_base = addr;
break;
case KVM_VGIC_V2_ADDR_TYPE_CPU:
- if (!IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base))
+ if (!IS_VGIC_ADDR_UNDEF(vgic->vgic_cpu_base))
return -EEXIST;
+ if (addr + VGIC_CPU_SIZE < addr)
+ return -EINVAL;
kvm->arch.vgic.vgic_cpu_base = addr;
break;
default:
r = -ENODEV;
}
+ if (vgic_ioaddr_overlap(kvm)) {
+ kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
+ kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
+ return -EINVAL;
+ }
+
mutex_unlock(&kvm->lock);
return r;
}
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html