Hi all,
Started reading through the git tree at
git://github.com/virtualopensystems/linux-kvm-arm.git (kvm-a15-v6-stage
branch), and noticed some things. I'm learning ARM as I go, so
apologies in advance for any dumb questions.
Psuedo-quoted below:
38049977d26ac3ca35cf5e18e45d0d54224749af wrote:
> diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
> index 0aa8542..1d42907 100644
> --- a/arch/arm/mach-vexpress/Kconfig
> +++ b/arch/arm/mach-vexpress/Kconfig
> @@ -39,6 +39,7 @@ config ARCH_VEXPRESS_CA15X4
> depends on VEXPRESS_EXTENDED_MEMORY_MAP
> select CPU_V7
> select ARM_ARCH_TIMER
> + select ARM_VIRT_EXT
> select ARM_GIC
> select ARM_GIC_VPPI
> select HAVE_SMP
> diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
> index 1a3ca24..5467b28 100644
> --- a/arch/arm/mm/Kconfig
> +++ b/arch/arm/mm/Kconfig
> @@ -640,6 +640,14 @@ config ARM_LPAE
>
> If unsure, say N.
>
> +config ARM_VIRT_EXT
> + bool "Support for ARM Virtualization Extensions"
> + depends on ARM_LPAE
> + help
> + Say Y if you have an ARMv7 processor supporting the ARM hardware
> + Virtualization extensions. KVM depends on this feature and will
> + not run without it being selected.
> +
It's usually a bad idea to SELECT an option which has a prompt, or one
which has dependencies. In this case, ARCH_VEXPRESS_CA15X4 will set
ARM_VIRT_EXT without ARM_LPAE. You need to either select ARM_LPAE in
ARCH_VEXPRESS_CA15X4, or not select ARM_VIRT_EXT and make that depend on
ARM_LPAE and ARCH_VEXPRESS_CA15X4, or just make KVM depends on ARM_LPAE.
> diff --git a/arch/arm/include/asm/kvm_para.h b/arch/arm/include/asm/kvm_para.h
> new file mode 100644
> index 0000000..7ce5f1c
> --- /dev/null
> +++ b/arch/arm/include/asm/kvm_para.h
> @@ -0,0 +1,9 @@
> +#ifndef _ASM_X86_KVM_PARA_H
> +#define _ASM_X86_KVM_PARA_H
I think you mean _ASM_ARM_ here. I know you only did this to see who
was reading carefully :)
> +static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
> +{
> + return ((vcpu_mode(vcpu)) == MODE_USR) ? 0 : 1;
> +}
Why not return vcpu_mode(vcpu) != MODE_USR ? And should MODE_UND be
privileged?
> +#ifndef __ARM_KVM_TRACE_H__
> +#define __ARM_KVM_TRACE_H__
> +
> +#include <linux/types.h>
> +#include <linux/kvm_types.h>
> +#include <linux/kvm_host.h>
> +
> +void __kvm_print_msg(char *_fmt, ...);
> +
> +#define kvm_err(err, fmt, args...) do { \
> + __kvm_print_msg(KERN_ERR "KVM error [%s:%d]: (%d) ", \
> + __func__, __LINE__, err); \
> + __kvm_print_msg(fmt "\n", ##args); \
> +} while (0)
> +
> +#define __kvm_msg(fmt, args...) do { \
> + __kvm_print_msg(KERN_ERR "KVM [%s:%d]: ", __func__, __LINE__); \
> + __kvm_print_msg(fmt, ##args); \
> +} while (0)
> +
> +#define kvm_msg(__fmt, __args...) __kvm_msg(__fmt "\n", ##__args)
> +
> +
> +#define KVMARM_NOT_IMPLEMENTED() \
> +{ \
> + printk(KERN_ERR "KVM not implemented [%s:%d] in %s\n", \
> + __FILE__, __LINE__, __func__); \
> +}
kvm_host.h already has:
#define pr_unimpl(vcpu, fmt, ...) \
pr_err_ratelimited("kvm: %i: cpu%i " fmt, \
current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__)
#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
But that should probably be converted to pr_debug(), which gives you
#ifdef DEBUG and dynamic debug for free.
> +static unsigned long vcpu_reg_offsets[MODE_SYS + 1][16] = {
> + /* FIQ Registers */
> + {
> +
const is preferred these days where possible, so we can put stuff in the
r/o section.
> +/*
> + * Return a pointer to the register number valid in the specified mode of
> + * the virtual CPU.
> + */
> +u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode)
> +{
> + BUG_ON(reg_num > 15);
> + BUG_ON(mode > MODE_SYS);
> +
> + return (u32 *)((void *)&vcpu->arch + vcpu_reg_offsets[mode][reg_num]);
> +}
This is pretty neat! With only three different cases (?) I might have
been tempted to use a switch, but this is definitely nicer.
> +#define VM_STAT(x) (offsetof(struct kvm, stat.x), KVM_STAT_VM)
> +#define VCPU_STAT(x) (offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU)
> +
> +struct kvm_stats_debugfs_item debugfs_entries[] = {
> + { NULL }
> +};
That's weird. I see it's used in x86, not here though.
> +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs
> *regs)
> +{
> + struct kvm_vcpu_regs *vcpu_regs = &vcpu->arch.regs;
> +
> + /*
> + * GPRs and PSRs
> + */
> + memcpy(regs->regs0_7, &(vcpu_regs->usr_regs[0]), sizeof(u32) * 8);
> + memcpy(regs->usr_regs8_12, &(vcpu_regs->usr_regs[8]), sizeof(u32) *
> 5);
> + memcpy(regs->fiq_regs8_12, &(vcpu_regs->fiq_regs[0]), sizeof(u32) *
> 5);
> + regs->reg13[MODE_FIQ] = vcpu_regs->fiq_regs[5];
> + regs->reg14[MODE_FIQ] = vcpu_regs->fiq_regs[6];
> + regs->reg13[MODE_IRQ] = vcpu_regs->irq_regs[0];
> + regs->reg14[MODE_IRQ] = vcpu_regs->irq_regs[1];
> + regs->reg13[MODE_SVC] = vcpu_regs->svc_regs[0];
> + regs->reg14[MODE_SVC] = vcpu_regs->svc_regs[1];
> + regs->reg13[MODE_ABT] = vcpu_regs->abt_regs[0];
> + regs->reg14[MODE_ABT] = vcpu_regs->abt_regs[1];
> + regs->reg13[MODE_UND] = vcpu_regs->und_regs[0];
> + regs->reg14[MODE_UND] = vcpu_regs->und_regs[1];
> + regs->reg13[MODE_USR] = vcpu_regs->usr_regs[0];
> + regs->reg14[MODE_USR] = vcpu_regs->usr_regs[1];
Can we use the vcpu_reg_offsets[] logic here somehow, rather than
open-coding the mapping again? Maybe not worth it?
In 5cbffd9ca63ece23593e11eb0cdb1a937d398a0c:
> -static void identity_mapping_add(pgd_t *pgd, unsigned long addr, unsigned
> long
> +static void __identity_mapping_add(pgd_t *pgd, unsigned long addr,
> + unsigned long end, bool hyp_mapping)
> {
> unsigned long prot, next;
>
> prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF;
> +
> +#ifdef CONFIG_ARM_LPAE
> + if (hyp_mapping)
> + prot |= PMD_SECT_AP1;
> +#endif
> +
> if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale())
> prot |= PMD_BIT4;
>
> @@ -75,6 +83,11 @@ static void identity_mapping_add(pgd_t *pgd, unsigned long
> ad
>
> extern char __idmap_text_start[], __idmap_text_end[];
>
> +static void identity_mapping_add(pgd_t *pgd, unsigned long addr, unsigned
> long
> +{
> + __identity_mapping_add(pgd, addr, end, false);
> +}
> +
> static int __init init_static_idmap(void)
> {
> phys_addr_t idmap_start, idmap_end;
Since this only has one, internal caller, introducing this indirection
is a bit weird. How about just changing the prototype of
identity_mapping_add and the one caller?
> +/*
> + * This version actually frees the underlying pmds for all pgds in range and
> + * clear the pgds themselves afterwards.
> + */
> +void hyp_identity_mapping_del(pgd_t *pgd, unsigned long addr, unsigned long
> end
s/del/free/ maybe? Even truncate the names to hyp_idmap_add / hyp_idmap_free?
In 23c09018329da88b36cad96a197420a08c9542f2:
> + /*
> + * Allocate stack pages for Hypervisor-mode
> + */
> + for_each_possible_cpu(cpu) {
> + void *stack_page;
> +
> + stack_page = (void *)__get_free_page(GFP_KERNEL);
> + if (!stack_page) {
> + err = -ENOMEM;
> + goto out_free_pgd;
> + }
This leaks memory on error. You need to free the pages of already-done
cpus. Since free_page() handles 0 address as noop, this is pretty easy
to fix though.
> +static void cpu_set_vector(void *vector)
> +{
> + register unsigned long vector_ptr asm("r0");
> + register unsigned long smc_hyp_nr asm("r7");
> +
> + vector_ptr = (unsigned long)vector;
> + smc_hyp_nr = SMCHYP_HVBAR_W;
> +
> + /*
> + * Set the HVBAR
> + */
> + asm volatile (
> + "smc #0\n\t" : :
> + [vector_ptr] "r" (vector_ptr),
> + [smc_hyp_nr] "r" (smc_hyp_nr));
> +}
Ah, right, the bootloader sets a Secure Mode stub to allow us to change
HVBAR. And this stub only has the SMCHYP_HVBAR_W function so far. Is
there some standard here, or is it just made up? AFAICT it doesn't
indicate failture if you hand it a different function, does it?
What are the alternatives? Can we put the monitor vector somewhere the
OS can change it? That would be nice, because then we can do
*anything*...
> + asm volatile (
> + "hvc #0\n\t" : :
> + [pgd_ptr] "r" (pgd_ptr),
> + [stack_ptr] "r" (hyp_stack_ptr));
This bouncing into PL2 all the time seems a bit awkward. I assume Stuff
Breaks if we try to stay in PL2 all the time?
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html