Re: [PATCH] kvm: x86: svm: remove SVM_EXIT_READ_CR* intercepts
On 03/12/2015 04:20 PM, Radim Krčmář wrote: 2015-03-12 15:17-0500, Joel Schopp: There isn't really a valid reason for kvm to intercept cr* reads on svm hardware. The current kvm code just ends up returning the register There is no need to intercept CR* if the value that the guest should see is equal to what we set there, but that is not always the case: - CR0 might differ from what the guest should see because of lazy fpu Based on our previous conversations I understand why we have to trap the write to the CR0 ts bit for lazy fpu, but don't understand why that should affect a read. I'll take another look at the code to see what I'm missing. You are probably correct in which case I'll modify the patch to only turn off the read intercepts when lazy fpu isn't active. - CR3 isn't intercepted with nested paging and it should differ otherwise - CR4 contains PAE bit when run without nested paging CR2 and CR8 already aren't intercepted, so it looks like only CR0 and CR4 could use some optimizations. I'll send out a v2 with these less aggressive optimizations. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] kvm: x86: svm: remove SVM_EXIT_READ_CR* intercepts
There isn't really a valid reason for kvm to intercept cr* reads on svm hardware. The current kvm code just ends up returning the register Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/x86/kvm/svm.c | 41 - 1 file changed, 4 insertions(+), 37 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index cc618c8..c3d10e6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1090,9 +1090,6 @@ static void init_vmcb(struct vcpu_svm *svm) svm-vcpu.fpu_active = 1; svm-vcpu.arch.hflags = 0; - set_cr_intercept(svm, INTERCEPT_CR0_READ); - set_cr_intercept(svm, INTERCEPT_CR3_READ); - set_cr_intercept(svm, INTERCEPT_CR4_READ); set_cr_intercept(svm, INTERCEPT_CR0_WRITE); set_cr_intercept(svm, INTERCEPT_CR3_WRITE); set_cr_intercept(svm, INTERCEPT_CR4_WRITE); @@ -1174,7 +1171,6 @@ static void init_vmcb(struct vcpu_svm *svm) control-nested_ctl = 1; clr_intercept(svm, INTERCEPT_INVLPG); clr_exception_intercept(svm, PF_VECTOR); - clr_cr_intercept(svm, INTERCEPT_CR3_READ); clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); save-g_pat = 0x0007040600070406ULL; save-cr3 = 0; @@ -2968,29 +2964,10 @@ static int cr_interception(struct vcpu_svm *svm) kvm_queue_exception(svm-vcpu, UD_VECTOR); return 1; } - } else { /* mov from cr */ - switch (cr) { - case 0: - val = kvm_read_cr0(svm-vcpu); - break; - case 2: - val = svm-vcpu.arch.cr2; - break; - case 3: - val = kvm_read_cr3(svm-vcpu); - break; - case 4: - val = kvm_read_cr4(svm-vcpu); - break; - case 8: - val = kvm_get_cr8(svm-vcpu); - break; - default: - WARN(1, unhandled read from CR%d, cr); - kvm_queue_exception(svm-vcpu, UD_VECTOR); - return 1; - } - kvm_register_write(svm-vcpu, reg, val); + } else { /* mov from cr, should never trap in svm */ + WARN(1, unhandled read from CR%d, cr); + kvm_queue_exception(svm-vcpu, UD_VECTOR); + return 1; } kvm_complete_insn_gp(svm-vcpu, err); @@ -3321,10 +3298,6 @@ static int mwait_interception(struct vcpu_svm *svm) } static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { - [SVM_EXIT_READ_CR0] = cr_interception, - [SVM_EXIT_READ_CR3] = cr_interception, - [SVM_EXIT_READ_CR4] = cr_interception, - [SVM_EXIT_READ_CR8] = cr_interception, [SVM_EXIT_CR0_SEL_WRITE]= emulate_on_interception, [SVM_EXIT_WRITE_CR0]= cr_interception, [SVM_EXIT_WRITE_CR3]= cr_interception, @@ -4151,11 +4124,9 @@ static const struct __x86_intercept { u32 exit_code; enum x86_intercept_stage stage; } x86_intercept_map[] = { - [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0), [x86_intercept_cr_write]= POST_EX(SVM_EXIT_WRITE_CR0), [x86_intercept_clts]= POST_EX(SVM_EXIT_WRITE_CR0), [x86_intercept_lmsw]= POST_EX(SVM_EXIT_WRITE_CR0), - [x86_intercept_smsw]= POST_EX(SVM_EXIT_READ_CR0), [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0), [x86_intercept_dr_write]= POST_EX(SVM_EXIT_WRITE_DR0), [x86_intercept_sldt]= POST_EX(SVM_EXIT_LDTR_READ), @@ -4221,10 +4192,6 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, goto out; switch (icpt_info.exit_code) { - case SVM_EXIT_READ_CR0: - if (info-intercept == x86_intercept_cr_read) - icpt_info.exit_code += info-modrm_reg; - break; case SVM_EXIT_WRITE_CR0: { unsigned long cr0, val; u64 intercept; -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] x86: svm: use cr_interception for SVM_EXIT_CR0_SEL_WRITE
From: David Kaplan david.kap...@amd.com Another patch in my war on emulate_on_interception() use as a svm exit handler. These were pulled out of a larger patch at the suggestion of Radim Krcmar, see https://lkml.org/lkml/2015/2/25/559 Signed-off-by: David Kaplan david.kap...@amd.com [separated out just cr_interception part from larger removal of INTERCEPT_CR0_WRITE, forward ported, tested] Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/x86/kvm/svm.c |7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d319e0c..57f0240 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2940,7 +2940,10 @@ static int cr_interception(struct vcpu_svm *svm) return emulate_on_interception(svm); reg = svm-vmcb-control.exit_info_1 SVM_EXITINFO_REG_MASK; - cr = svm-vmcb-control.exit_code - SVM_EXIT_READ_CR0; + if (svm-vmcb-control.exit_code == SVM_EXIT_SEL_CR0_WRITE) + cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0; + else + cr = svm-vmcb-control.exit_code - SVM_EXIT_READ_CR0; err = 0; if (cr = 16) { /* mov to cr */ @@ -3325,7 +3328,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR3] = cr_interception, [SVM_EXIT_READ_CR4] = cr_interception, [SVM_EXIT_READ_CR8] = cr_interception, - [SVM_EXIT_CR0_SEL_WRITE]= emulate_on_interception, + [SVM_EXIT_CR0_SEL_WRITE]= cr_interception, [SVM_EXIT_WRITE_CR0]= cr_interception, [SVM_EXIT_WRITE_CR3]= cr_interception, [SVM_EXIT_WRITE_CR4]= cr_interception, -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2] x86: svm: use cr_interception for SVM_EXIT_CR0_SEL_WRITE
From: David Kaplan david.kap...@amd.com Another patch in my war on emulate_on_interception() use as a svm exit handler. These were pulled out of a larger patch at the suggestion of Radim Krcmar, see https://lkml.org/lkml/2015/2/25/559 Changes since v1: * fixed typo introduced after test, retested Signed-off-by: David Kaplan david.kap...@amd.com [separated out just cr_interception part from larger removal of INTERCEPT_CR0_WRITE, forward ported, tested] Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/x86/kvm/svm.c |7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d319e0c..16ad05b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2940,7 +2940,10 @@ static int cr_interception(struct vcpu_svm *svm) return emulate_on_interception(svm); reg = svm-vmcb-control.exit_info_1 SVM_EXITINFO_REG_MASK; - cr = svm-vmcb-control.exit_code - SVM_EXIT_READ_CR0; + if (svm-vmcb-control.exit_code == SVM_EXIT_CR0_SEL_WRITE) + cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0; + else + cr = svm-vmcb-control.exit_code - SVM_EXIT_READ_CR0; err = 0; if (cr = 16) { /* mov to cr */ @@ -3325,7 +3328,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR3] = cr_interception, [SVM_EXIT_READ_CR4] = cr_interception, [SVM_EXIT_READ_CR8] = cr_interception, - [SVM_EXIT_CR0_SEL_WRITE]= emulate_on_interception, + [SVM_EXIT_CR0_SEL_WRITE]= cr_interception, [SVM_EXIT_WRITE_CR0]= cr_interception, [SVM_EXIT_WRITE_CR3]= cr_interception, [SVM_EXIT_WRITE_CR4]= cr_interception, -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3] x86: svm: use kvm_fast_pio_in()
Thank you for your detailed review on several of my patches. +static int complete_fast_pio(struct kvm_vcpu *vcpu) (complete_fast_pio_in()?) If I do a v4 I'll adopt that name. +{ +unsigned long new_rax = kvm_register_read(vcpu, VCPU_REGS_RAX); Shouldn't we handle writes in EAX differently than in AX and AL, because of implicit zero extension. I don't think the implicit zero extension hurts us here, but maybe there is something I'm missing that I need understand. Could you explain this further? + +BUG_ON(!vcpu-arch.pio.count); +BUG_ON(vcpu-arch.pio.count * vcpu-arch.pio.size sizeof(new_rax)); (Looking at it again, a check for 'vcpu-arch.pio.count == 1' would be sufficient.) I prefer the checks that are there now after your last review, especially since surrounded by BUG_ON they only run on debug kernels. + +memcpy(new_rax, vcpu, sizeof(new_rax)); +trace_kvm_pio(KVM_PIO_IN, vcpu-arch.pio.port, vcpu-arch.pio.size, + vcpu-arch.pio.count, vcpu-arch.pio_data); +kvm_register_write(vcpu, VCPU_REGS_RAX, new_rax); +vcpu-arch.pio.count = 0; I think it is better to call emulator_pio_in_emulated directly, like emulator_pio_in_out(vcpu-arch.emulate_ctxt, vcpu-arch.pio.size, vcpu-arch.pio.port, new_rax, 1); kvm_register_write(vcpu, VCPU_REGS_RAX, new_rax); because we know that vcpu-arch.pio.count != 0. I think two extra lines of code in my patch vs your suggestion are worth it to a) reduce execution path length b) increase readability c) avoid breaking the abstraction by not checking the return code d) avoid any future bugs introduced by changes the function that would return a value other than 1. Refactoring could avoid the weird vcpu-ctxt-vcpu conversion. (A better name is always welcome.) The pointer chasing is making me dizzy. I'm not sure why emulator_pio_in_emulated takes a x86_emulate_ctxt when all it does it immediately translate that to a vcpu and never use the x86_emulate_ctxt, why not pass the vcpu in the first place? -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3] x86: svm: use kvm_fast_pio_in()
On 03/03/2015 10:44 AM, Radim Krčmář wrote: 2015-03-02 15:02-0600, Joel Schopp: +int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port) +{ +unsigned long val; +int ret = emulator_pio_in_emulated(vcpu-arch.emulate_ctxt, size, + port, val, 1); + Btw. does this return 1 in some scenario? If a function returns a value it is always a good idea to check it and act appropriately. That said... emulator_pio_in_emulated will return 1 if emulator_pio_in_out returns 1 or if vcpu-arch.pio.count != 0 emulator_pio_in_out returns 1 if kernel_pio returns 0 kernel_pio returns 0 if kvm_io_bus_read returns 0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3] x86: svm: use kvm_fast_pio_in()
From: David Kaplan david.kap...@amd.com We can make the in instruction go faster the same way the out instruction is already. Changes from v2[Joel]: * changed rax from u32 to unsigned long * changed a couple return 0 to BUG_ON() * changed 8 to sizeof(new_rax) * added trace hook * removed redundant clearing of count Changes from v1[Joel] * Added kvm_fast_pio_in() implementation that was left out of v1 Signed-off-by: David Kaplan david.kap...@amd.com [extracted from larger unlrelated patch, forward ported, addressed reviews, tested] Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/x86/include/asm/kvm_host.h |1 + arch/x86/kvm/svm.c |4 +++- arch/x86/kvm/x86.c | 30 ++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a236e39..b976824 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -931,6 +931,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); struct x86_emulate_ctxt; int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); +int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d319e0c..f8c906b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1899,7 +1899,7 @@ static int io_interception(struct vcpu_svm *svm) ++svm-vcpu.stat.io_exits; string = (io_info SVM_IOIO_STR_MASK) != 0; in = (io_info SVM_IOIO_TYPE_MASK) != 0; - if (string || in) + if (string) return emulate_instruction(vcpu, 0) == EMULATE_DONE; port = io_info 16; @@ -1907,6 +1907,8 @@ static int io_interception(struct vcpu_svm *svm) svm-next_rip = svm-vmcb-control.exit_info_2; skip_emulated_instruction(svm-vcpu); + if (in) + return kvm_fast_pio_in(vcpu, size, port); return kvm_fast_pio_out(vcpu, size, port); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bd7a70b..d05efaf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5463,6 +5463,36 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) } EXPORT_SYMBOL_GPL(kvm_fast_pio_out); +static int complete_fast_pio(struct kvm_vcpu *vcpu) +{ + unsigned long new_rax = kvm_register_read(vcpu, VCPU_REGS_RAX); + + BUG_ON(!vcpu-arch.pio.count); + BUG_ON(vcpu-arch.pio.count * vcpu-arch.pio.size sizeof(new_rax)); + + memcpy(new_rax, vcpu, sizeof(new_rax)); + trace_kvm_pio(KVM_PIO_IN, vcpu-arch.pio.port, vcpu-arch.pio.size, + vcpu-arch.pio.count, vcpu-arch.pio_data); + kvm_register_write(vcpu, VCPU_REGS_RAX, new_rax); + vcpu-arch.pio.count = 0; + return 1; +} + +int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port) +{ + unsigned long val; + int ret = emulator_pio_in_emulated(vcpu-arch.emulate_ctxt, size, + port, val, 1); + + if (ret) + kvm_register_write(vcpu, VCPU_REGS_RAX, val); + else + vcpu-arch.complete_userspace_io = complete_fast_pio; + + return ret; +} +EXPORT_SYMBOL_GPL(kvm_fast_pio_in); + static void tsc_bad(void *info) { __this_cpu_write(cpu_tsc_khz, 0); -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] x86: svm: use kvm_fast_pio_in()
return emulate_instruction(vcpu, 0) == EMULATE_DONE; port = io_info 16; @@ -1907,6 +1907,8 @@ static int io_interception(struct vcpu_svm *svm) svm-next_rip = svm-vmcb-control.exit_info_2; skip_emulated_instruction(svm-vcpu); + if (in) + return kvm_fast_pio_in(vcpu, size, port); return kvm_fast_pio_out(vcpu, size, port); (kvm_fast_pio() comes to mind.) If you combined them you'd have to have an extra argument to say if it was in or out. You'd then have to have code to parse that. I prefer this way. } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bd7a70b..089247c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5463,6 +5463,39 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) } EXPORT_SYMBOL_GPL(kvm_fast_pio_out); +static int complete_fast_pio(struct kvm_vcpu *vcpu) +{ + u32 new_rax = kvm_register_read(vcpu, VCPU_REGS_RAX); u64. Good call. I'll use unsigned long like kvm_fast_pio_out() uses. arch/x86/kvm/x86.c + + if (!vcpu-arch.pio.count) + return 0; + if (vcpu-arch.pio.count * vcpu-arch.pio.size 8) + return 0; sizeof(new_rax). (safer and easier to understand) Both should never happen in KVM code, BUG_ON(). Agreed on both counts. + + memcpy(new_rax, vcpu-arch.pio_data, + vcpu-arch.pio.count * vcpu-arch.pio.size); Use emulator_pio_in_emulated() here, for code sharing. (We want to trace the read here too; it could be better to split the path from emulator_pio_in_emulated() first.) I looked at pulling this out, it was a painful. I'll add the trace hook. + kvm_register_write(vcpu, VCPU_REGS_RAX, new_rax); + + vcpu-arch.pio.count = 0; + return 1; +} + +int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port) +{ + unsigned long val; + int ret = emulator_pio_in_emulated(vcpu-arch.emulate_ctxt, size, + port, val, 1); + + if (ret) { + kvm_register_write(vcpu, VCPU_REGS_RAX, val); + vcpu-arch.pio.count = 0; (emulator_pio_in_emulated() sets count to zero if it returns true.) will remove = 0 line -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] x86: svm: use kvm_fast_pio_in()
+ if (in) + return kvm_fast_pio_in(vcpu, size, port); Have I missed a patch that defined kvm_fast_pio_in()? Not sure how I managed to leave out the bulk of the patch. Resending v2 momentarily. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] x86: svm: make wbinvd faster
On 03/02/2015 10:03 AM, Radim Krčmář wrote: 2015-03-02 10:25-0500, Bandan Das: Radim Krčmář rkrc...@redhat.com writes: 2015-03-01 21:29-0500, Bandan Das: Joel Schopp joel.sch...@amd.com writes: +static int wbinvd_interception(struct vcpu_svm *svm) +{ + kvm_emulate_wbinvd(svm-vcpu); + skip_emulated_instruction(svm-vcpu); + return 1; +} Can't we merge this to kvm_emulate_wbinvd, and just call that function directly for both vmx and svm ? kvm_emulate_wbinvd() lives in x86.c and skip_emulated_instruction() is from svm.c/vmx.c: so we'd have to create a new x86 op and change the emulator code as well ... it's probably better like this. There's already one - kvm_x86_ops-skip_emulated_instruction My bad, its usage is inconsistent and I only looked at two close interceptions where it was used ... kvm_emulate_cpuid() calls kvm_x86_ops-skip_emulated_instruction(), while kvm_emulate_halt() and kvm_emulate_hypercall() need an external skip. We do skip the instruction with kvm_emulate(), so automatically skipping the instruction on kvm_emulate_*() makes sense: 1. rename kvm_emulate_halt() and kvm_emulate_wbinvd() to accommodate callers that don't want to skip 2. introduce kvm_emulate_{halt,wbinvd}() and move the skip to to kvm_emulate_{halt,wbinvd,hypercall}() The alternative is to remove kvm_x86_ops-skip_emulated_instruction(): 1. remove skip from kvm_emulate_cpuid() and modify callers 2. move kvm_complete_insn_gp to a header file and use skip_emulated_instruction directly 3. remove unused kvm_x86_ops-skip_emulated_instruction() Which one do you prefer? I prefer renaming them, ie kvm_emulate_wbinvd_noskip(), and making the existing ones, ie kvm_emulate_wbinvd() call the noskip verion and add a skip similar to how wbinvd_interception above does. I can send out a patch later today with that rework. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2] x86: svm: use kvm_fast_pio_in()
From: David Kaplan david.kap...@amd.com We can make the in instruction go faster the same way the out instruction is already. Changes from v1 * Added kvm_fast_pio_in() implementation that was left out of v1 Signed-off-by: David Kaplan david.kap...@amd.com [extracted from larger unlrelated patch, forward ported, tested] Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/x86/include/asm/kvm_host.h |1 + arch/x86/kvm/svm.c |4 +++- arch/x86/kvm/x86.c | 33 + 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a236e39..b976824 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -931,6 +931,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); struct x86_emulate_ctxt; int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); +int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d319e0c..f8c906b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1899,7 +1899,7 @@ static int io_interception(struct vcpu_svm *svm) ++svm-vcpu.stat.io_exits; string = (io_info SVM_IOIO_STR_MASK) != 0; in = (io_info SVM_IOIO_TYPE_MASK) != 0; - if (string || in) + if (string) return emulate_instruction(vcpu, 0) == EMULATE_DONE; port = io_info 16; @@ -1907,6 +1907,8 @@ static int io_interception(struct vcpu_svm *svm) svm-next_rip = svm-vmcb-control.exit_info_2; skip_emulated_instruction(svm-vcpu); + if (in) + return kvm_fast_pio_in(vcpu, size, port); return kvm_fast_pio_out(vcpu, size, port); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bd7a70b..089247c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5463,6 +5463,39 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) } EXPORT_SYMBOL_GPL(kvm_fast_pio_out); +static int complete_fast_pio(struct kvm_vcpu *vcpu) +{ + u32 new_rax = kvm_register_read(vcpu, VCPU_REGS_RAX); + + if (!vcpu-arch.pio.count) + return 0; + if (vcpu-arch.pio.count * vcpu-arch.pio.size 8) + return 0; + + memcpy(new_rax, vcpu-arch.pio_data, + vcpu-arch.pio.count * vcpu-arch.pio.size); + kvm_register_write(vcpu, VCPU_REGS_RAX, new_rax); + + vcpu-arch.pio.count = 0; + return 1; +} + +int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port) +{ + unsigned long val; + int ret = emulator_pio_in_emulated(vcpu-arch.emulate_ctxt, size, + port, val, 1); + + if (ret) { + kvm_register_write(vcpu, VCPU_REGS_RAX, val); + vcpu-arch.pio.count = 0; + } else + vcpu-arch.complete_userspace_io = complete_fast_pio; + + return ret; +} +EXPORT_SYMBOL_GPL(kvm_fast_pio_in); + static void tsc_bad(void *info) { __this_cpu_write(cpu_tsc_khz, 0); -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 0/2] Series short description
Review comments from v1 that used kvm_emulate_wbinvd() pointed out that kvm_emulate_* was inconsistant in using skipping, while kvm_emulate() always skips. The first patch cleans up the existing use while the second patch adds use of the updated version of kvm_emulate_wbinvd() in svm --- Joel Schopp (2): kvm: x86: make kvm_emulate_* consistant x86: svm: make wbinvd faster arch/x86/kvm/svm.c | 11 --- arch/x86/kvm/vmx.c |9 +++-- arch/x86/kvm/x86.c | 23 --- 3 files changed, 31 insertions(+), 12 deletions(-) -- -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 1/2] kvm: x86: make kvm_emulate_* consistant
Currently kvm_emulate() skips the instruction but kvm_emulate_* sometimes don't. The end reult is the caller ends up doing the skip themselves. Let's make them consistant. Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/x86/kvm/svm.c |2 -- arch/x86/kvm/vmx.c |9 +++-- arch/x86/kvm/x86.c | 23 --- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d319e0c..0c9e377 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1929,14 +1929,12 @@ static int nop_on_interception(struct vcpu_svm *svm) static int halt_interception(struct vcpu_svm *svm) { svm-next_rip = kvm_rip_read(svm-vcpu) + 1; - skip_emulated_instruction(svm-vcpu); return kvm_emulate_halt(svm-vcpu); } static int vmmcall_interception(struct vcpu_svm *svm) { svm-next_rip = kvm_rip_read(svm-vcpu) + 3; - skip_emulated_instruction(svm-vcpu); kvm_emulate_hypercall(svm-vcpu); return 1; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 14c1a18..b7dcd3c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4995,7 +4995,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, if (emulate_instruction(vcpu, 0) == EMULATE_DONE) { if (vcpu-arch.halt_request) { vcpu-arch.halt_request = 0; - return kvm_emulate_halt(vcpu); + return kvm_emulate_halt_noskip(vcpu); } return 1; } @@ -5522,13 +5522,11 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu) static int handle_halt(struct kvm_vcpu *vcpu) { - skip_emulated_instruction(vcpu); return kvm_emulate_halt(vcpu); } static int handle_vmcall(struct kvm_vcpu *vcpu) { - skip_emulated_instruction(vcpu); kvm_emulate_hypercall(vcpu); return 1; } @@ -5559,7 +5557,6 @@ static int handle_rdpmc(struct kvm_vcpu *vcpu) static int handle_wbinvd(struct kvm_vcpu *vcpu) { - skip_emulated_instruction(vcpu); kvm_emulate_wbinvd(vcpu); return 1; } @@ -5898,7 +5895,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (vcpu-arch.halt_request) { vcpu-arch.halt_request = 0; - ret = kvm_emulate_halt(vcpu); + ret = kvm_emulate_halt_noskip(vcpu); goto out; } @@ -9513,7 +9510,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) vmcs12-launch_state = 1; if (vmcs12-guest_activity_state == GUEST_ACTIVITY_HLT) - return kvm_emulate_halt(vcpu); + return kvm_emulate_halt_noskip(vcpu); vmx-nested.nested_run_pending = 1; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bd7a70b..96a8333 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4706,7 +4706,7 @@ static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address) kvm_mmu_invlpg(emul_to_vcpu(ctxt), address); } -int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) +int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu) { if (!need_emulate_wbinvd(vcpu)) return X86EMUL_CONTINUE; @@ -4723,11 +4723,19 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) wbinvd(); return X86EMUL_CONTINUE; } + +int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops-skip_emulated_instruction(vcpu); + return kvm_emulate_wbinvd_noskip(vcpu); +} EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); + + static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) { - kvm_emulate_wbinvd(emul_to_vcpu(ctxt)); + kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt)); } int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) @@ -5817,7 +5825,7 @@ void kvm_arch_exit(void) free_percpu(shared_msrs); } -int kvm_emulate_halt(struct kvm_vcpu *vcpu) +int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu) { ++vcpu-stat.halt_exits; if (irqchip_in_kernel(vcpu-kvm)) { @@ -5828,6 +5836,13 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) return 0; } } +EXPORT_SYMBOL_GPL(kvm_emulate_halt_noskip); + +int kvm_emulate_halt(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops-skip_emulated_instruction(vcpu); + return kvm_emulate_halt_noskip(vcpu); +} EXPORT_SYMBOL_GPL(kvm_emulate_halt); int kvm_hv_hypercall(struct kvm_vcpu *vcpu) @@ -5912,6 +5927,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) unsigned long nr, a0, a1, a2, a3, ret; int op_64_bit, r = 1; + kvm_x86_ops-skip_emulated_instruction(vcpu); + if (kvm_hv_hypercall_enabled(vcpu-kvm)) return kvm_hv_hypercall(vcpu); -- To unsubscribe from this list: send the line unsubscribe kvm in the body
[PATCH v2 2/2] x86: svm: make wbinvd faster
From: David Kaplan david.kap...@amd.com No need to re-decode WBINVD since we know what it is from the intercept. Signed-off-by: David Kaplan david.kap...@amd.com [extracted from larger unlrelated patch, forward ported, tested] Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/x86/kvm/svm.c |9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0c9e377..794bca7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2774,6 +2774,13 @@ static int skinit_interception(struct vcpu_svm *svm) return 1; } +static int wbinvd_interception(struct vcpu_svm *svm) +{ + kvm_emulate_wbinvd(svm-vcpu); + return 1; +} + + static int xsetbv_interception(struct vcpu_svm *svm) { u64 new_bv = kvm_read_edx_eax(svm-vcpu); @@ -3374,7 +3381,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_STGI] = stgi_interception, [SVM_EXIT_CLGI] = clgi_interception, [SVM_EXIT_SKINIT] = skinit_interception, - [SVM_EXIT_WBINVD] = emulate_on_interception, + [SVM_EXIT_WBINVD] = wbinvd_interception, [SVM_EXIT_MONITOR] = monitor_interception, [SVM_EXIT_MWAIT]= mwait_interception, [SVM_EXIT_XSETBV] = xsetbv_interception, -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 1/2] kvm: x86: make kvm_emulate_* consistant
--- diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c @@ -4995,7 +4995,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, if (emulate_instruction(vcpu, 0) == EMULATE_DONE) { if (vcpu-arch.halt_request) { vcpu-arch.halt_request = 0; - return kvm_emulate_halt(vcpu); + return kvm_emulate_halt_noskip(vcpu); noskip is used without being declared ... it shouldn't compile. I tested on AMD hardware, I thought I had turned on the Intel KVM module as well, but it turns out I hadn't. Will fix in v3. *_noskip makes the usual case harder to undertand: we just want to halt the vcpu, so name it more directly ... like kvm_vcpu_halt()? I like that better. Will make the change in v3. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 1/2] kvm: x86: make kvm_emulate_* consistant
Currently kvm_emulate() skips the instruction but kvm_emulate_* sometimes don't. The end reult is the caller ends up doing the skip themselves. Let's make them consistant. Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/x86/include/asm/kvm_host.h |1 + arch/x86/kvm/svm.c |2 -- arch/x86/kvm/vmx.c |9 +++-- arch/x86/kvm/x86.c | 23 --- 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a236e39..bf5a160 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -933,6 +933,7 @@ struct x86_emulate_ctxt; int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); +int kvm_vcpu_halt(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d319e0c..0c9e377 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1929,14 +1929,12 @@ static int nop_on_interception(struct vcpu_svm *svm) static int halt_interception(struct vcpu_svm *svm) { svm-next_rip = kvm_rip_read(svm-vcpu) + 1; - skip_emulated_instruction(svm-vcpu); return kvm_emulate_halt(svm-vcpu); } static int vmmcall_interception(struct vcpu_svm *svm) { svm-next_rip = kvm_rip_read(svm-vcpu) + 3; - skip_emulated_instruction(svm-vcpu); kvm_emulate_hypercall(svm-vcpu); return 1; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 14c1a18..eef7f53 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4995,7 +4995,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, if (emulate_instruction(vcpu, 0) == EMULATE_DONE) { if (vcpu-arch.halt_request) { vcpu-arch.halt_request = 0; - return kvm_emulate_halt(vcpu); + return kvm_vcpu_halt(vcpu); } return 1; } @@ -5522,13 +5522,11 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu) static int handle_halt(struct kvm_vcpu *vcpu) { - skip_emulated_instruction(vcpu); return kvm_emulate_halt(vcpu); } static int handle_vmcall(struct kvm_vcpu *vcpu) { - skip_emulated_instruction(vcpu); kvm_emulate_hypercall(vcpu); return 1; } @@ -5559,7 +5557,6 @@ static int handle_rdpmc(struct kvm_vcpu *vcpu) static int handle_wbinvd(struct kvm_vcpu *vcpu) { - skip_emulated_instruction(vcpu); kvm_emulate_wbinvd(vcpu); return 1; } @@ -5898,7 +5895,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (vcpu-arch.halt_request) { vcpu-arch.halt_request = 0; - ret = kvm_emulate_halt(vcpu); + ret = kvm_vcpu_halt(vcpu); goto out; } @@ -9513,7 +9510,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) vmcs12-launch_state = 1; if (vmcs12-guest_activity_state == GUEST_ACTIVITY_HLT) - return kvm_emulate_halt(vcpu); + return kvm_vcpu_halt(vcpu); vmx-nested.nested_run_pending = 1; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bd7a70b..6ff90f7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4706,7 +4706,7 @@ static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address) kvm_mmu_invlpg(emul_to_vcpu(ctxt), address); } -int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) +int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu) { if (!need_emulate_wbinvd(vcpu)) return X86EMUL_CONTINUE; @@ -4723,11 +4723,19 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) wbinvd(); return X86EMUL_CONTINUE; } + +int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops-skip_emulated_instruction(vcpu); + return kvm_emulate_wbinvd_noskip(vcpu); +} EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); + + static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) { - kvm_emulate_wbinvd(emul_to_vcpu(ctxt)); + kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt)); } int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) @@ -5817,7 +5825,7 @@ void kvm_arch_exit(void) free_percpu(shared_msrs); } -int kvm_emulate_halt(struct kvm_vcpu *vcpu) +int kvm_vcpu_halt(struct kvm_vcpu *vcpu) { ++vcpu-stat.halt_exits; if (irqchip_in_kernel(vcpu-kvm)) { @@ -5828,6 +5836,13 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) return 0; } } +EXPORT_SYMBOL_GPL(kvm_vcpu_halt); + +int
[PATCH v3 2/2] x86: svm: make wbinvd faster
From: David Kaplan david.kap...@amd.com No need to re-decode WBINVD since we know what it is from the intercept. Signed-off-by: David Kaplan david.kap...@amd.com [extracted from larger unlrelated patch, forward ported, tested,style cleanup] Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/x86/kvm/svm.c |8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0c9e377..6fa4222 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2774,6 +2774,12 @@ static int skinit_interception(struct vcpu_svm *svm) return 1; } +static int wbinvd_interception(struct vcpu_svm *svm) +{ + kvm_emulate_wbinvd(svm-vcpu); + return 1; +} + static int xsetbv_interception(struct vcpu_svm *svm) { u64 new_bv = kvm_read_edx_eax(svm-vcpu); @@ -3374,7 +3380,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_STGI] = stgi_interception, [SVM_EXIT_CLGI] = clgi_interception, [SVM_EXIT_SKINIT] = skinit_interception, - [SVM_EXIT_WBINVD] = emulate_on_interception, + [SVM_EXIT_WBINVD] = wbinvd_interception, [SVM_EXIT_MONITOR] = monitor_interception, [SVM_EXIT_MWAIT]= mwait_interception, [SVM_EXIT_XSETBV] = xsetbv_interception, -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 0/2] kvm: x86: kvm_emulate_*
Review comments from v1 that used kvm_emulate_wbinvd() pointed out that kvm_emulate_* was inconsistant in using skipping, while kvm_emulate() always skips. The first patch cleans up the existing use while the second patch adds use of the updated version of kvm_emulate_wbinvd() in svm Changes since v2: * fixed email subject line on series short description * renamed kvm_emulate_halt_noskip() to kvm_vcpu_halt() * added header declaration for kvm_vcpu_halt() * squashed blank line --- David Kaplan (1): x86: svm: make wbinvd faster Joel Schopp (1): kvm: x86: make kvm_emulate_* consistant arch/x86/include/asm/kvm_host.h |1 + arch/x86/kvm/svm.c | 10 +++--- arch/x86/kvm/vmx.c |9 +++-- arch/x86/kvm/x86.c | 23 --- 4 files changed, 31 insertions(+), 12 deletions(-) -- -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] x86: svm: use kvm_fast_pio_in()
From: David Kaplan david.kap...@amd.com We can make the in instruction go faster the same way the out instruction is already. Signed-off-by: David Kaplan david.kap...@amd.com [extracted from larger unlrelated patch, forward ported, tested] Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/x86/kvm/svm.c |4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d319e0c..f8c906b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1899,7 +1899,7 @@ static int io_interception(struct vcpu_svm *svm) ++svm-vcpu.stat.io_exits; string = (io_info SVM_IOIO_STR_MASK) != 0; in = (io_info SVM_IOIO_TYPE_MASK) != 0; - if (string || in) + if (string) return emulate_instruction(vcpu, 0) == EMULATE_DONE; port = io_info 16; @@ -1907,6 +1907,8 @@ static int io_interception(struct vcpu_svm *svm) svm-next_rip = svm-vmcb-control.exit_info_2; skip_emulated_instruction(svm-vcpu); + if (in) + return kvm_fast_pio_in(vcpu, size, port); return kvm_fast_pio_out(vcpu, size, port); } -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] x86: svm: use kvm_fast_pio_in()
From: David Kaplan david.kap...@amd.com We can make the in instruction go faster the same way the out instruction is already. Signed-off-by: David Kaplan david.kap...@amd.com [extracted from larger unlrelated patch, forward ported, tested] Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/x86/kvm/svm.c |4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d319e0c..f8c906b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1899,7 +1899,7 @@ static int io_interception(struct vcpu_svm *svm) ++svm-vcpu.stat.io_exits; string = (io_info SVM_IOIO_STR_MASK) != 0; in = (io_info SVM_IOIO_TYPE_MASK) != 0; - if (string || in) + if (string) return emulate_instruction(vcpu, 0) == EMULATE_DONE; port = io_info 16; @@ -1907,6 +1907,8 @@ static int io_interception(struct vcpu_svm *svm) svm-next_rip = svm-vmcb-control.exit_info_2; skip_emulated_instruction(svm-vcpu); + if (in) + return kvm_fast_pio_in(vcpu, size, port); return kvm_fast_pio_out(vcpu, size, port); } -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] x86: svm: make wbinvd faster
From: David Kaplan david.kap...@amd.com No need to re-decode WBINVD since we know what it is from the intercept. Signed-off-by: David Kaplan david.kap...@amd.com [extracted from larger unlrelated patch, forward ported, tested] Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/x86/kvm/svm.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d319e0c..86ecd21 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2776,6 +2776,14 @@ static int skinit_interception(struct vcpu_svm *svm) return 1; } +static int wbinvd_interception(struct vcpu_svm *svm) +{ + kvm_emulate_wbinvd(svm-vcpu); + skip_emulated_instruction(svm-vcpu); + return 1; +} + + static int xsetbv_interception(struct vcpu_svm *svm) { u64 new_bv = kvm_read_edx_eax(svm-vcpu); @@ -3376,7 +3384,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_STGI] = stgi_interception, [SVM_EXIT_CLGI] = clgi_interception, [SVM_EXIT_SKINIT] = skinit_interception, - [SVM_EXIT_WBINVD] = emulate_on_interception, + [SVM_EXIT_WBINVD] = wbinvd_interception, [SVM_EXIT_MONITOR] = monitor_interception, [SVM_EXIT_MWAIT]= mwait_interception, [SVM_EXIT_XSETBV] = xsetbv_interception, -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] x86: svm: don't intercept CR0 TS or MP bit write
On 02/25/2015 02:26 PM, Radim Krčmář wrote: 2015-02-24 15:25-0600, Joel Schopp: - clr_cr_intercept(svm, INTERCEPT_CR0_WRITE); } else { set_cr_intercept(svm, INTERCEPT_CR0_READ); (There is no point in checking fpu_active if cr0s are equal.) - set_cr_intercept(svm, INTERCEPT_CR0_WRITE); KVM uses lazy FPU and the state is undefined before the first access. We set cr0.ts when !svm-vcpu.fpu_active to detect the first access, but if we allow the guest to clear cr0.ts without exiting, it can access FPU with undefined state. Thanks for the valuable feedback. It's apparent I hadn't thought through the interaction with lazy FPU and will need to go back and rethink my approach here. I don't think we can gain much without sacrificing some laziness, like: when a guest with lazy FPU clears CR0.TS, it is going to use that FPU, so we could pre-load FPU in this case and drop the write intercept too; guests that unconditionally clear CR0.TS would perform worse though. It's going to take a lot of time, but two hunks in your patch, that made selective intercept benefit from decode assists, look useful even now. Would you post them separately? I can re-post those separately. They are less useful, though probably still worth doing, on their own because SVM_EXIT_WRITE_CR0 takes precidence over SVM_EXIT_CR0_SEL_WRITE -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] x86: svm: don't intercept CR0 TS or MP bit write
-clr_cr_intercept(svm, INTERCEPT_CR0_WRITE); } else { set_cr_intercept(svm, INTERCEPT_CR0_READ); (There is no point in checking fpu_active if cr0s are equal.) -set_cr_intercept(svm, INTERCEPT_CR0_WRITE); KVM uses lazy FPU and the state is undefined before the first access. We set cr0.ts when !svm-vcpu.fpu_active to detect the first access, but if we allow the guest to clear cr0.ts without exiting, it can access FPU with undefined state. Thanks for the valuable feedback. It's apparent I hadn't thought through the interaction with lazy FPU and will need to go back and rethink my approach here. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2] x86: svm: use kvm_register_write()/read()
From: David Kaplan david.kap...@amd.com KVM has nice wrappers to access the register values, clean up a few places that should use them but currently do not. Signed-off-by: David Kaplan david.kap...@amd.com [forward port and testing] Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/x86/kvm/svm.c | 19 +-- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d319e0c..a7d88e4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2757,11 +2757,11 @@ static int invlpga_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = svm-vcpu; - trace_kvm_invlpga(svm-vmcb-save.rip, vcpu-arch.regs[VCPU_REGS_RCX], - vcpu-arch.regs[VCPU_REGS_RAX]); + trace_kvm_invlpga(svm-vmcb-save.rip, kvm_register_read(svm-vcpu, VCPU_REGS_RCX), + kvm_register_read(svm-vcpu, VCPU_REGS_RAX)); /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ - kvm_mmu_invlpg(vcpu, vcpu-arch.regs[VCPU_REGS_RAX]); + kvm_mmu_invlpg(vcpu, kvm_register_read(svm-vcpu, VCPU_REGS_RAX)); svm-next_rip = kvm_rip_read(svm-vcpu) + 3; skip_emulated_instruction(svm-vcpu); @@ -2770,7 +2770,7 @@ static int invlpga_interception(struct vcpu_svm *svm) static int skinit_interception(struct vcpu_svm *svm) { - trace_kvm_skinit(svm-vmcb-save.rip, svm-vcpu.arch.regs[VCPU_REGS_RAX]); + trace_kvm_skinit(svm-vmcb-save.rip, kvm_register_read(svm-vcpu, VCPU_REGS_RAX)); kvm_queue_exception(svm-vcpu, UD_VECTOR); return 1; @@ -3133,7 +3133,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) static int rdmsr_interception(struct vcpu_svm *svm) { - u32 ecx = svm-vcpu.arch.regs[VCPU_REGS_RCX]; + u32 ecx = kvm_register_read(svm-vcpu, VCPU_REGS_RCX); u64 data; if (svm_get_msr(svm-vcpu, ecx, data)) { @@ -3142,8 +3142,8 @@ static int rdmsr_interception(struct vcpu_svm *svm) } else { trace_kvm_msr_read(ecx, data); - svm-vcpu.arch.regs[VCPU_REGS_RAX] = data 0x; - svm-vcpu.arch.regs[VCPU_REGS_RDX] = data 32; + kvm_register_write(svm-vcpu, VCPU_REGS_RAX, data 0x); + kvm_register_write(svm-vcpu, VCPU_REGS_RDX, data 32); svm-next_rip = kvm_rip_read(svm-vcpu) + 2; skip_emulated_instruction(svm-vcpu); } @@ -3246,9 +3246,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) static int wrmsr_interception(struct vcpu_svm *svm) { struct msr_data msr; - u32 ecx = svm-vcpu.arch.regs[VCPU_REGS_RCX]; - u64 data = (svm-vcpu.arch.regs[VCPU_REGS_RAX] -1u) - | ((u64)(svm-vcpu.arch.regs[VCPU_REGS_RDX] -1u) 32); + u32 ecx = kvm_register_read(svm-vcpu, VCPU_REGS_RCX); + u64 data = kvm_read_edx_eax(svm-vcpu); msr.data = data; msr.index = ecx; -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] x86: svm: use kvm_register_write()/read()
On 02/20/2015 02:54 PM, Borislav Petkov wrote: On Fri, Feb 20, 2015 at 12:39:40PM -0600, Joel Schopp wrote: KVM has nice wrappers to access the register values, clean up a few places that should use them but currently do not. Signed-off-by:David Kaplan david.kap...@amd.com Signed-off-by:Joel Schopp joel.sch...@amd.com This SOB chain looks strange. If David is the author, you want to have him in From: at the beginning of the patch. Stuff you did ontop should be in []-braces before your SOB, like this: Will resend with From: line and braces for clarification. Signed-off-by: David Kaplan david.kap...@amd.com [ Did this and that to patch. ] Signed-off-by: Joel Schopp joel.sch...@amd.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] x86: svm: don't intercept CR0 TS or MP bit write
From: David Kaplan david.kap...@amd.com Reduce the number of exits by avoiding exiting when the guest writes TS or MP bits of CR0. INTERCEPT_CR0_WRITE intercepts all writes to CR0 including TS and MP bits. It intercepts these even if INTERCEPT_SELECTIVE_CR0 is set. What we should be doing is setting INTERCEPT_SELECTIVE_CR0 and not setting INTERCEPT_CR0_WRITE. Signed-off-by: David Kaplan david.kap...@amd.com [added remove of clr_cr_intercept in init_vmcb, fixed check in handle_exit, added emulation on interception back in, forward ported, tested] Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/x86/kvm/svm.c | 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d319e0c..55822e5 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1093,7 +1093,6 @@ static void init_vmcb(struct vcpu_svm *svm) set_cr_intercept(svm, INTERCEPT_CR0_READ); set_cr_intercept(svm, INTERCEPT_CR3_READ); set_cr_intercept(svm, INTERCEPT_CR4_READ); - set_cr_intercept(svm, INTERCEPT_CR0_WRITE); set_cr_intercept(svm, INTERCEPT_CR3_WRITE); set_cr_intercept(svm, INTERCEPT_CR4_WRITE); set_cr_intercept(svm, INTERCEPT_CR8_WRITE); @@ -1539,10 +1538,8 @@ static void update_cr0_intercept(struct vcpu_svm *svm) if (gcr0 == *hcr0 svm-vcpu.fpu_active) { clr_cr_intercept(svm, INTERCEPT_CR0_READ); - clr_cr_intercept(svm, INTERCEPT_CR0_WRITE); } else { set_cr_intercept(svm, INTERCEPT_CR0_READ); - set_cr_intercept(svm, INTERCEPT_CR0_WRITE); } } @@ -2940,7 +2937,11 @@ static int cr_interception(struct vcpu_svm *svm) return emulate_on_interception(svm); reg = svm-vmcb-control.exit_info_1 SVM_EXITINFO_REG_MASK; - cr = svm-vmcb-control.exit_code - SVM_EXIT_READ_CR0; + + if (svm-vmcb-control.exit_code == SVM_EXIT_CR0_SEL_WRITE) + cr = 16; + else + cr = svm-vmcb-control.exit_code - SVM_EXIT_READ_CR0; err = 0; if (cr = 16) { /* mov to cr */ @@ -3325,7 +3326,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR3] = cr_interception, [SVM_EXIT_READ_CR4] = cr_interception, [SVM_EXIT_READ_CR8] = cr_interception, - [SVM_EXIT_CR0_SEL_WRITE]= emulate_on_interception, + [SVM_EXIT_CR0_SEL_WRITE]= cr_interception, [SVM_EXIT_WRITE_CR0]= cr_interception, [SVM_EXIT_WRITE_CR3]= cr_interception, [SVM_EXIT_WRITE_CR4]= cr_interception, @@ -3502,7 +3503,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) struct kvm_run *kvm_run = vcpu-run; u32 exit_code = svm-vmcb-control.exit_code; - if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) + if (!is_cr_intercept(svm, INTERCEPT_SELECTIVE_CR0)) vcpu-arch.cr0 = svm-vmcb-save.cr0; if (npt_enabled) vcpu-arch.cr3 = svm-vmcb-save.cr3; -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] x86: svm: use kvm_register_write()/read()
KVM has nice wrappers to access the register values, clean up a few places that should use them but currently do not. Signed-off-by:David Kaplan david.kap...@amd.com Signed-off-by:Joel Schopp joel.sch...@amd.com --- arch/x86/kvm/svm.c | 19 +-- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d319e0c..a7d88e4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2757,11 +2757,11 @@ static int invlpga_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = svm-vcpu; - trace_kvm_invlpga(svm-vmcb-save.rip, vcpu-arch.regs[VCPU_REGS_RCX], - vcpu-arch.regs[VCPU_REGS_RAX]); + trace_kvm_invlpga(svm-vmcb-save.rip, kvm_register_read(svm-vcpu, VCPU_REGS_RCX), + kvm_register_read(svm-vcpu, VCPU_REGS_RAX)); /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ - kvm_mmu_invlpg(vcpu, vcpu-arch.regs[VCPU_REGS_RAX]); + kvm_mmu_invlpg(vcpu, kvm_register_read(svm-vcpu, VCPU_REGS_RAX)); svm-next_rip = kvm_rip_read(svm-vcpu) + 3; skip_emulated_instruction(svm-vcpu); @@ -2770,7 +2770,7 @@ static int invlpga_interception(struct vcpu_svm *svm) static int skinit_interception(struct vcpu_svm *svm) { - trace_kvm_skinit(svm-vmcb-save.rip, svm-vcpu.arch.regs[VCPU_REGS_RAX]); + trace_kvm_skinit(svm-vmcb-save.rip, kvm_register_read(svm-vcpu, VCPU_REGS_RAX)); kvm_queue_exception(svm-vcpu, UD_VECTOR); return 1; @@ -3133,7 +3133,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) static int rdmsr_interception(struct vcpu_svm *svm) { - u32 ecx = svm-vcpu.arch.regs[VCPU_REGS_RCX]; + u32 ecx = kvm_register_read(svm-vcpu, VCPU_REGS_RCX); u64 data; if (svm_get_msr(svm-vcpu, ecx, data)) { @@ -3142,8 +3142,8 @@ static int rdmsr_interception(struct vcpu_svm *svm) } else { trace_kvm_msr_read(ecx, data); - svm-vcpu.arch.regs[VCPU_REGS_RAX] = data 0x; - svm-vcpu.arch.regs[VCPU_REGS_RDX] = data 32; + kvm_register_write(svm-vcpu, VCPU_REGS_RAX, data 0x); + kvm_register_write(svm-vcpu, VCPU_REGS_RDX, data 32); svm-next_rip = kvm_rip_read(svm-vcpu) + 2; skip_emulated_instruction(svm-vcpu); } @@ -3246,9 +3246,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) static int wrmsr_interception(struct vcpu_svm *svm) { struct msr_data msr; - u32 ecx = svm-vcpu.arch.regs[VCPU_REGS_RCX]; - u64 data = (svm-vcpu.arch.regs[VCPU_REGS_RAX] -1u) - | ((u64)(svm-vcpu.arch.regs[VCPU_REGS_RDX] -1u) 32); + u32 ecx = kvm_register_read(svm-vcpu, VCPU_REGS_RCX); + u64 data = kvm_read_edx_eax(svm-vcpu); msr.data = data; msr.index = ecx; -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v6] arm64: fix VTTBR_BADDR_MASK
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((1LLU (40 - VTTBR_X)) - 1) VTTBR_BADDR_SHIFT) Actually, after some more thinking, why don't we just make the upper limit of this mask 48-bit always or even 64-bit. That's a physical mask for checking whether the pgd pointer in vttbr is aligned as per the architecture requirements. Given that the pointer is allocated from the platform memory, it's clear that it is within the PA range. So basically you just need a mask to check the bottom alignment based on VTCR_EL2.T0SZ (which should be independent from the PA range). I guess it should be enough as: This sounds fine to me. I would say that there is no harm in re-checking the upper bits, but I agree it is unnecessary. #define VTTBR_BADDR_MASK (~0ULL VTTBR_BADDR_SHIFT) without any other changes to T0SZ. The TCR_EL2.PS setting should be done based on the ID_A64MMFR0_EL1 but you can do this in __do_hyp_init (it looks like this function handles VTCR_EL2.PS already, not sure why it does do it for TCR_EL2 as well). So IMO you only need about a few lines patch. My original patch to fix this problem was one line, so I'm perfectly happy with simplification. But it would be nice if the other reviewers could agree with this approach. With six versions that each addressed all the comments from reviewers I'd like it if the v7 that throws away most of that feedback didn't result in a v8 that puts it all back again. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 6] From nobody Wed Sep 3 13:22:18 2014
Subject: [PATCH] From nobody Tue Sep 2 17:49:46 2014 From: Joel Schopp joel.sch...@amd.com To: kvm...@lists.cs.columbia.edu, linux-arm-ker...@lists.infradead.org, kvm@vger.kernel.org Cc: Marc Zyngier marc.zyng...@arm.com, Sungjinn Chung sungjinn.ch...@samsung.com, Christoffer Dall christoffer.d...@linaro.org, Jungseok Lee jays@samsung.com Bcc: ssg.sos.patc...@amd.com Date: Wed, 03 Sep 2014 13:22:18 -0500 Message-ID: 20140903182218.2879.80647.st...@joelaarch64.amd.com User-Agent: StGit/0.17.1-4-g4a0c1 MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 7bit Subject: [PATCH] arm64: fix VTTBR_BADDR_MASK From: Joel Schopp joel.sch...@amd.com To: kvm...@lists.cs.columbia.edu, linux-arm-ker...@lists.infradead.org, kvm@vger.kernel.org Cc: Marc Zyngier marc.zyng...@arm.com, Sungjinn Chung sungjinn.ch...@samsung.com, Christoffer Dall christoffer.d...@linaro.org, Jungseok Lee jays@samsung.com Bcc: ssg.sos.patc...@amd.com Date: Tue, 02 Sep 2014 17:49:46 -0500 Message-ID: 20140902224946.1695.38669.st...@joelaarch64.amd.com User-Agent: StGit/0.17.1-4-g4a0c1 MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 7bit The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current systems. Rather than just add a bit it seems like a good time to also set things at run-time instead of compile time to accomodate more hardware. This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime, not compile time. In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address size (VTCR_EL2.T0SZE) cannot be determined in compile time since they depend on hardware capability. According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document, vttbr_x is calculated using different fixed values with consideration of T0SZ, granule size and the level of translation tables. Therefore, vttbr_baddr_mask should be determined dynamically. Changes since v5: Fixed declaration of vttbr_baddr_mask to not create multiple instances Refactored return codes based on feedback For 32 bit included kvm_arm.h in kvm_mmu.h to explictly pick up VTTBR_BADDR_MASK Changes since v4: More minor cleanups from review Moved some functions into headers Added runtime check in kvm_alloc_stage2_pgd Changes since v3: Another rebase Addressed minor comments from v2 Changes since v2: Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git next branch Changes since v1: Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to provide better long term fix. Updated that patch to log error instead of silently fail on unaligned vttbr. Cc: Marc Zyngier marc.zyng...@arm.com Cc: Christoffer Dall christoffer.d...@linaro.org Cc: Sungjinn Chung sungjinn.ch...@samsung.com Signed-off-by: Jungseok Lee jays@samsung.com Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/arm/include/asm/kvm_mmu.h | 13 +++ arch/arm/kvm/arm.c | 23 +++- arch/arm64/include/asm/kvm_arm.h | 17 ++--- arch/arm64/include/asm/kvm_mmu.h | 75 ++ arch/arm64/kvm/hyp-init.S| 20 +++--- 5 files changed, 126 insertions(+), 22 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5cc0b0f..03a08bb 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -21,6 +21,7 @@ #include asm/memory.h #include asm/page.h +#include asm/kvm_arm.h /* * We directly use the kernel VA for the HYP, as we can directly share @@ -178,6 +179,18 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, void stage2_flush_vm(struct kvm *kvm); +static inline int kvm_get_phys_addr_shift(void) +{ + return KVM_PHYS_SHIFT; +} + + +static inline u32 get_vttbr_baddr_mask(void) +{ + return VTTBR_BADDR_MASK; +} + + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index a99e0cd..d0fca8f 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -37,6 +37,7 @@ #include asm/mman.h #include asm/tlbflush.h #include asm/cacheflush.h +#include asm/cputype.h #include asm/virt.h #include asm/kvm_arm.h #include asm/kvm_asm.h @@ -61,6 +62,12 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u8 kvm_next_vmid; static DEFINE_SPINLOCK(kvm_vmid_lock); +#ifdef CONFIG_ARM64 +static u64 vttbr_baddr_mask; +#else +static u32 vttbr_baddr_mask; +#endif + static bool vgic_present; static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) @@ -429,8 +436,14 @@ static void update_vttbr(struct kvm *kvm) /* update vttbr to be used with the new vmid */ pgd_phys = virt_to_phys(kvm-arch.pgd); vmid = ((u64)(kvm-arch.vmid) VTTBR_VMID_SHIFT) VTTBR_VMID_MASK; - kvm-arch.vttbr = pgd_phys VTTBR_BADDR_MASK; - kvm-arch.vttbr |= vmid; + + /* +* If the VTTBR isn't
Re: [PATCH 6] From nobody Wed Sep 3 13:22:18 2014
My apologies for the bad formatting. Will resend momentarily. On 09/08/2014 06:01 PM, Joel Schopp wrote: Subject: [PATCH] From nobody Tue Sep 2 17:49:46 2014 From: Joel Schopp joel.sch...@amd.com To: kvm...@lists.cs.columbia.edu, linux-arm-ker...@lists.infradead.org, kvm@vger.kernel.org Cc: Marc Zyngier marc.zyng...@arm.com, Sungjinn Chung sungjinn.ch...@samsung.com, Christoffer Dall christoffer.d...@linaro.org, Jungseok Lee jays@samsung.com Bcc: ssg.sos.patc...@amd.com Date: Wed, 03 Sep 2014 13:22:18 -0500 Message-ID: 20140903182218.2879.80647.st...@joelaarch64.amd.com User-Agent: StGit/0.17.1-4-g4a0c1 MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 7bit Subject: [PATCH] arm64: fix VTTBR_BADDR_MASK From: Joel Schopp joel.sch...@amd.com To: kvm...@lists.cs.columbia.edu, linux-arm-ker...@lists.infradead.org, kvm@vger.kernel.org Cc: Marc Zyngier marc.zyng...@arm.com, Sungjinn Chung sungjinn.ch...@samsung.com, Christoffer Dall christoffer.d...@linaro.org, Jungseok Lee jays@samsung.com Bcc: ssg.sos.patc...@amd.com Date: Tue, 02 Sep 2014 17:49:46 -0500 Message-ID: 20140902224946.1695.38669.st...@joelaarch64.amd.com User-Agent: StGit/0.17.1-4-g4a0c1 MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 7bit The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current systems. Rather than just add a bit it seems like a good time to also set things at run-time instead of compile time to accomodate more hardware. This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime, not compile time. In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address size (VTCR_EL2.T0SZE) cannot be determined in compile time since they depend on hardware capability. According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document, vttbr_x is calculated using different fixed values with consideration of T0SZ, granule size and the level of translation tables. Therefore, vttbr_baddr_mask should be determined dynamically. Changes since v5: Fixed declaration of vttbr_baddr_mask to not create multiple instances Refactored return codes based on feedback For 32 bit included kvm_arm.h in kvm_mmu.h to explictly pick up VTTBR_BADDR_MASK Changes since v4: More minor cleanups from review Moved some functions into headers Added runtime check in kvm_alloc_stage2_pgd Changes since v3: Another rebase Addressed minor comments from v2 Changes since v2: Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git next branch Changes since v1: Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to provide better long term fix. Updated that patch to log error instead of silently fail on unaligned vttbr. Cc: Marc Zyngier marc.zyng...@arm.com Cc: Christoffer Dall christoffer.d...@linaro.org Cc: Sungjinn Chung sungjinn.ch...@samsung.com Signed-off-by: Jungseok Lee jays@samsung.com Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/arm/include/asm/kvm_mmu.h | 13 +++ arch/arm/kvm/arm.c | 23 +++- arch/arm64/include/asm/kvm_arm.h | 17 ++--- arch/arm64/include/asm/kvm_mmu.h | 75 ++ arch/arm64/kvm/hyp-init.S| 20 +++--- 5 files changed, 126 insertions(+), 22 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5cc0b0f..03a08bb 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -21,6 +21,7 @@ #include asm/memory.h #include asm/page.h +#include asm/kvm_arm.h /* * We directly use the kernel VA for the HYP, as we can directly share @@ -178,6 +179,18 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, void stage2_flush_vm(struct kvm *kvm); +static inline int kvm_get_phys_addr_shift(void) +{ + return KVM_PHYS_SHIFT; +} + + +static inline u32 get_vttbr_baddr_mask(void) +{ + return VTTBR_BADDR_MASK; +} + + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index a99e0cd..d0fca8f 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -37,6 +37,7 @@ #include asm/mman.h #include asm/tlbflush.h #include asm/cacheflush.h +#include asm/cputype.h #include asm/virt.h #include asm/kvm_arm.h #include asm/kvm_asm.h @@ -61,6 +62,12 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u8 kvm_next_vmid; static DEFINE_SPINLOCK(kvm_vmid_lock); +#ifdef CONFIG_ARM64 +static u64 vttbr_baddr_mask; +#else +static u32 vttbr_baddr_mask; +#endif + static bool vgic_present; static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) @@ -429,8 +436,14 @@ static void update_vttbr(struct kvm *kvm) /* update vttbr to be used with the new vmid
[PATCH v6] arm64: fix VTTBR_BADDR_MASK
The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current systems. Rather than just add a bit it seems like a good time to also set things at run-time instead of compile time to accomodate more hardware. This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime, not compile time. In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address size (VTCR_EL2.T0SZE) cannot be determined in compile time since they depend on hardware capability. According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document, vttbr_x is calculated using different fixed values with consideration of T0SZ, granule size and the level of translation tables. Therefore, vttbr_baddr_mask should be determined dynamically. Changes since v5: Fixed declaration of vttbr_baddr_mask to not create multiple instances Refactored return codes based on feedback For 32 bit included kvm_arm.h in kvm_mmu.h to explictly pick up VTTBR_BADDR_MASK Changes since v4: More minor cleanups from review Moved some functions into headers Added runtime check in kvm_alloc_stage2_pgd Changes since v3: Another rebase Addressed minor comments from v2 Changes since v2: Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git next branch Changes since v1: Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to provide better long term fix. Updated that patch to log error instead of silently fail on unaligned vttbr. Cc: Marc Zyngier marc.zyng...@arm.com Cc: Christoffer Dall christoffer.d...@linaro.org Cc: Sungjinn Chung sungjinn.ch...@samsung.com Signed-off-by: Jungseok Lee jays@samsung.com Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/arm/include/asm/kvm_mmu.h | 13 +++ arch/arm/kvm/arm.c | 23 +++- arch/arm64/include/asm/kvm_arm.h | 17 ++--- arch/arm64/include/asm/kvm_mmu.h | 75 ++ arch/arm64/kvm/hyp-init.S| 20 +++--- 5 files changed, 126 insertions(+), 22 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5cc0b0f..03a08bb 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -21,6 +21,7 @@ #include asm/memory.h #include asm/page.h +#include asm/kvm_arm.h /* * We directly use the kernel VA for the HYP, as we can directly share @@ -178,6 +179,18 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, void stage2_flush_vm(struct kvm *kvm); +static inline int kvm_get_phys_addr_shift(void) +{ + return KVM_PHYS_SHIFT; +} + + +static inline u32 get_vttbr_baddr_mask(void) +{ + return VTTBR_BADDR_MASK; +} + + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index a99e0cd..d0fca8f 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -37,6 +37,7 @@ #include asm/mman.h #include asm/tlbflush.h #include asm/cacheflush.h +#include asm/cputype.h #include asm/virt.h #include asm/kvm_arm.h #include asm/kvm_asm.h @@ -61,6 +62,12 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u8 kvm_next_vmid; static DEFINE_SPINLOCK(kvm_vmid_lock); +#ifdef CONFIG_ARM64 +static u64 vttbr_baddr_mask; +#else +static u32 vttbr_baddr_mask; +#endif + static bool vgic_present; static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) @@ -429,8 +436,14 @@ static void update_vttbr(struct kvm *kvm) /* update vttbr to be used with the new vmid */ pgd_phys = virt_to_phys(kvm-arch.pgd); vmid = ((u64)(kvm-arch.vmid) VTTBR_VMID_SHIFT) VTTBR_VMID_MASK; - kvm-arch.vttbr = pgd_phys VTTBR_BADDR_MASK; - kvm-arch.vttbr |= vmid; + + /* +* If the VTTBR isn't aligned there is something wrong with the system +* or kernel. +*/ + BUG_ON(pgd_phys ~vttbr_baddr_mask); + + kvm-arch.vttbr = pgd_phys | vmid; spin_unlock(kvm_vmid_lock); } @@ -1015,6 +1028,12 @@ int kvm_arch_init(void *opaque) } } + vttbr_baddr_mask = get_vttbr_baddr_mask(); + if (vttbr_baddr_mask == ~0) { + kvm_err(Cannot set vttbr_baddr_mask\n); + return -EINVAL; + } + cpu_notifier_register_begin(); err = init_hyp_mode(); diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index cc83520..ff4a4fa 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -95,7 +95,6 @@ /* TCR_EL2 Registers bits */ #define TCR_EL2_TBI(1 20) #define TCR_EL2_PS (7 16) -#define TCR_EL2_PS_40B (2 16) #define TCR_EL2_TG0(1 14) #define TCR_EL2_SH0(3 12) #define TCR_EL2_ORGN0 (3 10) @@ -104,8 +103,6 @@ #define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) -#define TCR_EL2_FLAGS (TCR_EL2_PS_40B) - /* VTCR_EL2 Registers
[PATCH] KVM: arm64: add gic-400 compatible
Add a one liner to identify the gic-400. It's gicv2 with optional MSI extensions. Cc: Christoffer Dall christoffer.d...@linaro.org Signed-off-by: Joel Schopp joel.sch...@amd.com --- virt/kvm/arm/vgic.c |1 + 1 file changed, 1 insertion(+) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 73eba79..e81444e 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1550,6 +1550,7 @@ static struct notifier_block vgic_cpu_nb = { static const struct of_device_id vgic_ids[] = { { .compatible = arm,cortex-a15-gic, .data = vgic_v2_probe, }, + { .compatible = arm,gic-400, .data = vgic_v2_probe, }, { .compatible = arm,gic-v3, .data = vgic_v3_probe, }, {}, }; -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v10 0/6] arm: dirty page logging support for ARMv7
On 08/26/2014 06:51 PM, Mario Smarduch wrote: This patch adds support for ARMv7 dirty page logging. Some functions of dirty page logging have been split to generic and arch specific implementations, details below. Dirty page logging is one of serveral features required for live migration, live migration has been tested for ARMv7. Any reason not to cc the kvm arm list? I almost missed this patch set. kvm...@lists.cs.columbia.edu -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5] arm64: fix VTTBR_BADDR_MASK
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5c7aa3c..73f6ff6 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -166,6 +166,18 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, void stage2_flush_vm(struct kvm *kvm); +static inline int kvm_get_phys_addr_shift(void) +{ + return KVM_PHYS_SHIFT; +} + +static inline int set_vttbr_baddr_mask(void) +{ + vttbr_baddr_mask = VTTBR_BADDR_MASK; Have you tried compiling this? Apart from the obvious missing definition of the variable, I'm not fond of functions with side-effects hidden in an include file. What is wrong with just returning the mask and letting the common code setting it? I like that change, will do in v6. +#ifdef CONFIG_ARM64_64K_PAGES +static inline int t0sz_to_vttbr_x(int t0sz) +{ + if (t0sz 16 || t0sz 34) { + kvm_err(Cannot support %d-bit address space\n, 64 - t0sz); + return 0; 0 is definitely a bad value for something that is an error case. Consider -EINVAL instead. OK. Also, what if we're in a range that only deals with more levels of page tables than the kernel can deal with (remember we use the kernel page table accessors)? See the new ARM64_VA_BITS and ARM64_PGTABLE_LEVELS symbols that are now available, and use them to validate the range you have. With the simple current tests I can look at them and see they are correct, even if I can't make a scenario to test that they would fail. However, if I add in more complicated checks I'd really like to test them catching the failure cases. Can you describe a case where we can boot a kernel and then have the checks still fail in kvm? + } + return 37 - t0sz; +} +#endif +static inline int kvm_get_phys_addr_shift(void) +{ + int pa_range = read_cpuid(ID_AA64MMFR0_EL1) 0xf; + + switch (pa_range) { + case 0: return 32; + case 1: return 36; + case 2: return 40; + case 3: return 42; + case 4: return 44; + case 5: return 48; + default: + BUG(); + return 0; + } +} + +static u64 vttbr_baddr_mask; Now every compilation unit that includes kvm_mmu.h has an instance of this variable. I doubt that it is the intended effect. The change for the comment farther above to just return the mask and have the common code set it should resolve this as well. + +/** + * set_vttbr_baddr_mask - set mask value for vttbr base address + * + * In ARMv8, vttbr_baddr_mask cannot be determined in compile time since the + * stage2 input address size depends on hardware capability. Thus, we first + * need to read ID_AA64MMFR0_EL1.PARange and then set vttbr_baddr_mask with + * consideration of both the granule size and the level of translation tables. + */ +static inline int set_vttbr_baddr_mask(void) +{ + int t0sz, vttbr_x; + + t0sz = VTCR_EL2_T0SZ(kvm_get_phys_addr_shift()); + vttbr_x = t0sz_to_vttbr_x(t0sz); + if (!vttbr_x) + return -EINVAL; + vttbr_baddr_mask = (((1LLU (48 - vttbr_x)) - 1) (vttbr_x - 1)); I think this can now be written as GENMASK_ULL(48, (vttbr_x - 1)). That does improve readability, I like it. + return 0; +} + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index d968796..c0f7634 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -63,17 +63,21 @@ __do_hyp_init: mrs x4, tcr_el1 ldr x5, =TCR_EL2_MASK and x4, x4, x5 - ldr x5, =TCR_EL2_FLAGS - orr x4, x4, x5 - msr tcr_el2, x4 - - ldr x4, =VTCR_EL2_FLAGS /* * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in -* VTCR_EL2. +* TCR_EL2 and both PS bits and T0SZ bits in VTCR_EL2. */ mrs x5, ID_AA64MMFR0_EL1 bfi x4, x5, #16, #3 + msr tcr_el2, x4 + + ldr x4, =VTCR_EL2_FLAGS + bfi x4, x5, #16, #3 + and x5, x5, #0xf + adr x6, t0sz + add x6, x6, x5, lsl #2 + ldr w5, [x6] + orr x4, x4, x5 You'll need to validate the T0SZ value, and possibly adjust it so that it is compatible with the addressing capability of the kernel. That probably require a slight change of the hyp-init API. In order to do that I really should test that path, can you think of a way to generate a t0sz value that is incompatible with the kernel, but the kernel still boots so I can load kvm and test it? msr vtcr_el2, x4 mrs x4, mair_el1 @@ -109,6 +113,10 @@ target: /* We're now in the trampoline code, switch page tables */ /* Hello, World! */ eret + +t0sz: + .word VTCR_EL2_T0SZ(32),
Re: [PATCH v4] arm64: fix VTTBR_BADDR_MASK
On 08/19/2014 07:22 AM, Christoffer Dall wrote: On Mon, Aug 18, 2014 at 03:30:58PM -0500, Joel Schopp wrote: #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 16e7994..70f0f02 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -521,6 +521,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) */ int kvm_alloc_stage2_pgd(struct kvm *kvm) { +unsigned int s2_pgds, s2_pgd_order; pgd_t *pgd; if (kvm-arch.pgd != NULL) { @@ -528,10 +529,18 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) return -EINVAL; } -pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER); +s2_pgds = (1 (kvm_get_phys_addr_shift() - PGDIR_SHIFT)); +s2_pgd_order = get_order(s2_pgds * sizeof(pgd_t)); + +pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, s2_pgd_order); if (!pgd) return -ENOMEM; +if ((unsigned long)pgd ~vttbr_baddr_mask) { +kvm_err(Stage-2 pgd not correctly aligned: %p\n, pgd); +return -EFAULT; +} There are two problems that I've found here. The first problem is that vttbr_baddr_mask isn't allocated yet at this point in the code. allocated? you mean assigned? aren't you setting vttbr_baddr_mask in kvm_arch_init()? that's certainly called before kvm_arch_init_vm(). Yes, I mean assigned, at least I got the first letter correct :) All I know is that vttbr_baddr_mask was still zero and checking for zero and calling the set function gave it a value. The second problem is that pgd is a virtual address, ie pgd == 0xfe03bbb4 while the vttbr masks off the high bits for a physical address, ie vttbr_baddr_mask=0x7ffe . Even correcting for those issues I haven't been able to make this check work properly. I'll resend v5 the patch with all the other suggested changes. What are the issues that you face? Iow. what is the alignment of the returned physical address? (You should be able to just to virt_to_phys(pgd) and use that to test for the vttbr_baddr_mask). The addresses above are actually from my system, 64K page aligned on a 64K page kernel. I did use virt_to_phys() and the kernel got a null dereference and paniced, I didn't trace down where the panic was occuring. Thanks, -Christoffer -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5] arm64: fix VTTBR_BADDR_MASK
On 08/19/2014 07:24 AM, Christoffer Dall wrote: On Mon, Aug 18, 2014 at 03:36:04PM -0500, Joel Schopp wrote: The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current systems. Rather than just add a bit it seems like a good time to also set things at run-time instead of compile time to accomodate more hardware. This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime, not compile time. In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address size (VTCR_EL2.T0SZE) cannot be determined in compile time since they depend on hardware capability. According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document, vttbr_x is calculated using different fixed values with consideration of T0SZ, granule size and the level of translation tables. Therefore, vttbr_baddr_mask should be determined dynamically. Changes since v4: More minor cleanups from review Moved some functions into headers Changes since v3: Another rebase Addressed minor comments from v2 Changes since v2: Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git next branch Changes since v1: Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to provide better long term fix. Updated that patch to log error instead of silently fail on unaligned vttbr. Cc: Christoffer Dall christoffer.d...@linaro.org Cc: Sungjinn Chung sungjinn.ch...@samsung.com Signed-off-by: Jungseok Lee jays@samsung.com Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/arm/include/asm/kvm_mmu.h | 12 ++ arch/arm/kvm/arm.c | 17 +++- arch/arm64/include/asm/kvm_arm.h | 17 +--- arch/arm64/include/asm/kvm_mmu.h | 78 ++ arch/arm64/kvm/hyp-init.S| 20 +++--- 5 files changed, 122 insertions(+), 22 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5c7aa3c..73f6ff6 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -166,6 +166,18 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, void stage2_flush_vm(struct kvm *kvm); +static inline int kvm_get_phys_addr_shift(void) +{ +return KVM_PHYS_SHIFT; +} + +static inline int set_vttbr_baddr_mask(void) +{ +vttbr_baddr_mask = VTTBR_BADDR_MASK; +return 0; +} + + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 3c82b37..f396eb7 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -37,6 +37,7 @@ #include asm/mman.h #include asm/tlbflush.h #include asm/cacheflush.h +#include asm/cputype.h #include asm/virt.h #include asm/kvm_arm.h #include asm/kvm_asm.h @@ -466,8 +467,14 @@ static void update_vttbr(struct kvm *kvm) /* update vttbr to be used with the new vmid */ pgd_phys = virt_to_phys(kvm-arch.pgd); vmid = ((u64)(kvm-arch.vmid) VTTBR_VMID_SHIFT) VTTBR_VMID_MASK; -kvm-arch.vttbr = pgd_phys VTTBR_BADDR_MASK; -kvm-arch.vttbr |= vmid; + +/* + * If the VTTBR isn't aligned there is something wrong with the system + * or kernel. + */ +BUG_ON(pgd_phys ~vttbr_baddr_mask); + +kvm-arch.vttbr = pgd_phys | vmid; spin_unlock(kvm_vmid_lock); } @@ -1052,6 +1059,12 @@ int kvm_arch_init(void *opaque) } } +err = set_vttbr_baddr_mask(); +if (err) { +kvm_err(Cannot set vttbr_baddr_mask\n); +return -EINVAL; +} + cpu_notifier_register_begin(); err = init_hyp_mode(); diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 3d69030..8dbef70 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -94,7 +94,6 @@ /* TCR_EL2 Registers bits */ #define TCR_EL2_TBI (1 20) #define TCR_EL2_PS (7 16) -#define TCR_EL2_PS_40B (2 16) #define TCR_EL2_TG0 (1 14) #define TCR_EL2_SH0 (3 12) #define TCR_EL2_ORGN0 (3 10) @@ -103,8 +102,6 @@ #define TCR_EL2_MASK(TCR_EL2_TG0 | TCR_EL2_SH0 | \ TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) -#define TCR_EL2_FLAGS (TCR_EL2_PS_40B) - /* VTCR_EL2 Registers bits */ #define VTCR_EL2_PS_MASK(7 16) #define VTCR_EL2_TG0_MASK (1 14) @@ -119,36 +116,28 @@ #define VTCR_EL2_SL0_MASK (3 6) #define VTCR_EL2_SL0_LVL1 (1 6) #define VTCR_EL2_T0SZ_MASK 0x3f -#define VTCR_EL2_T0SZ_40B 24 +#define VTCR_EL2_T0SZ(bits) (64 - (bits)) #ifdef CONFIG_ARM64_64K_PAGES /* * Stage2 translation configuration: - * 40bits output (PS = 2) - * 40bits input (T0SZ = 24) * 64kB pages (TG0 = 1) * 2 level page tables (SL = 1) */ #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER
Re: [PATCH v5] arm64: fix VTTBR_BADDR_MASK
The return is a value,not just an error code. Because of this returning an error overloads that value. 0 just seemed like a convenient invalid value to check since a vttbr_x of 0 is invalid, but returning a negative error code would be as equally invalid. If this is the only issue it doesn't seem worth respinning the patch for, but I'll change it to -EINVAL if for some reason a v6 is needed. Have you given up on doing the alignment check with the proper size on the pgd allocation for this patch? Yes, I'd rather leave the extra check out of this patch. If I were changing the pgd allocation code I would make sure to add a check, or if there were a static check there now I would update it for the dynamic value from the hardware, but it seems unrelated to add several checks to other parts of the code beyond those already in the patch. I did leave the functions in the headers such that checks like this could be added when someone is updating the code for other reasons, say 4 level page tables. -Joel -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v4] arm64: fix VTTBR_BADDR_MASK
On 08/19/2014 09:37 AM, Christoffer Dall wrote: On Tue, Aug 19, 2014 at 09:05:09AM -0500, Joel Schopp wrote: On 08/19/2014 07:22 AM, Christoffer Dall wrote: On Mon, Aug 18, 2014 at 03:30:58PM -0500, Joel Schopp wrote: #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 16e7994..70f0f02 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -521,6 +521,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) */ int kvm_alloc_stage2_pgd(struct kvm *kvm) { + unsigned int s2_pgds, s2_pgd_order; pgd_t *pgd; if (kvm-arch.pgd != NULL) { @@ -528,10 +529,18 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) return -EINVAL; } - pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER); + s2_pgds = (1 (kvm_get_phys_addr_shift() - PGDIR_SHIFT)); + s2_pgd_order = get_order(s2_pgds * sizeof(pgd_t)); + + pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, s2_pgd_order); if (!pgd) return -ENOMEM; + if ((unsigned long)pgd ~vttbr_baddr_mask) { + kvm_err(Stage-2 pgd not correctly aligned: %p\n, pgd); + return -EFAULT; + } There are two problems that I've found here. The first problem is that vttbr_baddr_mask isn't allocated yet at this point in the code. allocated? you mean assigned? aren't you setting vttbr_baddr_mask in kvm_arch_init()? that's certainly called before kvm_arch_init_vm(). Yes, I mean assigned, at least I got the first letter correct :) All I know is that vttbr_baddr_mask was still zero and checking for zero and calling the set function gave it a value. that sounds weird and wrong. Hum. Mind sticking a few prints in there and figuring out what's causing this? The second problem is that pgd is a virtual address, ie pgd == 0xfe03bbb4 while the vttbr masks off the high bits for a physical address, ie vttbr_baddr_mask=0x7ffe . Even correcting for those issues I haven't been able to make this check work properly. I'll resend v5 the patch with all the other suggested changes. What are the issues that you face? Iow. what is the alignment of the returned physical address? (You should be able to just to virt_to_phys(pgd) and use that to test for the vttbr_baddr_mask). The addresses above are actually from my system, 64K page aligned on a 64K page kernel. I did use virt_to_phys() and the kernel got a null dereference and paniced, I didn't trace down where the panic was occuring. virt_to_phys() directly caused the null dereference? That sounds bad! I don't think it was the virt_to_phys() directly causing the null dereference, but again I didn't trace it down. Would you mind trying to trace this down? I'll be happy to provide as much help as I can along the way. I can break the kvm_alloc_stage2_pgd check into a separate patch on top of this one and circle back around to it after I finish another unrelated thing I'm working on. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5] arm64: fix VTTBR_BADDR_MASK
hmmm, the point is that we need to ensure that we have a properly aligned allocated PGD, that's what this patch currently addresses, and as you pointed out, the BUG_ON() just before trying to run a VM is not the nicest solution - we should really be dealing with this properly at allocation time. But, if you don't have time to look at that, then ok, I'll have to pick it up myself. However, you are hinting that we do not support 4 levels of page tables, yet you do allow the t0sz_to_vttbr_x funciton to pass even when using t0sz values only supported under 4 levels of page tables What is the rationale for that? Minimizing merge conflicts. I figure both are going in within the next month or two. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC 0/4] VFIO: PLATFORM: Return device tree info for a platform device node
This RFC's intention is to show what an interface to access device node properties for VFIO_PLATFORM can look like. If a device tree node corresponding to a platform device bound by VFIO_PLATFORM is available, this patch series will allow the user to query the properties associated with this device node. This can be useful for userspace drivers to automatically query parameters related to the device. An API to return data from a device's device tree has been proposed before on these lists. The API proposed here is slightly different. Properties to parse from the device tree are not indexed by a numerical id. The host system doesn't guarantee any specific ordering for the available properties, or that those will remain the same; while this does not happen in practice, there is nothing from the host changing the device nodes during operation. So properties are accessed by property name. The type of the property accessed must also be known by the user. Properties types implemented in this RFC: - VFIO_DEVTREE_ARR_TYPE_STRING (strings sepparated by the null character) - VFIO_DEVTREE_ARR_TYPE_U32 - VFIO_DEVTREE_ARR_TYPE_U16 - VFIO_DEVTREE_ARR_TYPE_U8 These can all be access by the ioctl VFIO_DEVICE_GET_DEVTREE_INFO. A new ioctl was preferred instead of shoehorning the functionality in VFIO_DEVICE_GET_INFO. The structure exchanged looks like this: You'll have to forgive my ignorance on the history. But with the dtc tool already supporting a filesystem representation (--in-format=fs), with the dtc tool source already built into qemu, and having an example implementation of such an interface in /proc/device-tree/ why is an ioctl interface preferred over a filesystem interface? -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v4] arm64: fix VTTBR_BADDR_MASK
#endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 16e7994..70f0f02 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -521,6 +521,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) */ int kvm_alloc_stage2_pgd(struct kvm *kvm) { + unsigned int s2_pgds, s2_pgd_order; pgd_t *pgd; if (kvm-arch.pgd != NULL) { @@ -528,10 +529,18 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) return -EINVAL; } - pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER); + s2_pgds = (1 (kvm_get_phys_addr_shift() - PGDIR_SHIFT)); + s2_pgd_order = get_order(s2_pgds * sizeof(pgd_t)); + + pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, s2_pgd_order); if (!pgd) return -ENOMEM; + if ((unsigned long)pgd ~vttbr_baddr_mask) { + kvm_err(Stage-2 pgd not correctly aligned: %p\n, pgd); + return -EFAULT; + } There are two problems that I've found here. The first problem is that vttbr_baddr_mask isn't allocated yet at this point in the code. The second problem is that pgd is a virtual address, ie pgd == 0xfe03bbb4 while the vttbr masks off the high bits for a physical address, ie vttbr_baddr_mask=0x7ffe . Even correcting for those issues I haven't been able to make this check work properly. I'll resend v5 the patch with all the other suggested changes. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5] arm64: fix VTTBR_BADDR_MASK
The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current systems. Rather than just add a bit it seems like a good time to also set things at run-time instead of compile time to accomodate more hardware. This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime, not compile time. In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address size (VTCR_EL2.T0SZE) cannot be determined in compile time since they depend on hardware capability. According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document, vttbr_x is calculated using different fixed values with consideration of T0SZ, granule size and the level of translation tables. Therefore, vttbr_baddr_mask should be determined dynamically. Changes since v4: More minor cleanups from review Moved some functions into headers Changes since v3: Another rebase Addressed minor comments from v2 Changes since v2: Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git next branch Changes since v1: Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to provide better long term fix. Updated that patch to log error instead of silently fail on unaligned vttbr. Cc: Christoffer Dall christoffer.d...@linaro.org Cc: Sungjinn Chung sungjinn.ch...@samsung.com Signed-off-by: Jungseok Lee jays@samsung.com Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/arm/include/asm/kvm_mmu.h | 12 ++ arch/arm/kvm/arm.c | 17 +++- arch/arm64/include/asm/kvm_arm.h | 17 +--- arch/arm64/include/asm/kvm_mmu.h | 78 ++ arch/arm64/kvm/hyp-init.S| 20 +++--- 5 files changed, 122 insertions(+), 22 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5c7aa3c..73f6ff6 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -166,6 +166,18 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, void stage2_flush_vm(struct kvm *kvm); +static inline int kvm_get_phys_addr_shift(void) +{ + return KVM_PHYS_SHIFT; +} + +static inline int set_vttbr_baddr_mask(void) +{ + vttbr_baddr_mask = VTTBR_BADDR_MASK; + return 0; +} + + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 3c82b37..f396eb7 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -37,6 +37,7 @@ #include asm/mman.h #include asm/tlbflush.h #include asm/cacheflush.h +#include asm/cputype.h #include asm/virt.h #include asm/kvm_arm.h #include asm/kvm_asm.h @@ -466,8 +467,14 @@ static void update_vttbr(struct kvm *kvm) /* update vttbr to be used with the new vmid */ pgd_phys = virt_to_phys(kvm-arch.pgd); vmid = ((u64)(kvm-arch.vmid) VTTBR_VMID_SHIFT) VTTBR_VMID_MASK; - kvm-arch.vttbr = pgd_phys VTTBR_BADDR_MASK; - kvm-arch.vttbr |= vmid; + + /* +* If the VTTBR isn't aligned there is something wrong with the system +* or kernel. +*/ + BUG_ON(pgd_phys ~vttbr_baddr_mask); + + kvm-arch.vttbr = pgd_phys | vmid; spin_unlock(kvm_vmid_lock); } @@ -1052,6 +1059,12 @@ int kvm_arch_init(void *opaque) } } + err = set_vttbr_baddr_mask(); + if (err) { + kvm_err(Cannot set vttbr_baddr_mask\n); + return -EINVAL; + } + cpu_notifier_register_begin(); err = init_hyp_mode(); diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 3d69030..8dbef70 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -94,7 +94,6 @@ /* TCR_EL2 Registers bits */ #define TCR_EL2_TBI(1 20) #define TCR_EL2_PS (7 16) -#define TCR_EL2_PS_40B (2 16) #define TCR_EL2_TG0(1 14) #define TCR_EL2_SH0(3 12) #define TCR_EL2_ORGN0 (3 10) @@ -103,8 +102,6 @@ #define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) -#define TCR_EL2_FLAGS (TCR_EL2_PS_40B) - /* VTCR_EL2 Registers bits */ #define VTCR_EL2_PS_MASK (7 16) #define VTCR_EL2_TG0_MASK (1 14) @@ -119,36 +116,28 @@ #define VTCR_EL2_SL0_MASK (3 6) #define VTCR_EL2_SL0_LVL1 (1 6) #define VTCR_EL2_T0SZ_MASK 0x3f -#define VTCR_EL2_T0SZ_40B 24 +#define VTCR_EL2_T0SZ(bits)(64 - (bits)) #ifdef CONFIG_ARM64_64K_PAGES /* * Stage2 translation configuration: - * 40bits output (PS = 2) - * 40bits input (T0SZ = 24) * 64kB pages (TG0 = 1) * 2 level page tables (SL = 1) */ #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ -VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) -#define VTTBR_X(38 - VTCR_EL2_T0SZ_40B
Re: [Qemu-devel] The status about vhost-net on kvm-arm?
we at Virtual Open Systems did some work and tested vhost-net on ARM back in March. The setup was based on: - host kernel with our ioeventfd patches: http://www.spinics.net/lists/kvm-arm/msg08413.html - qemu with the aforementioned patches from Ying-Shiuan Pan https://lists.gnu.org/archive/html/qemu-devel/2014-02/msg00715.html The testbed was ARM Chromebook with Exynos 5250, using a 1Gbps USB3 Ethernet adapter connected to a 1Gbps switch. I can't find the actual numbers but I remember that with multiple streams the gain was clearly seen. Note that it used the minimum required ioventfd implementation and not irqfd. I guess it is feasible to think that it all can be put together and rebased + the recent irqfd work. One can achiev even better performance (because of the irqfd). Managed to replicate the setup with the old versions e used in March: Single stream from another machine to chromebook with 1Gbps USB3 Ethernet adapter. iperf -c address -P 1 -i 1 -p 5001 -f k -t 10 to HOST: 858316 Kbits/sec to GUEST: 761563 Kbits/sec to GUEST vhost=off: 508150 Kbits/sec 10 parallel streams iperf -c address -P 10 -i 1 -p 5001 -f k -t 10 to HOST: 842420 Kbits/sec to GUEST: 625144 Kbits/sec to GUEST vhost=off: 425276 Kbits/sec I have tested the same cases on a Hisilicon board (Cortex-A15@1G) with Integrated 1Gbps Ethernet adapter. iperf -c address -P 1 -i 1 -p 5001 -f M -t 10 to HOST: 906 Mbits/sec to GUEST: 562 Mbits/sec to GUEST vhost=off: 340 Mbits/sec 10 parallel streams, the performance gets 10% plus: iperf -c address -P 10 -i 1 -p 5001 -f M -t 10 to HOST: 923 Mbits/sec to GUEST: 592 Mbits/sec to GUEST vhost=off: 364 Mbits/sec I't easy to see vhost-net brings great performance improvements, almost 50%+. That's pretty impressive for not even having irqfd. I guess we should renew some effort to get these patches merged upstream. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3] arm64: fix VTTBR_BADDR_MASK
Thanks for the detailed review. the last case would be case 5 and the default case would be a BUG(). I agree with the case, but rather than do a BUG() I'm going to print an error and return -EINVAL. Not worth stopping the host kernel just because kvm is messed up when we can gracefully exit from it. + +/* + * See Table D4-23 and Table D4-25 in ARM DDI 0487A.b to figure out + * the origin of the hardcoded values, 38 and 37. + */ +#ifdef CONFIG_ARM64_64K_PAGES +/* + * 16 = T0SZ = 21 is valid under 3 level of translation tables + * 18 = T0SZ = 34 is valid under 2 level of translation tables + * 31 = T0SZ = 39 is valid under 1 level of transltaion tables + */ so this scheme is with concatenated initial level stage-2 page tables. But we only ever allocate the amount of pages for our pgd according to what the host has, so I think this allocation needs to be locked down more tight, because the host is always using the appropriate amount for 39 bits virtual addresses. I'll narrow the sanity check of the range. I'll narrow it based on a 39 - 48 bit VA host range in anticipation of the 4 level 4k and 3 level 64k host patches going in. +kvm_err(Cannot support %d-bit address space\n, 64 - t0sz); +return -EINVAL; +} +vttbr_x = 37 - t0sz; +#endif +vttbr_baddr_mask = (((1LLU (48 - vttbr_x)) - 1) (vttbr_x - 1)); +#endif This nested ifdef is really quite horrible. Can you either factor these out into some static inlines in arch/arm[64]/include/asm/kvm_mmu.h or provide a per-architecture implementation in a .c file? I'll factor it out in the file to make it more readable and do away with the nested ifdef. My theory on putting things into .h files is to only do it if there is actually another file that uses it. +return 0; +} + +/** * update_vttbr - Update the VTTBR with a valid VMID before the guest runs * @kvm The guest that we are about to run * @@ -429,8 +502,16 @@ static void update_vttbr(struct kvm *kvm) /* update vttbr to be used with the new vmid */ pgd_phys = virt_to_phys(kvm-arch.pgd); vmid = ((u64)(kvm-arch.vmid) VTTBR_VMID_SHIFT) VTTBR_VMID_MASK; -kvm-arch.vttbr = pgd_phys VTTBR_BADDR_MASK; -kvm-arch.vttbr |= vmid; + +/* + * If the VTTBR isn't aligned there is something wrong with the system + * or kernel. It is better to just fail and not mask it. But no need + * to panic the host kernel with a BUG_ON(), instead just log the error. + */ These last two sentences are not very helpful, because they don't describe the rationale for what you're doing, only what you are (and are not) doing. I'll reword the comment. That said, I don't think this is doing the right thing. I think you want to refuse running the VM and avoid any stage-2 entried being created if this is not the case (actually, we may want to check this after set_vttbr_baddr_mask() or right aftert allocating the stage-2 pgd), because otherwise I think we may be overwriting memory not belonging to us with concatenated page tables in a 42-bit 4KB system, for example. My experience here was that the hardware actually catches the error on the first instruction load of the guest kernel and does a stage 2 translation abort. However, to be extra safe we could just log the error with the address of the vttbr and then zero out the pgd_phys part of vttbr altogether, leaving only the vmid. The guest would then die of natural causes and we wouldn't have to worry about the outside possibility of memory getting overwritten. I don't like the option of just calling BUG() because stopping the host kernel from running just because we can't run a guest seems a bit extreme. On the other hand adding a return code to update_vttbr and checking it (even with unlikely) in the kvm_arch_vcpu_ioctl_run() loop doesn't thrill me either just from a wasted cpu cycles point of view. +if (pgd_phys ~vttbr_baddr_mask) +kvm_err(VTTBR not aligned, expect guest to fail); + +kvm-arch.vttbr = pgd_phys | vmid; spin_unlock(kvm_vmid_lock); } @@ -1015,6 +1096,12 @@ int kvm_arch_init(void *opaque) } } +err = set_vttbr_baddr_mask(); +if (err) { +kvm_err(Cannot set vttbr_baddr_mask\n); +return -EINVAL; +} + cpu_notifier_register_begin(); err = init_hyp_mode(); diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index cc83520..ff4a4fa 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -95,7 +95,6 @@ /* TCR_EL2 Registers bits */ #define TCR_EL2_TBI (1 20) #define TCR_EL2_PS (7 16) -#define TCR_EL2_PS_40B (2 16) #define TCR_EL2_TG0 (1 14) #define TCR_EL2_SH0 (3 12) #define TCR_EL2_ORGN0 (3 10) @@ -104,8 +103,6 @@ #define TCR_EL2_MASK(TCR_EL2_TG0 | TCR_EL2_SH0 | \
Re: KVM call for agenda for 2014-08-19
On 08/11/2014 08:09 AM, Juan Quintela wrote: Hi Please, send any topic that you are interested in covering. People have complained on the past that I don't cancel the call until the very last minute. So, what do you think that deadline for submitting topics is 23:00UTC on Monday? I like the deadline. Call details: 15:00 CEST 13:00 UTC 09:00 EDT Every two weeks By popular demand, a google calendar public entry with it https://www.google.com/calendar/embed?src=dG9iMXRqcXAzN3Y4ZXZwNzRoMHE4a3BqcXNAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ (Let me know if you have any problems with the calendar entry) If you need phone number details, contact me privately Thanks, Juan. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3] arm64: fix VTTBR_BADDR_MASK
That said, I don't think this is doing the right thing. I think you want to refuse running the VM and avoid any stage-2 entried being created if this is not the case (actually, we may want to check this after set_vttbr_baddr_mask() or right aftert allocating the stage-2 pgd), because otherwise I think we may be overwriting memory not belonging to us with concatenated page tables in a 42-bit 4KB system, for example. My experience here was that the hardware actually catches the error on the first instruction load of the guest kernel and does a stage 2 translation abort. However, to be extra safe we could just log the error with the address of the vttbr and then zero out the pgd_phys part of vttbr altogether, leaving only the vmid. The guest would then die of natural causes and we wouldn't have to worry about the outside possibility of memory getting overwritten. uh, putting zero in the pgd_phys part will just point to random memory if you happen to have memory based at address 0 though, right? I think we should check when we allocate the pgd that it is indeed of the right size and alignment, and if it isn't at this point, it truly is a BUG() and your kernel is terribly busted. If I can't rely on 0 to be an invalid address I can't think of what I could rely on to be invalid. I'll just change this to BUG_ON(pgd_phys ~vttbr_baddr_mask); and give up on my dream of the host kernel surviving the bug. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4] arm64: fix VTTBR_BADDR_MASK
The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current systems. Rather than just add a bit it seems like a good time to also set things at run-time instead of compile time to accomodate more hardware. This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime, not compile time. In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address size (VTCR_EL2.T0SZE) cannot be determined in compile time since they depend on hardware capability. According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document, vttbr_x is calculated using different fixed values with consideration of T0SZ, granule size and the level of translation tables. Therefore, vttbr_baddr_mask should be determined dynamically. Changes since v3: Another rebase Addressed minor comments from v2 Changes since v2: Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git next branch Changes since v1: Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to provide better long term fix. Updated that patch to log error instead of silently fail on unaligned vttbr. Cc: Christoffer Dall christoffer.d...@linaro.org Cc: Sungjinn Chung sungjinn.ch...@samsung.com Signed-off-by: Jungseok Lee jays@samsung.com Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/arm/kvm/arm.c | 116 +- arch/arm64/include/asm/kvm_arm.h | 17 +- arch/arm64/kvm/hyp-init.S| 20 +-- 3 files changed, 131 insertions(+), 22 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 3c82b37..b4859fa 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -37,6 +37,7 @@ #include asm/mman.h #include asm/tlbflush.h #include asm/cacheflush.h +#include asm/cputype.h #include asm/virt.h #include asm/kvm_arm.h #include asm/kvm_asm.h @@ -61,6 +62,8 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u8 kvm_next_vmid; static DEFINE_SPINLOCK(kvm_vmid_lock); +static u64 vttbr_baddr_mask; + static bool vgic_present; static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) @@ -412,6 +415,103 @@ static bool need_new_vmid_gen(struct kvm *kvm) return unlikely(kvm-arch.vmid_gen != atomic64_read(kvm_vmid_gen)); } + + + /* +* ARMv8 64K architecture limitations: +* 16 = T0SZ = 21 is valid under 3 level of translation tables +* 18 = T0SZ = 34 is valid under 2 level of translation tables +* 31 = T0SZ = 39 is valid under 1 level of transltaion tables +* +* ARMv8 4K architecture limitations: +* 16 = T0SZ = 24 is valid under 4 level of translation tables +* 21 = T0SZ = 30 is valid under 3 level of translation tables +* 30 = T0SZ = 39 is valid under 2 level of translation tables +* +* +* We further limit T0SZ in ARM64 Linux by not supporting 1 level +* translation tables at all, not supporting 2 level translation +* tables with 4k pages, not supporting different levels of translation +* tables in stage 1 vs stage 2, not supporting different page sizes in +* stage 1 vs stage 2, not supporting less than 40 bit address space +* with 64k pages, and not supporting less than 32 bit address space +* with 4K pages. +* +* See Table D4-23 and Table D4-25 in ARM DDI 0487A.b to figure out +* the origin of the hardcoded values, 38 and 37. +*/ + +#ifdef CONFIG_ARM64_64K_PAGES +static inline int t0sz_to_vttbr_x(int t0sz){ + if (t0sz 16 || t0sz 24) { + kvm_err(Cannot support %d-bit address space\n, 64 - t0sz); + return -EINVAL; + } + + return 38 - t0sz; +} +#elif CONFIG_ARM64 !CONFIG_ARM64_64K_PAGES +static inline int t0sz_to_vttbr_x(int t0sz){ + if (t0sz 16 || t0sz 32) { + kvm_err(Cannot support %d-bit address space\n, 64 - t0sz); + return -EINVAL; + } + return 37 - t0sz; +} +#endif + + +/** + * set_vttbr_baddr_mask - set mask value for vttbr base address + * + * In ARMv8, vttbr_baddr_mask cannot be determined in compile time since the + * stage2 input address size depends on hardware capability. Thus, we first + * need to read ID_AA64MMFR0_EL1.PARange first and then set vttbr_baddr_mask + * with consideration of both granule size and the level of translation tables. + */ +#ifndef CONFIG_ARM64 +static int set_vttbr_baddr_mask(void) +{ + vttbr_baddr_mask = VTTBR_BADDR_MASK; + return 0; +} +#else +static int set_vttbr_baddr_mask(void) +{ + int pa_range, t0sz, vttbr_x; + + pa_range = read_cpuid(ID_AA64MMFR0_EL1) 0xf; + + switch (pa_range) { + case 0: + t0sz = VTCR_EL2_T0SZ(32); + break; + case 1: + t0sz = VTCR_EL2_T0SZ(36); + break; + case 2: + t0sz = VTCR_EL2_T0SZ(40); + break; + case 3
[RFC PATCH] arm64: KVM: add irqfd support
Depends on Eric Auger's ARM: KVM: add irqfd support patch. Enable vfio of platform devices for ARM64. This patch fixes the ARM64 compile. However this patch has only been compile tested. It seemed worth sharing as it will allow us to carry both the ARM and ARM64 patches together as we do more testing. Cc: Eirc Auger eric.au...@linaro.org Signed-off-by: Joel Schopp joel.sch...@amd.com --- Documentation/virtual/kvm/api.txt |2 +- arch/arm64/include/uapi/asm/kvm.h |4 arch/arm64/kvm/Kconfig|4 +++- arch/arm64/kvm/Makefile |2 +- drivers/vfio/platform/Kconfig |2 +- 5 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 04310d9..bc64ce9 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2132,7 +2132,7 @@ into the hash PTE second double word). 4.75 KVM_IRQFD Capability: KVM_CAP_IRQFD -Architectures: x86 s390 arm +Architectures: x86 s390 arm arm64 Type: vm ioctl Parameters: struct kvm_irqfd (in) Returns: 0 on success, -1 on error diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index e633ff8..3df8baa 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -180,6 +180,10 @@ struct kvm_arch_memory_slot { /* Highest supported SPI, from VGIC_NR_IRQS */ #define KVM_ARM_IRQ_GIC_MAX127 +/* One single KVM irqchip, ie. the VGIC */ +#define KVM_NR_IRQCHIPS 1 + + /* PSCI interface */ #define KVM_PSCI_FN_BASE 0x95c1ba5e #define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n)) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 8ba85e9..cbd3525 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -26,6 +26,7 @@ config KVM select KVM_ARM_HOST select KVM_ARM_VGIC select KVM_ARM_TIMER + select HAVE_KVM_EVENTFD ---help--- Support hosting virtualized guest machines. @@ -50,13 +51,14 @@ config KVM_ARM_MAX_VCPUS config KVM_ARM_VGIC bool depends on KVM_ARM_HOST OF - select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD ---help--- Adds support for a hardware assisted, in-kernel GIC emulation. config KVM_ARM_TIMER bool depends on KVM_ARM_VGIC + select HAVE_KVM_IRQCHIP ---help--- Adds support for the Architected Timers in virtual machines. diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 72a9fd5..40b9970 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -11,7 +11,7 @@ ARM=../../../arch/arm/kvm obj-$(CONFIG_KVM_ARM_HOST) += kvm.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o diff --git a/drivers/vfio/platform/Kconfig b/drivers/vfio/platform/Kconfig index c51af17..43ee890 100644 --- a/drivers/vfio/platform/Kconfig +++ b/drivers/vfio/platform/Kconfig @@ -1,6 +1,6 @@ config VFIO_PLATFORM tristate VFIO support for platform devices - depends on VFIO EVENTFD ARM + depends on VFIO EVENTFD (ARM || ARM64) help Support for platform devices with VFIO. This is required to make use of platform devices present on the system using the VFIO -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM on ARM64
virtio will get you the best performance so why would you want to use something slower? -Joel On 08/07/2014 11:51 AM, Mathew Li wrote: Great. VirtIO works for me. Thanks for your help folks! Is there is any other way to add virtual disk, more like a traditional disk to qemu-system-aarch64? For example IDE disk or SATA disk or maybe as a SCSI disk? On Wed, Aug 6, 2014 at 9:48 AM, Joel Schopp joel.sch...@amd.com wrote: It turns out that after a recent rebase of my kernel and qemu to the latest the problem is fixed. Rather than hunt down what fixed it I'm just accepting the win and moving on. -smp 4 now works. -Joel On 08/06/2014 11:15 AM, Christoffer Dall wrote: On Tue, Aug 5, 2014 at 4:18 PM, Joel Schopp joel.sch...@amd.com wrote: On 08/04/2014 07:35 PM, Mathew Li wrote: Hi, I have a quick question. How do we add a hard disk to the qemu ARM VM? I tried: qemu-system-aarch64 -machine virt -hda disk.img -kernel image -initrd initrd.img qemu-system-aarch64 -machine virt -sd disk.img -kernel image -initrd initrd.img qemu-system-aarch64 -machine virt -mtdblock disk.img -kernel image -initrd initrd.img Nothing seems to work. I am not able to see any disk (i.e. dev/sdX) inside guest OS. I've been running something like this: qemu-system-aarch64 -smp 1 --enable-kvm -nographic -netdev tap,id=t0,ifname=tap0,script=no,downscript=no,vhost=on -device virtio-net-device,netdev=t0,id=nic0 \ -kernel /extra/rootfs/boot/Image -drive file=/extra/rootfs.img,id=fs -device virtio-blk-device,drive=fs -m 512 -M virt -cpu host -append console=ttyAMA0 console=ttyS0 root=/dev/vda On my system -smp 2 or higher hangs in the guest kernel. The -smp 2 hang issue is probably due to a missing PSCI v0.2 follow-up patch to QEMU, you can try: https://git.linaro.org/people/christoffer.dall/qemu-arm.git/shortlog/refs/heads/psci2-smp-fix [disclaimer: there may be a better fix somewhere on the qemu list, I haven't kept track the last couple of days] -Christoffer -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM on ARM64
On 08/07/2014 12:53 PM, Christoffer Dall wrote: Currently we only model a virtual machine board (the -machine type=virt parameter) which has a UART, a flash, an RTC, and a bunch of virtio-mmio channelse. Once we either emulate a real aarch64 board (with whatever peripherals it may have) or add a PCI controller to the virt board, then you can choose whatever storage the real board has or start doing interesting things like plugging in a scsi controller to your PCI controller on the virt board or whatever else you desire. I am very interested in having a PCI controller on the virt board to be able to do some testing of -device pci-assign and -device vfio-pci. I noticed that Alvise Rigo (ccd) had sent some patches out to the qemu-devel list July 11th that seem to add a generic pci controller. But as Joel points out, VirtIO is likely to get you the best performance and is the most convenient method. -Christoffer On Thu, Aug 7, 2014 at 6:51 PM, Mathew Li mathew.li...@gmail.com wrote: Great. VirtIO works for me. Thanks for your help folks! Is there is any other way to add virtual disk, more like a traditional disk to qemu-system-aarch64? For example IDE disk or SATA disk or maybe as a SCSI disk? -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM on ARM64
It turns out that after a recent rebase of my kernel and qemu to the latest the problem is fixed. Rather than hunt down what fixed it I'm just accepting the win and moving on. -smp 4 now works. -Joel On 08/06/2014 11:15 AM, Christoffer Dall wrote: On Tue, Aug 5, 2014 at 4:18 PM, Joel Schopp joel.sch...@amd.com wrote: On 08/04/2014 07:35 PM, Mathew Li wrote: Hi, I have a quick question. How do we add a hard disk to the qemu ARM VM? I tried: qemu-system-aarch64 -machine virt -hda disk.img -kernel image -initrd initrd.img qemu-system-aarch64 -machine virt -sd disk.img -kernel image -initrd initrd.img qemu-system-aarch64 -machine virt -mtdblock disk.img -kernel image -initrd initrd.img Nothing seems to work. I am not able to see any disk (i.e. dev/sdX) inside guest OS. I've been running something like this: qemu-system-aarch64 -smp 1 --enable-kvm -nographic -netdev tap,id=t0,ifname=tap0,script=no,downscript=no,vhost=on -device virtio-net-device,netdev=t0,id=nic0 \ -kernel /extra/rootfs/boot/Image -drive file=/extra/rootfs.img,id=fs -device virtio-blk-device,drive=fs -m 512 -M virt -cpu host -append console=ttyAMA0 console=ttyS0 root=/dev/vda On my system -smp 2 or higher hangs in the guest kernel. The -smp 2 hang issue is probably due to a missing PSCI v0.2 follow-up patch to QEMU, you can try: https://git.linaro.org/people/christoffer.dall/qemu-arm.git/shortlog/refs/heads/psci2-smp-fix [disclaimer: there may be a better fix somewhere on the qemu list, I haven't kept track the last couple of days] -Christoffer -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM on ARM64
On 08/04/2014 07:35 PM, Mathew Li wrote: Hi, I have a quick question. How do we add a hard disk to the qemu ARM VM? I tried: qemu-system-aarch64 -machine virt -hda disk.img -kernel image -initrd initrd.img qemu-system-aarch64 -machine virt -sd disk.img -kernel image -initrd initrd.img qemu-system-aarch64 -machine virt -mtdblock disk.img -kernel image -initrd initrd.img Nothing seems to work. I am not able to see any disk (i.e. dev/sdX) inside guest OS. I've been running something like this: qemu-system-aarch64 -smp 1 --enable-kvm -nographic -netdev tap,id=t0,ifname=tap0,script=no,downscript=no,vhost=on -device virtio-net-device,netdev=t0,id=nic0 \ -kernel /extra/rootfs/boot/Image -drive file=/extra/rootfs.img,id=fs -device virtio-blk-device,drive=fs -m 512 -M virt -cpu host -append console=ttyAMA0 console=ttyS0 root=/dev/vda On my system -smp 2 or higher hangs in the guest kernel. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3] arm64: fix VTTBR_BADDR_MASK
The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current systems. Rather than just add a bit it seems like a good time to also set things at run-time instead of compile time to accomodate more hardware. This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime, not compile time. In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address size (VTCR_EL2.T0SZE) cannot be determined in compile time since they depends on hardware capability. According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document, vttbr_x is calculated using different hard-coded values with consideration of T0SZ, granule size and the level of translation tables. Therefore, vttbr_baddr_mask should be determined dynamically. Changes since v2: Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git next branch Changes since v1: Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to provide better long term fix. Updated that patch to log error instead of silently fail on unaligned vttbr. Cc: Christoffer Dall christoffer.d...@linaro.org Cc: Sungjinn Chung sungjinn.ch...@samsung.com Signed-off-by: Jungseok Lee jays@samsung.com Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/arm/kvm/arm.c | 91 +- arch/arm64/include/asm/kvm_arm.h | 17 +-- arch/arm64/kvm/hyp-init.S| 20 ++-- 3 files changed, 106 insertions(+), 22 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index d7424ef..d7ca2f5 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -37,6 +37,7 @@ #include asm/mman.h #include asm/tlbflush.h #include asm/cacheflush.h +#include asm/cputype.h #include asm/virt.h #include asm/kvm_arm.h #include asm/kvm_asm.h @@ -61,6 +62,9 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u8 kvm_next_vmid; static DEFINE_SPINLOCK(kvm_vmid_lock); +/* VTTBR mask cannot be determined in complie time under ARMv8 */ +static u64 vttbr_baddr_mask; + static bool vgic_present; static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) @@ -376,6 +380,75 @@ static bool need_new_vmid_gen(struct kvm *kvm) } /** + * set_vttbr_baddr_mask - set mask value for vttbr base address + * + * In ARMv8, vttbr_baddr_mask cannot be determined in compile time since stage2 + * input address size depends on hardware capability. Thus, it is needed to read + * ID_AA64MMFR0_EL1.PARange first and then set vttbr_baddr_mask with + * consideration of both granule size and the level of translation tables. + */ +static int set_vttbr_baddr_mask(void) +{ +#ifndef CONFIG_ARM64 + vttbr_baddr_mask = VTTBR_BADDR_MASK; +#else + int pa_range, t0sz, vttbr_x; + + pa_range = read_cpuid(ID_AA64MMFR0_EL1) 0xf; + + switch (pa_range) { + case 0: + t0sz = VTCR_EL2_T0SZ(32); + break; + case 1: + t0sz = VTCR_EL2_T0SZ(36); + break; + case 2: + t0sz = VTCR_EL2_T0SZ(40); + break; + case 3: + t0sz = VTCR_EL2_T0SZ(42); + break; + case 4: + t0sz = VTCR_EL2_T0SZ(44); + break; + default: + t0sz = VTCR_EL2_T0SZ(48); + } + + /* +* See Table D4-23 and Table D4-25 in ARM DDI 0487A.b to figure out +* the origin of the hardcoded values, 38 and 37. +*/ +#ifdef CONFIG_ARM64_64K_PAGES + /* +* 16 = T0SZ = 21 is valid under 3 level of translation tables +* 18 = T0SZ = 34 is valid under 2 level of translation tables +* 31 = T0SZ = 39 is valid under 1 level of transltaion tables +*/ + if (t0sz = 17) { + kvm_err(Cannot support %d-bit address space\n, 64 - t0sz); + return -EINVAL; + } + vttbr_x = 38 - t0sz; +#else + /* +* 16 = T0SZ = 24 is valid under 4 level of translation tables +* 21 = T0SZ = 30 is valid under 3 level of translation tables +* 30 = T0SZ = 39 is valid under 2 level of translation tables +*/ + if (t0sz = 20) { + kvm_err(Cannot support %d-bit address space\n, 64 - t0sz); + return -EINVAL; + } + vttbr_x = 37 - t0sz; +#endif + vttbr_baddr_mask = (((1LLU (48 - vttbr_x)) - 1) (vttbr_x - 1)); +#endif + return 0; +} + +/** * update_vttbr - Update the VTTBR with a valid VMID before the guest runs * @kvmThe guest that we are about to run * @@ -429,8 +502,16 @@ static void update_vttbr(struct kvm *kvm) /* update vttbr to be used with the new vmid */ pgd_phys = virt_to_phys(kvm-arch.pgd); vmid = ((u64)(kvm-arch.vmid) VTTBR_VMID_SHIFT) VTTBR_VMID_MASK; - kvm-arch.vttbr = pgd_phys VTTBR_BADDR_MASK; - kvm-arch.vttbr |= vmid; + + /* +* If the VTTBR isn't aligned there is something wrong with the system
Re: [PATCH v3] arm64: fix VTTBR_BADDR_MASK
Since this fixes a real problem and didn't make it into 3.16 it would be good if this made it into 3.17. -Joel On 08/04/2014 09:38 AM, Joel Schopp wrote: The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current systems. Rather than just add a bit it seems like a good time to also set things at run-time instead of compile time to accomodate more hardware. This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime, not compile time. In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address size (VTCR_EL2.T0SZE) cannot be determined in compile time since they depends on hardware capability. According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document, vttbr_x is calculated using different hard-coded values with consideration of T0SZ, granule size and the level of translation tables. Therefore, vttbr_baddr_mask should be determined dynamically. Changes since v2: Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git next branch Changes since v1: Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to provide better long term fix. Updated that patch to log error instead of silently fail on unaligned vttbr. Cc: Christoffer Dall christoffer.d...@linaro.org Cc: Sungjinn Chung sungjinn.ch...@samsung.com Signed-off-by: Jungseok Lee jays@samsung.com Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/arm/kvm/arm.c | 91 +- arch/arm64/include/asm/kvm_arm.h | 17 +-- arch/arm64/kvm/hyp-init.S| 20 ++-- 3 files changed, 106 insertions(+), 22 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index d7424ef..d7ca2f5 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -37,6 +37,7 @@ #include asm/mman.h #include asm/tlbflush.h #include asm/cacheflush.h +#include asm/cputype.h #include asm/virt.h #include asm/kvm_arm.h #include asm/kvm_asm.h @@ -61,6 +62,9 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u8 kvm_next_vmid; static DEFINE_SPINLOCK(kvm_vmid_lock); +/* VTTBR mask cannot be determined in complie time under ARMv8 */ +static u64 vttbr_baddr_mask; + static bool vgic_present; static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) @@ -376,6 +380,75 @@ static bool need_new_vmid_gen(struct kvm *kvm) } /** + * set_vttbr_baddr_mask - set mask value for vttbr base address + * + * In ARMv8, vttbr_baddr_mask cannot be determined in compile time since stage2 + * input address size depends on hardware capability. Thus, it is needed to read + * ID_AA64MMFR0_EL1.PARange first and then set vttbr_baddr_mask with + * consideration of both granule size and the level of translation tables. + */ +static int set_vttbr_baddr_mask(void) +{ +#ifndef CONFIG_ARM64 + vttbr_baddr_mask = VTTBR_BADDR_MASK; +#else + int pa_range, t0sz, vttbr_x; + + pa_range = read_cpuid(ID_AA64MMFR0_EL1) 0xf; + + switch (pa_range) { + case 0: + t0sz = VTCR_EL2_T0SZ(32); + break; + case 1: + t0sz = VTCR_EL2_T0SZ(36); + break; + case 2: + t0sz = VTCR_EL2_T0SZ(40); + break; + case 3: + t0sz = VTCR_EL2_T0SZ(42); + break; + case 4: + t0sz = VTCR_EL2_T0SZ(44); + break; + default: + t0sz = VTCR_EL2_T0SZ(48); + } + + /* + * See Table D4-23 and Table D4-25 in ARM DDI 0487A.b to figure out + * the origin of the hardcoded values, 38 and 37. + */ +#ifdef CONFIG_ARM64_64K_PAGES + /* + * 16 = T0SZ = 21 is valid under 3 level of translation tables + * 18 = T0SZ = 34 is valid under 2 level of translation tables + * 31 = T0SZ = 39 is valid under 1 level of transltaion tables + */ + if (t0sz = 17) { + kvm_err(Cannot support %d-bit address space\n, 64 - t0sz); + return -EINVAL; + } + vttbr_x = 38 - t0sz; +#else + /* + * 16 = T0SZ = 24 is valid under 4 level of translation tables + * 21 = T0SZ = 30 is valid under 3 level of translation tables + * 30 = T0SZ = 39 is valid under 2 level of translation tables + */ + if (t0sz = 20) { + kvm_err(Cannot support %d-bit address space\n, 64 - t0sz); + return -EINVAL; + } + vttbr_x = 37 - t0sz; +#endif + vttbr_baddr_mask = (((1LLU (48 - vttbr_x)) - 1) (vttbr_x - 1)); +#endif + return 0; +} + +/** * update_vttbr - Update the VTTBR with a valid VMID before the guest runs * @kvm The guest that we are about to run * @@ -429,8 +502,16 @@ static void update_vttbr(struct kvm *kvm) /* update vttbr to be used with the new vmid */ pgd_phys = virt_to_phys(kvm-arch.pgd); vmid = ((u64)(kvm-arch.vmid) VTTBR_VMID_SHIFT
[PATCH] arm64: bump MAX_MASTER_STREAMIDS from 16 to 32
I recently ran into a situation where I needed more than 16 stream ids for an smmu on an AMD SOC, but we are currently limited to 16 by: #define MAX_MASTER_STREAMIDSMAX_PHANDLE_ARGS #define MAX_PHANDLE_ARGS 16 I expect others will run into this in the future as more advanced SOCs start to come out. The resulting one line patch has been tested to fix the problem. Cc: Grant Likely grant.lik...@linaro.org Cc: Rob Herring robh...@kernel.org Cc: Will Deacon will.dea...@arm.com Signed-off-by: Joel Schopp joel.sch...@amd.com --- include/linux/of.h |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/of.h b/include/linux/of.h index 196b34c..70ad4d9 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -67,7 +67,7 @@ struct device_node { #endif }; -#define MAX_PHANDLE_ARGS 16 +#define MAX_PHANDLE_ARGS 32 struct of_phandle_args { struct device_node *np; int args_count; -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Verifying Execution Integrity in Untrusted hypervisors
On 07/25/2014 03:11 PM, Shiva V wrote: Hello, I am exploring on finding a way to ensure runtime integrity of a executable in untrusted hypervisors. In particular, this is my requirements: 1. I have a 2 virtual machines. (A, B). 2. VM-A is running some service (exe) inside it. For example any resource accounting service intended to monitor for VM-B. 3. I need a way to verify run time integrity from VM-B of the executable running inside VM-A. 4. Both the vm's are not privileged vm's and are just normal client virtual machines. 5. Underlying hypervisor is untrusted. If the hypervisor is untrusted you have broken the root of trust and are going to be pretty out of luck. Any solution will require a level below the hypervisor that you trust. An example would be hardware that isolates memory from the hypervisor, ie https://www.google.com/patents/WO2013054528A1?cl=endq=Joel+Schopphl=ensa=Xei=YYPWU6aVJNProATe5IHACQved=0CDMQ6AEwAw Another approach might be to start with something like a TPM and a trusted runtime UEFI. You could then have the guest call UEFI to do measurement with the TPM and use that for remote attestation. With such a method you could probably get to the point that you could measure something in guest memory at run-time, but you would have no assurance it hadn't been modified prior or after and was just temporarily correct, it would be a very point in time measurement. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
I can't think of any way of determining whether a particular system gets this right or wrong automatically, which suggests perhaps we need to allow the device tree to specify that the GICV is 64k-page-safe... When we support such systems, I also think we'll need a device-tree change. My main concern right now is stopping the ability to hose the entire machine by trying to instantiate a virtual GIC. ...I don't see how your patch prevents instantiating a VGIC and hosing the machine on a system where the 64K with the GICV registers in it goes [GICV registers] [machine blows up if you read this] 0K 8K 64K True, if such a machine existed, then this patch wouldn't detect it. I don't think we support anything like that in mainline at the moment, but the following additional diff should solve the problem, no? diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index fa9a95b3ed19..476d3bf540a8 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1539,6 +1539,14 @@ int kvm_vgic_hyp_init(void) goto out_unmap; } + if (!PAGE_ALIGNED(resource_size(vcpu_res))) { + kvm_err(GICV size 0x%llx not a multiple of page size 0x%lx\n, + (unsigned long long)resource_size(vcpu_res), + PAGE_SIZE); + ret = -ENXIO; + goto out_unmap; + } + vgic_vcpu_base = vcpu_res.start; kvm_info(%s@%llx IRQ%d\n, vgic_node-name, This would break with my SOC device tree which looks like this. Note this device tree works just fine without checks. gic: interrupt-controller@e1101000 { compatible = arm,gic-400-v2m; #interrupt-cells = 3; #address-cells = 0; interrupt-controller; msi-controller; reg = 0x0 0xe111 0 0x1000, /* gic dist */ 0x0 0xe112f000 0 0x2000, /* gic cpu */ 0x0 0xe114f000 0 0x2000, /* gic virtual ic*/ 0x0 0xe116f000 0 0x2000, /* gic virtual cpu*/ 0x0 0xe118 0 0x1000; /* gic msi */ interrupts = 1 8 0xf04; }; -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
On 07/25/2014 09:08 AM, Will Deacon wrote: Hi Joel, On Fri, Jul 25, 2014 at 03:02:58PM +0100, Joel Schopp wrote: I can't think of any way of determining whether a particular system gets this right or wrong automatically, which suggests perhaps we need to allow the device tree to specify that the GICV is 64k-page-safe... When we support such systems, I also think we'll need a device-tree change. My main concern right now is stopping the ability to hose the entire machine by trying to instantiate a virtual GIC. ...I don't see how your patch prevents instantiating a VGIC and hosing the machine on a system where the 64K with the GICV registers in it goes [GICV registers] [machine blows up if you read this] 0K 8K 64K True, if such a machine existed, then this patch wouldn't detect it. I don't think we support anything like that in mainline at the moment, but the following additional diff should solve the problem, no? diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index fa9a95b3ed19..476d3bf540a8 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1539,6 +1539,14 @@ int kvm_vgic_hyp_init(void) goto out_unmap; } + if (!PAGE_ALIGNED(resource_size(vcpu_res))) { + kvm_err(GICV size 0x%llx not a multiple of page size 0x%lx\n, + (unsigned long long)resource_size(vcpu_res), + PAGE_SIZE); + ret = -ENXIO; + goto out_unmap; + } + vgic_vcpu_base = vcpu_res.start; kvm_info(%s@%llx IRQ%d\n, vgic_node-name, This would break with my SOC device tree which looks like this. Note this device tree works just fine without checks. gic: interrupt-controller@e1101000 { compatible = arm,gic-400-v2m; #interrupt-cells = 3; #address-cells = 0; interrupt-controller; msi-controller; reg = 0x0 0xe111 0 0x1000, /* gic dist */ 0x0 0xe112f000 0 0x2000, /* gic cpu */ 0x0 0xe114f000 0 0x2000, /* gic virtual ic*/ 0x0 0xe116f000 0 0x2000, /* gic virtual cpu*/ 0x0 0xe118 0 0x1000; /* gic msi */ interrupts = 1 8 0xf04; }; I appreciate it may work, but that's only because the kernel is actually using an alias of GICV at 0xe116 by accident. I would say that you're getting away with passing an incorrect description. The problem is that by the spec the size is 0x2000 and was never properly rearchitected from 4K to variable page size support. The workaround is that each of the 4K in the 64K page (16 of them) are really mapped to the same location in hardware. So the contents of 0xe116 is the same as the contents of 0xe116f000. See “Appendix F: GIC-400 and 64KB Translation Granule” in v2.1 of the ARM _Server Base System Architecture_ specification. Now if we were to change the entry to 0x0 0xe116 0 0x2000 it would be obviously broken because the second half would map to a duplicate of the first half. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
On 07/25/2014 09:23 AM, Will Deacon wrote: On Fri, Jul 25, 2014 at 03:16:15PM +0100, Joel Schopp wrote: On 07/25/2014 09:08 AM, Will Deacon wrote: This would break with my SOC device tree which looks like this. Note this device tree works just fine without checks. gic: interrupt-controller@e1101000 { compatible = arm,gic-400-v2m; #interrupt-cells = 3; #address-cells = 0; interrupt-controller; msi-controller; reg = 0x0 0xe111 0 0x1000, /* gic dist */ 0x0 0xe112f000 0 0x2000, /* gic cpu */ 0x0 0xe114f000 0 0x2000, /* gic virtual ic*/ 0x0 0xe116f000 0 0x2000, /* gic virtual cpu*/ 0x0 0xe118 0 0x1000; /* gic msi */ interrupts = 1 8 0xf04; }; I appreciate it may work, but that's only because the kernel is actually using an alias of GICV at 0xe116 by accident. I would say that you're getting away with passing an incorrect description. The problem is that by the spec the size is 0x2000 and was never properly rearchitected from 4K to variable page size support. The workaround is that each of the 4K in the 64K page (16 of them) are really mapped to the same location in hardware. So the contents of 0xe116 is the same as the contents of 0xe116f000. See “Appendix F: GIC-400 and 64KB Translation Granule” in v2.1 of the ARM _Server Base System Architecture_ specification. I've read that document, but it's not mandated and I don't think I have a single piece of hardware that actually follows it. Even the CPUs don't seem to perform the aliasing suggesting there (take a look at the A57 and A53 TRMs -- there are reserved regions in there). Not mandated, but it is obviously highly encouraged, and at a minimum valid. The SOC sitting under my desk follows it. Now if we were to change the entry to 0x0 0xe116 0 0x2000 it would be obviously broken because the second half would map to a duplicate of the first half. I think you'd need something like 0x0 0xe116 0 0x2 and we'd have to change the GIC driver to do the right thing. What we currently have is unsafe on most hardware, yet happens to work on your system. If you change the GIC driver,kvm, kvmtool, and qemu to do the right thing I'd be happy to change the device tree entry to 0x0 0xe116 0 0x2 or any other value of your choosing. I really have no opinion here on how it should be done other than what is there now currently works and I'd like to have whatever we do in the future continue to work. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
On 07/25/2014 10:29 AM, Will Deacon wrote: If the physical address of GICV isn't page-aligned, then we end up creating a stage-2 mapping of the page containing it, which causes us to map neighbouring memory locations directly into the guest. As an example, consider a platform with GICV at physical 0x2c02f000 running a 64k-page host kernel. If qemu maps this into the guest at 0x8001, then guest physical addresses 0x8001 - 0x8001efff will map host physical region 0x2c02 - 0x2c02efff. Accesses to these physical regions may cause UNPREDICTABLE behaviour, for example, on the Juno platform this will cause an SError exception to EL3, which brings down the entire physical CPU resulting in RCU stalls / HYP panics / host crashing / wasted weeks of debugging. No denying this is a problem. SBSA recommends that systems alias the 4k GICV across the bounding 64k region, in which case GICV physical could be described as 0x2c02 in the above scenario. The problem with this patch is the gicv is really 8K. The reason you would map at a 60K offset (0xf000), and why we do on our SOC, is so that the 8K gicv would pick up the last 4K from the first page and the first 4K from the next page. With your patch it is impossible to map all 8K of the gicv with 64K pages. My SOC which works fine with kvm now will go to not working with kvm after this patch. This patch fixes the problem by failing the vgic probe if the physical base address or the size of GICV aren't page-aligned. Note that this generated a warning in dmesg about freeing enabled IRQs, so I had to move the IRQ enabling later in the probe. Cc: Christoffer Dall christoffer.d...@linaro.org Cc: Marc Zyngier marc.zyng...@arm.com Cc: Gleb Natapov g...@kernel.org Cc: Paolo Bonzini pbonz...@redhat.com Cc: Joel Schopp joel.sch...@amd.com Cc: Don Dutile ddut...@redhat.com Acked-by: Peter Maydell peter.mayd...@linaro.org Signed-off-by: Will Deacon will.dea...@arm.com --- v1 -v2 : Added size alignment check and Peter's ack. Could this go in for 3.16 please? virt/kvm/arm/vgic.c | 24 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 56ff9bebb577..476d3bf540a8 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1526,17 +1526,33 @@ int kvm_vgic_hyp_init(void) goto out_unmap; } - kvm_info(%s@%llx IRQ%d\n, vgic_node-name, - vctrl_res.start, vgic_maint_irq); - on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); - if (of_address_to_resource(vgic_node, 3, vcpu_res)) { kvm_err(Cannot obtain VCPU resource\n); ret = -ENXIO; goto out_unmap; } + + if (!PAGE_ALIGNED(vcpu_res.start)) { + kvm_err(GICV physical address 0x%llx not page aligned\n, + (unsigned long long)vcpu_res.start); + ret = -ENXIO; + goto out_unmap; + } + + if (!PAGE_ALIGNED(resource_size(vcpu_res))) { + kvm_err(GICV size 0x%llx not a multiple of page size 0x%lx\n, + (unsigned long long)resource_size(vcpu_res), + PAGE_SIZE); + ret = -ENXIO; + goto out_unmap; + } + vgic_vcpu_base = vcpu_res.start; + kvm_info(%s@%llx IRQ%d\n, vgic_node-name, + vctrl_res.start, vgic_maint_irq); + on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); + goto out; out_unmap: -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
On 07/25/2014 11:02 AM, Peter Maydell wrote: On 25 July 2014 16:56, Joel Schopp joel.sch...@amd.com wrote: The problem with this patch is the gicv is really 8K. The reason you would map at a 60K offset (0xf000), and why we do on our SOC, is so that the 8K gicv would pick up the last 4K from the first page and the first 4K from the next page. With your patch it is impossible to map all 8K of the gicv with 64K pages. My SOC which works fine with kvm now will go to not working with kvm after this patch. Your SOC currently works by fluke because the guest doesn't look at the last 4K of the GICC. If you're happy with it continuing to work by fluke you could make your device tree say it had a 64K GICV region with a 64K-aligned base. To make it work not by fluke but actually correctly requires Marc's patchset, at a minimum. Since we aren't actually using the last 4K of the gicv at the moment I supppose I could drop my objections to this patch and change my device tree until Marc's patchset provides a proper solution for the gicv's second 4K that works for everybody. Acked-by: Joel Schopp joel.sch...@amd.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
On 07/24/2014 02:47 PM, Peter Maydell wrote: On 24 July 2014 20:27, Will Deacon will.dea...@arm.com wrote: If the physical address of GICV isn't page-aligned, then we end up creating a stage-2 mapping of the page containing it, which causes us to map neighbouring memory locations directly into the guest. As an example, consider a platform with GICV at physical 0x2c02f000 running a 64k-page host kernel. If qemu maps this into the guest at 0x8001, then guest physical addresses 0x8001 - 0x8001efff will map host physical region 0x2c02 - 0x2c02efff. Accesses to these physical regions may cause UNPREDICTABLE behaviour, for example, on the Juno platform this will cause an SError exception to EL3, which brings down the entire physical CPU resulting in RCU stalls / HYP panics / host crashing / wasted weeks of debugging. This seems to me like a specific problem with Juno rather than an issue with having the GICV at a non-page-aligned start. The requirement to be able to expose host GICV as the guest GICC in a 64K pages system is just nothing else in that 64K page (or pages, if the GICV runs across two pages) is allowed to be unsafe for the guest to touch, which remains true whether the GICV starts at 0K in the 64K page or 60K. SBSA recommends that systems alias the 4k GICV across the bounding 64k region, in which case GICV physical could be described as 0x2c02 in the above scenario. The SBSA make every 4K region in the 64K page be the same thing recommendation is one way of satisfying the requirement that the whole 64K page is safe for the guest to touch. (Making the rest of the page RAZ/WI would be another option I guess.) If your system actually implements the SBSA recommendation then in fact describing the GICV-phys-base as the 64K-aligned address is wrong, because then the register at GICV-base + 4K would not be the first register in the 2nd page of the GICV, it would be another copy of the 1st page. This happens to work on Linux guests currently because they don't touch anything in the 2nd page, but for cases like device passthrough IIRC we might well like the guest to use some of the 2nd page registers. So the only correct choice on those systems is to specify the +60K address as the GICV physaddr in the device tree, and use Marc's patchset to allow QEMU/kvmtool to determine the page offset within the 64K page so it can reflect that in the guest's device tree. I have one of those systems specifying +60K address as the GICV physaddr and it works well for me with 64K pages and kvm with both QEMU and kvmtool. I can't think of any way of determining whether a particular system gets this right or wrong automatically, which suggests perhaps we need to allow the device tree to specify that the GICV is 64k-page-safe... I don't have a better solution, despite my lack of enthusiasm for yet another device tree property. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
On 07/24/2014 02:55 PM, Will Deacon wrote: On Thu, Jul 24, 2014 at 08:47:23PM +0100, Peter Maydell wrote: On 24 July 2014 20:27, Will Deacon will.dea...@arm.com wrote: If the physical address of GICV isn't page-aligned, then we end up creating a stage-2 mapping of the page containing it, which causes us to map neighbouring memory locations directly into the guest. As an example, consider a platform with GICV at physical 0x2c02f000 running a 64k-page host kernel. If qemu maps this into the guest at 0x8001, then guest physical addresses 0x8001 - 0x8001efff will map host physical region 0x2c02 - 0x2c02efff. Accesses to these physical regions may cause UNPREDICTABLE behaviour, for example, on the Juno platform this will cause an SError exception to EL3, which brings down the entire physical CPU resulting in RCU stalls / HYP panics / host crashing / wasted weeks of debugging. This seems to me like a specific problem with Juno rather than an issue with having the GICV at a non-page-aligned start. The requirement to be able to expose host GICV as the guest GICC in a 64K pages system is just nothing else in that 64K page (or pages, if the GICV runs across two pages) is allowed to be unsafe for the guest to touch, which remains true whether the GICV starts at 0K in the 64K page or 60K. I agree, and for that we would need a new ioctl so we can query the page-offset of the GICV on systems where it is safe. Given that such an ioctl doesn't exist today, I would like to plug the hole in mainline kernels with this patch, we can relax in the future if systems appear where it would be safe to map the entire 64k region. I have such a system. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] kvmtool: arm64: fix compilation error
Currently tools/kvm doesn't build on arm64 when gtk3 is present. The error looks like this: LINK lkvm ui/gtk3.o: In function `kvm_gtk_key_press': /extra/sb/linux-kvm/tools/kvm/ui/gtk3.c:201: undefined reference to `kbd_queue' /extra/sb/linux-kvm/tools/kvm/ui/gtk3.c:204: undefined reference to `kbd_queue' /extra/sb/linux-kvm/tools/kvm/ui/gtk3.c:216: undefined reference to `kbd_queue' /extra/sb/linux-kvm/tools/kvm/ui/gtk3.c:217: undefined reference to `kbd_queue' /extra/sb/linux-kvm/tools/kvm/ui/gtk3.c:218: undefined reference to `kbd_queue' ui/gtk3.o:/extra/sb/linux-kvm/tools/kvm/ui/gtk3.c:219: more undefined references to `kbd_queue' follow collect2: error: ld returned 1 exit status make: *** [lkvm] Error 1 The patch below makes the error go away and the resulting lkvm runs on arm64. Cc: Pekka Enberg penb...@kernel.org Signed-off-by: Joel Schopp joel.sch...@amd.com --- tools/kvm/Makefile |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile index 880d580..fba60f1 100644 --- a/tools/kvm/Makefile +++ b/tools/kvm/Makefile @@ -102,6 +102,7 @@ OBJS+= hw/pci-shmem.o OBJS += kvm-ipc.o OBJS += builtin-sandbox.o OBJS += virtio/mmio.o +OBJS += hw/i8042.o # Translate uname -m into ARCH string ARCH ?= $(shell uname -m | sed -e s/i.86/i386/ -e s/ppc.*/powerpc/ \ @@ -129,7 +130,6 @@ ifeq ($(ARCH),x86) OBJS+= x86/kvm.o OBJS+= x86/kvm-cpu.o OBJS+= x86/mptable.o - OBJS+= hw/i8042.o # Exclude BIOS object files from header dependencies. OTHEROBJS += x86/bios.o OTHEROBJS += x86/bios/bios-rom.o -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2] arm64: fix VTTBR_BADDR_MASK
This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime, not compile time. In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address size (VTCR_EL2.T0SZE) cannot be determined in compile time since they depends on hardware capability. According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document, vttbr_x is calculated using different hard-coded values with consideration of T0SZ, granule size and the level of translation tables. Therefore, vttbr_baddr_mask should be determined dynamically. Changes since v1: Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to provide better long term fix. Updated that patch to log error instead of silently fail on unaligned vttbr. Cc: Christoffer Dall christoffer.d...@linaro.org Cc: Sungjinn Chung sungjinn.ch...@samsung.com Signed-off-by: Jungseok Lee jays@samsung.com Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/arm/kvm/arm.c | 91 +- arch/arm64/include/asm/kvm_arm.h | 17 +-- arch/arm64/kvm/hyp-init.S| 20 ++-- 3 files changed, 106 insertions(+), 22 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 3c82b37..6d51a53 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -37,6 +37,7 @@ #include asm/mman.h #include asm/tlbflush.h #include asm/cacheflush.h +#include asm/cputype.h #include asm/virt.h #include asm/kvm_arm.h #include asm/kvm_asm.h @@ -61,6 +62,9 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u8 kvm_next_vmid; static DEFINE_SPINLOCK(kvm_vmid_lock); +/* VTTBR mask cannot be determined in complie time under ARMv8 */ +static u64 vttbr_baddr_mask; + static bool vgic_present; static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) @@ -413,6 +417,75 @@ static bool need_new_vmid_gen(struct kvm *kvm) } /** + * set_vttbr_baddr_mask - set mask value for vttbr base address + * + * In ARMv8, vttbr_baddr_mask cannot be determined in compile time since stage2 + * input address size depends on hardware capability. Thus, it is needed to read + * ID_AA64MMFR0_EL1.PARange first and then set vttbr_baddr_mask with + * consideration of both granule size and the level of translation tables. + */ +static int set_vttbr_baddr_mask(void) +{ +#ifndef CONFIG_ARM64 + vttbr_baddr_mask = VTTBR_BADDR_MASK; +#else + int pa_range, t0sz, vttbr_x; + + pa_range = read_cpuid(ID_AA64MMFR0_EL1) 0xf; + + switch (pa_range) { + case 0: + t0sz = VTCR_EL2_T0SZ(32); + break; + case 1: + t0sz = VTCR_EL2_T0SZ(36); + break; + case 2: + t0sz = VTCR_EL2_T0SZ(40); + break; + case 3: + t0sz = VTCR_EL2_T0SZ(42); + break; + case 4: + t0sz = VTCR_EL2_T0SZ(44); + break; + default: + t0sz = VTCR_EL2_T0SZ(48); + } + + /* +* See Table D4-23 and Table D4-25 in ARM DDI 0487A.b to figure out +* the origin of the hardcoded values, 38 and 37. +*/ +#ifdef CONFIG_ARM64_64K_PAGES + /* +* 16 = T0SZ = 21 is valid under 3 level of translation tables +* 18 = T0SZ = 34 is valid under 2 level of translation tables +* 31 = T0SZ = 39 is valid under 1 level of transltaion tables +*/ + if (t0sz = 17) { + kvm_err(Cannot support %d-bit address space\n, 64 - t0sz); + return -EINVAL; + } + vttbr_x = 38 - t0sz; +#else + /* +* 16 = T0SZ = 24 is valid under 4 level of translation tables +* 21 = T0SZ = 30 is valid under 3 level of translation tables +* 30 = T0SZ = 39 is valid under 2 level of translation tables +*/ + if (t0sz = 20) { + kvm_err(Cannot support %d-bit address space\n, 64 - t0sz); + return -EINVAL; + } + vttbr_x = 37 - t0sz; +#endif + vttbr_baddr_mask = (((1LLU (48 - vttbr_x)) - 1) (vttbr_x - 1)); +#endif + return 0; +} + +/** * update_vttbr - Update the VTTBR with a valid VMID before the guest runs * @kvmThe guest that we are about to run * @@ -466,8 +539,16 @@ static void update_vttbr(struct kvm *kvm) /* update vttbr to be used with the new vmid */ pgd_phys = virt_to_phys(kvm-arch.pgd); vmid = ((u64)(kvm-arch.vmid) VTTBR_VMID_SHIFT) VTTBR_VMID_MASK; - kvm-arch.vttbr = pgd_phys VTTBR_BADDR_MASK; - kvm-arch.vttbr |= vmid; + + /* +* If the VTTBR isn't aligned there is something wrong with the system +* or kernel. It is better to just fail and not mask it. But no need +* to panic the host kernel with a BUG_ON(), instead just log the error. +*/ + if (pgd_phys ~vttbr_baddr_mask) + kvm_err(VTTBR not aligned, expect guest to fail); + + kvm-arch.vttbr = pgd_phys | vmid
Re: [PATCH] arm64: fix VTTBR_BADDR_MASK
On 07/10/2014 03:25 PM, Christoffer Dall wrote: On Wed, Jul 09, 2014 at 11:17:04AM -0500, Joel Schopp wrote: The current calculation for VTTBR_BADDR_MASK masks only 39 bits and not all 40 bits. That last bit is important as some systems allocate from near the top of the available address space. This patch is necessary to run KVM on an aarch64 SOC I have been testing. Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/arm64/include/asm/kvm_arm.h |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 3d69030..b39e93f 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -148,7 +148,7 @@ #endif #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((1LLU (40 - VTTBR_X)) - 1) VTTBR_BADDR_SHIFT) +#define VTTBR_BADDR_MASK (0xffLLU) /* bits 0-39 */ #define VTTBR_VMID_SHIFT (48LLU) #define VTTBR_VMID_MASK (0xffLLU VTTBR_VMID_SHIFT) While this is obviously fixing a bug, it doesn't feel like the right short-term fix. I'll have to go back and read the definitions of x in BADDR[47:x] for VTTBR_EL2 exactly again, but the intended use of VTTBR_BADDR_MASK (and the only occurence of it in C-code) is to deal with alignment of the allocated pgd. I think there is some confusion. Before VTTBR_BADDR_MASK always evaluated to 0x7fLLU, after the change it always evaluates to 0xffLLU Neither before nor after the patch is it dealing with alignment. Any bits it throws away (bits 40-47) are most significant not least significant. I could have rewritten the macro like: #define VTTBR_BADDR_MASK (((1LLU (40 - VTTBR_X + 1)) - 1) VTTBR_BADDR_SHIFT) to correct the bug but it's my opinion that the existing code is quite obfuscated which is how the bug happened in the first place. It seemed easier to just actually mask the bits in a straightforward and easy to understand manner. I even added a comment so nobody has to count the fs ;) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] arm64: fix VTTBR_BADDR_MASK
On 07/10/2014 04:02 PM, Joel Schopp wrote: On 07/10/2014 03:25 PM, Christoffer Dall wrote: On Wed, Jul 09, 2014 at 11:17:04AM -0500, Joel Schopp wrote: The current calculation for VTTBR_BADDR_MASK masks only 39 bits and not all 40 bits. That last bit is important as some systems allocate from near the top of the available address space. This patch is necessary to run KVM on an aarch64 SOC I have been testing. Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/arm64/include/asm/kvm_arm.h |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 3d69030..b39e93f 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -148,7 +148,7 @@ #endif #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((1LLU (40 - VTTBR_X)) - 1) VTTBR_BADDR_SHIFT) +#define VTTBR_BADDR_MASK (0xffLLU) /* bits 0-39 */ #define VTTBR_VMID_SHIFT (48LLU) #define VTTBR_VMID_MASK (0xffLLU VTTBR_VMID_SHIFT) While this is obviously fixing a bug, it doesn't feel like the right short-term fix. I'll have to go back and read the definitions of x in BADDR[47:x] for VTTBR_EL2 exactly again, but the intended use of VTTBR_BADDR_MASK (and the only occurence of it in C-code) is to deal with alignment of the allocated pgd. I think there is some confusion. Before VTTBR_BADDR_MASK always evaluated to 0x7fLLU, after the change it always evaluates to 0xffLLU Neither before nor after the patch is it dealing with alignment. Any bits it throws away (bits 40-47) are most significant not least significant. I could have rewritten the macro like: #define VTTBR_BADDR_MASK (((1LLU (40 - VTTBR_X + 1)) - 1) VTTBR_BADDR_SHIFT) to correct the bug but it's my opinion that the existing code is quite obfuscated which is how the bug happened in the first place. It seemed easier to just actually mask the bits in a straightforward and easy to understand manner. I even added a comment so nobody has to count the fs ;) I hate to reply to my own email correcting myself. But you were correct. I will fix and resend a v2. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] arm64: fix VTTBR_BADDR_MASK
The current calculation for VTTBR_BADDR_MASK masks only 39 bits and not all 40 bits. That last bit is important as some systems allocate from near the top of the available address space. This patch is necessary to run KVM on an aarch64 SOC I have been testing. Signed-off-by: Joel Schopp joel.sch...@amd.com --- arch/arm64/include/asm/kvm_arm.h |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 3d69030..b39e93f 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -148,7 +148,7 @@ #endif #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((1LLU (40 - VTTBR_X)) - 1) VTTBR_BADDR_SHIFT) +#define VTTBR_BADDR_MASK (0xffLLU) /* bits 0-39 */ #define VTTBR_VMID_SHIFT (48LLU) #define VTTBR_VMID_MASK (0xffLLU VTTBR_VMID_SHIFT) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 9/9] arm64: KVM: vgic: deal with GIC sub-page alignment
On 06/25/2014 10:00 AM, Marc Zyngier wrote: On 25/06/14 15:56, Joel Schopp wrote: On 06/24/2014 05:28 PM, Peter Maydell wrote: On 24 June 2014 20:28, Joel Schopp joel.sch...@amd.com wrote: On 06/19/2014 04:21 AM, Marc Zyngier wrote: The GIC CPU interface is always 4k aligned. If the host is using 64k pages, it is critical to place the guest's GICC interface at the same relative alignment as the host's GICV. Failure to do so results in an impossibility for the guest to deal with interrupts. Add a KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET attribute for the VGIC, allowing userspace to retrieve the GICV offset in a page. It becomes then trivial to adjust the GICC base address for the guest. Does this mean there is a corresponding patch for qemu? Not as far as I know. It's a bit awkward on the QEMU end because we really want to provide the guest a consistent memory map regardless of the host CPU. So at best we'd probably use it to say sorry, can't run on this CPU/host kernel. I think most arm64 servers are going to run with 64k pages. It seems like a major problem to have qemu not work on these systems. How many of them will be with the GICC *not* 64kB aligned? If I'm reading the Server Base System Architecture v2.2 Appendix F correctly all of them. Here's the relevant quote: In a 64KB translation granule system this means that GICC needs to have its base at 4KB below a 64KB boundary. (That said, if you think you can make QEMU usefully use the information and want to write a QEMU patch I'm not averse to the idea.) I'll have to think about this approach some more, but I'm not opposed to doing the work if I thought it was the right thing to do. kvmtool is probably better placed to take advantage of it since it takes more of a deal with what the host provides you philosophy. kvmtool is fun as a play toy, but in the real world nobody is building clouds using kvmtool, they use kvm with qemu. A play toy? Hmmm. Do you realise that most of KVM on arm64 has been written using this play toy? I meant no insult. I really like kvmtool. I'm just saying that the eventual end users of these systems will want to run qemu and not kvmtool. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 9/9] arm64: KVM: vgic: deal with GIC sub-page alignment
On 06/24/2014 05:28 PM, Peter Maydell wrote: On 24 June 2014 20:28, Joel Schopp joel.sch...@amd.com wrote: On 06/19/2014 04:21 AM, Marc Zyngier wrote: The GIC CPU interface is always 4k aligned. If the host is using 64k pages, it is critical to place the guest's GICC interface at the same relative alignment as the host's GICV. Failure to do so results in an impossibility for the guest to deal with interrupts. Add a KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET attribute for the VGIC, allowing userspace to retrieve the GICV offset in a page. It becomes then trivial to adjust the GICC base address for the guest. Does this mean there is a corresponding patch for qemu? Not as far as I know. It's a bit awkward on the QEMU end because we really want to provide the guest a consistent memory map regardless of the host CPU. So at best we'd probably use it to say sorry, can't run on this CPU/host kernel. I think most arm64 servers are going to run with 64k pages. It seems like a major problem to have qemu not work on these systems. (That said, if you think you can make QEMU usefully use the information and want to write a QEMU patch I'm not averse to the idea.) I'll have to think about this approach some more, but I'm not opposed to doing the work if I thought it was the right thing to do. kvmtool is probably better placed to take advantage of it since it takes more of a deal with what the host provides you philosophy. kvmtool is fun as a play toy, but in the real world nobody is building clouds using kvmtool, they use kvm with qemu. thanks -- PMM -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 9/9] arm64: KVM: vgic: deal with GIC sub-page alignment
On 06/25/2014 12:34 PM, Peter Maydell wrote: On 25 June 2014 15:56, Joel Schopp joel.sch...@amd.com wrote: On 06/24/2014 05:28 PM, Peter Maydell wrote: On 24 June 2014 20:28, Joel Schopp joel.sch...@amd.com wrote: Does this mean there is a corresponding patch for qemu? Not as far as I know. It's a bit awkward on the QEMU end because we really want to provide the guest a consistent memory map regardless of the host CPU. So at best we'd probably use it to say sorry, can't run on this CPU/host kernel. I think most arm64 servers are going to run with 64k pages. It seems like a major problem to have qemu not work on these systems. QEMU should already work fine on servers with 64K pages; you just need to have the host offset of the GICV within the 64K page and the guest offset of the GICC within the 64K page be the same (and at the moment both must also be zero, which I believe is true for all of them at the moment except possibly the AEM model; counterexamples welcome). Disclaimer: I haven't personally tested this, but on the other hand I don't think anybody's reported it as not working either. It doesn't work for me. Maybe I'm doing something wrong, but I can't see what. I am unique in that I'm running a gic-400 (gicv2m) on aarch64 hardware with 64k pages. I'm also unique in that my hardware maps each 4K gic entry to a 64K page (aliasing each 4k of gic 16 times in a 64K page, ie the gic virtual ic is at 0xe114 and 0xe1141000 and 0xe1142000, etc). This is inline with appendix F of the server base system architecture. This is inconvenient when the size is 0x2000 (8K). As a result all the offsets in the device tree entries are to the last 4K in the page so that an 8K read will read the last 4k from one page and the first 4k from the next and actually get 8k of the gic. gic: interrupt-controller@e1101000 { compatible = arm,gic-400; #interrupt-cells = 3; #address-cells = 0; interrupt-controller; msi-controller; reg = 0x0 0xe111 0 0x1000, /* gic dist */ 0x0 0xe112f000 0 0x2000, /* gic cpu */ 0x0 0xe114f000 0 0x2000, /* gic virtual ic*/ 0x0 0xe116f000 0 0x2000, /* gic virtual cpu*/ 0x0 0xe118 0 0x1000; /* gic msi */ interrupts = 1 8 0xf04; }; My concern here is that if userspace is going to look at 8k starting at the beginning of the page, guest offset 0 in your terminology, (say 0xe114) instead of starting at the last 4k of the page, offset 0xf000 (say 0xe114f000) it is going to get the second 4k wrong by reading 0xe1141000 instead of 0xe115. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 9/9] arm64: KVM: vgic: deal with GIC sub-page alignment
On 06/25/2014 03:45 PM, Peter Maydell wrote: On 25 June 2014 20:34, Joel Schopp joel.sch...@amd.com wrote: It doesn't work for me. Maybe I'm doing something wrong, but I can't see what. I am unique in that I'm running a gic-400 (gicv2m) on aarch64 hardware with 64k pages. I'm also unique in that my hardware maps each 4K gic entry to a 64K page (aliasing each 4k of gic 16 times in a 64K page, ie the gic virtual ic is at 0xe114 and 0xe1141000 and 0xe1142000, etc). This is inline with appendix F of the server base system architecture. This is inconvenient when the size is 0x2000 (8K). As a result all the offsets in the device tree entries are to the last 4K in the page so that an 8K read will read the last 4k from one page and the first 4k from the next and actually get 8k of the gic. gic: interrupt-controller@e1101000 { compatible = arm,gic-400; #interrupt-cells = 3; #address-cells = 0; interrupt-controller; msi-controller; reg = 0x0 0xe111 0 0x1000, /* gic dist */ 0x0 0xe112f000 0 0x2000, /* gic cpu */ 0x0 0xe114f000 0 0x2000, /* gic virtual ic*/ 0x0 0xe116f000 0 0x2000, /* gic virtual cpu*/ 0x0 0xe118 0 0x1000; /* gic msi */ Right, this is the oddball case we don't yet support for 64K pages (though as you say it is a permitted configuration per the SBSA). At least I know I'm not going crazy. interrupts = 1 8 0xf04; }; My concern here is that if userspace is going to look at 8k starting at the beginning of the page, guest offset 0 in your terminology, (say 0xe114) instead of starting at the last 4k of the page, offset 0xf000 (say 0xe114f000) it is going to get the second 4k wrong by reading 0xe1141000 instead of 0xe115. Userspace doesn't actually look at anything in the GICC. It just asks the kernel to put the guest GICC (ie the mapping of the host GICV) at a particular base address which happens to be a multiple of 64K. In this case if the host kernel is using 64K pages then the KVM kernel code ought to say sorry, can't do that when we tell it the base address. (That is, it's impossible to give the guest a VM where the GICC it sees is at a 64K boundary on your hardware and host kernel config, and hopefully we report that in a not totally opaque fashion.) The errors I'm seeing look like: from qemu: error: kvm run failed Bad address Aborted (core dumped) from kvm: [ 7931.722965] kvm [1208]: Unsupported fault status: EC=0x20 DFCS=0x14 from kvmtool: from lkvm (kvmtool): Warning: /extra/rootfs/boot/Image is not a bzImage. Trying to load it as a flat binary... Info: Loaded kernel to 0x8008 (10212384 bytes) Info: Placing fdt at 0x8fe0 - 0x8fff Info: virtio-mmio.devices=0x200@0x1:36 KVM_RUN failed: Bad address If you hack QEMU's memory map for the virt board so instead of [VIRT_GIC_CPU] = { 0x801, 0x1 }, we have [VIRT_GIC_CPU] = { 0x801f000, 0x2000 }, No change in result, not to say that this wouldn't work if some other unknown problem were fixed. does it work? If QEMU supported this VGIC_GRP_ADDR_OFFSET query then all it would do would be to change that offset and size. It would be good to know if there are other problems beyond that... (Conveniently, Linux guests won't currently try to look at the second 4K page of their GICC...) That's handy. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 9/9] arm64: KVM: vgic: deal with GIC sub-page alignment
On 06/19/2014 04:21 AM, Marc Zyngier wrote: The GIC CPU interface is always 4k aligned. If the host is using 64k pages, it is critical to place the guest's GICC interface at the same relative alignment as the host's GICV. Failure to do so results in an impossibility for the guest to deal with interrupts. Add a KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET attribute for the VGIC, allowing userspace to retrieve the GICV offset in a page. It becomes then trivial to adjust the GICC base address for the guest. Does this mean there is a corresponding patch for qemu? Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/include/uapi/asm/kvm.h | 1 + arch/arm64/include/uapi/asm/kvm.h | 1 + virt/kvm/arm/vgic.c | 7 +++ 3 files changed, 9 insertions(+) diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 8b51c1a..056b782 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -174,6 +174,7 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK(0xULL KVM_DEV_ARM_VGIC_OFFSET_SHIFT) #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 +#define KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET 4 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT24 diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index b5cd6ed..5513de4 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -160,6 +160,7 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK(0xULL KVM_DEV_ARM_VGIC_OFFSET_SHIFT) #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 +#define KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET 4 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT24 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index b0cd417..68ac9c6 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -2228,6 +2228,12 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) r = put_user(dev-kvm-arch.vgic.nr_irqs, uaddr); break; } + case KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET: { + u32 __user *uaddr = (u32 __user *)(long)attr-addr; + u32 val = vgic-vcpu_base ~PAGE_MASK; + r = put_user(val, uaddr); + break; + } } @@ -2265,6 +2271,7 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) offset = attr-attr KVM_DEV_ARM_VGIC_OFFSET_MASK; return vgic_has_attr_regs(vgic_cpu_ranges, offset); case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: + case KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET: return 0; } return -ENXIO; -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html