Re: [PATCH] kvm: x86: svm: remove SVM_EXIT_READ_CR* intercepts

2015-03-16 Thread Joel Schopp


On 03/12/2015 04:20 PM, Radim Krčmář wrote:
 2015-03-12 15:17-0500, Joel Schopp:
 There isn't really a valid reason for kvm to intercept cr* reads
 on svm hardware.  The current kvm code just ends up returning
 the register
 There is no need to intercept CR* if the value that the guest should see
 is equal to what we set there, but that is not always the case:
 - CR0 might differ from what the guest should see because of lazy fpu
Based on our previous conversations I understand why we have to trap the
write to the CR0 ts bit for lazy fpu, but don't understand why that
should affect a read.  I'll take another look at the code to see what
I'm missing.  You are probably correct in which case I'll modify the
patch to only turn off the read intercepts when lazy fpu isn't active.

 - CR3 isn't intercepted with nested paging and it should differ
   otherwise
 - CR4 contains PAE bit when run without nested paging

 CR2 and CR8 already aren't intercepted, so it looks like only CR0 and
 CR4 could use some optimizations.
I'll send out a v2 with these less aggressive optimizations.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: x86: svm: remove SVM_EXIT_READ_CR* intercepts

2015-03-12 Thread Joel Schopp
There isn't really a valid reason for kvm to intercept cr* reads
on svm hardware.  The current kvm code just ends up returning
the register

Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/x86/kvm/svm.c |   41 -
 1 file changed, 4 insertions(+), 37 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index cc618c8..c3d10e6 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1090,9 +1090,6 @@ static void init_vmcb(struct vcpu_svm *svm)
svm-vcpu.fpu_active = 1;
svm-vcpu.arch.hflags = 0;
 
-   set_cr_intercept(svm, INTERCEPT_CR0_READ);
-   set_cr_intercept(svm, INTERCEPT_CR3_READ);
-   set_cr_intercept(svm, INTERCEPT_CR4_READ);
set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
@@ -1174,7 +1171,6 @@ static void init_vmcb(struct vcpu_svm *svm)
control-nested_ctl = 1;
clr_intercept(svm, INTERCEPT_INVLPG);
clr_exception_intercept(svm, PF_VECTOR);
-   clr_cr_intercept(svm, INTERCEPT_CR3_READ);
clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
save-g_pat = 0x0007040600070406ULL;
save-cr3 = 0;
@@ -2968,29 +2964,10 @@ static int cr_interception(struct vcpu_svm *svm)
kvm_queue_exception(svm-vcpu, UD_VECTOR);
return 1;
}
-   } else { /* mov from cr */
-   switch (cr) {
-   case 0:
-   val = kvm_read_cr0(svm-vcpu);
-   break;
-   case 2:
-   val = svm-vcpu.arch.cr2;
-   break;
-   case 3:
-   val = kvm_read_cr3(svm-vcpu);
-   break;
-   case 4:
-   val = kvm_read_cr4(svm-vcpu);
-   break;
-   case 8:
-   val = kvm_get_cr8(svm-vcpu);
-   break;
-   default:
-   WARN(1, unhandled read from CR%d, cr);
-   kvm_queue_exception(svm-vcpu, UD_VECTOR);
-   return 1;
-   }
-   kvm_register_write(svm-vcpu, reg, val);
+   } else { /* mov from cr, should never trap in svm */
+   WARN(1, unhandled read from CR%d, cr);
+   kvm_queue_exception(svm-vcpu, UD_VECTOR);
+   return 1;
}
kvm_complete_insn_gp(svm-vcpu, err);
 
@@ -3321,10 +3298,6 @@ static int mwait_interception(struct vcpu_svm *svm)
 }
 
 static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
-   [SVM_EXIT_READ_CR0] = cr_interception,
-   [SVM_EXIT_READ_CR3] = cr_interception,
-   [SVM_EXIT_READ_CR4] = cr_interception,
-   [SVM_EXIT_READ_CR8] = cr_interception,
[SVM_EXIT_CR0_SEL_WRITE]= emulate_on_interception,
[SVM_EXIT_WRITE_CR0]= cr_interception,
[SVM_EXIT_WRITE_CR3]= cr_interception,
@@ -4151,11 +4124,9 @@ static const struct __x86_intercept {
u32 exit_code;
enum x86_intercept_stage stage;
 } x86_intercept_map[] = {
-   [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0),
[x86_intercept_cr_write]= POST_EX(SVM_EXIT_WRITE_CR0),
[x86_intercept_clts]= POST_EX(SVM_EXIT_WRITE_CR0),
[x86_intercept_lmsw]= POST_EX(SVM_EXIT_WRITE_CR0),
-   [x86_intercept_smsw]= POST_EX(SVM_EXIT_READ_CR0),
[x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0),
[x86_intercept_dr_write]= POST_EX(SVM_EXIT_WRITE_DR0),
[x86_intercept_sldt]= POST_EX(SVM_EXIT_LDTR_READ),
@@ -4221,10 +4192,6 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
goto out;
 
switch (icpt_info.exit_code) {
-   case SVM_EXIT_READ_CR0:
-   if (info-intercept == x86_intercept_cr_read)
-   icpt_info.exit_code += info-modrm_reg;
-   break;
case SVM_EXIT_WRITE_CR0: {
unsigned long cr0, val;
u64 intercept;

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] x86: svm: use cr_interception for SVM_EXIT_CR0_SEL_WRITE

2015-03-06 Thread Joel Schopp
From: David Kaplan david.kap...@amd.com

Another patch in my war on emulate_on_interception() use as a svm exit handler.

These were pulled out of a larger patch at the suggestion of Radim Krcmar, see
https://lkml.org/lkml/2015/2/25/559

Signed-off-by: David Kaplan david.kap...@amd.com
[separated out just cr_interception part from larger removal of
INTERCEPT_CR0_WRITE, forward ported, tested]
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/x86/kvm/svm.c |7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d319e0c..57f0240 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2940,7 +2940,10 @@ static int cr_interception(struct vcpu_svm *svm)
return emulate_on_interception(svm);
 
reg = svm-vmcb-control.exit_info_1  SVM_EXITINFO_REG_MASK;
-   cr = svm-vmcb-control.exit_code - SVM_EXIT_READ_CR0;
+   if (svm-vmcb-control.exit_code == SVM_EXIT_SEL_CR0_WRITE)
+   cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0;
+   else
+   cr = svm-vmcb-control.exit_code - SVM_EXIT_READ_CR0;
 
err = 0;
if (cr = 16) { /* mov to cr */
@@ -3325,7 +3328,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm 
*svm) = {
[SVM_EXIT_READ_CR3] = cr_interception,
[SVM_EXIT_READ_CR4] = cr_interception,
[SVM_EXIT_READ_CR8] = cr_interception,
-   [SVM_EXIT_CR0_SEL_WRITE]= emulate_on_interception,
+   [SVM_EXIT_CR0_SEL_WRITE]= cr_interception,
[SVM_EXIT_WRITE_CR0]= cr_interception,
[SVM_EXIT_WRITE_CR3]= cr_interception,
[SVM_EXIT_WRITE_CR4]= cr_interception,

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] x86: svm: use cr_interception for SVM_EXIT_CR0_SEL_WRITE

2015-03-06 Thread Joel Schopp
From: David Kaplan david.kap...@amd.com

Another patch in my war on emulate_on_interception() use as a svm exit handler.

These were pulled out of a larger patch at the suggestion of Radim Krcmar, see
https://lkml.org/lkml/2015/2/25/559

Changes since v1:
* fixed typo introduced after test, retested

Signed-off-by: David Kaplan david.kap...@amd.com
[separated out just cr_interception part from larger removal of
INTERCEPT_CR0_WRITE, forward ported, tested]
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/x86/kvm/svm.c |7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d319e0c..16ad05b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2940,7 +2940,10 @@ static int cr_interception(struct vcpu_svm *svm)
return emulate_on_interception(svm);
 
reg = svm-vmcb-control.exit_info_1  SVM_EXITINFO_REG_MASK;
-   cr = svm-vmcb-control.exit_code - SVM_EXIT_READ_CR0;
+   if (svm-vmcb-control.exit_code == SVM_EXIT_CR0_SEL_WRITE)
+   cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0;
+   else
+   cr = svm-vmcb-control.exit_code - SVM_EXIT_READ_CR0;
 
err = 0;
if (cr = 16) { /* mov to cr */
@@ -3325,7 +3328,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm 
*svm) = {
[SVM_EXIT_READ_CR3] = cr_interception,
[SVM_EXIT_READ_CR4] = cr_interception,
[SVM_EXIT_READ_CR8] = cr_interception,
-   [SVM_EXIT_CR0_SEL_WRITE]= emulate_on_interception,
+   [SVM_EXIT_CR0_SEL_WRITE]= cr_interception,
[SVM_EXIT_WRITE_CR0]= cr_interception,
[SVM_EXIT_WRITE_CR3]= cr_interception,
[SVM_EXIT_WRITE_CR4]= cr_interception,

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3] x86: svm: use kvm_fast_pio_in()

2015-03-03 Thread Joel Schopp
Thank you for your detailed review on several of my patches.

  
 +static int complete_fast_pio(struct kvm_vcpu *vcpu)
 (complete_fast_pio_in()?)
If I do a v4 I'll adopt that name.
 +{
 +unsigned long new_rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
 Shouldn't we handle writes in EAX differently than in AX and AL, because
 of implicit zero extension.
I don't think the implicit zero extension hurts us here, but maybe there
is something I'm missing that I need understand. Could you explain this
further?

 +
 +BUG_ON(!vcpu-arch.pio.count);
 +BUG_ON(vcpu-arch.pio.count * vcpu-arch.pio.size  sizeof(new_rax));
 (Looking at it again, a check for 'vcpu-arch.pio.count == 1' would be
  sufficient.)
I prefer the checks that are there now after your last review,
especially since surrounded by BUG_ON they only run on debug kernels.


 +
 +memcpy(new_rax, vcpu, sizeof(new_rax));
 +trace_kvm_pio(KVM_PIO_IN, vcpu-arch.pio.port, vcpu-arch.pio.size,
 +  vcpu-arch.pio.count, vcpu-arch.pio_data);
 +kvm_register_write(vcpu, VCPU_REGS_RAX, new_rax);
 +vcpu-arch.pio.count = 0;
 I think it is better to call emulator_pio_in_emulated directly, like

   emulator_pio_in_out(vcpu-arch.emulate_ctxt, vcpu-arch.pio.size,
   vcpu-arch.pio.port, new_rax, 1);
   kvm_register_write(vcpu, VCPU_REGS_RAX, new_rax);

 because we know that vcpu-arch.pio.count != 0.
I think two extra lines of code in my patch vs your suggestion are worth
it to a) reduce execution path length b) increase readability c) avoid
breaking the abstraction by not checking the return code d) avoid any
future bugs introduced by changes the function that would return a value
other than 1. 

 Refactoring could avoid the weird vcpu-ctxt-vcpu conversion.
 (A better name is always welcome.)
The pointer chasing is making me dizzy.  I'm not sure why
emulator_pio_in_emulated takes a x86_emulate_ctxt when all it does it
immediately translate that to a vcpu and never use the x86_emulate_ctxt,
why not pass the vcpu in the first place?

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3] x86: svm: use kvm_fast_pio_in()

2015-03-03 Thread Joel Schopp

On 03/03/2015 10:44 AM, Radim Krčmář wrote:
 2015-03-02 15:02-0600, Joel Schopp:
 +int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port)
 +{
 +unsigned long val;
 +int ret = emulator_pio_in_emulated(vcpu-arch.emulate_ctxt, size,
 +   port, val, 1);
 +
 Btw. does this return 1 in some scenario?
If a function returns a value it is always a good idea to check it and
act appropriately.  That said...
emulator_pio_in_emulated will return 1 if emulator_pio_in_out returns 1
or if vcpu-arch.pio.count != 0
emulator_pio_in_out returns 1 if kernel_pio returns 0
kernel_pio returns 0 if kvm_io_bus_read returns 0


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3] x86: svm: use kvm_fast_pio_in()

2015-03-02 Thread Joel Schopp
From: David Kaplan david.kap...@amd.com

We can make the in instruction go faster the same way the out instruction is
already.

Changes from v2[Joel]:
* changed rax from u32 to unsigned long
* changed a couple return 0 to BUG_ON()
* changed 8 to sizeof(new_rax)
* added trace hook
* removed redundant clearing of count
Changes from v1[Joel]
* Added kvm_fast_pio_in() implementation that was left out of v1

Signed-off-by: David Kaplan david.kap...@amd.com
[extracted from larger unlrelated patch, forward ported, addressed reviews, 
tested]
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/x86/include/asm/kvm_host.h |1 +
 arch/x86/kvm/svm.c  |4 +++-
 arch/x86/kvm/x86.c  |   30 ++
 3 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a236e39..b976824 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -931,6 +931,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data 
*msr);
 struct x86_emulate_ctxt;
 
 int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
+int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port);
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d319e0c..f8c906b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1899,7 +1899,7 @@ static int io_interception(struct vcpu_svm *svm)
++svm-vcpu.stat.io_exits;
string = (io_info  SVM_IOIO_STR_MASK) != 0;
in = (io_info  SVM_IOIO_TYPE_MASK) != 0;
-   if (string || in)
+   if (string)
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 
port = io_info  16;
@@ -1907,6 +1907,8 @@ static int io_interception(struct vcpu_svm *svm)
svm-next_rip = svm-vmcb-control.exit_info_2;
skip_emulated_instruction(svm-vcpu);
 
+   if (in)
+   return kvm_fast_pio_in(vcpu, size, port);
return kvm_fast_pio_out(vcpu, size, port);
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bd7a70b..d05efaf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5463,6 +5463,36 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, 
unsigned short port)
 }
 EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
 
+static int complete_fast_pio(struct kvm_vcpu *vcpu)
+{
+   unsigned long new_rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
+
+   BUG_ON(!vcpu-arch.pio.count);
+   BUG_ON(vcpu-arch.pio.count * vcpu-arch.pio.size  sizeof(new_rax));
+
+   memcpy(new_rax, vcpu, sizeof(new_rax));
+   trace_kvm_pio(KVM_PIO_IN, vcpu-arch.pio.port, vcpu-arch.pio.size,
+ vcpu-arch.pio.count, vcpu-arch.pio_data);
+   kvm_register_write(vcpu, VCPU_REGS_RAX, new_rax);
+   vcpu-arch.pio.count = 0;
+   return 1;
+}
+
+int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port)
+{
+   unsigned long val;
+   int ret = emulator_pio_in_emulated(vcpu-arch.emulate_ctxt, size,
+  port, val, 1);
+
+   if (ret)
+   kvm_register_write(vcpu, VCPU_REGS_RAX, val);
+   else
+   vcpu-arch.complete_userspace_io = complete_fast_pio;
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(kvm_fast_pio_in);
+
 static void tsc_bad(void *info)
 {
__this_cpu_write(cpu_tsc_khz, 0);

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] x86: svm: use kvm_fast_pio_in()

2015-03-02 Thread Joel Schopp





return emulate_instruction(vcpu, 0) == EMULATE_DONE;
  
  	port = io_info  16;

@@ -1907,6 +1907,8 @@ static int io_interception(struct vcpu_svm *svm)
svm-next_rip = svm-vmcb-control.exit_info_2;
skip_emulated_instruction(svm-vcpu);
  
+	if (in)

+   return kvm_fast_pio_in(vcpu, size, port);
return kvm_fast_pio_out(vcpu, size, port);

(kvm_fast_pio() comes to mind.)
If you combined them you'd have to have an extra argument to say if it 
was in or out. You'd then have to have code to parse that.  I prefer 
this way.





  }
  
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index bd7a70b..089247c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5463,6 +5463,39 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, 
unsigned short port)
  }
  EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
  
+static int complete_fast_pio(struct kvm_vcpu *vcpu)

+{
+   u32 new_rax = kvm_register_read(vcpu, VCPU_REGS_RAX);

u64.

Good call.  I'll use unsigned long like kvm_fast_pio_out() uses.


arch/x86/kvm/x86.c


+
+   if (!vcpu-arch.pio.count)
+   return 0;
+   if (vcpu-arch.pio.count * vcpu-arch.pio.size  8)
+   return 0;

sizeof(new_rax).  (safer and easier to understand)

Both should never happen in KVM code, BUG_ON().

Agreed on both counts.




+
+   memcpy(new_rax, vcpu-arch.pio_data,
+  vcpu-arch.pio.count * vcpu-arch.pio.size);

Use emulator_pio_in_emulated() here, for code sharing.
(We want to trace the read here too;  it could be better to split
  the path from emulator_pio_in_emulated() first.)

I looked at pulling this out, it was a painful.  I'll add the trace hook.




+   kvm_register_write(vcpu, VCPU_REGS_RAX, new_rax);
+
+   vcpu-arch.pio.count = 0;
+   return 1;
+}
+
+int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port)
+{
+   unsigned long val;
+   int ret = emulator_pio_in_emulated(vcpu-arch.emulate_ctxt, size,
+  port, val, 1);
+
+   if (ret) {
+   kvm_register_write(vcpu, VCPU_REGS_RAX, val);
+   vcpu-arch.pio.count = 0;

(emulator_pio_in_emulated() sets count to zero if it returns true.)

will remove = 0 line
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] x86: svm: use kvm_fast_pio_in()

2015-03-02 Thread Joel Schopp



+   if (in)
+   return kvm_fast_pio_in(vcpu, size, port);

Have I missed a patch that defined kvm_fast_pio_in()?
Not sure how I managed to leave out the bulk of the patch. Resending v2 
momentarily.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] x86: svm: make wbinvd faster

2015-03-02 Thread Joel Schopp


On 03/02/2015 10:03 AM, Radim Krčmář wrote:

2015-03-02 10:25-0500, Bandan Das:

Radim Krčmář rkrc...@redhat.com writes:

2015-03-01 21:29-0500, Bandan Das:

Joel Schopp joel.sch...@amd.com writes:

+static int wbinvd_interception(struct vcpu_svm *svm)
+{
+   kvm_emulate_wbinvd(svm-vcpu);
+   skip_emulated_instruction(svm-vcpu);
+   return 1;
+}

Can't we merge this to kvm_emulate_wbinvd, and just call that function
directly for both vmx and svm ?

kvm_emulate_wbinvd() lives in x86.c and skip_emulated_instruction() is
from svm.c/vmx.c:  so we'd have to create a new x86 op and change the
emulator code as well ... it's probably better like this.

There's already one - kvm_x86_ops-skip_emulated_instruction

My bad, its usage is inconsistent and I only looked at two close
interceptions where it was used ... kvm_emulate_cpuid() calls
kvm_x86_ops-skip_emulated_instruction(), while kvm_emulate_halt() and
kvm_emulate_hypercall() need an external skip.

We do skip the instruction with kvm_emulate(), so automatically
skipping the instruction on kvm_emulate_*() makes sense:
  1. rename kvm_emulate_halt() and kvm_emulate_wbinvd() to accommodate
 callers that don't want to skip
  2. introduce kvm_emulate_{halt,wbinvd}() and move the skip to to
 kvm_emulate_{halt,wbinvd,hypercall}()

The alternative is to remove kvm_x86_ops-skip_emulated_instruction():
  1. remove skip from kvm_emulate_cpuid() and modify callers
  2. move kvm_complete_insn_gp to a header file and use
 skip_emulated_instruction directly
  3. remove unused kvm_x86_ops-skip_emulated_instruction()

Which one do you prefer?
I prefer renaming them,  ie kvm_emulate_wbinvd_noskip(), and making the 
existing ones, ie kvm_emulate_wbinvd() call the noskip verion and add a 
skip similar to how wbinvd_interception above does.  I can send out a 
patch later today with that rework.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] x86: svm: use kvm_fast_pio_in()

2015-03-02 Thread Joel Schopp
From: David Kaplan david.kap...@amd.com

We can make the in instruction go faster the same way the out instruction is
already.

Changes from v1
* Added kvm_fast_pio_in() implementation that was left out of v1

Signed-off-by: David Kaplan david.kap...@amd.com
[extracted from larger unlrelated patch, forward ported, tested]
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/x86/include/asm/kvm_host.h |1 +
 arch/x86/kvm/svm.c  |4 +++-
 arch/x86/kvm/x86.c  |   33 +
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a236e39..b976824 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -931,6 +931,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data 
*msr);
 struct x86_emulate_ctxt;
 
 int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
+int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port);
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d319e0c..f8c906b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1899,7 +1899,7 @@ static int io_interception(struct vcpu_svm *svm)
++svm-vcpu.stat.io_exits;
string = (io_info  SVM_IOIO_STR_MASK) != 0;
in = (io_info  SVM_IOIO_TYPE_MASK) != 0;
-   if (string || in)
+   if (string)
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 
port = io_info  16;
@@ -1907,6 +1907,8 @@ static int io_interception(struct vcpu_svm *svm)
svm-next_rip = svm-vmcb-control.exit_info_2;
skip_emulated_instruction(svm-vcpu);
 
+   if (in)
+   return kvm_fast_pio_in(vcpu, size, port);
return kvm_fast_pio_out(vcpu, size, port);
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bd7a70b..089247c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5463,6 +5463,39 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, 
unsigned short port)
 }
 EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
 
+static int complete_fast_pio(struct kvm_vcpu *vcpu)
+{
+   u32 new_rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
+
+   if (!vcpu-arch.pio.count)
+   return 0;
+   if (vcpu-arch.pio.count * vcpu-arch.pio.size  8)
+   return 0;
+
+   memcpy(new_rax, vcpu-arch.pio_data,
+  vcpu-arch.pio.count * vcpu-arch.pio.size);
+   kvm_register_write(vcpu, VCPU_REGS_RAX, new_rax);
+
+   vcpu-arch.pio.count = 0;
+   return 1;
+}
+
+int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port)
+{
+   unsigned long val;
+   int ret = emulator_pio_in_emulated(vcpu-arch.emulate_ctxt, size,
+  port, val, 1);
+
+   if (ret) {
+   kvm_register_write(vcpu, VCPU_REGS_RAX, val);
+   vcpu-arch.pio.count = 0;
+   } else
+   vcpu-arch.complete_userspace_io = complete_fast_pio;
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(kvm_fast_pio_in);
+
 static void tsc_bad(void *info)
 {
__this_cpu_write(cpu_tsc_khz, 0);

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 0/2] Series short description

2015-03-02 Thread Joel Schopp
Review comments from v1 that used kvm_emulate_wbinvd() pointed out that 
kvm_emulate_* was inconsistant in using skipping, while kvm_emulate() always
skips.  The first patch cleans up the existing use while the second patch
adds use of the updated version of kvm_emulate_wbinvd() in svm

---

Joel Schopp (2):
  kvm: x86: make kvm_emulate_* consistant
  x86: svm: make wbinvd faster


 arch/x86/kvm/svm.c |   11 ---
 arch/x86/kvm/vmx.c |9 +++--
 arch/x86/kvm/x86.c |   23 ---
 3 files changed, 31 insertions(+), 12 deletions(-)

--

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 1/2] kvm: x86: make kvm_emulate_* consistant

2015-03-02 Thread Joel Schopp
Currently kvm_emulate() skips the instruction but kvm_emulate_* sometimes
don't.  The end reult is the caller ends up doing the skip themselves.
Let's make them consistant.

Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/x86/kvm/svm.c |2 --
 arch/x86/kvm/vmx.c |9 +++--
 arch/x86/kvm/x86.c |   23 ---
 3 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d319e0c..0c9e377 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1929,14 +1929,12 @@ static int nop_on_interception(struct vcpu_svm *svm)
 static int halt_interception(struct vcpu_svm *svm)
 {
svm-next_rip = kvm_rip_read(svm-vcpu) + 1;
-   skip_emulated_instruction(svm-vcpu);
return kvm_emulate_halt(svm-vcpu);
 }
 
 static int vmmcall_interception(struct vcpu_svm *svm)
 {
svm-next_rip = kvm_rip_read(svm-vcpu) + 3;
-   skip_emulated_instruction(svm-vcpu);
kvm_emulate_hypercall(svm-vcpu);
return 1;
 }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 14c1a18..b7dcd3c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4995,7 +4995,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
if (vcpu-arch.halt_request) {
vcpu-arch.halt_request = 0;
-   return kvm_emulate_halt(vcpu);
+   return kvm_emulate_halt_noskip(vcpu);
}
return 1;
}
@@ -5522,13 +5522,11 @@ static int handle_interrupt_window(struct kvm_vcpu 
*vcpu)
 
 static int handle_halt(struct kvm_vcpu *vcpu)
 {
-   skip_emulated_instruction(vcpu);
return kvm_emulate_halt(vcpu);
 }
 
 static int handle_vmcall(struct kvm_vcpu *vcpu)
 {
-   skip_emulated_instruction(vcpu);
kvm_emulate_hypercall(vcpu);
return 1;
 }
@@ -5559,7 +5557,6 @@ static int handle_rdpmc(struct kvm_vcpu *vcpu)
 
 static int handle_wbinvd(struct kvm_vcpu *vcpu)
 {
-   skip_emulated_instruction(vcpu);
kvm_emulate_wbinvd(vcpu);
return 1;
 }
@@ -5898,7 +5895,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu 
*vcpu)
 
if (vcpu-arch.halt_request) {
vcpu-arch.halt_request = 0;
-   ret = kvm_emulate_halt(vcpu);
+   ret = kvm_emulate_halt_noskip(vcpu);
goto out;
}
 
@@ -9513,7 +9510,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool 
launch)
vmcs12-launch_state = 1;
 
if (vmcs12-guest_activity_state == GUEST_ACTIVITY_HLT)
-   return kvm_emulate_halt(vcpu);
+   return kvm_emulate_halt_noskip(vcpu);
 
vmx-nested.nested_run_pending = 1;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bd7a70b..96a8333 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4706,7 +4706,7 @@ static void emulator_invlpg(struct x86_emulate_ctxt 
*ctxt, ulong address)
kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
 }
 
-int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
+int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
 {
if (!need_emulate_wbinvd(vcpu))
return X86EMUL_CONTINUE;
@@ -4723,11 +4723,19 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
wbinvd();
return X86EMUL_CONTINUE;
 }
+
+int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
+{
+   kvm_x86_ops-skip_emulated_instruction(vcpu);
+   return kvm_emulate_wbinvd_noskip(vcpu);
+}
 EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
 
+
+
 static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
 {
-   kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
+   kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
 }
 
 int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
@@ -5817,7 +5825,7 @@ void kvm_arch_exit(void)
free_percpu(shared_msrs);
 }
 
-int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu)
 {
++vcpu-stat.halt_exits;
if (irqchip_in_kernel(vcpu-kvm)) {
@@ -5828,6 +5836,13 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
return 0;
}
 }
+EXPORT_SYMBOL_GPL(kvm_emulate_halt_noskip);
+
+int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+{
+   kvm_x86_ops-skip_emulated_instruction(vcpu);
+   return kvm_emulate_halt_noskip(vcpu);
+}
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
 int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
@@ -5912,6 +5927,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
unsigned long nr, a0, a1, a2, a3, ret;
int op_64_bit, r = 1;
 
+   kvm_x86_ops-skip_emulated_instruction(vcpu);
+
if (kvm_hv_hypercall_enabled(vcpu-kvm))
return kvm_hv_hypercall(vcpu);
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body

[PATCH v2 2/2] x86: svm: make wbinvd faster

2015-03-02 Thread Joel Schopp
From: David Kaplan david.kap...@amd.com
No need to re-decode WBINVD since we know what it is from the intercept.

Signed-off-by: David Kaplan david.kap...@amd.com
[extracted from larger unlrelated patch, forward ported, tested]
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/x86/kvm/svm.c |9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 0c9e377..794bca7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2774,6 +2774,13 @@ static int skinit_interception(struct vcpu_svm *svm)
return 1;
 }
 
+static int wbinvd_interception(struct vcpu_svm *svm)
+{
+   kvm_emulate_wbinvd(svm-vcpu);
+   return 1;
+}
+
+
 static int xsetbv_interception(struct vcpu_svm *svm)
 {
u64 new_bv = kvm_read_edx_eax(svm-vcpu);
@@ -3374,7 +3381,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm 
*svm) = {
[SVM_EXIT_STGI] = stgi_interception,
[SVM_EXIT_CLGI] = clgi_interception,
[SVM_EXIT_SKINIT]   = skinit_interception,
-   [SVM_EXIT_WBINVD]   = emulate_on_interception,
+   [SVM_EXIT_WBINVD]   = wbinvd_interception,
[SVM_EXIT_MONITOR]  = monitor_interception,
[SVM_EXIT_MWAIT]= mwait_interception,
[SVM_EXIT_XSETBV]   = xsetbv_interception,

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 1/2] kvm: x86: make kvm_emulate_* consistant

2015-03-02 Thread Joel Schopp



---
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
@@ -4995,7 +4995,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
if (vcpu-arch.halt_request) {
vcpu-arch.halt_request = 0;
-   return kvm_emulate_halt(vcpu);
+   return kvm_emulate_halt_noskip(vcpu);

noskip is used without being declared ... it shouldn't compile.
I tested on AMD hardware, I thought I had turned on the Intel KVM module 
as well, but it turns out I hadn't.  Will fix in v3.



*_noskip makes the usual case harder to undertand: we just want to halt
the vcpu, so name it more directly ... like kvm_vcpu_halt()?

I like that better.  Will make the change in v3.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 1/2] kvm: x86: make kvm_emulate_* consistant

2015-03-02 Thread Joel Schopp
Currently kvm_emulate() skips the instruction but kvm_emulate_* sometimes
don't.  The end reult is the caller ends up doing the skip themselves.
Let's make them consistant.

Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/x86/include/asm/kvm_host.h |1 +
 arch/x86/kvm/svm.c  |2 --
 arch/x86/kvm/vmx.c  |9 +++--
 arch/x86/kvm/x86.c  |   23 ---
 4 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a236e39..bf5a160 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -933,6 +933,7 @@ struct x86_emulate_ctxt;
 int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d319e0c..0c9e377 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1929,14 +1929,12 @@ static int nop_on_interception(struct vcpu_svm *svm)
 static int halt_interception(struct vcpu_svm *svm)
 {
svm-next_rip = kvm_rip_read(svm-vcpu) + 1;
-   skip_emulated_instruction(svm-vcpu);
return kvm_emulate_halt(svm-vcpu);
 }
 
 static int vmmcall_interception(struct vcpu_svm *svm)
 {
svm-next_rip = kvm_rip_read(svm-vcpu) + 3;
-   skip_emulated_instruction(svm-vcpu);
kvm_emulate_hypercall(svm-vcpu);
return 1;
 }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 14c1a18..eef7f53 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4995,7 +4995,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
if (vcpu-arch.halt_request) {
vcpu-arch.halt_request = 0;
-   return kvm_emulate_halt(vcpu);
+   return kvm_vcpu_halt(vcpu);
}
return 1;
}
@@ -5522,13 +5522,11 @@ static int handle_interrupt_window(struct kvm_vcpu 
*vcpu)
 
 static int handle_halt(struct kvm_vcpu *vcpu)
 {
-   skip_emulated_instruction(vcpu);
return kvm_emulate_halt(vcpu);
 }
 
 static int handle_vmcall(struct kvm_vcpu *vcpu)
 {
-   skip_emulated_instruction(vcpu);
kvm_emulate_hypercall(vcpu);
return 1;
 }
@@ -5559,7 +5557,6 @@ static int handle_rdpmc(struct kvm_vcpu *vcpu)
 
 static int handle_wbinvd(struct kvm_vcpu *vcpu)
 {
-   skip_emulated_instruction(vcpu);
kvm_emulate_wbinvd(vcpu);
return 1;
 }
@@ -5898,7 +5895,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu 
*vcpu)
 
if (vcpu-arch.halt_request) {
vcpu-arch.halt_request = 0;
-   ret = kvm_emulate_halt(vcpu);
+   ret = kvm_vcpu_halt(vcpu);
goto out;
}
 
@@ -9513,7 +9510,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool 
launch)
vmcs12-launch_state = 1;
 
if (vmcs12-guest_activity_state == GUEST_ACTIVITY_HLT)
-   return kvm_emulate_halt(vcpu);
+   return kvm_vcpu_halt(vcpu);
 
vmx-nested.nested_run_pending = 1;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bd7a70b..6ff90f7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4706,7 +4706,7 @@ static void emulator_invlpg(struct x86_emulate_ctxt 
*ctxt, ulong address)
kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
 }
 
-int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
+int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
 {
if (!need_emulate_wbinvd(vcpu))
return X86EMUL_CONTINUE;
@@ -4723,11 +4723,19 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
wbinvd();
return X86EMUL_CONTINUE;
 }
+
+int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
+{
+   kvm_x86_ops-skip_emulated_instruction(vcpu);
+   return kvm_emulate_wbinvd_noskip(vcpu);
+}
 EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
 
+
+
 static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
 {
-   kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
+   kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
 }
 
 int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
@@ -5817,7 +5825,7 @@ void kvm_arch_exit(void)
free_percpu(shared_msrs);
 }
 
-int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
 {
++vcpu-stat.halt_exits;
if (irqchip_in_kernel(vcpu-kvm)) {
@@ -5828,6 +5836,13 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
return 0;
}
 }
+EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
+
+int

[PATCH v3 2/2] x86: svm: make wbinvd faster

2015-03-02 Thread Joel Schopp
From: David Kaplan david.kap...@amd.com

No need to re-decode WBINVD since we know what it is from the intercept.

Signed-off-by: David Kaplan david.kap...@amd.com
[extracted from larger unlrelated patch, forward ported, tested,style cleanup]
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/x86/kvm/svm.c |8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 0c9e377..6fa4222 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2774,6 +2774,12 @@ static int skinit_interception(struct vcpu_svm *svm)
return 1;
 }
 
+static int wbinvd_interception(struct vcpu_svm *svm)
+{
+   kvm_emulate_wbinvd(svm-vcpu);
+   return 1;
+}
+
 static int xsetbv_interception(struct vcpu_svm *svm)
 {
u64 new_bv = kvm_read_edx_eax(svm-vcpu);
@@ -3374,7 +3380,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm 
*svm) = {
[SVM_EXIT_STGI] = stgi_interception,
[SVM_EXIT_CLGI] = clgi_interception,
[SVM_EXIT_SKINIT]   = skinit_interception,
-   [SVM_EXIT_WBINVD]   = emulate_on_interception,
+   [SVM_EXIT_WBINVD]   = wbinvd_interception,
[SVM_EXIT_MONITOR]  = monitor_interception,
[SVM_EXIT_MWAIT]= mwait_interception,
[SVM_EXIT_XSETBV]   = xsetbv_interception,

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 0/2] kvm: x86: kvm_emulate_*

2015-03-02 Thread Joel Schopp
Review comments from v1 that used kvm_emulate_wbinvd() pointed out that 
kvm_emulate_* was inconsistant in using skipping, while kvm_emulate() always
skips.  The first patch cleans up the existing use while the second patch
adds use of the updated version of kvm_emulate_wbinvd() in svm

Changes since v2:
* fixed email subject line on series short description
* renamed kvm_emulate_halt_noskip() to kvm_vcpu_halt()
* added header declaration for kvm_vcpu_halt()
* squashed blank line 
---

David Kaplan (1):
  x86: svm: make wbinvd faster

Joel Schopp (1):
  kvm: x86: make kvm_emulate_* consistant


 arch/x86/include/asm/kvm_host.h |1 +
 arch/x86/kvm/svm.c  |   10 +++---
 arch/x86/kvm/vmx.c  |9 +++--
 arch/x86/kvm/x86.c  |   23 ---
 4 files changed, 31 insertions(+), 12 deletions(-)

--

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] x86: svm: use kvm_fast_pio_in()

2015-02-27 Thread Joel Schopp
From: David Kaplan david.kap...@amd.com

We can make the in instruction go faster the same way the out instruction is
already.

Signed-off-by: David Kaplan david.kap...@amd.com
[extracted from larger unlrelated patch, forward ported, tested]
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/x86/kvm/svm.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d319e0c..f8c906b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1899,7 +1899,7 @@ static int io_interception(struct vcpu_svm *svm)
++svm-vcpu.stat.io_exits;
string = (io_info  SVM_IOIO_STR_MASK) != 0;
in = (io_info  SVM_IOIO_TYPE_MASK) != 0;
-   if (string || in)
+   if (string)
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 
port = io_info  16;
@@ -1907,6 +1907,8 @@ static int io_interception(struct vcpu_svm *svm)
svm-next_rip = svm-vmcb-control.exit_info_2;
skip_emulated_instruction(svm-vcpu);
 
+   if (in)
+   return kvm_fast_pio_in(vcpu, size, port);
return kvm_fast_pio_out(vcpu, size, port);
 }
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] x86: svm: use kvm_fast_pio_in()

2015-02-27 Thread Joel Schopp
From: David Kaplan david.kap...@amd.com

We can make the in instruction go faster the same way the out instruction is
already.

Signed-off-by: David Kaplan david.kap...@amd.com
[extracted from larger unlrelated patch, forward ported, tested]
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/x86/kvm/svm.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d319e0c..f8c906b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1899,7 +1899,7 @@ static int io_interception(struct vcpu_svm *svm)
++svm-vcpu.stat.io_exits;
string = (io_info  SVM_IOIO_STR_MASK) != 0;
in = (io_info  SVM_IOIO_TYPE_MASK) != 0;
-   if (string || in)
+   if (string)
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 
port = io_info  16;
@@ -1907,6 +1907,8 @@ static int io_interception(struct vcpu_svm *svm)
svm-next_rip = svm-vmcb-control.exit_info_2;
skip_emulated_instruction(svm-vcpu);
 
+   if (in)
+   return kvm_fast_pio_in(vcpu, size, port);
return kvm_fast_pio_out(vcpu, size, port);
 }
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] x86: svm: make wbinvd faster

2015-02-27 Thread Joel Schopp
From: David Kaplan david.kap...@amd.com
No need to re-decode WBINVD since we know what it is from the intercept.

Signed-off-by: David Kaplan david.kap...@amd.com
[extracted from larger unlrelated patch, forward ported, tested]
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/x86/kvm/svm.c |   10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d319e0c..86ecd21 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2776,6 +2776,14 @@ static int skinit_interception(struct vcpu_svm *svm)
return 1;
 }
 
+static int wbinvd_interception(struct vcpu_svm *svm)
+{
+   kvm_emulate_wbinvd(svm-vcpu);
+   skip_emulated_instruction(svm-vcpu);
+   return 1;
+}
+
+
 static int xsetbv_interception(struct vcpu_svm *svm)
 {
u64 new_bv = kvm_read_edx_eax(svm-vcpu);
@@ -3376,7 +3384,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm 
*svm) = {
[SVM_EXIT_STGI] = stgi_interception,
[SVM_EXIT_CLGI] = clgi_interception,
[SVM_EXIT_SKINIT]   = skinit_interception,
-   [SVM_EXIT_WBINVD]   = emulate_on_interception,
+   [SVM_EXIT_WBINVD]   = wbinvd_interception,
[SVM_EXIT_MONITOR]  = monitor_interception,
[SVM_EXIT_MWAIT]= mwait_interception,
[SVM_EXIT_XSETBV]   = xsetbv_interception,

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] x86: svm: don't intercept CR0 TS or MP bit write

2015-02-25 Thread Joel Schopp

On 02/25/2015 02:26 PM, Radim Krčmář wrote:
 2015-02-24 15:25-0600, Joel Schopp:
 -  clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
} else {
set_cr_intercept(svm, INTERCEPT_CR0_READ);
 (There is no point in checking fpu_active if cr0s are equal.)

 -  set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
 KVM uses lazy FPU and the state is undefined before the first access.
 We set cr0.ts when !svm-vcpu.fpu_active to detect the first access, but
 if we allow the guest to clear cr0.ts without exiting, it can access FPU
 with undefined state.
 Thanks for the valuable feedback.  It's apparent I hadn't thought
 through the interaction with lazy FPU and will need to go back and
 rethink my approach here.
 I don't think we can gain much without sacrificing some laziness, like:
 when a guest with lazy FPU clears CR0.TS, it is going to use that FPU,
 so we could pre-load FPU in this case and drop the write intercept too;
 guests that unconditionally clear CR0.TS would perform worse though.

 It's going to take a lot of time, but two hunks in your patch, that made
 selective intercept benefit from decode assists, look useful even now.

 Would you post them separately?
I can re-post those separately.  They are less useful, though probably
still worth doing, on their own because SVM_EXIT_WRITE_CR0 takes
precidence over SVM_EXIT_CR0_SEL_WRITE
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] x86: svm: don't intercept CR0 TS or MP bit write

2015-02-24 Thread Joel Schopp

 -clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
  } else {
  set_cr_intercept(svm, INTERCEPT_CR0_READ);
 (There is no point in checking fpu_active if cr0s are equal.)

 -set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
 KVM uses lazy FPU and the state is undefined before the first access.
 We set cr0.ts when !svm-vcpu.fpu_active to detect the first access, but
 if we allow the guest to clear cr0.ts without exiting, it can access FPU
 with undefined state.
Thanks for the valuable feedback.  It's apparent I hadn't thought
through the interaction with lazy FPU and will need to go back and
rethink my approach here.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] x86: svm: use kvm_register_write()/read()

2015-02-20 Thread Joel Schopp
From: David Kaplan david.kap...@amd.com

KVM has nice wrappers to access the register values, clean up a few places
that should use them but currently do not.

Signed-off-by: David Kaplan david.kap...@amd.com
[forward port and testing]
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/x86/kvm/svm.c |   19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d319e0c..a7d88e4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2757,11 +2757,11 @@ static int invlpga_interception(struct vcpu_svm *svm)
 {
struct kvm_vcpu *vcpu = svm-vcpu;
 
-   trace_kvm_invlpga(svm-vmcb-save.rip, vcpu-arch.regs[VCPU_REGS_RCX],
- vcpu-arch.regs[VCPU_REGS_RAX]);
+   trace_kvm_invlpga(svm-vmcb-save.rip, kvm_register_read(svm-vcpu, 
VCPU_REGS_RCX),
+ kvm_register_read(svm-vcpu, VCPU_REGS_RAX));
 
/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
-   kvm_mmu_invlpg(vcpu, vcpu-arch.regs[VCPU_REGS_RAX]);
+   kvm_mmu_invlpg(vcpu, kvm_register_read(svm-vcpu, VCPU_REGS_RAX));
 
svm-next_rip = kvm_rip_read(svm-vcpu) + 3;
skip_emulated_instruction(svm-vcpu);
@@ -2770,7 +2770,7 @@ static int invlpga_interception(struct vcpu_svm *svm)
 
 static int skinit_interception(struct vcpu_svm *svm)
 {
-   trace_kvm_skinit(svm-vmcb-save.rip, 
svm-vcpu.arch.regs[VCPU_REGS_RAX]);
+   trace_kvm_skinit(svm-vmcb-save.rip, kvm_register_read(svm-vcpu, 
VCPU_REGS_RAX));
 
kvm_queue_exception(svm-vcpu, UD_VECTOR);
return 1;
@@ -3133,7 +3133,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned 
ecx, u64 *data)
 
 static int rdmsr_interception(struct vcpu_svm *svm)
 {
-   u32 ecx = svm-vcpu.arch.regs[VCPU_REGS_RCX];
+   u32 ecx = kvm_register_read(svm-vcpu, VCPU_REGS_RCX);
u64 data;
 
if (svm_get_msr(svm-vcpu, ecx, data)) {
@@ -3142,8 +3142,8 @@ static int rdmsr_interception(struct vcpu_svm *svm)
} else {
trace_kvm_msr_read(ecx, data);
 
-   svm-vcpu.arch.regs[VCPU_REGS_RAX] = data  0x;
-   svm-vcpu.arch.regs[VCPU_REGS_RDX] = data  32;
+   kvm_register_write(svm-vcpu, VCPU_REGS_RAX, data  
0x);
+   kvm_register_write(svm-vcpu, VCPU_REGS_RDX, data  32);
svm-next_rip = kvm_rip_read(svm-vcpu) + 2;
skip_emulated_instruction(svm-vcpu);
}
@@ -3246,9 +3246,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr)
 static int wrmsr_interception(struct vcpu_svm *svm)
 {
struct msr_data msr;
-   u32 ecx = svm-vcpu.arch.regs[VCPU_REGS_RCX];
-   u64 data = (svm-vcpu.arch.regs[VCPU_REGS_RAX]  -1u)
-   | ((u64)(svm-vcpu.arch.regs[VCPU_REGS_RDX]  -1u)  32);
+   u32 ecx = kvm_register_read(svm-vcpu, VCPU_REGS_RCX);
+   u64 data = kvm_read_edx_eax(svm-vcpu);
 
msr.data = data;
msr.index = ecx;

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] x86: svm: use kvm_register_write()/read()

2015-02-20 Thread Joel Schopp

On 02/20/2015 02:54 PM, Borislav Petkov wrote:
 On Fri, Feb 20, 2015 at 12:39:40PM -0600, Joel Schopp wrote:
 KVM has nice wrappers to access the register values, clean up a few places
 that should use them but currently do not.

 Signed-off-by:David Kaplan david.kap...@amd.com
 Signed-off-by:Joel Schopp joel.sch...@amd.com
 This SOB chain looks strange. If David is the author, you want to have
 him in From: at the beginning of the patch.

 Stuff you did ontop should be in []-braces before your SOB, like this:
Will resend with From: line and braces for clarification.


 Signed-off-by: David Kaplan david.kap...@amd.com
 [ Did this and that to patch. ]
 Signed-off-by: Joel Schopp joel.sch...@amd.com


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] x86: svm: don't intercept CR0 TS or MP bit write

2015-02-20 Thread Joel Schopp
From: David Kaplan david.kap...@amd.com

Reduce the number of exits by avoiding exiting when the guest writes TS or MP
bits of CR0.  INTERCEPT_CR0_WRITE intercepts all writes to CR0 including TS and
MP bits. It intercepts these even if INTERCEPT_SELECTIVE_CR0 is set.  What we
should be doing is setting INTERCEPT_SELECTIVE_CR0 and not setting
INTERCEPT_CR0_WRITE.

Signed-off-by: David Kaplan david.kap...@amd.com
[added remove of clr_cr_intercept in init_vmcb, fixed check in handle_exit,
added emulation on interception back in, forward ported, tested]
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/x86/kvm/svm.c |   13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d319e0c..55822e5 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1093,7 +1093,6 @@ static void init_vmcb(struct vcpu_svm *svm)
set_cr_intercept(svm, INTERCEPT_CR0_READ);
set_cr_intercept(svm, INTERCEPT_CR3_READ);
set_cr_intercept(svm, INTERCEPT_CR4_READ);
-   set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
@@ -1539,10 +1538,8 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
 
if (gcr0 == *hcr0  svm-vcpu.fpu_active) {
clr_cr_intercept(svm, INTERCEPT_CR0_READ);
-   clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
} else {
set_cr_intercept(svm, INTERCEPT_CR0_READ);
-   set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
}
 }
 
@@ -2940,7 +2937,11 @@ static int cr_interception(struct vcpu_svm *svm)
return emulate_on_interception(svm);
 
reg = svm-vmcb-control.exit_info_1  SVM_EXITINFO_REG_MASK;
-   cr = svm-vmcb-control.exit_code - SVM_EXIT_READ_CR0;
+
+   if (svm-vmcb-control.exit_code == SVM_EXIT_CR0_SEL_WRITE)
+  cr = 16;
+   else
+  cr = svm-vmcb-control.exit_code - SVM_EXIT_READ_CR0;
 
err = 0;
if (cr = 16) { /* mov to cr */
@@ -3325,7 +3326,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm 
*svm) = {
[SVM_EXIT_READ_CR3] = cr_interception,
[SVM_EXIT_READ_CR4] = cr_interception,
[SVM_EXIT_READ_CR8] = cr_interception,
-   [SVM_EXIT_CR0_SEL_WRITE]= emulate_on_interception,
+   [SVM_EXIT_CR0_SEL_WRITE]= cr_interception,
[SVM_EXIT_WRITE_CR0]= cr_interception,
[SVM_EXIT_WRITE_CR3]= cr_interception,
[SVM_EXIT_WRITE_CR4]= cr_interception,
@@ -3502,7 +3503,7 @@ static int handle_exit(struct kvm_vcpu *vcpu)
struct kvm_run *kvm_run = vcpu-run;
u32 exit_code = svm-vmcb-control.exit_code;
 
-   if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
+   if (!is_cr_intercept(svm, INTERCEPT_SELECTIVE_CR0))
vcpu-arch.cr0 = svm-vmcb-save.cr0;
if (npt_enabled)
vcpu-arch.cr3 = svm-vmcb-save.cr3;

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] x86: svm: use kvm_register_write()/read()

2015-02-20 Thread Joel Schopp
KVM has nice wrappers to access the register values, clean up a few places
that should use them but currently do not.

Signed-off-by:David Kaplan david.kap...@amd.com
Signed-off-by:Joel Schopp joel.sch...@amd.com
---
 arch/x86/kvm/svm.c |   19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d319e0c..a7d88e4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2757,11 +2757,11 @@ static int invlpga_interception(struct vcpu_svm *svm)
 {
struct kvm_vcpu *vcpu = svm-vcpu;
 
-   trace_kvm_invlpga(svm-vmcb-save.rip, vcpu-arch.regs[VCPU_REGS_RCX],
- vcpu-arch.regs[VCPU_REGS_RAX]);
+   trace_kvm_invlpga(svm-vmcb-save.rip, kvm_register_read(svm-vcpu, 
VCPU_REGS_RCX),
+ kvm_register_read(svm-vcpu, VCPU_REGS_RAX));
 
/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
-   kvm_mmu_invlpg(vcpu, vcpu-arch.regs[VCPU_REGS_RAX]);
+   kvm_mmu_invlpg(vcpu, kvm_register_read(svm-vcpu, VCPU_REGS_RAX));
 
svm-next_rip = kvm_rip_read(svm-vcpu) + 3;
skip_emulated_instruction(svm-vcpu);
@@ -2770,7 +2770,7 @@ static int invlpga_interception(struct vcpu_svm *svm)
 
 static int skinit_interception(struct vcpu_svm *svm)
 {
-   trace_kvm_skinit(svm-vmcb-save.rip, 
svm-vcpu.arch.regs[VCPU_REGS_RAX]);
+   trace_kvm_skinit(svm-vmcb-save.rip, kvm_register_read(svm-vcpu, 
VCPU_REGS_RAX));
 
kvm_queue_exception(svm-vcpu, UD_VECTOR);
return 1;
@@ -3133,7 +3133,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned 
ecx, u64 *data)
 
 static int rdmsr_interception(struct vcpu_svm *svm)
 {
-   u32 ecx = svm-vcpu.arch.regs[VCPU_REGS_RCX];
+   u32 ecx = kvm_register_read(svm-vcpu, VCPU_REGS_RCX);
u64 data;
 
if (svm_get_msr(svm-vcpu, ecx, data)) {
@@ -3142,8 +3142,8 @@ static int rdmsr_interception(struct vcpu_svm *svm)
} else {
trace_kvm_msr_read(ecx, data);
 
-   svm-vcpu.arch.regs[VCPU_REGS_RAX] = data  0x;
-   svm-vcpu.arch.regs[VCPU_REGS_RDX] = data  32;
+   kvm_register_write(svm-vcpu, VCPU_REGS_RAX, data  
0x);
+   kvm_register_write(svm-vcpu, VCPU_REGS_RDX, data  32);
svm-next_rip = kvm_rip_read(svm-vcpu) + 2;
skip_emulated_instruction(svm-vcpu);
}
@@ -3246,9 +3246,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr)
 static int wrmsr_interception(struct vcpu_svm *svm)
 {
struct msr_data msr;
-   u32 ecx = svm-vcpu.arch.regs[VCPU_REGS_RCX];
-   u64 data = (svm-vcpu.arch.regs[VCPU_REGS_RAX]  -1u)
-   | ((u64)(svm-vcpu.arch.regs[VCPU_REGS_RDX]  -1u)  32);
+   u32 ecx = kvm_register_read(svm-vcpu, VCPU_REGS_RCX);
+   u64 data = kvm_read_edx_eax(svm-vcpu);
 
msr.data = data;
msr.index = ecx;

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v6] arm64: fix VTTBR_BADDR_MASK

2014-09-22 Thread Joel Schopp

 -#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
 -#define VTTBR_BADDR_MASK  (((1LLU  (40 - VTTBR_X)) - 1)  
 VTTBR_BADDR_SHIFT)
 Actually, after some more thinking, why don't we just make the upper
 limit of this mask 48-bit always or even 64-bit. That's a physical mask
 for checking whether the pgd pointer in vttbr is aligned as per the
 architecture requirements. Given that the pointer is allocated from the
 platform memory, it's clear that it is within the PA range. So basically
 you just need a mask to check the bottom alignment based on
 VTCR_EL2.T0SZ (which should be independent from the PA range). I guess
 it should be enough as:
This sounds fine to me.  I would say that there is no harm in
re-checking the upper bits, but I agree it is unnecessary.


 #define VTTBR_BADDR_MASK  (~0ULL  VTTBR_BADDR_SHIFT)

 without any other changes to T0SZ.

 The TCR_EL2.PS setting should be done based on the ID_A64MMFR0_EL1
 but you can do this in __do_hyp_init (it looks like this function
 handles VTCR_EL2.PS already, not sure why it does do it for TCR_EL2 as
 well).

 So IMO you only need about a few lines patch.

My original patch to fix this problem was one line, so I'm perfectly
happy with simplification.  But it would be nice if the other reviewers
could agree with this approach.  With six versions that each addressed
all the comments from reviewers I'd like it if the v7 that throws away
most of that feedback didn't result in a v8 that puts it all back again.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6] From nobody Wed Sep 3 13:22:18 2014

2014-09-08 Thread Joel Schopp
Subject: [PATCH] From nobody Tue Sep  2 17:49:46 2014
From: Joel Schopp joel.sch...@amd.com
To: kvm...@lists.cs.columbia.edu, linux-arm-ker...@lists.infradead.org,
 kvm@vger.kernel.org
Cc: Marc Zyngier marc.zyng...@arm.com,
 Sungjinn Chung sungjinn.ch...@samsung.com,
 Christoffer Dall christoffer.d...@linaro.org,
 Jungseok Lee jays@samsung.com
Bcc: ssg.sos.patc...@amd.com
Date: Wed, 03 Sep 2014 13:22:18 -0500
Message-ID: 20140903182218.2879.80647.st...@joelaarch64.amd.com
User-Agent: StGit/0.17.1-4-g4a0c1
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 7bit

Subject: [PATCH] arm64: fix VTTBR_BADDR_MASK
From: Joel Schopp joel.sch...@amd.com
To: kvm...@lists.cs.columbia.edu, linux-arm-ker...@lists.infradead.org,
 kvm@vger.kernel.org
Cc: Marc Zyngier marc.zyng...@arm.com,
 Sungjinn Chung sungjinn.ch...@samsung.com,
 Christoffer Dall christoffer.d...@linaro.org,
 Jungseok Lee jays@samsung.com
Bcc: ssg.sos.patc...@amd.com
Date: Tue, 02 Sep 2014 17:49:46 -0500
Message-ID: 20140902224946.1695.38669.st...@joelaarch64.amd.com
User-Agent: StGit/0.17.1-4-g4a0c1
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 7bit

The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current
systems.  Rather than just add a bit it seems like a good time to also set
things at run-time instead of compile time to accomodate more hardware.

This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime,
not compile time.

In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address
size (VTCR_EL2.T0SZE) cannot be determined in compile time since they
depend on hardware capability.

According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document,
vttbr_x is calculated using different fixed values with consideration
of T0SZ, granule size and the level of translation tables. Therefore,
vttbr_baddr_mask should be determined dynamically.

Changes since v5:
Fixed declaration of vttbr_baddr_mask to not create multiple instances
Refactored return codes based on feedback
For 32 bit included kvm_arm.h in kvm_mmu.h to explictly pick up VTTBR_BADDR_MASK

Changes since v4:
More minor cleanups from review
Moved some functions into headers
Added runtime check in kvm_alloc_stage2_pgd

Changes since v3:
Another rebase
Addressed minor comments from v2

Changes since v2:
Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git 
next branch

Changes since v1:
Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to
provide better long term fix.  Updated that patch to log error instead of
silently fail on unaligned vttbr.

Cc: Marc Zyngier marc.zyng...@arm.com
Cc: Christoffer Dall christoffer.d...@linaro.org
Cc: Sungjinn Chung sungjinn.ch...@samsung.com
Signed-off-by: Jungseok Lee jays@samsung.com
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/arm/include/asm/kvm_mmu.h   |   13 +++
 arch/arm/kvm/arm.c   |   23 +++-
 arch/arm64/include/asm/kvm_arm.h |   17 ++---
 arch/arm64/include/asm/kvm_mmu.h |   75 ++
 arch/arm64/kvm/hyp-init.S|   20 +++---
 5 files changed, 126 insertions(+), 22 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 5cc0b0f..03a08bb 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -21,6 +21,7 @@
 
 #include asm/memory.h
 #include asm/page.h
+#include asm/kvm_arm.h
 
 /*
  * We directly use the kernel VA for the HYP, as we can directly share
@@ -178,6 +179,18 @@ static inline void coherent_cache_guest_page(struct 
kvm_vcpu *vcpu, hva_t hva,
 
 void stage2_flush_vm(struct kvm *kvm);
 
+static inline int kvm_get_phys_addr_shift(void)
+{
+   return KVM_PHYS_SHIFT;
+}
+
+
+static inline u32 get_vttbr_baddr_mask(void)
+{
+   return VTTBR_BADDR_MASK;
+}
+
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index a99e0cd..d0fca8f 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -37,6 +37,7 @@
 #include asm/mman.h
 #include asm/tlbflush.h
 #include asm/cacheflush.h
+#include asm/cputype.h
 #include asm/virt.h
 #include asm/kvm_arm.h
 #include asm/kvm_asm.h
@@ -61,6 +62,12 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u8 kvm_next_vmid;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
+#ifdef CONFIG_ARM64
+static u64 vttbr_baddr_mask;
+#else
+static u32 vttbr_baddr_mask;
+#endif
+
 static bool vgic_present;
 
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
@@ -429,8 +436,14 @@ static void update_vttbr(struct kvm *kvm)
/* update vttbr to be used with the new vmid */
pgd_phys = virt_to_phys(kvm-arch.pgd);
vmid = ((u64)(kvm-arch.vmid)  VTTBR_VMID_SHIFT)  VTTBR_VMID_MASK;
-   kvm-arch.vttbr = pgd_phys  VTTBR_BADDR_MASK;
-   kvm-arch.vttbr |= vmid;
+
+   /*
+* If the VTTBR isn't

Re: [PATCH 6] From nobody Wed Sep 3 13:22:18 2014

2014-09-08 Thread Joel Schopp
My apologies for the bad formatting.  Will resend momentarily.

On 09/08/2014 06:01 PM, Joel Schopp wrote:
 Subject: [PATCH] From nobody Tue Sep  2 17:49:46 2014
 From: Joel Schopp joel.sch...@amd.com
 To: kvm...@lists.cs.columbia.edu, linux-arm-ker...@lists.infradead.org,
  kvm@vger.kernel.org
 Cc: Marc Zyngier marc.zyng...@arm.com,
  Sungjinn Chung sungjinn.ch...@samsung.com,
  Christoffer Dall christoffer.d...@linaro.org,
  Jungseok Lee jays@samsung.com
 Bcc: ssg.sos.patc...@amd.com
 Date: Wed, 03 Sep 2014 13:22:18 -0500
 Message-ID: 20140903182218.2879.80647.st...@joelaarch64.amd.com
 User-Agent: StGit/0.17.1-4-g4a0c1
 MIME-Version: 1.0
 Content-Type: text/plain; charset=utf-8
 Content-Transfer-Encoding: 7bit

 Subject: [PATCH] arm64: fix VTTBR_BADDR_MASK
 From: Joel Schopp joel.sch...@amd.com
 To: kvm...@lists.cs.columbia.edu, linux-arm-ker...@lists.infradead.org,
  kvm@vger.kernel.org
 Cc: Marc Zyngier marc.zyng...@arm.com,
  Sungjinn Chung sungjinn.ch...@samsung.com,
  Christoffer Dall christoffer.d...@linaro.org,
  Jungseok Lee jays@samsung.com
 Bcc: ssg.sos.patc...@amd.com
 Date: Tue, 02 Sep 2014 17:49:46 -0500
 Message-ID: 20140902224946.1695.38669.st...@joelaarch64.amd.com
 User-Agent: StGit/0.17.1-4-g4a0c1
 MIME-Version: 1.0
 Content-Type: text/plain; charset=utf-8
 Content-Transfer-Encoding: 7bit

 The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current
 systems.  Rather than just add a bit it seems like a good time to also set
 things at run-time instead of compile time to accomodate more hardware.

 This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime,
 not compile time.

 In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address
 size (VTCR_EL2.T0SZE) cannot be determined in compile time since they
 depend on hardware capability.

 According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document,
 vttbr_x is calculated using different fixed values with consideration
 of T0SZ, granule size and the level of translation tables. Therefore,
 vttbr_baddr_mask should be determined dynamically.

 Changes since v5:
 Fixed declaration of vttbr_baddr_mask to not create multiple instances
 Refactored return codes based on feedback
 For 32 bit included kvm_arm.h in kvm_mmu.h to explictly pick up 
 VTTBR_BADDR_MASK

 Changes since v4:
 More minor cleanups from review
 Moved some functions into headers
 Added runtime check in kvm_alloc_stage2_pgd

 Changes since v3:
 Another rebase
 Addressed minor comments from v2

 Changes since v2:
 Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git 
 next branch

 Changes since v1:
 Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to
 provide better long term fix.  Updated that patch to log error instead of
 silently fail on unaligned vttbr.

 Cc: Marc Zyngier marc.zyng...@arm.com
 Cc: Christoffer Dall christoffer.d...@linaro.org
 Cc: Sungjinn Chung sungjinn.ch...@samsung.com
 Signed-off-by: Jungseok Lee jays@samsung.com
 Signed-off-by: Joel Schopp joel.sch...@amd.com
 ---
  arch/arm/include/asm/kvm_mmu.h   |   13 +++
  arch/arm/kvm/arm.c   |   23 +++-
  arch/arm64/include/asm/kvm_arm.h |   17 ++---
  arch/arm64/include/asm/kvm_mmu.h |   75 
 ++
  arch/arm64/kvm/hyp-init.S|   20 +++---
  5 files changed, 126 insertions(+), 22 deletions(-)

 diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
 index 5cc0b0f..03a08bb 100644
 --- a/arch/arm/include/asm/kvm_mmu.h
 +++ b/arch/arm/include/asm/kvm_mmu.h
 @@ -21,6 +21,7 @@
  
  #include asm/memory.h
  #include asm/page.h
 +#include asm/kvm_arm.h
  
  /*
   * We directly use the kernel VA for the HYP, as we can directly share
 @@ -178,6 +179,18 @@ static inline void coherent_cache_guest_page(struct 
 kvm_vcpu *vcpu, hva_t hva,
  
  void stage2_flush_vm(struct kvm *kvm);
  
 +static inline int kvm_get_phys_addr_shift(void)
 +{
 + return KVM_PHYS_SHIFT;
 +}
 +
 +
 +static inline u32 get_vttbr_baddr_mask(void)
 +{
 + return VTTBR_BADDR_MASK;
 +}
 +
 +
  #endif   /* !__ASSEMBLY__ */
  
  #endif /* __ARM_KVM_MMU_H__ */
 diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
 index a99e0cd..d0fca8f 100644
 --- a/arch/arm/kvm/arm.c
 +++ b/arch/arm/kvm/arm.c
 @@ -37,6 +37,7 @@
  #include asm/mman.h
  #include asm/tlbflush.h
  #include asm/cacheflush.h
 +#include asm/cputype.h
  #include asm/virt.h
  #include asm/kvm_arm.h
  #include asm/kvm_asm.h
 @@ -61,6 +62,12 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
  static u8 kvm_next_vmid;
  static DEFINE_SPINLOCK(kvm_vmid_lock);
  
 +#ifdef CONFIG_ARM64
 +static u64 vttbr_baddr_mask;
 +#else
 +static u32 vttbr_baddr_mask;
 +#endif
 +
  static bool vgic_present;
  
  static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
 @@ -429,8 +436,14 @@ static void update_vttbr(struct kvm *kvm)
   /* update vttbr to be used with the new vmid

[PATCH v6] arm64: fix VTTBR_BADDR_MASK

2014-09-08 Thread Joel Schopp
The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current
systems.  Rather than just add a bit it seems like a good time to also set
things at run-time instead of compile time to accomodate more hardware.

This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime,
not compile time.

In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address
size (VTCR_EL2.T0SZE) cannot be determined in compile time since they
depend on hardware capability.

According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document,
vttbr_x is calculated using different fixed values with consideration
of T0SZ, granule size and the level of translation tables. Therefore,
vttbr_baddr_mask should be determined dynamically.

Changes since v5:
Fixed declaration of vttbr_baddr_mask to not create multiple instances
Refactored return codes based on feedback
For 32 bit included kvm_arm.h in kvm_mmu.h to explictly pick up VTTBR_BADDR_MASK

Changes since v4:
More minor cleanups from review
Moved some functions into headers
Added runtime check in kvm_alloc_stage2_pgd

Changes since v3:
Another rebase
Addressed minor comments from v2

Changes since v2:
Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git 
next branch

Changes since v1:
Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to
provide better long term fix.  Updated that patch to log error instead of
silently fail on unaligned vttbr.

Cc: Marc Zyngier marc.zyng...@arm.com
Cc: Christoffer Dall christoffer.d...@linaro.org
Cc: Sungjinn Chung sungjinn.ch...@samsung.com
Signed-off-by: Jungseok Lee jays@samsung.com
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/arm/include/asm/kvm_mmu.h   |   13 +++
 arch/arm/kvm/arm.c   |   23 +++-
 arch/arm64/include/asm/kvm_arm.h |   17 ++---
 arch/arm64/include/asm/kvm_mmu.h |   75 ++
 arch/arm64/kvm/hyp-init.S|   20 +++---
 5 files changed, 126 insertions(+), 22 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 5cc0b0f..03a08bb 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -21,6 +21,7 @@
 
 #include asm/memory.h
 #include asm/page.h
+#include asm/kvm_arm.h
 
 /*
  * We directly use the kernel VA for the HYP, as we can directly share
@@ -178,6 +179,18 @@ static inline void coherent_cache_guest_page(struct 
kvm_vcpu *vcpu, hva_t hva,
 
 void stage2_flush_vm(struct kvm *kvm);
 
+static inline int kvm_get_phys_addr_shift(void)
+{
+   return KVM_PHYS_SHIFT;
+}
+
+
+static inline u32 get_vttbr_baddr_mask(void)
+{
+   return VTTBR_BADDR_MASK;
+}
+
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index a99e0cd..d0fca8f 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -37,6 +37,7 @@
 #include asm/mman.h
 #include asm/tlbflush.h
 #include asm/cacheflush.h
+#include asm/cputype.h
 #include asm/virt.h
 #include asm/kvm_arm.h
 #include asm/kvm_asm.h
@@ -61,6 +62,12 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u8 kvm_next_vmid;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
+#ifdef CONFIG_ARM64
+static u64 vttbr_baddr_mask;
+#else
+static u32 vttbr_baddr_mask;
+#endif
+
 static bool vgic_present;
 
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
@@ -429,8 +436,14 @@ static void update_vttbr(struct kvm *kvm)
/* update vttbr to be used with the new vmid */
pgd_phys = virt_to_phys(kvm-arch.pgd);
vmid = ((u64)(kvm-arch.vmid)  VTTBR_VMID_SHIFT)  VTTBR_VMID_MASK;
-   kvm-arch.vttbr = pgd_phys  VTTBR_BADDR_MASK;
-   kvm-arch.vttbr |= vmid;
+
+   /*
+* If the VTTBR isn't aligned there is something wrong with the system
+* or kernel.
+*/
+   BUG_ON(pgd_phys  ~vttbr_baddr_mask);
+
+   kvm-arch.vttbr = pgd_phys | vmid;
 
spin_unlock(kvm_vmid_lock);
 }
@@ -1015,6 +1028,12 @@ int kvm_arch_init(void *opaque)
}
}
 
+   vttbr_baddr_mask = get_vttbr_baddr_mask();
+   if (vttbr_baddr_mask == ~0) {
+   kvm_err(Cannot set vttbr_baddr_mask\n);
+   return -EINVAL;
+   }
+
cpu_notifier_register_begin();
 
err = init_hyp_mode();
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index cc83520..ff4a4fa 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -95,7 +95,6 @@
 /* TCR_EL2 Registers bits */
 #define TCR_EL2_TBI(1  20)
 #define TCR_EL2_PS (7  16)
-#define TCR_EL2_PS_40B (2  16)
 #define TCR_EL2_TG0(1  14)
 #define TCR_EL2_SH0(3  12)
 #define TCR_EL2_ORGN0  (3  10)
@@ -104,8 +103,6 @@
 #define TCR_EL2_MASK   (TCR_EL2_TG0 | TCR_EL2_SH0 | \
 TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
 
-#define TCR_EL2_FLAGS  (TCR_EL2_PS_40B)
-
 /* VTCR_EL2 Registers

[PATCH] KVM: arm64: add gic-400 compatible

2014-09-08 Thread Joel Schopp
Add a one liner to identify the gic-400.  It's gicv2 with optional MSI 
extensions.

Cc: Christoffer Dall christoffer.d...@linaro.org
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 virt/kvm/arm/vgic.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 73eba79..e81444e 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1550,6 +1550,7 @@ static struct notifier_block vgic_cpu_nb = {
 
 static const struct of_device_id vgic_ids[] = {
{ .compatible = arm,cortex-a15-gic, .data = vgic_v2_probe, },
+   { .compatible = arm,gic-400, .data = vgic_v2_probe, },
{ .compatible = arm,gic-v3, .data = vgic_v3_probe, },
{},
 };

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v10 0/6] arm: dirty page logging support for ARMv7

2014-08-28 Thread Joel Schopp


On 08/26/2014 06:51 PM, Mario Smarduch wrote:

This patch adds support for ARMv7 dirty page logging. Some functions of dirty
page logging have been split to generic and arch specific implementations,
details below. Dirty page logging is one of serveral features required for
live migration, live migration has been tested for ARMv7.
Any reason not to cc the kvm arm list?  I almost missed this patch set.  
kvm...@lists.cs.columbia.edu

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5] arm64: fix VTTBR_BADDR_MASK

2014-08-26 Thread Joel Schopp

 diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
 index 5c7aa3c..73f6ff6 100644
 --- a/arch/arm/include/asm/kvm_mmu.h
 +++ b/arch/arm/include/asm/kvm_mmu.h
 @@ -166,6 +166,18 @@ static inline void coherent_cache_guest_page(struct 
 kvm_vcpu *vcpu, hva_t hva,

  void stage2_flush_vm(struct kvm *kvm);

 +static inline int kvm_get_phys_addr_shift(void)
 +{
 +   return KVM_PHYS_SHIFT;
 +}
 +
 +static inline int set_vttbr_baddr_mask(void)
 +{
 +   vttbr_baddr_mask = VTTBR_BADDR_MASK;
 Have you tried compiling this?

 Apart from the obvious missing definition of the variable, I'm not fond
 of functions with side-effects hidden in an include file. What is wrong
 with just returning the mask and letting the common code setting it?
I like that change, will do in v6.

 +#ifdef CONFIG_ARM64_64K_PAGES
 +static inline int t0sz_to_vttbr_x(int t0sz)
 +{
 +   if (t0sz  16 || t0sz  34) {
 +   kvm_err(Cannot support %d-bit address space\n, 64 - t0sz);
 +   return 0;
 0 is definitely a bad value for something that is an error
 case. Consider -EINVAL instead.
OK.

 Also, what if we're in a range that only deals with more levels of page
 tables than the kernel can deal with (remember we use the kernel page
 table accessors)? See the new ARM64_VA_BITS and ARM64_PGTABLE_LEVELS
 symbols that are now available, and use them to validate the range you
 have.  
With the simple current tests I can look at them and see they are
correct, even if I can't make a scenario to test that they would fail. 
However, if I add in more complicated checks I'd really like to test
them catching the failure cases.  Can you describe a case where we can
boot a kernel and then have the checks still fail in kvm? 


 +   }
 +   return 37 - t0sz;
 +}
 +#endif
 +static inline int kvm_get_phys_addr_shift(void)
 +{
 +   int pa_range = read_cpuid(ID_AA64MMFR0_EL1)  0xf;
 +
 +   switch (pa_range) {
 +   case 0: return 32;
 +   case 1: return 36;
 +   case 2: return 40;
 +   case 3: return 42;
 +   case 4: return 44;
 +   case 5: return 48;
 +   default:
 +   BUG();
 +   return 0;
 +   }
 +}
 +
 +static u64 vttbr_baddr_mask;
 Now every compilation unit that includes kvm_mmu.h has an instance of
 this variable. I doubt that it is the intended effect.
The change for the comment farther above to just return the mask and
have the common code set it should resolve this as well.


 +
 +/**
 + * set_vttbr_baddr_mask - set mask value for vttbr base address
 + *
 + * In ARMv8, vttbr_baddr_mask cannot be determined in compile time since the
 + * stage2 input address size depends on hardware capability. Thus, we first
 + * need to read ID_AA64MMFR0_EL1.PARange and then set vttbr_baddr_mask with
 + * consideration of both the granule size and the level of translation 
 tables.
 + */
 +static inline int set_vttbr_baddr_mask(void)
 +{
 +   int t0sz, vttbr_x;
 +
 +   t0sz = VTCR_EL2_T0SZ(kvm_get_phys_addr_shift());
 +   vttbr_x = t0sz_to_vttbr_x(t0sz);
 +   if (!vttbr_x)
 +   return -EINVAL;
 +   vttbr_baddr_mask = (((1LLU  (48 - vttbr_x)) - 1)  (vttbr_x - 1));
 I think this can now be written as GENMASK_ULL(48, (vttbr_x - 1)).
That does improve readability, I like it.


 +   return 0;
 +}
 +
  #endif /* __ASSEMBLY__ */
  #endif /* __ARM64_KVM_MMU_H__ */
 diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
 index d968796..c0f7634 100644
 --- a/arch/arm64/kvm/hyp-init.S
 +++ b/arch/arm64/kvm/hyp-init.S
 @@ -63,17 +63,21 @@ __do_hyp_init:
 mrs x4, tcr_el1
 ldr x5, =TCR_EL2_MASK
 and x4, x4, x5
 -   ldr x5, =TCR_EL2_FLAGS
 -   orr x4, x4, x5
 -   msr tcr_el2, x4
 -
 -   ldr x4, =VTCR_EL2_FLAGS
 /*
  * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
 -* VTCR_EL2.
 +* TCR_EL2 and both PS bits and T0SZ bits in VTCR_EL2.
  */
 mrs x5, ID_AA64MMFR0_EL1
 bfi x4, x5, #16, #3
 +   msr tcr_el2, x4
 +
 +   ldr x4, =VTCR_EL2_FLAGS
 +   bfi x4, x5, #16, #3
 +   and x5, x5, #0xf
 +   adr x6, t0sz
 +   add x6, x6, x5, lsl #2
 +   ldr w5, [x6]
 +   orr x4, x4, x5
 You'll need to validate the T0SZ value, and possibly adjust it so that
 it is compatible with the addressing capability of the kernel. That
 probably require a slight change of the hyp-init API.
In order to do that I really should test that path, can you think of a
way to generate a t0sz value that is incompatible with the kernel, but
the kernel still boots so I can load kvm and test it?


 msr vtcr_el2, x4

 mrs x4, mair_el1
 @@ -109,6 +113,10 @@ target: /* We're now in the trampoline code, switch 
 page tables */

 /* Hello, World! */
 eret
 +
 +t0sz:
 +   .word   VTCR_EL2_T0SZ(32), 

Re: [PATCH v4] arm64: fix VTTBR_BADDR_MASK

2014-08-19 Thread Joel Schopp

On 08/19/2014 07:22 AM, Christoffer Dall wrote:
 On Mon, Aug 18, 2014 at 03:30:58PM -0500, Joel Schopp wrote:
  #endif /* __ARM_KVM_MMU_H__ */
 diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
 index 16e7994..70f0f02 100644
 --- a/arch/arm/kvm/mmu.c
 +++ b/arch/arm/kvm/mmu.c
 @@ -521,6 +521,7 @@ int create_hyp_io_mappings(void *from, void *to, 
 phys_addr_t phys_addr)
   */
  int kvm_alloc_stage2_pgd(struct kvm *kvm)
  {
 +unsigned int s2_pgds, s2_pgd_order;
  pgd_t *pgd;
  
  if (kvm-arch.pgd != NULL) {
 @@ -528,10 +529,18 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
  return -EINVAL;
  }
  
 -pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
 +s2_pgds = (1  (kvm_get_phys_addr_shift() - PGDIR_SHIFT));
 +s2_pgd_order = get_order(s2_pgds * sizeof(pgd_t));
 +
 +pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, s2_pgd_order);
  if (!pgd)
  return -ENOMEM;
  
 +if ((unsigned long)pgd  ~vttbr_baddr_mask) {
 +kvm_err(Stage-2 pgd not correctly aligned: %p\n, pgd);
 +return -EFAULT;
 +}


 There are two problems that I've found here.  The first problem is that
 vttbr_baddr_mask isn't allocated yet at this point in the code.
 allocated? you mean assigned?
 aren't you setting vttbr_baddr_mask in kvm_arch_init()?  that's
 certainly called before kvm_arch_init_vm().
Yes, I mean assigned, at least I got the first letter correct :)  All I
know is that vttbr_baddr_mask was still zero and checking for zero and
calling the set function gave it a value.


 The
 second problem is that pgd is a virtual address, ie pgd ==
 0xfe03bbb4 while the vttbr masks off the high bits for a
 physical address, ie vttbr_baddr_mask=0x7ffe .  Even
 correcting for those issues I haven't been able to make this check work
 properly.  I'll resend v5 the patch with all the other suggested changes.

 What are the issues that you face?  Iow. what is the alignment of the
 returned physical address?

 (You should be able to just to virt_to_phys(pgd) and use that to test
 for the vttbr_baddr_mask).
The addresses above are actually from my system, 64K page aligned on a
64K page kernel.  I did use virt_to_phys() and the kernel got a null
dereference and paniced, I didn't trace down where the panic was occuring.



 Thanks,
 -Christoffer

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5] arm64: fix VTTBR_BADDR_MASK

2014-08-19 Thread Joel Schopp

On 08/19/2014 07:24 AM, Christoffer Dall wrote:
 On Mon, Aug 18, 2014 at 03:36:04PM -0500, Joel Schopp wrote:
 The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current
 systems.  Rather than just add a bit it seems like a good time to also set
 things at run-time instead of compile time to accomodate more hardware.

 This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime,
 not compile time.

 In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address
 size (VTCR_EL2.T0SZE) cannot be determined in compile time since they
 depend on hardware capability.

 According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document,
 vttbr_x is calculated using different fixed values with consideration
 of T0SZ, granule size and the level of translation tables. Therefore,
 vttbr_baddr_mask should be determined dynamically.

 Changes since v4:
 More minor cleanups from review
 Moved some functions into headers

 Changes since v3:
 Another rebase
 Addressed minor comments from v2

 Changes since v2:
 Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git 
 next branch

 Changes since v1:
 Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to
 provide better long term fix.  Updated that patch to log error instead of
 silently fail on unaligned vttbr.

 Cc: Christoffer Dall christoffer.d...@linaro.org
 Cc: Sungjinn Chung sungjinn.ch...@samsung.com
 Signed-off-by: Jungseok Lee jays@samsung.com
 Signed-off-by: Joel Schopp joel.sch...@amd.com
 ---
  arch/arm/include/asm/kvm_mmu.h   |   12 ++
  arch/arm/kvm/arm.c   |   17 +++-
  arch/arm64/include/asm/kvm_arm.h |   17 +---
  arch/arm64/include/asm/kvm_mmu.h |   78 
 ++
  arch/arm64/kvm/hyp-init.S|   20 +++---
  5 files changed, 122 insertions(+), 22 deletions(-)

 diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
 index 5c7aa3c..73f6ff6 100644
 --- a/arch/arm/include/asm/kvm_mmu.h
 +++ b/arch/arm/include/asm/kvm_mmu.h
 @@ -166,6 +166,18 @@ static inline void coherent_cache_guest_page(struct 
 kvm_vcpu *vcpu, hva_t hva,
  
  void stage2_flush_vm(struct kvm *kvm);
  
 +static inline int kvm_get_phys_addr_shift(void)
 +{
 +return KVM_PHYS_SHIFT;
 +}
 +
 +static inline int set_vttbr_baddr_mask(void)
 +{
 +vttbr_baddr_mask = VTTBR_BADDR_MASK;
 +return 0;
 +}
 +
 +
  #endif  /* !__ASSEMBLY__ */
  
  #endif /* __ARM_KVM_MMU_H__ */
 diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
 index 3c82b37..f396eb7 100644
 --- a/arch/arm/kvm/arm.c
 +++ b/arch/arm/kvm/arm.c
 @@ -37,6 +37,7 @@
  #include asm/mman.h
  #include asm/tlbflush.h
  #include asm/cacheflush.h
 +#include asm/cputype.h
  #include asm/virt.h
  #include asm/kvm_arm.h
  #include asm/kvm_asm.h
 @@ -466,8 +467,14 @@ static void update_vttbr(struct kvm *kvm)
  /* update vttbr to be used with the new vmid */
  pgd_phys = virt_to_phys(kvm-arch.pgd);
  vmid = ((u64)(kvm-arch.vmid)  VTTBR_VMID_SHIFT)  VTTBR_VMID_MASK;
 -kvm-arch.vttbr = pgd_phys  VTTBR_BADDR_MASK;
 -kvm-arch.vttbr |= vmid;
 +
 +/*
 + * If the VTTBR isn't aligned there is something wrong with the system
 + * or kernel.
 + */
 +BUG_ON(pgd_phys  ~vttbr_baddr_mask);
 +
 +kvm-arch.vttbr = pgd_phys | vmid;
  
  spin_unlock(kvm_vmid_lock);
  }
 @@ -1052,6 +1059,12 @@ int kvm_arch_init(void *opaque)
  }
  }
  
 +err = set_vttbr_baddr_mask();
 +if (err) {
 +kvm_err(Cannot set vttbr_baddr_mask\n);
 +return -EINVAL;
 +}
 +
  cpu_notifier_register_begin();
  
  err = init_hyp_mode();
 diff --git a/arch/arm64/include/asm/kvm_arm.h 
 b/arch/arm64/include/asm/kvm_arm.h
 index 3d69030..8dbef70 100644
 --- a/arch/arm64/include/asm/kvm_arm.h
 +++ b/arch/arm64/include/asm/kvm_arm.h
 @@ -94,7 +94,6 @@
  /* TCR_EL2 Registers bits */
  #define TCR_EL2_TBI (1  20)
  #define TCR_EL2_PS  (7  16)
 -#define TCR_EL2_PS_40B  (2  16)
  #define TCR_EL2_TG0 (1  14)
  #define TCR_EL2_SH0 (3  12)
  #define TCR_EL2_ORGN0   (3  10)
 @@ -103,8 +102,6 @@
  #define TCR_EL2_MASK(TCR_EL2_TG0 | TCR_EL2_SH0 | \
   TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
  
 -#define TCR_EL2_FLAGS   (TCR_EL2_PS_40B)
 -
  /* VTCR_EL2 Registers bits */
  #define VTCR_EL2_PS_MASK(7  16)
  #define VTCR_EL2_TG0_MASK   (1  14)
 @@ -119,36 +116,28 @@
  #define VTCR_EL2_SL0_MASK   (3  6)
  #define VTCR_EL2_SL0_LVL1   (1  6)
  #define VTCR_EL2_T0SZ_MASK  0x3f
 -#define VTCR_EL2_T0SZ_40B   24
 +#define VTCR_EL2_T0SZ(bits) (64 - (bits))
  
  #ifdef CONFIG_ARM64_64K_PAGES
  /*
   * Stage2 translation configuration:
 - * 40bits output (PS = 2)
 - * 40bits input  (T0SZ = 24)
   * 64kB pages (TG0 = 1)
   * 2 level page tables (SL = 1)
   */
  #define VTCR_EL2_FLAGS  (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER

Re: [PATCH v5] arm64: fix VTTBR_BADDR_MASK

2014-08-19 Thread Joel Schopp

 The return is a value,not just an error code. Because of this returning
 an error overloads that value.  0 just seemed like a convenient invalid
 value to check since a vttbr_x of 0 is invalid, but returning a negative
 error code would be as equally invalid.  If this is the only issue it
 doesn't seem worth respinning the patch for, but I'll change it to
 -EINVAL if for some reason a v6 is needed.
 Have you given up on doing the alignment check with the proper size on
 the pgd allocation for this patch?
Yes, I'd rather leave the extra check out of this patch.  If I were
changing the pgd allocation code I would make sure to add a check, or if
there were a static check there now I would update it for the dynamic
value from the hardware, but it seems unrelated to add several checks to
other parts of the code beyond those already in the patch.  I did leave
the functions in the headers such that checks like this could be added
when someone is updating the code for other reasons, say 4 level page
tables.

-Joel
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4] arm64: fix VTTBR_BADDR_MASK

2014-08-19 Thread Joel Schopp

On 08/19/2014 09:37 AM, Christoffer Dall wrote:
 On Tue, Aug 19, 2014 at 09:05:09AM -0500, Joel Schopp wrote:
 On 08/19/2014 07:22 AM, Christoffer Dall wrote:
 On Mon, Aug 18, 2014 at 03:30:58PM -0500, Joel Schopp wrote:
  #endif /* __ARM_KVM_MMU_H__ */
 diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
 index 16e7994..70f0f02 100644
 --- a/arch/arm/kvm/mmu.c
 +++ b/arch/arm/kvm/mmu.c
 @@ -521,6 +521,7 @@ int create_hyp_io_mappings(void *from, void *to, 
 phys_addr_t phys_addr)
   */
  int kvm_alloc_stage2_pgd(struct kvm *kvm)
  {
 +  unsigned int s2_pgds, s2_pgd_order;
pgd_t *pgd;
  
if (kvm-arch.pgd != NULL) {
 @@ -528,10 +529,18 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
return -EINVAL;
}
  
 -  pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
 +  s2_pgds = (1  (kvm_get_phys_addr_shift() - PGDIR_SHIFT));
 +  s2_pgd_order = get_order(s2_pgds * sizeof(pgd_t));
 +
 +  pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, s2_pgd_order);
if (!pgd)
return -ENOMEM;
  
 +  if ((unsigned long)pgd  ~vttbr_baddr_mask) {
 +  kvm_err(Stage-2 pgd not correctly aligned: %p\n, pgd);
 +  return -EFAULT;
 +  }


 There are two problems that I've found here.  The first problem is that
 vttbr_baddr_mask isn't allocated yet at this point in the code.
 allocated? you mean assigned?
 aren't you setting vttbr_baddr_mask in kvm_arch_init()?  that's
 certainly called before kvm_arch_init_vm().
 Yes, I mean assigned, at least I got the first letter correct :)  All I
 know is that vttbr_baddr_mask was still zero and checking for zero and
 calling the set function gave it a value.
 that sounds weird and wrong.  Hum.  Mind sticking a few prints in
 there and figuring out what's causing this?


 The
 second problem is that pgd is a virtual address, ie pgd ==
 0xfe03bbb4 while the vttbr masks off the high bits for a
 physical address, ie vttbr_baddr_mask=0x7ffe .  Even
 correcting for those issues I haven't been able to make this check work
 properly.  I'll resend v5 the patch with all the other suggested changes.

 What are the issues that you face?  Iow. what is the alignment of the
 returned physical address?

 (You should be able to just to virt_to_phys(pgd) and use that to test
 for the vttbr_baddr_mask).
 The addresses above are actually from my system, 64K page aligned on a
 64K page kernel.  I did use virt_to_phys() and the kernel got a null
 dereference and paniced, I didn't trace down where the panic was occuring.

 virt_to_phys() directly caused the null dereference?  That sounds bad!
I don't think it was the virt_to_phys() directly causing the null
dereference, but again I didn't trace it down.


 Would you mind trying to trace this down?  I'll be happy to provide as
 much help as I can along the way.
I can break the kvm_alloc_stage2_pgd check into a separate patch on top
of this one and circle back around to it after I finish another
unrelated thing I'm working on.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5] arm64: fix VTTBR_BADDR_MASK

2014-08-19 Thread Joel Schopp

 hmmm, the point is that we need to ensure that we have a properly
 aligned allocated PGD, that's what this patch currently addresses, and as
 you pointed out, the BUG_ON() just before trying to run a VM is not the
 nicest solution - we should really be dealing with this properly at
 allocation time.

 But, if you don't have time to look at that, then ok, I'll have to pick
 it up myself.

 However, you are hinting that we do not support 4 levels of page tables,
 yet you do allow the t0sz_to_vttbr_x funciton to pass even when using
 t0sz values only supported under 4 levels of page tables  What is
 the rationale for that?
Minimizing merge conflicts. I figure both are going in within the next
month or two.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC 0/4] VFIO: PLATFORM: Return device tree info for a platform device node

2014-08-19 Thread Joel Schopp

 This RFC's intention is to show what an interface to access device node
 properties for VFIO_PLATFORM can look like.

 If a device tree node corresponding to a platform device bound by 
 VFIO_PLATFORM
 is available, this patch series will allow the user to query the properties
 associated with this device node. This can be useful for userspace drivers
 to automatically query parameters related to the device.

 An API to return data from a device's device tree has been proposed before on
 these lists. The API proposed here is slightly different.

 Properties to parse from the device tree are not indexed by a numerical id.
 The host system doesn't guarantee any specific ordering for the available
 properties, or that those will remain the same; while this does not happen in
 practice, there is nothing from the host changing the device nodes during
 operation. So properties are accessed by property name.

 The type of the property accessed must also be known by the user. Properties
 types implemented in this RFC:
 - VFIO_DEVTREE_ARR_TYPE_STRING (strings sepparated by the null character)
 - VFIO_DEVTREE_ARR_TYPE_U32
 - VFIO_DEVTREE_ARR_TYPE_U16
 - VFIO_DEVTREE_ARR_TYPE_U8

 These can all be access by the ioctl VFIO_DEVICE_GET_DEVTREE_INFO. A new ioctl
 was preferred instead of shoehorning the functionality in 
 VFIO_DEVICE_GET_INFO.
 The structure exchanged looks like this:

You'll have to forgive my ignorance on the history.  But with the dtc
tool already supporting a filesystem representation (--in-format=fs),
with the dtc tool source already built into qemu, and having an example
implementation of such an interface in /proc/device-tree/ why is an
ioctl interface preferred over a filesystem interface? 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4] arm64: fix VTTBR_BADDR_MASK

2014-08-18 Thread Joel Schopp

 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 16e7994..70f0f02 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -521,6 +521,7 @@ int create_hyp_io_mappings(void *from, void *to, 
phys_addr_t phys_addr)
  */
 int kvm_alloc_stage2_pgd(struct kvm *kvm)
 {
+   unsigned int s2_pgds, s2_pgd_order;
pgd_t *pgd;
 
if (kvm-arch.pgd != NULL) {
@@ -528,10 +529,18 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
return -EINVAL;
}
 
-   pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
+   s2_pgds = (1  (kvm_get_phys_addr_shift() - PGDIR_SHIFT));
+   s2_pgd_order = get_order(s2_pgds * sizeof(pgd_t));
+
+   pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, s2_pgd_order);
if (!pgd)
return -ENOMEM;
 
+   if ((unsigned long)pgd  ~vttbr_baddr_mask) {
+   kvm_err(Stage-2 pgd not correctly aligned: %p\n, pgd);
+   return -EFAULT;
+   }


There are two problems that I've found here.  The first problem is that
vttbr_baddr_mask isn't allocated yet at this point in the code.  The
second problem is that pgd is a virtual address, ie pgd ==
0xfe03bbb4 while the vttbr masks off the high bits for a
physical address, ie vttbr_baddr_mask=0x7ffe .  Even
correcting for those issues I haven't been able to make this check work
properly.  I'll resend v5 the patch with all the other suggested changes.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5] arm64: fix VTTBR_BADDR_MASK

2014-08-18 Thread Joel Schopp
The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current
systems.  Rather than just add a bit it seems like a good time to also set
things at run-time instead of compile time to accomodate more hardware.

This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime,
not compile time.

In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address
size (VTCR_EL2.T0SZE) cannot be determined in compile time since they
depend on hardware capability.

According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document,
vttbr_x is calculated using different fixed values with consideration
of T0SZ, granule size and the level of translation tables. Therefore,
vttbr_baddr_mask should be determined dynamically.

Changes since v4:
More minor cleanups from review
Moved some functions into headers

Changes since v3:
Another rebase
Addressed minor comments from v2

Changes since v2:
Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git 
next branch

Changes since v1:
Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to
provide better long term fix.  Updated that patch to log error instead of
silently fail on unaligned vttbr.

Cc: Christoffer Dall christoffer.d...@linaro.org
Cc: Sungjinn Chung sungjinn.ch...@samsung.com
Signed-off-by: Jungseok Lee jays@samsung.com
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/arm/include/asm/kvm_mmu.h   |   12 ++
 arch/arm/kvm/arm.c   |   17 +++-
 arch/arm64/include/asm/kvm_arm.h |   17 +---
 arch/arm64/include/asm/kvm_mmu.h |   78 ++
 arch/arm64/kvm/hyp-init.S|   20 +++---
 5 files changed, 122 insertions(+), 22 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 5c7aa3c..73f6ff6 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -166,6 +166,18 @@ static inline void coherent_cache_guest_page(struct 
kvm_vcpu *vcpu, hva_t hva,
 
 void stage2_flush_vm(struct kvm *kvm);
 
+static inline int kvm_get_phys_addr_shift(void)
+{
+   return KVM_PHYS_SHIFT;
+}
+
+static inline int set_vttbr_baddr_mask(void)
+{
+   vttbr_baddr_mask = VTTBR_BADDR_MASK;
+   return 0;
+}
+
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 3c82b37..f396eb7 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -37,6 +37,7 @@
 #include asm/mman.h
 #include asm/tlbflush.h
 #include asm/cacheflush.h
+#include asm/cputype.h
 #include asm/virt.h
 #include asm/kvm_arm.h
 #include asm/kvm_asm.h
@@ -466,8 +467,14 @@ static void update_vttbr(struct kvm *kvm)
/* update vttbr to be used with the new vmid */
pgd_phys = virt_to_phys(kvm-arch.pgd);
vmid = ((u64)(kvm-arch.vmid)  VTTBR_VMID_SHIFT)  VTTBR_VMID_MASK;
-   kvm-arch.vttbr = pgd_phys  VTTBR_BADDR_MASK;
-   kvm-arch.vttbr |= vmid;
+
+   /*
+* If the VTTBR isn't aligned there is something wrong with the system
+* or kernel.
+*/
+   BUG_ON(pgd_phys  ~vttbr_baddr_mask);
+
+   kvm-arch.vttbr = pgd_phys | vmid;
 
spin_unlock(kvm_vmid_lock);
 }
@@ -1052,6 +1059,12 @@ int kvm_arch_init(void *opaque)
}
}
 
+   err = set_vttbr_baddr_mask();
+   if (err) {
+   kvm_err(Cannot set vttbr_baddr_mask\n);
+   return -EINVAL;
+   }
+
cpu_notifier_register_begin();
 
err = init_hyp_mode();
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 3d69030..8dbef70 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -94,7 +94,6 @@
 /* TCR_EL2 Registers bits */
 #define TCR_EL2_TBI(1  20)
 #define TCR_EL2_PS (7  16)
-#define TCR_EL2_PS_40B (2  16)
 #define TCR_EL2_TG0(1  14)
 #define TCR_EL2_SH0(3  12)
 #define TCR_EL2_ORGN0  (3  10)
@@ -103,8 +102,6 @@
 #define TCR_EL2_MASK   (TCR_EL2_TG0 | TCR_EL2_SH0 | \
 TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
 
-#define TCR_EL2_FLAGS  (TCR_EL2_PS_40B)
-
 /* VTCR_EL2 Registers bits */
 #define VTCR_EL2_PS_MASK   (7  16)
 #define VTCR_EL2_TG0_MASK  (1  14)
@@ -119,36 +116,28 @@
 #define VTCR_EL2_SL0_MASK  (3  6)
 #define VTCR_EL2_SL0_LVL1  (1  6)
 #define VTCR_EL2_T0SZ_MASK 0x3f
-#define VTCR_EL2_T0SZ_40B  24
+#define VTCR_EL2_T0SZ(bits)(64 - (bits))
 
 #ifdef CONFIG_ARM64_64K_PAGES
 /*
  * Stage2 translation configuration:
- * 40bits output (PS = 2)
- * 40bits input  (T0SZ = 24)
  * 64kB pages (TG0 = 1)
  * 2 level page tables (SL = 1)
  */
 #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \
 VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
-VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
-#define VTTBR_X(38 - VTCR_EL2_T0SZ_40B

Re: [Qemu-devel] The status about vhost-net on kvm-arm?

2014-08-14 Thread Joel Schopp



we at Virtual Open Systems did some work and tested vhost-net on ARM
back in March.
The setup was based on:
  - host kernel with our ioeventfd patches:
http://www.spinics.net/lists/kvm-arm/msg08413.html

- qemu with the aforementioned patches from Ying-Shiuan Pan
https://lists.gnu.org/archive/html/qemu-devel/2014-02/msg00715.html

The testbed was ARM Chromebook with Exynos 5250, using a 1Gbps USB3
Ethernet adapter connected to a 1Gbps switch. I can't find the actual
numbers but I remember that with multiple streams the gain was clearly
seen. Note that it used the minimum required ioventfd implementation
and not irqfd.

I guess it is feasible to think that it all can be put together and
rebased + the recent irqfd work. One can achiev even better
performance (because of the irqfd).


Managed to replicate the setup with the old versions e used in March:

Single stream from another machine to chromebook with 1Gbps USB3
Ethernet adapter.
iperf -c address -P 1 -i 1 -p 5001 -f k -t 10
to HOST: 858316 Kbits/sec
to GUEST: 761563 Kbits/sec

to GUEST vhost=off: 508150 Kbits/sec

10 parallel streams
iperf -c address -P 10 -i 1 -p 5001 -f k -t 10
to HOST: 842420 Kbits/sec
to GUEST: 625144 Kbits/sec

to GUEST vhost=off: 425276 Kbits/sec

I have tested the same cases on a Hisilicon board (Cortex-A15@1G)
with Integrated 1Gbps Ethernet adapter.

iperf -c address -P 1 -i 1 -p 5001 -f M -t 10
to HOST: 906 Mbits/sec
to GUEST: 562 Mbits/sec
to GUEST vhost=off: 340 Mbits/sec

10 parallel streams, the performance gets 10% plus:
iperf -c address -P 10 -i 1 -p 5001 -f M -t 10
to HOST: 923 Mbits/sec
to GUEST: 592 Mbits/sec
to GUEST vhost=off: 364 Mbits/sec

I't easy to see vhost-net brings great performance improvements,
almost 50%+.
That's pretty impressive for not even having irqfd.  I guess we should 
renew some effort to get these patches merged upstream.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3] arm64: fix VTTBR_BADDR_MASK

2014-08-11 Thread Joel Schopp
Thanks for the detailed review.
 the last case would be case 5 and the default case would be a BUG().
I agree with the case, but rather than do a BUG() I'm going to print an
error and return -EINVAL.  Not worth stopping the host kernel just
because kvm is messed up when we can gracefully exit from it.

 +
 +/*
 + * See Table D4-23 and Table D4-25 in ARM DDI 0487A.b to figure out
 + * the origin of the hardcoded values, 38 and 37.
 + */
 +#ifdef CONFIG_ARM64_64K_PAGES
 +/*
 + * 16 = T0SZ = 21 is valid under 3 level of translation tables
 + * 18 = T0SZ = 34 is valid under 2 level of translation tables
 + * 31 = T0SZ = 39 is valid under 1 level of transltaion tables
 + */
 so this scheme is with concatenated initial level stage-2 page tables.

 But we only ever allocate the amount of pages for our pgd according to
 what the host has, so I think this allocation needs to be locked down
 more tight, because the host is always using the appropriate amount for
 39 bits virtual addresses.
I'll narrow the sanity check of the range.  I'll narrow it based on a 39
- 48 bit VA host range in anticipation of the 4 level 4k and 3 level 64k
host patches going in.

 +kvm_err(Cannot support %d-bit address space\n, 64 - t0sz);
 +return -EINVAL;
 +}
 +vttbr_x = 37 - t0sz;
 +#endif
 +vttbr_baddr_mask = (((1LLU  (48 - vttbr_x)) - 1)  (vttbr_x - 1));
 +#endif
 This nested ifdef is really quite horrible.  Can you either factor these
 out into some static inlines in arch/arm[64]/include/asm/kvm_mmu.h or
 provide a per-architecture implementation in a .c file?
I'll factor it out in the file to make it more readable and do away with
the nested ifdef.  My theory on putting things into .h files is to only
do it if there is actually another file that uses it.

 +return 0;
 +}
 +
 +/**
   * update_vttbr - Update the VTTBR with a valid VMID before the guest runs
   * @kvm The guest that we are about to run
   *
 @@ -429,8 +502,16 @@ static void update_vttbr(struct kvm *kvm)
  /* update vttbr to be used with the new vmid */
  pgd_phys = virt_to_phys(kvm-arch.pgd);
  vmid = ((u64)(kvm-arch.vmid)  VTTBR_VMID_SHIFT)  VTTBR_VMID_MASK;
 -kvm-arch.vttbr = pgd_phys  VTTBR_BADDR_MASK;
 -kvm-arch.vttbr |= vmid;
 +
 +/*
 + * If the VTTBR isn't aligned there is something wrong with the system
 + * or kernel.  It is better to just fail and not mask it. But no need
 + * to panic the host kernel with a BUG_ON(), instead just log the error.
 + */
 These last two sentences are not very helpful, because they don't
 describe the rationale for what you're doing, only what you are (and are
 not) doing.
I'll reword the comment.

 That said, I don't think this is doing the right thing.  I think you
 want to refuse running the VM and avoid any stage-2 entried being
 created if this is not the case (actually, we may want to check this
 after set_vttbr_baddr_mask() or right aftert allocating the stage-2
 pgd), because otherwise I think we may be overwriting memory not
 belonging to us with concatenated page tables in a 42-bit 4KB system,
 for example.
My experience here was that the hardware actually catches the error on
the first instruction load of the guest kernel and does a stage 2
translation abort.  However, to be extra safe we could just log the
error with the address of the vttbr and then zero out the pgd_phys part
of vttbr altogether, leaving only the vmid.  The guest would then die of
natural causes and we wouldn't have to worry about the outside
possibility of memory getting overwritten.

I don't like the option of just calling BUG() because stopping the host
kernel from running just because we can't run a guest seems a bit
extreme.  On the other hand adding a return code to update_vttbr and
checking it (even with unlikely) in the kvm_arch_vcpu_ioctl_run() loop
doesn't thrill me either just from a wasted cpu cycles point of view. 

 +if (pgd_phys  ~vttbr_baddr_mask)
 +kvm_err(VTTBR not aligned, expect guest to fail);
 +
 +kvm-arch.vttbr = pgd_phys | vmid;
  
  spin_unlock(kvm_vmid_lock);
  }
 @@ -1015,6 +1096,12 @@ int kvm_arch_init(void *opaque)
  }
  }
  
 +err = set_vttbr_baddr_mask();
 +if (err) {
 +kvm_err(Cannot set vttbr_baddr_mask\n);
 +return -EINVAL;
 +}
 +
  cpu_notifier_register_begin();
  
  err = init_hyp_mode();
 diff --git a/arch/arm64/include/asm/kvm_arm.h 
 b/arch/arm64/include/asm/kvm_arm.h
 index cc83520..ff4a4fa 100644
 --- a/arch/arm64/include/asm/kvm_arm.h
 +++ b/arch/arm64/include/asm/kvm_arm.h
 @@ -95,7 +95,6 @@
  /* TCR_EL2 Registers bits */
  #define TCR_EL2_TBI (1  20)
  #define TCR_EL2_PS  (7  16)
 -#define TCR_EL2_PS_40B  (2  16)
  #define TCR_EL2_TG0 (1  14)
  #define TCR_EL2_SH0 (3  12)
  #define TCR_EL2_ORGN0   (3  10)
 @@ -104,8 +103,6 @@
  #define TCR_EL2_MASK(TCR_EL2_TG0 | TCR_EL2_SH0 | \
  

Re: KVM call for agenda for 2014-08-19

2014-08-11 Thread Joel Schopp

On 08/11/2014 08:09 AM, Juan Quintela wrote:
 Hi

 Please, send any topic that you are interested in covering.

 People have complained on the past that I don't cancel the call until
 the very last minute.  So, what do you think that deadline for
 submitting topics is 23:00UTC on Monday?
I like the deadline. 

 Call details:

  15:00 CEST
  13:00 UTC
  09:00 EDT

 Every two weeks

 By popular demand, a google calendar public entry with it

  
 https://www.google.com/calendar/embed?src=dG9iMXRqcXAzN3Y4ZXZwNzRoMHE4a3BqcXNAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ

   (Let me know if you have any problems with the calendar entry)

 If you need phone number details,  contact me privately

 Thanks, Juan.
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3] arm64: fix VTTBR_BADDR_MASK

2014-08-11 Thread Joel Schopp

 That said, I don't think this is doing the right thing.  I think you
 want to refuse running the VM and avoid any stage-2 entried being
 created if this is not the case (actually, we may want to check this
 after set_vttbr_baddr_mask() or right aftert allocating the stage-2
 pgd), because otherwise I think we may be overwriting memory not
 belonging to us with concatenated page tables in a 42-bit 4KB system,
 for example.
 My experience here was that the hardware actually catches the error on
 the first instruction load of the guest kernel and does a stage 2
 translation abort.  However, to be extra safe we could just log the
 error with the address of the vttbr and then zero out the pgd_phys part
 of vttbr altogether, leaving only the vmid.  The guest would then die of
 natural causes and we wouldn't have to worry about the outside
 possibility of memory getting overwritten.
 uh, putting zero in the pgd_phys part will just point to random memory
 if you happen to have memory based at address 0 though, right?

 I think we should check when we allocate the pgd that it is indeed of
 the right size and alignment, and if it isn't at this point, it truly is
 a BUG() and your kernel is terribly busted.
If I can't rely on 0 to be an invalid address I can't think of what I
could rely on to be invalid.  I'll just change this to BUG_ON(pgd_phys 
~vttbr_baddr_mask); and give up on my dream of the host kernel surviving
the bug.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4] arm64: fix VTTBR_BADDR_MASK

2014-08-11 Thread Joel Schopp
The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current
systems.  Rather than just add a bit it seems like a good time to also set
things at run-time instead of compile time to accomodate more hardware.

This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime,
not compile time.

In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address
size (VTCR_EL2.T0SZE) cannot be determined in compile time since they
depend on hardware capability.

According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document,
vttbr_x is calculated using different fixed values with consideration
of T0SZ, granule size and the level of translation tables. Therefore,
vttbr_baddr_mask should be determined dynamically.

Changes since v3:
Another rebase
Addressed minor comments from v2

Changes since v2:
Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git 
next branch

Changes since v1:
Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to
provide better long term fix.  Updated that patch to log error instead of
silently fail on unaligned vttbr.

Cc: Christoffer Dall christoffer.d...@linaro.org
Cc: Sungjinn Chung sungjinn.ch...@samsung.com
Signed-off-by: Jungseok Lee jays@samsung.com
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/arm/kvm/arm.c   |  116 +-
 arch/arm64/include/asm/kvm_arm.h |   17 +-
 arch/arm64/kvm/hyp-init.S|   20 +--
 3 files changed, 131 insertions(+), 22 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 3c82b37..b4859fa 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -37,6 +37,7 @@
 #include asm/mman.h
 #include asm/tlbflush.h
 #include asm/cacheflush.h
+#include asm/cputype.h
 #include asm/virt.h
 #include asm/kvm_arm.h
 #include asm/kvm_asm.h
@@ -61,6 +62,8 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u8 kvm_next_vmid;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
+static u64 vttbr_baddr_mask;
+
 static bool vgic_present;
 
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
@@ -412,6 +415,103 @@ static bool need_new_vmid_gen(struct kvm *kvm)
return unlikely(kvm-arch.vmid_gen != atomic64_read(kvm_vmid_gen));
 }
 
+
+
+   /*
+* ARMv8 64K architecture limitations:
+* 16 = T0SZ = 21 is valid under 3 level of translation tables
+* 18 = T0SZ = 34 is valid under 2 level of translation tables
+* 31 = T0SZ = 39 is valid under 1 level of transltaion tables
+*
+* ARMv8 4K architecture limitations:
+* 16 = T0SZ = 24 is valid under 4 level of translation tables
+* 21 = T0SZ = 30 is valid under 3 level of translation tables
+* 30 = T0SZ = 39 is valid under 2 level of translation tables
+*
+* 
+* We further limit T0SZ in ARM64 Linux by not supporting 1 level 
+* translation tables at all, not supporting 2 level translation 
+* tables with 4k pages, not supporting different levels of translation
+* tables in stage 1 vs stage 2, not supporting different page sizes in
+* stage 1 vs stage 2, not supporting less than 40 bit address space 
+* with 64k pages, and not supporting less than 32 bit address space 
+* with 4K pages.
+*
+* See Table D4-23 and Table D4-25 in ARM DDI 0487A.b to figure out
+* the origin of the hardcoded values, 38 and 37.
+*/
+
+#ifdef CONFIG_ARM64_64K_PAGES
+static inline int t0sz_to_vttbr_x(int t0sz){
+   if (t0sz  16 || t0sz  24) {
+   kvm_err(Cannot support %d-bit address space\n, 64 - t0sz);
+   return -EINVAL;
+   }
+
+   return 38 - t0sz;
+}
+#elif CONFIG_ARM64  !CONFIG_ARM64_64K_PAGES
+static inline int t0sz_to_vttbr_x(int t0sz){
+   if (t0sz  16 || t0sz  32) {
+   kvm_err(Cannot support %d-bit address space\n, 64 - t0sz);
+   return -EINVAL;
+   }
+   return 37 - t0sz;
+}
+#endif
+
+
+/**
+ * set_vttbr_baddr_mask - set mask value for vttbr base address
+ *
+ * In ARMv8, vttbr_baddr_mask cannot be determined in compile time since the
+ * stage2 input address size depends on hardware capability. Thus, we first
+ * need to read ID_AA64MMFR0_EL1.PARange first and then set vttbr_baddr_mask
+ * with consideration of both granule size and the level of translation tables.
+ */
+#ifndef CONFIG_ARM64
+static int set_vttbr_baddr_mask(void)
+{
+   vttbr_baddr_mask = VTTBR_BADDR_MASK;
+   return 0;
+}
+#else
+static int set_vttbr_baddr_mask(void)
+{
+  int pa_range, t0sz, vttbr_x;
+
+   pa_range = read_cpuid(ID_AA64MMFR0_EL1)  0xf;
+
+   switch (pa_range) {
+   case 0:
+   t0sz = VTCR_EL2_T0SZ(32);
+   break;
+   case 1:
+   t0sz = VTCR_EL2_T0SZ(36);
+   break;
+   case 2:
+   t0sz = VTCR_EL2_T0SZ(40);
+   break;
+   case 3

[RFC PATCH] arm64: KVM: add irqfd support

2014-08-11 Thread Joel Schopp
Depends on Eric Auger's ARM: KVM: add irqfd support patch.

Enable vfio of platform devices for ARM64.  This patch fixes the ARM64 compile.
However this patch has only been compile tested.  It seemed worth sharing as it
will allow us to carry both the ARM and ARM64 patches together as we do more
testing.

Cc: Eirc Auger eric.au...@linaro.org
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 Documentation/virtual/kvm/api.txt |2 +-
 arch/arm64/include/uapi/asm/kvm.h |4 
 arch/arm64/kvm/Kconfig|4 +++-
 arch/arm64/kvm/Makefile   |2 +-
 drivers/vfio/platform/Kconfig |2 +-
 5 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 04310d9..bc64ce9 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2132,7 +2132,7 @@ into the hash PTE second double word).
 4.75 KVM_IRQFD
 
 Capability: KVM_CAP_IRQFD
-Architectures: x86 s390 arm
+Architectures: x86 s390 arm arm64
 Type: vm ioctl
 Parameters: struct kvm_irqfd (in)
 Returns: 0 on success, -1 on error
diff --git a/arch/arm64/include/uapi/asm/kvm.h 
b/arch/arm64/include/uapi/asm/kvm.h
index e633ff8..3df8baa 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -180,6 +180,10 @@ struct kvm_arch_memory_slot {
 /* Highest supported SPI, from VGIC_NR_IRQS */
 #define KVM_ARM_IRQ_GIC_MAX127
 
+/* One single KVM irqchip, ie. the VGIC */
+#define KVM_NR_IRQCHIPS  1
+
+
 /* PSCI interface */
 #define KVM_PSCI_FN_BASE   0x95c1ba5e
 #define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n))
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 8ba85e9..cbd3525 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -26,6 +26,7 @@ config KVM
select KVM_ARM_HOST
select KVM_ARM_VGIC
select KVM_ARM_TIMER
+   select HAVE_KVM_EVENTFD
---help---
  Support hosting virtualized guest machines.
 
@@ -50,13 +51,14 @@ config KVM_ARM_MAX_VCPUS
 config KVM_ARM_VGIC
bool
depends on KVM_ARM_HOST  OF
-   select HAVE_KVM_IRQCHIP
+   select HAVE_KVM_IRQFD
---help---
  Adds support for a hardware assisted, in-kernel GIC emulation.
 
 config KVM_ARM_TIMER
bool
depends on KVM_ARM_VGIC
+   select HAVE_KVM_IRQCHIP
---help---
  Adds support for the Architected Timers in virtual machines.
 
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 72a9fd5..40b9970 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -11,7 +11,7 @@ ARM=../../../arch/arm/kvm
 
 obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
 
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o 
$(KVM)/eventfd.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
 
diff --git a/drivers/vfio/platform/Kconfig b/drivers/vfio/platform/Kconfig
index c51af17..43ee890 100644
--- a/drivers/vfio/platform/Kconfig
+++ b/drivers/vfio/platform/Kconfig
@@ -1,6 +1,6 @@
 config VFIO_PLATFORM
tristate VFIO support for platform devices
-   depends on VFIO  EVENTFD  ARM
+   depends on VFIO  EVENTFD  (ARM || ARM64)
help
  Support for platform devices with VFIO. This is required to make
  use of platform devices present on the system using the VFIO

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM on ARM64

2014-08-07 Thread Joel Schopp
virtio will get you the best performance so why would you want to use 
something slower?


-Joel

On 08/07/2014 11:51 AM, Mathew Li wrote:

Great. VirtIO works for me. Thanks for your help folks!

Is there is any other way to add virtual disk, more like a traditional
disk to qemu-system-aarch64? For example IDE disk or SATA disk or
maybe as a SCSI disk?

On Wed, Aug 6, 2014 at 9:48 AM, Joel Schopp joel.sch...@amd.com wrote:

It turns out that after a recent rebase of my kernel and qemu to the
latest the problem is fixed.  Rather than hunt down what fixed it I'm
just accepting the win and moving on. -smp 4 now works.

-Joel

On 08/06/2014 11:15 AM, Christoffer Dall wrote:

On Tue, Aug 5, 2014 at 4:18 PM, Joel Schopp joel.sch...@amd.com wrote:

On 08/04/2014 07:35 PM, Mathew Li wrote:

Hi,

I have a quick question. How do we add a hard disk to the qemu ARM VM?

I tried:

qemu-system-aarch64 -machine virt -hda disk.img -kernel image -initrd initrd.img

qemu-system-aarch64 -machine virt -sd disk.img -kernel image -initrd initrd.img

qemu-system-aarch64 -machine virt -mtdblock disk.img -kernel image
-initrd initrd.img

Nothing seems to work. I am not able to see any disk (i.e. dev/sdX)
inside guest OS.

I've been running something like this:

qemu-system-aarch64 -smp 1 --enable-kvm -nographic -netdev 
tap,id=t0,ifname=tap0,script=no,downscript=no,vhost=on -device 
virtio-net-device,netdev=t0,id=nic0 \
-kernel /extra/rootfs/boot/Image -drive file=/extra/rootfs.img,id=fs -device 
virtio-blk-device,drive=fs -m 512 -M virt -cpu host -append console=ttyAMA0 
console=ttyS0 root=/dev/vda


On my system -smp 2 or higher hangs in the guest kernel.

The -smp 2 hang issue is probably due to a missing PSCI v0.2 follow-up
patch to QEMU, you can try:
https://git.linaro.org/people/christoffer.dall/qemu-arm.git/shortlog/refs/heads/psci2-smp-fix

[disclaimer: there may be a better fix somewhere on the qemu list, I
haven't kept track the last couple of days]

-Christoffer


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM on ARM64

2014-08-07 Thread Joel Schopp


On 08/07/2014 12:53 PM, Christoffer Dall wrote:

Currently we only model a virtual machine board (the -machine
type=virt parameter) which has a UART, a flash, an RTC, and a bunch of
virtio-mmio channelse.

Once we either emulate a real aarch64 board (with whatever peripherals
it may have) or add a PCI controller to the virt board, then you can
choose whatever storage the real board has or start doing interesting
things like plugging in a scsi controller to your PCI controller on
the virt board or whatever else you desire.
I am very interested in having a PCI controller on the virt board to be 
able to do some testing of -device pci-assign and -device vfio-pci.  
I noticed that Alvise Rigo (ccd) had sent some patches out to the 
qemu-devel list July 11th that seem to add a generic pci controller.




But as Joel points out, VirtIO is likely to get you the best
performance and is the most convenient method.

-Christoffer

On Thu, Aug 7, 2014 at 6:51 PM, Mathew Li mathew.li...@gmail.com wrote:

Great. VirtIO works for me. Thanks for your help folks!

Is there is any other way to add virtual disk, more like a traditional
disk to qemu-system-aarch64? For example IDE disk or SATA disk or
maybe as a SCSI disk?



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM on ARM64

2014-08-06 Thread Joel Schopp
It turns out that after a recent rebase of my kernel and qemu to the
latest the problem is fixed.  Rather than hunt down what fixed it I'm
just accepting the win and moving on. -smp 4 now works.

-Joel

On 08/06/2014 11:15 AM, Christoffer Dall wrote:
 On Tue, Aug 5, 2014 at 4:18 PM, Joel Schopp joel.sch...@amd.com wrote:
 On 08/04/2014 07:35 PM, Mathew Li wrote:
 Hi,

 I have a quick question. How do we add a hard disk to the qemu ARM VM?

 I tried:

 qemu-system-aarch64 -machine virt -hda disk.img -kernel image -initrd 
 initrd.img

 qemu-system-aarch64 -machine virt -sd disk.img -kernel image -initrd 
 initrd.img

 qemu-system-aarch64 -machine virt -mtdblock disk.img -kernel image
 -initrd initrd.img

 Nothing seems to work. I am not able to see any disk (i.e. dev/sdX)
 inside guest OS.
 I've been running something like this:

 qemu-system-aarch64 -smp 1 --enable-kvm -nographic -netdev 
 tap,id=t0,ifname=tap0,script=no,downscript=no,vhost=on -device 
 virtio-net-device,netdev=t0,id=nic0 \
 -kernel /extra/rootfs/boot/Image -drive file=/extra/rootfs.img,id=fs -device 
 virtio-blk-device,drive=fs -m 512 -M virt -cpu host -append console=ttyAMA0 
 console=ttyS0 root=/dev/vda


 On my system -smp 2 or higher hangs in the guest kernel.
 The -smp 2 hang issue is probably due to a missing PSCI v0.2 follow-up
 patch to QEMU, you can try:
 https://git.linaro.org/people/christoffer.dall/qemu-arm.git/shortlog/refs/heads/psci2-smp-fix

 [disclaimer: there may be a better fix somewhere on the qemu list, I
 haven't kept track the last couple of days]

 -Christoffer

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM on ARM64

2014-08-05 Thread Joel Schopp

On 08/04/2014 07:35 PM, Mathew Li wrote:
 Hi,

 I have a quick question. How do we add a hard disk to the qemu ARM VM?

 I tried:

 qemu-system-aarch64 -machine virt -hda disk.img -kernel image -initrd 
 initrd.img

 qemu-system-aarch64 -machine virt -sd disk.img -kernel image -initrd 
 initrd.img

 qemu-system-aarch64 -machine virt -mtdblock disk.img -kernel image
 -initrd initrd.img

 Nothing seems to work. I am not able to see any disk (i.e. dev/sdX)
 inside guest OS.
I've been running something like this:

qemu-system-aarch64 -smp 1 --enable-kvm -nographic -netdev 
tap,id=t0,ifname=tap0,script=no,downscript=no,vhost=on -device 
virtio-net-device,netdev=t0,id=nic0 \
-kernel /extra/rootfs/boot/Image -drive file=/extra/rootfs.img,id=fs -device 
virtio-blk-device,drive=fs -m 512 -M virt -cpu host -append console=ttyAMA0 
console=ttyS0 root=/dev/vda


On my system -smp 2 or higher hangs in the guest kernel.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3] arm64: fix VTTBR_BADDR_MASK

2014-08-04 Thread Joel Schopp
The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current
systems.  Rather than just add a bit it seems like a good time to also set
things at run-time instead of compile time to accomodate more hardware.

This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime,
not compile time.

In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address
size (VTCR_EL2.T0SZE) cannot be determined in compile time since they
depends on hardware capability.

According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document,
vttbr_x is calculated using different hard-coded values with consideration
of T0SZ, granule size and the level of translation tables. Therefore,
vttbr_baddr_mask should be determined dynamically.

Changes since v2:
Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git 
next branch

Changes since v1:
Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to
provide better long term fix.  Updated that patch to log error instead of
silently fail on unaligned vttbr.

Cc: Christoffer Dall christoffer.d...@linaro.org
Cc: Sungjinn Chung sungjinn.ch...@samsung.com
Signed-off-by: Jungseok Lee jays@samsung.com
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/arm/kvm/arm.c   |   91 +-
 arch/arm64/include/asm/kvm_arm.h |   17 +--
 arch/arm64/kvm/hyp-init.S|   20 ++--
 3 files changed, 106 insertions(+), 22 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index d7424ef..d7ca2f5 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -37,6 +37,7 @@
 #include asm/mman.h
 #include asm/tlbflush.h
 #include asm/cacheflush.h
+#include asm/cputype.h
 #include asm/virt.h
 #include asm/kvm_arm.h
 #include asm/kvm_asm.h
@@ -61,6 +62,9 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u8 kvm_next_vmid;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
+/* VTTBR mask cannot be determined in complie time under ARMv8 */
+static u64 vttbr_baddr_mask;
+
 static bool vgic_present;
 
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
@@ -376,6 +380,75 @@ static bool need_new_vmid_gen(struct kvm *kvm)
 }
 
 /**
+ * set_vttbr_baddr_mask - set mask value for vttbr base address
+ *
+ * In ARMv8, vttbr_baddr_mask cannot be determined in compile time since stage2
+ * input address size depends on hardware capability. Thus, it is needed to 
read
+ * ID_AA64MMFR0_EL1.PARange first and then set vttbr_baddr_mask with
+ * consideration of both granule size and the level of translation tables.
+ */
+static int set_vttbr_baddr_mask(void)
+{
+#ifndef CONFIG_ARM64
+   vttbr_baddr_mask = VTTBR_BADDR_MASK;
+#else
+   int pa_range, t0sz, vttbr_x;
+
+   pa_range = read_cpuid(ID_AA64MMFR0_EL1)  0xf;
+
+   switch (pa_range) {
+   case 0:
+   t0sz = VTCR_EL2_T0SZ(32);
+   break;
+   case 1:
+   t0sz = VTCR_EL2_T0SZ(36);
+   break;
+   case 2:
+   t0sz = VTCR_EL2_T0SZ(40);
+   break;
+   case 3:
+   t0sz = VTCR_EL2_T0SZ(42);
+   break;
+   case 4:
+   t0sz = VTCR_EL2_T0SZ(44);
+   break;
+   default:
+   t0sz = VTCR_EL2_T0SZ(48);
+   }
+
+   /*
+* See Table D4-23 and Table D4-25 in ARM DDI 0487A.b to figure out
+* the origin of the hardcoded values, 38 and 37.
+*/
+#ifdef CONFIG_ARM64_64K_PAGES
+   /*
+* 16 = T0SZ = 21 is valid under 3 level of translation tables
+* 18 = T0SZ = 34 is valid under 2 level of translation tables
+* 31 = T0SZ = 39 is valid under 1 level of transltaion tables
+*/
+   if (t0sz = 17) {
+   kvm_err(Cannot support %d-bit address space\n, 64 - t0sz);
+   return -EINVAL;
+   }
+   vttbr_x = 38 - t0sz;
+#else
+   /*
+* 16 = T0SZ = 24 is valid under 4 level of translation tables
+* 21 = T0SZ = 30 is valid under 3 level of translation tables
+* 30 = T0SZ = 39 is valid under 2 level of translation tables
+*/
+   if (t0sz = 20) {
+   kvm_err(Cannot support %d-bit address space\n, 64 - t0sz);
+   return -EINVAL;
+   }
+   vttbr_x = 37 - t0sz;
+#endif
+   vttbr_baddr_mask = (((1LLU  (48 - vttbr_x)) - 1)  (vttbr_x - 1));
+#endif
+   return 0;
+}
+
+/**
  * update_vttbr - Update the VTTBR with a valid VMID before the guest runs
  * @kvmThe guest that we are about to run
  *
@@ -429,8 +502,16 @@ static void update_vttbr(struct kvm *kvm)
/* update vttbr to be used with the new vmid */
pgd_phys = virt_to_phys(kvm-arch.pgd);
vmid = ((u64)(kvm-arch.vmid)  VTTBR_VMID_SHIFT)  VTTBR_VMID_MASK;
-   kvm-arch.vttbr = pgd_phys  VTTBR_BADDR_MASK;
-   kvm-arch.vttbr |= vmid;
+
+   /*
+* If the VTTBR isn't aligned there is something wrong with the system

Re: [PATCH v3] arm64: fix VTTBR_BADDR_MASK

2014-08-04 Thread Joel Schopp
Since this fixes a real problem and didn't make it into 3.16 it would be
good if this made it into 3.17.

-Joel

On 08/04/2014 09:38 AM, Joel Schopp wrote:
 The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current
 systems.  Rather than just add a bit it seems like a good time to also set
 things at run-time instead of compile time to accomodate more hardware.

 This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime,
 not compile time.

 In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address
 size (VTCR_EL2.T0SZE) cannot be determined in compile time since they
 depends on hardware capability.

 According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document,
 vttbr_x is calculated using different hard-coded values with consideration
 of T0SZ, granule size and the level of translation tables. Therefore,
 vttbr_baddr_mask should be determined dynamically.

 Changes since v2:
 Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git 
 next branch

 Changes since v1:
 Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to
 provide better long term fix.  Updated that patch to log error instead of
 silently fail on unaligned vttbr.

 Cc: Christoffer Dall christoffer.d...@linaro.org
 Cc: Sungjinn Chung sungjinn.ch...@samsung.com
 Signed-off-by: Jungseok Lee jays@samsung.com
 Signed-off-by: Joel Schopp joel.sch...@amd.com
 ---
  arch/arm/kvm/arm.c   |   91 
 +-
  arch/arm64/include/asm/kvm_arm.h |   17 +--
  arch/arm64/kvm/hyp-init.S|   20 ++--
  3 files changed, 106 insertions(+), 22 deletions(-)

 diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
 index d7424ef..d7ca2f5 100644
 --- a/arch/arm/kvm/arm.c
 +++ b/arch/arm/kvm/arm.c
 @@ -37,6 +37,7 @@
  #include asm/mman.h
  #include asm/tlbflush.h
  #include asm/cacheflush.h
 +#include asm/cputype.h
  #include asm/virt.h
  #include asm/kvm_arm.h
  #include asm/kvm_asm.h
 @@ -61,6 +62,9 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
  static u8 kvm_next_vmid;
  static DEFINE_SPINLOCK(kvm_vmid_lock);
  
 +/* VTTBR mask cannot be determined in complie time under ARMv8 */
 +static u64 vttbr_baddr_mask;
 +
  static bool vgic_present;
  
  static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
 @@ -376,6 +380,75 @@ static bool need_new_vmid_gen(struct kvm *kvm)
  }
  
  /**
 + * set_vttbr_baddr_mask - set mask value for vttbr base address
 + *
 + * In ARMv8, vttbr_baddr_mask cannot be determined in compile time since 
 stage2
 + * input address size depends on hardware capability. Thus, it is needed to 
 read
 + * ID_AA64MMFR0_EL1.PARange first and then set vttbr_baddr_mask with
 + * consideration of both granule size and the level of translation tables.
 + */
 +static int set_vttbr_baddr_mask(void)
 +{
 +#ifndef CONFIG_ARM64
 + vttbr_baddr_mask = VTTBR_BADDR_MASK;
 +#else
 + int pa_range, t0sz, vttbr_x;
 +
 + pa_range = read_cpuid(ID_AA64MMFR0_EL1)  0xf;
 +
 + switch (pa_range) {
 + case 0:
 + t0sz = VTCR_EL2_T0SZ(32);
 + break;
 + case 1:
 + t0sz = VTCR_EL2_T0SZ(36);
 + break;
 + case 2:
 + t0sz = VTCR_EL2_T0SZ(40);
 + break;
 + case 3:
 + t0sz = VTCR_EL2_T0SZ(42);
 + break;
 + case 4:
 + t0sz = VTCR_EL2_T0SZ(44);
 + break;
 + default:
 + t0sz = VTCR_EL2_T0SZ(48);
 + }
 +
 + /*
 +  * See Table D4-23 and Table D4-25 in ARM DDI 0487A.b to figure out
 +  * the origin of the hardcoded values, 38 and 37.
 +  */
 +#ifdef CONFIG_ARM64_64K_PAGES
 + /*
 +  * 16 = T0SZ = 21 is valid under 3 level of translation tables
 +  * 18 = T0SZ = 34 is valid under 2 level of translation tables
 +  * 31 = T0SZ = 39 is valid under 1 level of transltaion tables
 +  */
 + if (t0sz = 17) {
 + kvm_err(Cannot support %d-bit address space\n, 64 - t0sz);
 + return -EINVAL;
 + }
 + vttbr_x = 38 - t0sz;
 +#else
 + /*
 +  * 16 = T0SZ = 24 is valid under 4 level of translation tables
 +  * 21 = T0SZ = 30 is valid under 3 level of translation tables
 +  * 30 = T0SZ = 39 is valid under 2 level of translation tables
 +  */
 + if (t0sz = 20) {
 + kvm_err(Cannot support %d-bit address space\n, 64 - t0sz);
 + return -EINVAL;
 + }
 + vttbr_x = 37 - t0sz;
 +#endif
 + vttbr_baddr_mask = (((1LLU  (48 - vttbr_x)) - 1)  (vttbr_x - 1));
 +#endif
 + return 0;
 +}
 +
 +/**
   * update_vttbr - Update the VTTBR with a valid VMID before the guest runs
   * @kvm  The guest that we are about to run
   *
 @@ -429,8 +502,16 @@ static void update_vttbr(struct kvm *kvm)
   /* update vttbr to be used with the new vmid */
   pgd_phys = virt_to_phys(kvm-arch.pgd);
   vmid = ((u64)(kvm-arch.vmid)  VTTBR_VMID_SHIFT

[PATCH] arm64: bump MAX_MASTER_STREAMIDS from 16 to 32

2014-07-31 Thread Joel Schopp
I recently ran into a situation where I needed more than 16 stream ids
for an smmu on an AMD SOC, but we are currently limited to 16 by:

#define MAX_MASTER_STREAMIDSMAX_PHANDLE_ARGS
#define MAX_PHANDLE_ARGS 16

I expect others will run into this in the future as more advanced SOCs
start to come out. The resulting one line patch has been tested to fix
the problem.

Cc: Grant Likely grant.lik...@linaro.org
Cc: Rob Herring robh...@kernel.org
Cc: Will Deacon will.dea...@arm.com
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 include/linux/of.h |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/of.h b/include/linux/of.h
index 196b34c..70ad4d9 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -67,7 +67,7 @@ struct device_node {
 #endif
 };
 
-#define MAX_PHANDLE_ARGS 16
+#define MAX_PHANDLE_ARGS 32
 struct of_phandle_args {
struct device_node *np;
int args_count;

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Verifying Execution Integrity in Untrusted hypervisors

2014-07-28 Thread Joel Schopp

On 07/25/2014 03:11 PM, Shiva V wrote:
 Hello,
 I am exploring on finding a way to ensure runtime integrity of 

 a executable in untrusted hypervisors.

 In particular, this is my requirements:

 1. I have a 2 virtual machines. (A, B). 

 2. VM-A is running some service (exe) inside it. For example any resource 

 accounting service intended to monitor for VM-B.

 3. I need a way to verify run time integrity from VM-B of the executable 

 running inside VM-A.

 4. Both the vm's are not privileged vm's and are just normal client virtual 

 machines.

 5. Underlying hypervisor is untrusted.
If the hypervisor is untrusted you have broken the root of trust and are
going to be pretty out of luck.

Any solution will require a level below the hypervisor  that you trust. 
An example would be hardware that isolates memory from the hypervisor,
ie
https://www.google.com/patents/WO2013054528A1?cl=endq=Joel+Schopphl=ensa=Xei=YYPWU6aVJNProATe5IHACQved=0CDMQ6AEwAw

Another approach might be to start with something like a TPM and a
trusted runtime UEFI.  You could then have the guest call UEFI to do
measurement with the TPM and use that for remote attestation.  With such
a method you could probably get to the point that you could measure
something in guest memory at run-time, but you would have no assurance
it hadn't been modified prior or after and was just temporarily correct,
it would be a very point in time measurement.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform

2014-07-25 Thread Joel Schopp

 I can't think of any way of determining whether a particular
 system gets this right or wrong automatically, which suggests
 perhaps we need to allow the device tree to specify that the
 GICV is 64k-page-safe...
 When we support such systems, I also think we'll need a device-tree change.
 My main concern right now is stopping the ability to hose the entire machine
 by trying to instantiate a virtual GIC.
 ...I don't see how your patch prevents instantiating a VGIC
 and hosing the machine on a system where the 64K
 with the GICV registers in it goes
  [GICV registers] [machine blows up if you read this]
  0K  8K 64K
 True, if such a machine existed, then this patch wouldn't detect it. I don't
 think we support anything like that in mainline at the moment, but the
 following additional diff should solve the problem, no?

 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
 index fa9a95b3ed19..476d3bf540a8 100644
 --- a/virt/kvm/arm/vgic.c
 +++ b/virt/kvm/arm/vgic.c
 @@ -1539,6 +1539,14 @@ int kvm_vgic_hyp_init(void)
 goto out_unmap;
 }
  
 +   if (!PAGE_ALIGNED(resource_size(vcpu_res))) {
 +   kvm_err(GICV size 0x%llx not a multiple of page size 
 0x%lx\n,
 +   (unsigned long long)resource_size(vcpu_res),
 +   PAGE_SIZE);
 +   ret = -ENXIO;
 +   goto out_unmap;
 +   }
 +
 vgic_vcpu_base = vcpu_res.start;
  
 kvm_info(%s@%llx IRQ%d\n, vgic_node-name,
This would break with my SOC device tree which looks like this.  Note
this device tree works just fine without checks.

gic: interrupt-controller@e1101000 {
compatible = arm,gic-400-v2m;
#interrupt-cells = 3;
#address-cells = 0;
interrupt-controller;
msi-controller;
reg = 0x0 0xe111 0 0x1000, /* gic dist */
  0x0 0xe112f000 0 0x2000, /* gic cpu */
  0x0 0xe114f000 0 0x2000, /* gic virtual ic*/
  0x0 0xe116f000 0 0x2000, /* gic virtual cpu*/
  0x0 0xe118 0 0x1000; /* gic msi */
interrupts = 1 8 0xf04;
};

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform

2014-07-25 Thread Joel Schopp

On 07/25/2014 09:08 AM, Will Deacon wrote:
 Hi Joel,

 On Fri, Jul 25, 2014 at 03:02:58PM +0100, Joel Schopp wrote:
 I can't think of any way of determining whether a particular
 system gets this right or wrong automatically, which suggests
 perhaps we need to allow the device tree to specify that the
 GICV is 64k-page-safe...
 When we support such systems, I also think we'll need a device-tree 
 change.
 My main concern right now is stopping the ability to hose the entire 
 machine
 by trying to instantiate a virtual GIC.
 ...I don't see how your patch prevents instantiating a VGIC
 and hosing the machine on a system where the 64K
 with the GICV registers in it goes
  [GICV registers] [machine blows up if you read this]
  0K  8K 64K
 True, if such a machine existed, then this patch wouldn't detect it. I don't
 think we support anything like that in mainline at the moment, but the
 following additional diff should solve the problem, no?

 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
 index fa9a95b3ed19..476d3bf540a8 100644
 --- a/virt/kvm/arm/vgic.c
 +++ b/virt/kvm/arm/vgic.c
 @@ -1539,6 +1539,14 @@ int kvm_vgic_hyp_init(void)
 goto out_unmap;
 }
  
 +   if (!PAGE_ALIGNED(resource_size(vcpu_res))) {
 +   kvm_err(GICV size 0x%llx not a multiple of page size 
 0x%lx\n,
 +   (unsigned long long)resource_size(vcpu_res),
 +   PAGE_SIZE);
 +   ret = -ENXIO;
 +   goto out_unmap;
 +   }
 +
 vgic_vcpu_base = vcpu_res.start;
  
 kvm_info(%s@%llx IRQ%d\n, vgic_node-name,
 This would break with my SOC device tree which looks like this.  Note
 this device tree works just fine without checks.

 gic: interrupt-controller@e1101000 {
 compatible = arm,gic-400-v2m;
 #interrupt-cells = 3;
 #address-cells = 0;
 interrupt-controller;
 msi-controller;
 reg = 0x0 0xe111 0 0x1000, /* gic dist */
   0x0 0xe112f000 0 0x2000, /* gic cpu */
   0x0 0xe114f000 0 0x2000, /* gic virtual ic*/
   0x0 0xe116f000 0 0x2000, /* gic virtual cpu*/
   0x0 0xe118 0 0x1000; /* gic msi */
 interrupts = 1 8 0xf04;
 };
 I appreciate it may work, but that's only because the kernel is actually
 using an alias of GICV at 0xe116 by accident. I would say that you're
 getting away with passing an incorrect description.
The problem is that by the spec the size is 0x2000 and was never
properly rearchitected from 4K to variable page size support. The
workaround is that each of the 4K in the 64K page (16 of them) are
really mapped to the same location in hardware. So the contents of
0xe116 is the same as the contents of 0xe116f000. See “Appendix F:
GIC-400 and 64KB Translation Granule” in v2.1 of the ARM _Server Base
System Architecture_ specification.

Now if we were to change the entry to 0x0 0xe116 0 0x2000 it would
be obviously broken because the second half would map to a duplicate of
the first half.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform

2014-07-25 Thread Joel Schopp

On 07/25/2014 09:23 AM, Will Deacon wrote:
 On Fri, Jul 25, 2014 at 03:16:15PM +0100, Joel Schopp wrote:
 On 07/25/2014 09:08 AM, Will Deacon wrote:
 This would break with my SOC device tree which looks like this.  Note
 this device tree works just fine without checks.

 gic: interrupt-controller@e1101000 {
 compatible = arm,gic-400-v2m;
 #interrupt-cells = 3;
 #address-cells = 0;
 interrupt-controller;
 msi-controller;
 reg = 0x0 0xe111 0 0x1000, /* gic dist */
   0x0 0xe112f000 0 0x2000, /* gic cpu */
   0x0 0xe114f000 0 0x2000, /* gic virtual ic*/
   0x0 0xe116f000 0 0x2000, /* gic virtual cpu*/
   0x0 0xe118 0 0x1000; /* gic msi */
 interrupts = 1 8 0xf04;
 };
 I appreciate it may work, but that's only because the kernel is actually
 using an alias of GICV at 0xe116 by accident. I would say that you're
 getting away with passing an incorrect description.
 The problem is that by the spec the size is 0x2000 and was never
 properly rearchitected from 4K to variable page size support. The
 workaround is that each of the 4K in the 64K page (16 of them) are
 really mapped to the same location in hardware. So the contents of
 0xe116 is the same as the contents of 0xe116f000. See “Appendix F:
 GIC-400 and 64KB Translation Granule” in v2.1 of the ARM _Server Base
 System Architecture_ specification.
 I've read that document, but it's not mandated and I don't think I have a
 single piece of hardware that actually follows it. Even the CPUs don't seem
 to perform the aliasing suggesting there (take a look at the A57 and A53
 TRMs -- there are reserved regions in there).
Not mandated, but it is obviously highly encouraged, and at a minimum
valid.  The SOC sitting under my desk follows it.


 Now if we were to change the entry to 0x0 0xe116 0 0x2000 it would
 be obviously broken because the second half would map to a duplicate of
 the first half.
 I think you'd need something like 0x0 0xe116 0 0x2 and we'd have
 to change the GIC driver to do the right thing. What we currently have is
 unsafe on most hardware, yet happens to work on your system.
If you change the GIC driver,kvm, kvmtool, and qemu to do the right
thing I'd be happy to change the device tree entry to 0x0 0xe116 0
0x2 or any other value of your choosing.  I really have no opinion
here on how it should be done other than what is there now currently
works and I'd like to have whatever we do in the future continue to work.


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform

2014-07-25 Thread Joel Schopp

On 07/25/2014 10:29 AM, Will Deacon wrote:
 If the physical address of GICV isn't page-aligned, then we end up
 creating a stage-2 mapping of the page containing it, which causes us to
 map neighbouring memory locations directly into the guest.

 As an example, consider a platform with GICV at physical 0x2c02f000
 running a 64k-page host kernel. If qemu maps this into the guest at
 0x8001, then guest physical addresses 0x8001 - 0x8001efff will
 map host physical region 0x2c02 - 0x2c02efff. Accesses to these
 physical regions may cause UNPREDICTABLE behaviour, for example, on the
 Juno platform this will cause an SError exception to EL3, which brings
 down the entire physical CPU resulting in RCU stalls / HYP panics / host
 crashing / wasted weeks of debugging.
No denying this is a problem.

 SBSA recommends that systems alias the 4k GICV across the bounding 64k
 region, in which case GICV physical could be described as 0x2c02 in
 the above scenario.
The problem with this patch is the gicv is really 8K.  The reason you
would map at a 60K offset (0xf000), and why we do on our SOC, is so that
the 8K gicv would pick up the last 4K from the first page and the first
4K from the next page.  With your patch it is impossible to map all 8K
of the gicv with 64K pages. 

My SOC which works fine with kvm now will go to not working with kvm
after this patch. 


 This patch fixes the problem by failing the vgic probe if the physical
 base address or the size of GICV aren't page-aligned. Note that this
 generated a warning in dmesg about freeing enabled IRQs, so I had to
 move the IRQ enabling later in the probe.

 Cc: Christoffer Dall christoffer.d...@linaro.org
 Cc: Marc Zyngier marc.zyng...@arm.com
 Cc: Gleb Natapov g...@kernel.org
 Cc: Paolo Bonzini pbonz...@redhat.com
 Cc: Joel Schopp joel.sch...@amd.com
 Cc: Don Dutile ddut...@redhat.com
 Acked-by: Peter Maydell peter.mayd...@linaro.org
 Signed-off-by: Will Deacon will.dea...@arm.com
 ---

 v1 -v2 : Added size alignment check and Peter's ack. Could this go in
   for 3.16 please?

  virt/kvm/arm/vgic.c | 24 
  1 file changed, 20 insertions(+), 4 deletions(-)

 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
 index 56ff9bebb577..476d3bf540a8 100644
 --- a/virt/kvm/arm/vgic.c
 +++ b/virt/kvm/arm/vgic.c
 @@ -1526,17 +1526,33 @@ int kvm_vgic_hyp_init(void)
   goto out_unmap;
   }
  
 - kvm_info(%s@%llx IRQ%d\n, vgic_node-name,
 -  vctrl_res.start, vgic_maint_irq);
 - on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
 -
   if (of_address_to_resource(vgic_node, 3, vcpu_res)) {
   kvm_err(Cannot obtain VCPU resource\n);
   ret = -ENXIO;
   goto out_unmap;
   }
 +
 + if (!PAGE_ALIGNED(vcpu_res.start)) {
 + kvm_err(GICV physical address 0x%llx not page aligned\n,
 + (unsigned long long)vcpu_res.start);
 + ret = -ENXIO;
 + goto out_unmap;
 + }
 +
 + if (!PAGE_ALIGNED(resource_size(vcpu_res))) {
 + kvm_err(GICV size 0x%llx not a multiple of page size 0x%lx\n,
 + (unsigned long long)resource_size(vcpu_res),
 + PAGE_SIZE);
 + ret = -ENXIO;
 + goto out_unmap;
 + }
 +
   vgic_vcpu_base = vcpu_res.start;
  
 + kvm_info(%s@%llx IRQ%d\n, vgic_node-name,
 +  vctrl_res.start, vgic_maint_irq);
 + on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
 +
   goto out;
  
  out_unmap:

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform

2014-07-25 Thread Joel Schopp

On 07/25/2014 11:02 AM, Peter Maydell wrote:
 On 25 July 2014 16:56, Joel Schopp joel.sch...@amd.com wrote:
 The problem with this patch is the gicv is really 8K.  The reason you
 would map at a 60K offset (0xf000), and why we do on our SOC, is so that
 the 8K gicv would pick up the last 4K from the first page and the first
 4K from the next page.  With your patch it is impossible to map all 8K
 of the gicv with 64K pages.

 My SOC which works fine with kvm now will go to not working with kvm
 after this patch.
 Your SOC currently works by fluke because the guest doesn't
 look at the last 4K of the GICC. If you're happy with it continuing
 to work by fluke you could make your device tree say it had a
 64K GICV region with a 64K-aligned base.

 To make it work not by fluke but actually correctly requires
 Marc's patchset, at a minimum.

Since we aren't actually using the last 4K of the gicv at the moment I supppose 
I could drop my objections to this patch and change my device tree until Marc's 
patchset provides a proper solution for the gicv's second 4K that works for 
everybody.

Acked-by: Joel Schopp joel.sch...@amd.com

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform

2014-07-24 Thread Joel Schopp

On 07/24/2014 02:47 PM, Peter Maydell wrote:
 On 24 July 2014 20:27, Will Deacon will.dea...@arm.com wrote:
 If the physical address of GICV isn't page-aligned, then we end up
 creating a stage-2 mapping of the page containing it, which causes us to
 map neighbouring memory locations directly into the guest.

 As an example, consider a platform with GICV at physical 0x2c02f000
 running a 64k-page host kernel. If qemu maps this into the guest at
 0x8001, then guest physical addresses 0x8001 - 0x8001efff will
 map host physical region 0x2c02 - 0x2c02efff. Accesses to these
 physical regions may cause UNPREDICTABLE behaviour, for example, on the
 Juno platform this will cause an SError exception to EL3, which brings
 down the entire physical CPU resulting in RCU stalls / HYP panics / host
 crashing / wasted weeks of debugging.
 This seems to me like a specific problem with Juno rather than an
 issue with having the GICV at a non-page-aligned start. The
 requirement to be able to expose host GICV as the guest GICC
 in a 64K pages system is just nothing else in that 64K page
 (or pages, if the GICV runs across two pages) is allowed to be
 unsafe for the guest to touch, which remains true whether the
 GICV starts at 0K in the 64K page or 60K.

 SBSA recommends that systems alias the 4k GICV across the bounding 64k
 region, in which case GICV physical could be described as 0x2c02 in
 the above scenario.
 The SBSA make every 4K region in the 64K page be the same thing
 recommendation is one way of satisfying the requirement that the
 whole 64K page is safe for the guest to touch. (Making the rest of
 the page RAZ/WI would be another option I guess.) If your system
 actually implements the SBSA recommendation then in fact
 describing the GICV-phys-base as the 64K-aligned address is wrong,
 because then the register at GICV-base + 4K would not be
 the first register in the 2nd page of the GICV, it would be another
 copy of the 1st page. This happens to work on Linux guests
 currently because they don't touch anything in the 2nd page,
 but for cases like device passthrough IIRC we might well like
 the guest to use some of the 2nd page registers. So the only
 correct choice on those systems is to specify the +60K address
 as the GICV physaddr in the device tree, and use Marc's patchset
 to allow QEMU/kvmtool to determine the page offset within the 64K
 page so it can reflect that in the guest's device tree.
I have one of those systems specifying +60K address as the GICV physaddr
and it works well for me with 64K pages and kvm with both QEMU and kvmtool.


 I can't think of any way of determining whether a particular
 system gets this right or wrong automatically, which suggests
 perhaps we need to allow the device tree to specify that the
 GICV is 64k-page-safe...
I don't have a better solution, despite my lack of enthusiasm for yet
another device tree property.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform

2014-07-24 Thread Joel Schopp

On 07/24/2014 02:55 PM, Will Deacon wrote:
 On Thu, Jul 24, 2014 at 08:47:23PM +0100, Peter Maydell wrote:
 On 24 July 2014 20:27, Will Deacon will.dea...@arm.com wrote:
 If the physical address of GICV isn't page-aligned, then we end up
 creating a stage-2 mapping of the page containing it, which causes us to
 map neighbouring memory locations directly into the guest.

 As an example, consider a platform with GICV at physical 0x2c02f000
 running a 64k-page host kernel. If qemu maps this into the guest at
 0x8001, then guest physical addresses 0x8001 - 0x8001efff will
 map host physical region 0x2c02 - 0x2c02efff. Accesses to these
 physical regions may cause UNPREDICTABLE behaviour, for example, on the
 Juno platform this will cause an SError exception to EL3, which brings
 down the entire physical CPU resulting in RCU stalls / HYP panics / host
 crashing / wasted weeks of debugging.
 This seems to me like a specific problem with Juno rather than an
 issue with having the GICV at a non-page-aligned start. The
 requirement to be able to expose host GICV as the guest GICC
 in a 64K pages system is just nothing else in that 64K page
 (or pages, if the GICV runs across two pages) is allowed to be
 unsafe for the guest to touch, which remains true whether the
 GICV starts at 0K in the 64K page or 60K.
 I agree, and for that we would need a new ioctl so we can query the
 page-offset of the GICV on systems where it is safe. Given that such an
 ioctl doesn't exist today, I would like to plug the hole in mainline kernels
 with this patch, we can relax in the future if systems appear where it would
 be safe to map the entire 64k region.
I have such a system. 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvmtool: arm64: fix compilation error

2014-07-22 Thread Joel Schopp
Currently tools/kvm doesn't build on arm64 when gtk3 is present.  The error 
looks like this:

  LINK lkvm
ui/gtk3.o: In function `kvm_gtk_key_press':
/extra/sb/linux-kvm/tools/kvm/ui/gtk3.c:201: undefined reference to `kbd_queue'
/extra/sb/linux-kvm/tools/kvm/ui/gtk3.c:204: undefined reference to `kbd_queue'
/extra/sb/linux-kvm/tools/kvm/ui/gtk3.c:216: undefined reference to `kbd_queue'
/extra/sb/linux-kvm/tools/kvm/ui/gtk3.c:217: undefined reference to `kbd_queue'
/extra/sb/linux-kvm/tools/kvm/ui/gtk3.c:218: undefined reference to `kbd_queue'
ui/gtk3.o:/extra/sb/linux-kvm/tools/kvm/ui/gtk3.c:219: more undefined 
references to `kbd_queue' follow
collect2: error: ld returned 1 exit status
make: *** [lkvm] Error 1

The patch below makes the error go away and the resulting lkvm runs on arm64.

Cc: Pekka Enberg penb...@kernel.org
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 tools/kvm/Makefile |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 880d580..fba60f1 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -102,6 +102,7 @@ OBJS+= hw/pci-shmem.o
 OBJS   += kvm-ipc.o
 OBJS   += builtin-sandbox.o
 OBJS   += virtio/mmio.o
+OBJS   += hw/i8042.o
 
 # Translate uname -m into ARCH string
 ARCH ?= $(shell uname -m | sed -e s/i.86/i386/ -e s/ppc.*/powerpc/ \
@@ -129,7 +130,6 @@ ifeq ($(ARCH),x86)
OBJS+= x86/kvm.o
OBJS+= x86/kvm-cpu.o
OBJS+= x86/mptable.o
-   OBJS+= hw/i8042.o
 # Exclude BIOS object files from header dependencies.
OTHEROBJS   += x86/bios.o
OTHEROBJS   += x86/bios/bios-rom.o

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] arm64: fix VTTBR_BADDR_MASK

2014-07-11 Thread Joel Schopp
This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime,
not compile time.

In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address
size (VTCR_EL2.T0SZE) cannot be determined in compile time since they
depends on hardware capability.

According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document,
vttbr_x is calculated using different hard-coded values with consideration
of T0SZ, granule size and the level of translation tables. Therefore,
vttbr_baddr_mask should be determined dynamically.

Changes since v1:
Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to
provide better long term fix.  Updated that patch to log error instead of
silently fail on unaligned vttbr.

Cc: Christoffer Dall christoffer.d...@linaro.org
Cc: Sungjinn Chung sungjinn.ch...@samsung.com
Signed-off-by: Jungseok Lee jays@samsung.com
Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/arm/kvm/arm.c   |   91 +-
 arch/arm64/include/asm/kvm_arm.h |   17 +--
 arch/arm64/kvm/hyp-init.S|   20 ++--
 3 files changed, 106 insertions(+), 22 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 3c82b37..6d51a53 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -37,6 +37,7 @@
 #include asm/mman.h
 #include asm/tlbflush.h
 #include asm/cacheflush.h
+#include asm/cputype.h
 #include asm/virt.h
 #include asm/kvm_arm.h
 #include asm/kvm_asm.h
@@ -61,6 +62,9 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u8 kvm_next_vmid;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
+/* VTTBR mask cannot be determined in complie time under ARMv8 */
+static u64 vttbr_baddr_mask;
+
 static bool vgic_present;
 
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
@@ -413,6 +417,75 @@ static bool need_new_vmid_gen(struct kvm *kvm)
 }
 
 /**
+ * set_vttbr_baddr_mask - set mask value for vttbr base address
+ *
+ * In ARMv8, vttbr_baddr_mask cannot be determined in compile time since stage2
+ * input address size depends on hardware capability. Thus, it is needed to 
read
+ * ID_AA64MMFR0_EL1.PARange first and then set vttbr_baddr_mask with
+ * consideration of both granule size and the level of translation tables.
+ */
+static int set_vttbr_baddr_mask(void)
+{
+#ifndef CONFIG_ARM64
+   vttbr_baddr_mask = VTTBR_BADDR_MASK;
+#else
+   int pa_range, t0sz, vttbr_x;
+
+   pa_range = read_cpuid(ID_AA64MMFR0_EL1)  0xf;
+
+   switch (pa_range) {
+   case 0:
+   t0sz = VTCR_EL2_T0SZ(32);
+   break;
+   case 1:
+   t0sz = VTCR_EL2_T0SZ(36);
+   break;
+   case 2:
+   t0sz = VTCR_EL2_T0SZ(40);
+   break;
+   case 3:
+   t0sz = VTCR_EL2_T0SZ(42);
+   break;
+   case 4:
+   t0sz = VTCR_EL2_T0SZ(44);
+   break;
+   default:
+   t0sz = VTCR_EL2_T0SZ(48);
+   }
+
+   /*
+* See Table D4-23 and Table D4-25 in ARM DDI 0487A.b to figure out
+* the origin of the hardcoded values, 38 and 37.
+*/
+#ifdef CONFIG_ARM64_64K_PAGES
+   /*
+* 16 = T0SZ = 21 is valid under 3 level of translation tables
+* 18 = T0SZ = 34 is valid under 2 level of translation tables
+* 31 = T0SZ = 39 is valid under 1 level of transltaion tables
+*/
+   if (t0sz = 17) {
+   kvm_err(Cannot support %d-bit address space\n, 64 - t0sz);
+   return -EINVAL;
+   }
+   vttbr_x = 38 - t0sz;
+#else
+   /*
+* 16 = T0SZ = 24 is valid under 4 level of translation tables
+* 21 = T0SZ = 30 is valid under 3 level of translation tables
+* 30 = T0SZ = 39 is valid under 2 level of translation tables
+*/
+   if (t0sz = 20) {
+   kvm_err(Cannot support %d-bit address space\n, 64 - t0sz);
+   return -EINVAL;
+   }
+   vttbr_x = 37 - t0sz;
+#endif
+   vttbr_baddr_mask = (((1LLU  (48 - vttbr_x)) - 1)  (vttbr_x - 1));
+#endif
+   return 0;
+}
+
+/**
  * update_vttbr - Update the VTTBR with a valid VMID before the guest runs
  * @kvmThe guest that we are about to run
  *
@@ -466,8 +539,16 @@ static void update_vttbr(struct kvm *kvm)
/* update vttbr to be used with the new vmid */
pgd_phys = virt_to_phys(kvm-arch.pgd);
vmid = ((u64)(kvm-arch.vmid)  VTTBR_VMID_SHIFT)  VTTBR_VMID_MASK;
-   kvm-arch.vttbr = pgd_phys  VTTBR_BADDR_MASK;
-   kvm-arch.vttbr |= vmid;
+
+   /*
+* If the VTTBR isn't aligned there is something wrong with the system
+* or kernel.  It is better to just fail and not mask it. But no need
+* to panic the host kernel with a BUG_ON(), instead just log the error.
+*/
+   if (pgd_phys  ~vttbr_baddr_mask)
+   kvm_err(VTTBR not aligned, expect guest to fail);
+
+   kvm-arch.vttbr = pgd_phys | vmid

Re: [PATCH] arm64: fix VTTBR_BADDR_MASK

2014-07-10 Thread Joel Schopp

On 07/10/2014 03:25 PM, Christoffer Dall wrote:
 On Wed, Jul 09, 2014 at 11:17:04AM -0500, Joel Schopp wrote:
 The current calculation for VTTBR_BADDR_MASK masks only 39 bits and not
 all 40 bits.  That last bit is important as some systems allocate
 from near the top of the available address space.

 This patch is necessary to run KVM on an aarch64 SOC I have been testing.

 Signed-off-by: Joel Schopp joel.sch...@amd.com
 ---
  arch/arm64/include/asm/kvm_arm.h |2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/arch/arm64/include/asm/kvm_arm.h 
 b/arch/arm64/include/asm/kvm_arm.h
 index 3d69030..b39e93f 100644
 --- a/arch/arm64/include/asm/kvm_arm.h
 +++ b/arch/arm64/include/asm/kvm_arm.h
 @@ -148,7 +148,7 @@
  #endif
  
  #define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
 -#define VTTBR_BADDR_MASK  (((1LLU  (40 - VTTBR_X)) - 1)  
 VTTBR_BADDR_SHIFT)
 +#define VTTBR_BADDR_MASK  (0xffLLU)  /* bits 0-39 */
  #define VTTBR_VMID_SHIFT  (48LLU)
  #define VTTBR_VMID_MASK   (0xffLLU  VTTBR_VMID_SHIFT)
  

 While this is obviously fixing a bug, it doesn't feel like the right
 short-term fix.  I'll have to go back and read the definitions of x in
 BADDR[47:x] for VTTBR_EL2 exactly again, but the intended use of
 VTTBR_BADDR_MASK (and the only occurence of it in C-code) is to deal
 with alignment of the allocated pgd.
I think there is some confusion.  Before VTTBR_BADDR_MASK always
evaluated to 0x7fLLU, after the change it always evaluates to
0xffLLU

Neither before nor after the patch is it dealing with alignment.  Any
bits it throws away (bits 40-47) are most significant not least significant.

I could have rewritten the macro like:

#define VTTBR_BADDR_MASK  (((1LLU  (40 - VTTBR_X + 1)) - 1)  
VTTBR_BADDR_SHIFT)

to correct the bug but it's my opinion that the existing code is quite
obfuscated which is how the bug happened in the first place.  It seemed
easier to just actually mask the bits in a straightforward and easy to
understand manner.  I even added a comment so nobody has to count the fs ;)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] arm64: fix VTTBR_BADDR_MASK

2014-07-10 Thread Joel Schopp

On 07/10/2014 04:02 PM, Joel Schopp wrote:
 On 07/10/2014 03:25 PM, Christoffer Dall wrote:
 On Wed, Jul 09, 2014 at 11:17:04AM -0500, Joel Schopp wrote:
 The current calculation for VTTBR_BADDR_MASK masks only 39 bits and not
 all 40 bits.  That last bit is important as some systems allocate
 from near the top of the available address space.

 This patch is necessary to run KVM on an aarch64 SOC I have been testing.

 Signed-off-by: Joel Schopp joel.sch...@amd.com
 ---
  arch/arm64/include/asm/kvm_arm.h |2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/arch/arm64/include/asm/kvm_arm.h 
 b/arch/arm64/include/asm/kvm_arm.h
 index 3d69030..b39e93f 100644
 --- a/arch/arm64/include/asm/kvm_arm.h
 +++ b/arch/arm64/include/asm/kvm_arm.h
 @@ -148,7 +148,7 @@
  #endif
  
  #define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
 -#define VTTBR_BADDR_MASK  (((1LLU  (40 - VTTBR_X)) - 1)  
 VTTBR_BADDR_SHIFT)
 +#define VTTBR_BADDR_MASK  (0xffLLU)  /* bits 0-39 */
  #define VTTBR_VMID_SHIFT  (48LLU)
  #define VTTBR_VMID_MASK  (0xffLLU  VTTBR_VMID_SHIFT)
  

 While this is obviously fixing a bug, it doesn't feel like the right
 short-term fix.  I'll have to go back and read the definitions of x in
 BADDR[47:x] for VTTBR_EL2 exactly again, but the intended use of
 VTTBR_BADDR_MASK (and the only occurence of it in C-code) is to deal
 with alignment of the allocated pgd.
 I think there is some confusion.  Before VTTBR_BADDR_MASK always
 evaluated to 0x7fLLU, after the change it always evaluates to
 0xffLLU

 Neither before nor after the patch is it dealing with alignment.  Any
 bits it throws away (bits 40-47) are most significant not least significant.

 I could have rewritten the macro like:

 #define VTTBR_BADDR_MASK  (((1LLU  (40 - VTTBR_X + 1)) - 1)  
 VTTBR_BADDR_SHIFT)

 to correct the bug but it's my opinion that the existing code is quite
 obfuscated which is how the bug happened in the first place.  It seemed
 easier to just actually mask the bits in a straightforward and easy to
 understand manner.  I even added a comment so nobody has to count the fs ;)

I hate to reply to my own email correcting myself.  But you were
correct.  I will fix and resend a v2.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] arm64: fix VTTBR_BADDR_MASK

2014-07-09 Thread Joel Schopp
The current calculation for VTTBR_BADDR_MASK masks only 39 bits and not
all 40 bits.  That last bit is important as some systems allocate
from near the top of the available address space.

This patch is necessary to run KVM on an aarch64 SOC I have been testing.

Signed-off-by: Joel Schopp joel.sch...@amd.com
---
 arch/arm64/include/asm/kvm_arm.h |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 3d69030..b39e93f 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -148,7 +148,7 @@
 #endif
 
 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK  (((1LLU  (40 - VTTBR_X)) - 1)  VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK  (0xffLLU)  /* bits 0-39 */
 #define VTTBR_VMID_SHIFT  (48LLU)
 #define VTTBR_VMID_MASK  (0xffLLU  VTTBR_VMID_SHIFT)
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 9/9] arm64: KVM: vgic: deal with GIC sub-page alignment

2014-06-25 Thread Joel Schopp


On 06/25/2014 10:00 AM, Marc Zyngier wrote:

On 25/06/14 15:56, Joel Schopp wrote:

On 06/24/2014 05:28 PM, Peter Maydell wrote:

On 24 June 2014 20:28, Joel Schopp joel.sch...@amd.com wrote:

On 06/19/2014 04:21 AM, Marc Zyngier wrote:

The GIC CPU interface is always 4k aligned. If the host is using
64k pages, it is critical to place the guest's GICC interface at the
same relative alignment as the host's GICV. Failure to do so results
in an impossibility for the guest to deal with interrupts.

Add a KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET attribute for the VGIC, allowing
userspace to retrieve the GICV offset in a page. It becomes then trivial
to adjust the GICC base address for the guest.

Does this mean there is a corresponding patch for qemu?

Not as far as I know. It's a bit awkward on the QEMU end because
we really want to provide the guest a consistent memory map
regardless of the host CPU. So at best we'd probably use it to
say sorry, can't run on this CPU/host kernel.

I think most arm64 servers are going to run with 64k pages.  It seems
like a major problem to have qemu not work on these systems.

How many of them will be with the GICC *not* 64kB aligned?


If I'm reading the Server Base System Architecture v2.2 Appendix F 
correctly all of them.  Here's the relevant quote: In a 64KB 
translation granule system this means that GICC needs to have its base 
at 4KB below a 64KB boundary.



(That said, if you think you can make QEMU usefully use the
information and want to write a QEMU patch I'm not averse
to the idea.)

I'll have to think about this approach some more, but I'm not opposed to
doing the work if I thought it was the right thing to do.


kvmtool is probably better placed to take advantage of it since
it takes more of a deal with what the host provides you
philosophy.

kvmtool is fun as a play toy, but in the real world nobody is building
clouds using kvmtool, they use kvm with qemu.

A play toy? Hmmm. Do you realise that most of KVM on arm64 has been
written using this play toy?


I meant no insult.  I really like kvmtool.  I'm just saying that the 
eventual end users of these systems will want to run qemu and not kvmtool.


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 9/9] arm64: KVM: vgic: deal with GIC sub-page alignment

2014-06-25 Thread Joel Schopp


On 06/24/2014 05:28 PM, Peter Maydell wrote:

On 24 June 2014 20:28, Joel Schopp joel.sch...@amd.com wrote:

On 06/19/2014 04:21 AM, Marc Zyngier wrote:

The GIC CPU interface is always 4k aligned. If the host is using
64k pages, it is critical to place the guest's GICC interface at the
same relative alignment as the host's GICV. Failure to do so results
in an impossibility for the guest to deal with interrupts.

Add a KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET attribute for the VGIC, allowing
userspace to retrieve the GICV offset in a page. It becomes then trivial
to adjust the GICC base address for the guest.


Does this mean there is a corresponding patch for qemu?

Not as far as I know. It's a bit awkward on the QEMU end because
we really want to provide the guest a consistent memory map
regardless of the host CPU. So at best we'd probably use it to
say sorry, can't run on this CPU/host kernel.
I think most arm64 servers are going to run with 64k pages.  It seems 
like a major problem to have qemu not work on these systems.




(That said, if you think you can make QEMU usefully use the
information and want to write a QEMU patch I'm not averse
to the idea.)
I'll have to think about this approach some more, but I'm not opposed to 
doing the work if I thought it was the right thing to do.




kvmtool is probably better placed to take advantage of it since
it takes more of a deal with what the host provides you
philosophy.
kvmtool is fun as a play toy, but in the real world nobody is building 
clouds using kvmtool, they use kvm with qemu.




thanks
-- PMM


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 9/9] arm64: KVM: vgic: deal with GIC sub-page alignment

2014-06-25 Thread Joel Schopp


On 06/25/2014 12:34 PM, Peter Maydell wrote:

On 25 June 2014 15:56, Joel Schopp joel.sch...@amd.com wrote:

On 06/24/2014 05:28 PM, Peter Maydell wrote:

On 24 June 2014 20:28, Joel Schopp joel.sch...@amd.com wrote:

Does this mean there is a corresponding patch for qemu?

Not as far as I know. It's a bit awkward on the QEMU end because
we really want to provide the guest a consistent memory map
regardless of the host CPU. So at best we'd probably use it to
say sorry, can't run on this CPU/host kernel.

I think most arm64 servers are going to run with 64k pages.  It seems like a
major problem to have qemu not work on these systems.

QEMU should already work fine on servers with 64K pages;
you just need to have the host offset of the GICV within the 64K page
and the guest offset of the GICC within the 64K page be the same
(and at the moment both must also be zero, which I believe is true
for all of them at the moment except possibly the AEM model;
counterexamples welcome). Disclaimer: I haven't personally
tested this, but on the other hand I don't think anybody's
reported it as not working either.


It doesn't work for me.  Maybe I'm doing something wrong, but I can't 
see what.  I am unique in that I'm running a gic-400 (gicv2m) on aarch64 
hardware with 64k pages.  I'm also unique in that my hardware maps each 
4K gic entry to a 64K page (aliasing each 4k of gic 16 times in a 64K 
page, ie the gic virtual ic is at 0xe114 and 0xe1141000 and 
0xe1142000, etc).  This is inline with appendix F of the server base 
system architecture.  This is inconvenient when the size is 0x2000 
(8K).  As a result all the offsets in the device tree entries are to the 
last 4K in the page so that an 8K read will read the last 4k from one 
page and the first 4k from the next and actually get 8k of the gic.



gic: interrupt-controller@e1101000 {
compatible = arm,gic-400;
#interrupt-cells = 3;
#address-cells = 0;
interrupt-controller;
msi-controller;
reg = 0x0 0xe111 0 0x1000, /* gic dist */
  0x0 0xe112f000 0 0x2000, /* gic cpu */
  0x0 0xe114f000 0 0x2000, /* gic virtual ic*/
  0x0 0xe116f000 0 0x2000, /* gic virtual cpu*/
  0x0 0xe118 0 0x1000; /* gic msi */

interrupts = 1 8 0xf04;
};


My concern here is that if userspace is going to look at 8k starting at 
the beginning of the page, guest offset 0 in your terminology, (say 
0xe114) instead of starting at the last 4k of the page, offset 
0xf000 (say 0xe114f000) it is going to get the second 4k wrong by 
reading 0xe1141000 instead of 0xe115.


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 9/9] arm64: KVM: vgic: deal with GIC sub-page alignment

2014-06-25 Thread Joel Schopp


On 06/25/2014 03:45 PM, Peter Maydell wrote:

On 25 June 2014 20:34, Joel Schopp joel.sch...@amd.com wrote:

It doesn't work for me.  Maybe I'm doing something wrong, but I can't see
what.  I am unique in that I'm running a gic-400 (gicv2m) on aarch64
hardware with 64k pages.  I'm also unique in that my hardware maps each 4K
gic entry to a 64K page (aliasing each 4k of gic 16 times in a 64K page, ie
the gic virtual ic is at 0xe114 and 0xe1141000 and 0xe1142000, etc).

This is inline with appendix F of the server base system architecture.  This
is inconvenient when the size is 0x2000 (8K).  As a result all the offsets
in the device tree entries are to the last 4K in the page so that an 8K read
will read the last 4k from one page and the first 4k from the next and
actually get 8k of the gic.


 gic: interrupt-controller@e1101000 {
 compatible = arm,gic-400;
 #interrupt-cells = 3;
 #address-cells = 0;
 interrupt-controller;
 msi-controller;
 reg = 0x0 0xe111 0 0x1000, /* gic dist */
   0x0 0xe112f000 0 0x2000, /* gic cpu */
   0x0 0xe114f000 0 0x2000, /* gic virtual ic*/
   0x0 0xe116f000 0 0x2000, /* gic virtual cpu*/
   0x0 0xe118 0 0x1000; /* gic msi */

Right, this is the oddball case we don't yet support for 64K pages
(though as you say it is a permitted configuration per the SBSA).

At least I know I'm not going crazy.



 interrupts = 1 8 0xf04;
 };


My concern here is that if userspace is going to look at 8k starting at the
beginning of the page, guest offset 0 in your terminology, (say 0xe114)
instead of starting at the last 4k of the page, offset 0xf000 (say
0xe114f000) it is going to get the second 4k wrong by reading 0xe1141000
instead of 0xe115.

Userspace doesn't actually look at anything in the GICC. It just asks
the kernel to put the guest GICC (ie the mapping of the host GICV)
at a particular base address which happens to be a multiple of 64K.
In this case if the host kernel is using 64K pages then the KVM
kernel code ought to say sorry, can't do that when we tell it the
base address. (That is, it's impossible to give the guest a VM
where the GICC it sees is at a 64K boundary on your hardware
and host kernel config, and hopefully we report that in a not totally
opaque fashion.)

The errors I'm seeing look like:
from qemu:
error: kvm run failed Bad address
Aborted (core dumped)

from kvm:
[ 7931.722965] kvm [1208]: Unsupported fault status: EC=0x20 DFCS=0x14

from kvmtool:
from lkvm (kvmtool):
  Warning: /extra/rootfs/boot/Image is not a bzImage. Trying to load it 
as a flat binary...

  Info: Loaded kernel to 0x8008 (10212384 bytes)
  Info: Placing fdt at 0x8fe0 - 0x8fff
  Info: virtio-mmio.devices=0x200@0x1:36

KVM_RUN failed: Bad address




If you hack QEMU's memory map for the virt board so instead of
 [VIRT_GIC_CPU] = { 0x801, 0x1 },
we have
 [VIRT_GIC_CPU] = { 0x801f000, 0x2000 },
No change in result, not to say that this wouldn't work if some other 
unknown problem were fixed.


does it work? If QEMU supported this VGIC_GRP_ADDR_OFFSET
query then all it would do would be to change that offset and size.
It would be good to know if there are other problems beyond that...

(Conveniently, Linux guests won't currently try to look at the second
4K page of their GICC...)

That's handy.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 9/9] arm64: KVM: vgic: deal with GIC sub-page alignment

2014-06-24 Thread Joel Schopp


On 06/19/2014 04:21 AM, Marc Zyngier wrote:

The GIC CPU interface is always 4k aligned. If the host is using
64k pages, it is critical to place the guest's GICC interface at the
same relative alignment as the host's GICV. Failure to do so results
in an impossibility for the guest to deal with interrupts.

Add a KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET attribute for the VGIC, allowing
userspace to retrieve the GICV offset in a page. It becomes then trivial
to adjust the GICC base address for the guest.


Does this mean there is a corresponding patch for qemu?



Signed-off-by: Marc Zyngier marc.zyng...@arm.com
---
  arch/arm/include/uapi/asm/kvm.h   | 1 +
  arch/arm64/include/uapi/asm/kvm.h | 1 +
  virt/kvm/arm/vgic.c   | 7 +++
  3 files changed, 9 insertions(+)

diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 8b51c1a..056b782 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -174,6 +174,7 @@ struct kvm_arch_memory_slot {
  #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT   0
  #define   KVM_DEV_ARM_VGIC_OFFSET_MASK(0xULL  
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
  #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS  3
+#define KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET 4
  
  /* KVM_IRQ_LINE irq field index values */

  #define KVM_ARM_IRQ_TYPE_SHIFT24
diff --git a/arch/arm64/include/uapi/asm/kvm.h 
b/arch/arm64/include/uapi/asm/kvm.h
index b5cd6ed..5513de4 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -160,6 +160,7 @@ struct kvm_arch_memory_slot {
  #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT   0
  #define   KVM_DEV_ARM_VGIC_OFFSET_MASK(0xULL  
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
  #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS  3
+#define KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET 4
  
  /* KVM_IRQ_LINE irq field index values */

  #define KVM_ARM_IRQ_TYPE_SHIFT24
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index b0cd417..68ac9c6 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -2228,6 +2228,12 @@ static int vgic_get_attr(struct kvm_device *dev, struct 
kvm_device_attr *attr)
r = put_user(dev-kvm-arch.vgic.nr_irqs, uaddr);
break;
}
+   case KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET: {
+   u32 __user *uaddr = (u32 __user *)(long)attr-addr;
+   u32 val = vgic-vcpu_base  ~PAGE_MASK;
+   r = put_user(val, uaddr);
+   break;
+   }
  
  	}
  
@@ -2265,6 +2271,7 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)

offset = attr-attr  KVM_DEV_ARM_VGIC_OFFSET_MASK;
return vgic_has_attr_regs(vgic_cpu_ranges, offset);
case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+   case KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET:
return 0;
}
return -ENXIO;


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html