Re: [PATCH 04/12] KVM: x86: Replace call-back set_tsc_khz() with a common function

2015-09-28 Thread Haozhong Zhang
On Mon, Sep 28, 2015 at 08:27:02PM -0700, Eric Northup wrote:
> On Sun, Sep 27, 2015 at 10:38 PM, Haozhong Zhang 
> wrote:
> 
> > Both VMX and SVM propagate virtual_tsc_khz in the same way, so this
> > patch removes the call-back set_tsc_khz() and replaces it with a common
> > function.
> >
> > Signed-off-by: Haozhong Zhang 
> > ---
> >  arch/x86/include/asm/kvm_host.h |  1 -
> >  arch/x86/kvm/svm.c  | 36 
> >  arch/x86/kvm/vmx.c  | 17 -
> >  arch/x86/kvm/x86.c  | 41
> > -
> >  4 files changed, 40 insertions(+), 55 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h
> > b/arch/x86/include/asm/kvm_host.h
> > index 4f32c68..5a0c435 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -842,7 +842,6 @@ struct kvm_x86_ops {
> >
> > bool (*has_wbinvd_exit)(void);
> >
> > -   void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool
> > scale);
> > u64 (*read_tsc_offset)(struct kvm_vcpu *vcpu);
> > void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
> >
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index 1a333bd..d46dcf3 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -1015,41 +1015,6 @@ static void init_sys_seg(struct vmcb_seg *seg,
> > uint32_t type)
> > seg->base = 0;
> >  }
> >
> > -static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool
> > scale)
> > -{
> > -   u64 ratio;
> > -   u64 khz;
> > -
> > -   /* Guest TSC same frequency as host TSC? */
> > -   if (!scale) {
> > -   vcpu->arch.tsc_scaling_ratio = TSC_RATIO_DEFAULT;
> > -   return;
> > -   }
> > -
> > -   /* TSC scaling supported? */
> > -   if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
> > -   if (user_tsc_khz > tsc_khz) {
> > -   vcpu->arch.tsc_catchup = 1;
> > -   vcpu->arch.tsc_always_catchup = 1;
> > -   } else
> > -   WARN(1, "user requested TSC rate below hardware
> > speed\n");
> > -   return;
> > -   }
> > -
> > -   khz = user_tsc_khz;
> > -
> > -   /* TSC scaling required  - calculate ratio */
> > -   ratio = khz << 32;
> > -   do_div(ratio, tsc_khz);
> > -
> > -   if (ratio == 0 || ratio & TSC_RATIO_RSVD) {
> > -   WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n",
> > -   user_tsc_khz);
> > -   return;
> > -   }
> > -   vcpu->arch.tsc_scaling_ratio = ratio;
> > -}
> > -
> >  static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu)
> >  {
> > struct vcpu_svm *svm = to_svm(vcpu);
> > @@ -4507,7 +4472,6 @@ static struct kvm_x86_ops svm_x86_ops = {
> >
> > .has_wbinvd_exit = svm_has_wbinvd_exit,
> >
> > -   .set_tsc_khz = svm_set_tsc_khz,
> > .read_tsc_offset = svm_read_tsc_offset,
> > .write_tsc_offset = svm_write_tsc_offset,
> > .adjust_tsc_offset = svm_adjust_tsc_offset,
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index 6407674..1751537 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -2255,22 +2255,6 @@ static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu,
> > u64 host_tsc)
> > return host_tsc + tsc_offset;
> >  }
> >
> > -/*
> > - * Engage any workarounds for mis-matched TSC rates.  Currently limited to
> > - * software catchup for faster rates on slower CPUs.
> > - */
> > -static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool
> > scale)
> > -{
> > -   if (!scale)
> > -   return;
> > -
> > -   if (user_tsc_khz > tsc_khz) {
> > -   vcpu->arch.tsc_catchup = 1;
> > -   vcpu->arch.tsc_always_catchup = 1;
> > -   } else
> > -   WARN(1, "user requested TSC rate below hardware speed\n");
> > -}
> > -
> >  static u64 vmx_read_tsc_offset(struct kvm_vcpu *vcpu)
> >  {
> > return vmcs_read64(TSC_OFFSET);
> > @@ -10380,7 +10364,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
> >
> > .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
> >
> > -   .set_tsc_khz = vmx_set_tsc_khz,
> > .read_tsc_offset = vmx_read_tsc_offset,
> > .write_tsc_offset = vmx_write_tsc_offset,
> > .adjust_tsc_offset = vmx_adjust_tsc_offset,
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 920c302..e2e1fdb 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -1248,6 +1248,45 @@ static u32 adjust_tsc_khz(u32 khz, s32 ppm)
> > return v;
> >  }
> >
> > +static void set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool
> > scale)
> > +{
> > +   u64 ratio, khz;
> > +   s8 shift;
> > +
> > +   /* Guest TSC same frequency as host TSC? */
> > +   if 

Re: [PATCH v4 15/15] KVM: arm: enable trapping of all debug registers

2015-09-28 Thread Zhichao Huang


On 2015/9/3 0:08, Christoffer Dall wrote:
> On Mon, Aug 10, 2015 at 09:26:07PM +0800, Zhichao Huang wrote:
>> Enable trapping of the debug registers unconditionally, allowing guests to
>> use the debug infrastructure.
>>
>> Signed-off-by: Zhichao Huang 
>> Reviewed-by: Christoffer Dall 
>> ---
>>  arch/arm/kvm/interrupts_head.S | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
>> index 7ad0adf..494991d 100644
>> --- a/arch/arm/kvm/interrupts_head.S
>> +++ b/arch/arm/kvm/interrupts_head.S
>> @@ -792,7 +792,7 @@ ARM_BE8(rev  r6, r6  )
>>   * (hardware reset value is 0) */
>>  .macro set_hdcr operation
>>  mrc p15, 4, r2, c1, c1, 1
>> -ldr r3, =(HDCR_TPM|HDCR_TPMCR)
>> +ldr r3, =(HDCR_TPM|HDCR_TPMCR|HDCR_TDRA|HDCR_TDOSA|HDCR_TDA)
> 
> eh, but I thought we didn't have to trap accesses to the debug registers
> if we switch them, because the guest is likely to be using them?
> 
> Maybe I am getting confused, can you repeat for me exactly when we
> context-switch the registers and when we trap accesses to them?
> 

Since we don't want to world switch the dangerous register(DBGDSCR), we have
to trap accesses all the time, to prevent the guest to write to the real 
register.

> Thanks,
> -Christoffer
> 
>>  .if \operation == vmentry
>>  orr r2, r2, r3  @ Trap some perfmon accesses
>>  .else
>> -- 
>> 1.7.12.4
>>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 13/15] KVM: arm: keep track of guest use of the debug registers

2015-09-28 Thread Zhichao Huang


On 2015/9/3 0:01, Christoffer Dall wrote:
> On Mon, Aug 10, 2015 at 09:26:05PM +0800, Zhichao Huang wrote:
>>  
>> -static bool trap_debug32(struct kvm_vcpu *vcpu,
>> +/* Indicate whether the guest has enabled any break/watch points or not. */
>> +static bool guest_debug_in_use(struct kvm_vcpu *vcpu)
>> +{
>> +unsigned int i;
>> +
>> +for (i = 0; i < ARM_MAX_BRP; i++)
>> +if (vcpu->arch.cp14[cp14_DBGBCR0 + i] & 0x1)
>> +return true;
>> +for (i = 0; i < ARM_MAX_WRP; i++)
>> +if (vcpu->arch.cp14[cp14_DBGWCR0 + i] & 0x1)
>> +return true;
>> +
>> +return false;
>> +}
>> +
>> +static bool __trap_debug32(struct kvm_vcpu *vcpu,
>>  const struct coproc_params *p,
>>  const struct coproc_reg *r)
>>  {
>> @@ -232,6 +247,56 @@ static bool trap_debug32(struct kvm_vcpu *vcpu,
>>  return true;
>>  }
>>  

>> +static bool trap_debug32(struct kvm_vcpu *vcpu,
>> +const struct coproc_params *p,
>> +const struct coproc_reg *r)
>> +{
>> +__trap_debug32(vcpu, p, r);
>> +
>> +if (p->is_write) {
>> +if ((vcpu->arch.cp14[r->reg] & 0x1) ||
>> +guest_debug_in_use(vcpu))
>> +vcpu->arch.debug_flags |= KVM_ARM_DEBUG_GUEST_INUSE;
>> +else
>> +vcpu->arch.debug_flags &= ~KVM_ARM_DEBUG_GUEST_INUSE;
> 
> I don't understand this logic, if we are enabling one of the w/b points
> or if there was already an enabled w/b point, then we set the flag, but
> if you disable a single one then you clear the flag?
> 
> It looks to me like you're mixing two approaches here;  either read
> through all the registers whenever you need to know to set the flag or
> not, or you keep track of this on every read/write of the registers.
> 

I did it in the function guest_debug_in_use(), which will read through all the
registers.


> 
> So __trap_debug32 is for the non-control registers and trap-debug32 is
> for the control registers?
> 
> I think specifically naming the control register function
> trap_debug_cr would be cleaner in that case.
> 

OK.

> Thanks,
> -Christoffer
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 01/12] KVM: x86: Collect information for setting TSC scaling ratio

2015-09-28 Thread Eric Northup
On Sun, Sep 27, 2015 at 10:38 PM, Haozhong Zhang
 wrote:
>
> The number of bits of the fractional part of the 64-bit TSC scaling
> ratio in VMX and SVM is different. This patch makes the architecture
> code to collect the number of fractional bits and other related
> information into variables that can be accessed in the common code.
>
> Signed-off-by: Haozhong Zhang 
> ---
>  arch/x86/include/asm/kvm_host.h | 8 
>  arch/x86/kvm/svm.c  | 5 +
>  arch/x86/kvm/x86.c  | 8 
>  3 files changed, 21 insertions(+)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 2beee03..5b9b86e 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -965,6 +965,14 @@ extern bool kvm_has_tsc_control;
>  extern u32  kvm_min_guest_tsc_khz;
>  /* maximum supported tsc_khz for guests */
>  extern u32  kvm_max_guest_tsc_khz;
> +/* number of bits of the fractional part of the TSC scaling ratio */
> +extern u8   kvm_tsc_scaling_ratio_frac_bits;
> +/* reserved bits of TSC scaling ratio (SBZ) */
> +extern u64  kvm_tsc_scaling_ratio_rsvd;
> +/* default TSC scaling ratio (= 1.0) */
> +extern u64  kvm_default_tsc_scaling_ratio;
> +/* maximum allowed value of TSC scaling ratio */
> +extern u64  kvm_max_tsc_scaling_ratio;

Do we need all 3 of kvm_max_guest_tsc_khz, kvm_max_tsc_scaling_ratio,
and kvm_tsc_scaling_ratio_rsvd (since only SVM has reserved bits - and
just for complaining if the high bits are set, which can already be
expressed by max_tsc_scaling ratio)

kvm_max_tsc_scaling_ratio seems to be write-only.

>
>  enum emulation_result {
> EMULATE_DONE, /* no further processing */
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 94b7d15..eff7db7 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -963,7 +963,12 @@ static __init int svm_hardware_setup(void)
> max = min(0x7fffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX));
>
> kvm_max_guest_tsc_khz = max;
> +
> +   kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
> +   kvm_tsc_scaling_ratio_frac_bits = 32;
> +   kvm_tsc_scaling_ratio_rsvd = TSC_RATIO_RSVD;
> }
> +   kvm_default_tsc_scaling_ratio = TSC_RATIO_DEFAULT;
>
> if (nested) {
> printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 991466b..f888225 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -106,6 +106,14 @@ bool kvm_has_tsc_control;
>  EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
>  u32  kvm_max_guest_tsc_khz;
>  EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
> +u8   kvm_tsc_scaling_ratio_frac_bits;
> +EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
> +u64  kvm_tsc_scaling_ratio_rsvd;
> +EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_rsvd);
> +u64  kvm_default_tsc_scaling_ratio;
> +EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
> +u64  kvm_max_tsc_scaling_ratio;
> +EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
>
>  /* tsc tolerance in parts per million - default to 1/2 of the NTP threshold 
> */
>  static u32 tsc_tolerance_ppm = 250;
> --
> 2.4.8
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/12] KVM: x86: add support for VMX TSC scaling

2015-09-28 Thread Eric Northup
On Sun, Sep 27, 2015 at 10:37 PM, Haozhong Zhang
 wrote:
> This patchset adds support for VMX TSC scaling feature which is
> available on Intel Skylake CPU. The specification of VMX TSC scaling
> can be found at
> http://www.intel.com/content/www/us/en/processors/timestamp-counter-scaling-virtualization-white-paper.html
>
> VMX TSC scaling allows guest TSC which is read by guest rdtsc(p)
> instructions increases in a rate that is customized by the hypervisor
> and can be different than the host TSC rate. Basically, VMX TSC
> scaling adds a 64-bit field called TSC multiplier in VMCS so that, if
> VMX TSC scaling is enabled, TSC read by guest rdtsc(p) instructions
> will be calculated by the following formula:
>
>   guest EDX:EAX = (Host TSC * TSC multiplier) >> 48 + VMX TSC Offset
>
> where, Host TSC = Host MSR_IA32_TSC + Host MSR_IA32_TSC_ADJUST.
>
> This patchset, when cooperating with another QEMU patchset (sent in
> another email "target-i386: save/restore vcpu's TSC rate during
> migration"), allows guest programs observe a consistent TSC rate even
> though they are migrated among machines with different host TSC rates.
>
> VMX TSC scaling shares some common logics with SVM TSC scaling which
> is already supported by KVM. Patch 1 ~ 8 move those common logics from
> SVM code to the common code. Upon them, patch 9 ~ 12 add VMX-specific
> support for VMX TSC scaling.

reviewed-by: Eric Northup 

>
> Haozhong Zhang (12):
>   KVM: x86: Collect information for setting TSC scaling ratio
>   KVM: x86: Add a common TSC scaling ratio field in kvm_vcpu_arch
>   KVM: x86: Add a common TSC scaling function
>   KVM: x86: Replace call-back set_tsc_khz() with a common function
>   KVM: x86: Replace call-back compute_tsc_offset() with a common function
>   KVM: x86: Move TSC scaling logic out of call-back adjust_tsc_offset()
>   KVM: x86: Move TSC scaling logic out of call-back read_l1_tsc()
>   KVM: x86: Use the correct vcpu's TSC rate to compute time scale
>   KVM: VMX: Enable and initialize VMX TSC scaling
>   KVM: VMX: Setup TSC scaling ratio when a vcpu is loaded
>   KVM: VMX: Use a scaled host TSC for guest readings of MSR_IA32_TSC
>   KVM: VMX: Dump TSC multiplier in dump_vmcs()
>
>  arch/x86/include/asm/kvm_host.h |  24 +++
>  arch/x86/include/asm/vmx.h  |   4 +-
>  arch/x86/kvm/lapic.c|   5 +-
>  arch/x86/kvm/svm.c  | 113 +++--
>  arch/x86/kvm/vmx.c  |  60 
>  arch/x86/kvm/x86.c  | 154 
> +---
>  include/linux/kvm_host.h|  21 +-
>  7 files changed, 221 insertions(+), 160 deletions(-)
>
> --
> 2.4.8
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 10/15] KVM: arm: implement world switch for debug registers

2015-09-28 Thread Zhichao Huang


On 2015/9/2 22:53, Christoffer Dall wrote:
>> +/* Reads cp14 registers from hardware.
>> + * Writes cp14 registers in-order to the CP14 struct pointed to by r10
>> + *
>> + * Assumes vcpu pointer in vcpu reg
>> + *
>> + * Clobbers r2-r12
>> + */
>> +.macro save_debug_state
>> +read_hw_dbg_num
>> +cp14_read_and_str r10, 4, cp14_DBGBVR0, r11
>> +cp14_read_and_str r10, 5, cp14_DBGBCR0, r11
>> +cp14_read_and_str r10, 6, cp14_DBGWVR0, r12
>> +cp14_read_and_str r10, 7, cp14_DBGWCR0, r12
>> +
>> +/* DBGDSCR reg */
>> +mrc p14, 0, r2, c0, c1, 0
>> +str r2, [r10, #CP14_OFFSET(cp14_DBGDSCRext)]
> 
> so again we're touching the scary register on every world-switch.  Since
> it sounds like we have experience telling us that this can cause
> troubles, I'm wondering if we can get around it by:
> 
> Only ever allow the guest to use debugging registers if we managed to
> enter_monitor_mode on the host, and in that case only allow guest
> debugging with the configuration of DBGDSCR that the host has.
> 
> If the host never managed to enable debugging, the guest probably won't
> succeed either, and we should just trap all guest accesses to the debug
> registers.
> 
> Does this work?
> 

I think it works. Since the register is dangerous, we will try not to
world switch it. It means that the guest will not be able to write the register,
and will always see what the host set. So the guest will not be able to use
hardware debug feature if the host disable it.

> 
> -Christoffer
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] kvm-all: notice KVM of vcpu's TSC rate after migration

2015-09-28 Thread Haozhong Zhang
On Mon, Sep 28, 2015 at 01:37:34PM -0300, Eduardo Habkost wrote:
> On Mon, Sep 28, 2015 at 01:38:31PM +0800, Haozhong Zhang wrote:
> > When a vcpu is created in KVM, its TSC rate is initially identical to
> > the host TSC rate. If its state is migrated to a vcpu on another
> > machine (target machine) which may uses a different host TSC rate, QEMU
> > on the target machine should notice KVM of the migrated vcpu's TSC
> > rate. In case that KVM on the target machine supports TSC scaling, guest
> > programs running on the migrated vcpu will observe the same TSC rate
> > before and after the migration.
> > 
> > Signed-off-by: Haozhong Zhang 
> > ---
> >  kvm-all.c | 13 +
> >  1 file changed, 13 insertions(+)
> > 
> > diff --git a/kvm-all.c b/kvm-all.c
> > index 0be4615..e8de038 100644
> > --- a/kvm-all.c
> > +++ b/kvm-all.c
> > @@ -1769,6 +1769,19 @@ void kvm_cpu_synchronize_post_reset(CPUState *cpu)
> >  static void do_kvm_cpu_synchronize_post_init(void *arg)
> >  {
> >  CPUState *cpu = arg;
> > +CPUX86State *env = _CPU(cpu)->env;
> > +int r;
> > +
> > +/*
> > + * XXX: KVM_SET_TSC_KHZ must be done before kvm_arch_put_registers().
> 
> Could you explain where this requirement comes from?
>

kvm_arch_put_registers() below will setup vcpu's MSR_IA32_TSC through
KVM ioctl KVM_SET_MSRS. KVM needs to know vcpu's TSC rate so as to
correctly scale the TSC value given by QEMU, especially when vcpu's
TSC rate is different than the host TSC rate (e.g. it's migrated from
another machine w/ different host TSC rate than the current one).

> > + */
> > +r = kvm_check_extension(cpu->kvm_state, KVM_CAP_TSC_CONTROL);
> > +if (r && env->tsc_khz) {
> > +r = kvm_vcpu_ioctl(cpu, KVM_SET_TSC_KHZ, env->tsc_khz);
> > +if (r < 0) {
> > +fprintf(stderr, "KVM_SET_TSC_KHZ failed\n");
> > +}
> > +}
> 
> This is duplicating the existing KVM_SET_TSC_KHZ call at
> kvm_arch_init_vcpu(). I wonder if there's a way to avoid this
> duplication. Should we set TSC KHz only at
> do_kvm_cpu_synchronize_post_init(), and remove the call from
> kvm_arch_init_vcpu()?
>

I'll check if it's safe to remove the call from kvm_arch_init_vcpu().

> Or maybe we shouldn't treat this as VM state, but as configuration, and
> let management configure the TSC frequency explicitly if the user really
> needs it to stay the same during migration.
>
> (CCing libvir-list to see if they have feedback)
>

Thanks for CC. I'll consider to add a command line option to control
the configuration of guest TSC fequency.

> -- 
> Eduardo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 01/12] KVM: x86: Collect information for setting TSC scaling ratio

2015-09-28 Thread Haozhong Zhang
On Mon, Sep 28, 2015 at 08:28:57PM -0700, Eric Northup wrote:
> On Sun, Sep 27, 2015 at 10:38 PM, Haozhong Zhang
>  wrote:
> >
> > The number of bits of the fractional part of the 64-bit TSC scaling
> > ratio in VMX and SVM is different. This patch makes the architecture
> > code to collect the number of fractional bits and other related
> > information into variables that can be accessed in the common code.
> >
> > Signed-off-by: Haozhong Zhang 
> > ---
> >  arch/x86/include/asm/kvm_host.h | 8 
> >  arch/x86/kvm/svm.c  | 5 +
> >  arch/x86/kvm/x86.c  | 8 
> >  3 files changed, 21 insertions(+)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h 
> > b/arch/x86/include/asm/kvm_host.h
> > index 2beee03..5b9b86e 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -965,6 +965,14 @@ extern bool kvm_has_tsc_control;
> >  extern u32  kvm_min_guest_tsc_khz;
> >  /* maximum supported tsc_khz for guests */
> >  extern u32  kvm_max_guest_tsc_khz;
> > +/* number of bits of the fractional part of the TSC scaling ratio */
> > +extern u8   kvm_tsc_scaling_ratio_frac_bits;
> > +/* reserved bits of TSC scaling ratio (SBZ) */
> > +extern u64  kvm_tsc_scaling_ratio_rsvd;
> > +/* default TSC scaling ratio (= 1.0) */
> > +extern u64  kvm_default_tsc_scaling_ratio;
> > +/* maximum allowed value of TSC scaling ratio */
> > +extern u64  kvm_max_tsc_scaling_ratio;
> 
> Do we need all 3 of kvm_max_guest_tsc_khz, kvm_max_tsc_scaling_ratio,
> and kvm_tsc_scaling_ratio_rsvd (since only SVM has reserved bits - and
> just for complaining if the high bits are set, which can already be
> expressed by max_tsc_scaling ratio)
> 
> kvm_max_tsc_scaling_ratio seems to be write-only.
>

You are right. I'll remove kvm_tsc_scaling_ratio_rsvd and just use
kvm_max_tsc_scaling_ratio to verify TSC scaling ratio in
set_tsc_khz().

> >
> >  enum emulation_result {
> > EMULATE_DONE, /* no further processing */
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index 94b7d15..eff7db7 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -963,7 +963,12 @@ static __init int svm_hardware_setup(void)
> > max = min(0x7fffULL, __scale_tsc(tsc_khz, 
> > TSC_RATIO_MAX));
> >
> > kvm_max_guest_tsc_khz = max;
> > +
> > +   kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
> > +   kvm_tsc_scaling_ratio_frac_bits = 32;
> > +   kvm_tsc_scaling_ratio_rsvd = TSC_RATIO_RSVD;
> > }
> > +   kvm_default_tsc_scaling_ratio = TSC_RATIO_DEFAULT;
> >
> > if (nested) {
> > printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 991466b..f888225 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -106,6 +106,14 @@ bool kvm_has_tsc_control;
> >  EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
> >  u32  kvm_max_guest_tsc_khz;
> >  EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
> > +u8   kvm_tsc_scaling_ratio_frac_bits;
> > +EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
> > +u64  kvm_tsc_scaling_ratio_rsvd;
> > +EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_rsvd);
> > +u64  kvm_default_tsc_scaling_ratio;
> > +EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
> > +u64  kvm_max_tsc_scaling_ratio;
> > +EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
> >
> >  /* tsc tolerance in parts per million - default to 1/2 of the NTP 
> > threshold */
> >  static u32 tsc_tolerance_ppm = 250;
> > --
> > 2.4.8
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majord...@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 01/15] KVM: arm: plug guest debug exploit

2015-09-28 Thread Zhichao Huang


On 2015/9/2 19:38, Christoffer Dall wrote:
> 
> I really think that we should read the register, clear the bits you care
> about (MDBGen and HDBGen) and then write back the register.
> 
> So, if I recall correctly, this is to avoid having to set HDCR_TDE
> below?
> 
> Given Will's concerns about touching this register, I'm thinking if we
> shouldn't start with the HDCR_TDE enabled (and a handler in KVM) and
> then see if we want to add this optimization later?
> 
> At the very least, you should do as Will pointed out and predicate
> writes to this register based on whether the reset code in
> hw_breakpoint.c successfully reset the debug regs.  I think checking the
> debug_err_mask variable from the C code and pass this on to the Hyp code
> would be the right way to go.
> 
> But as I said, I think we should just trap debug exceptions to begin
> with (to plug the hole) and then add the more intelligent stuff later.
> 

OK, I will set HDCR_TDE, and ignore all the debug exceptions in
KVM handlers to prevent the guest to mess with the host states.

>> +.endif
>> +
>> +mcr p14, 0, r2, c0, c2, 2   @ DBGDSCR
>>  .endm
>>  
>>  /*
>> @@ -620,7 +633,7 @@ ARM_BE8(rev  r6, r6  )
>>   * (hardware reset value is 0) */
>>  .macro set_hdcr operation
>>  mrc p15, 4, r2, c1, c1, 1
>> -ldr r3, =(HDCR_TPM|HDCR_TPMCR)
>> +ldr r3, =(HDCR_TPM|HDCR_TPMCR|HDCR_TDRA|HDCR_TDOSA|HDCR_TDA)
> 
> 
> 
>>  .if \operation == vmentry
>>  orr r2, r2, r3  @ Trap some perfmon accesses
>>  .else
>> -- 
>> 1.7.12.4
>>
> 
> Thanks,
> -Christoffer
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/12] KVM: x86: add support for VMX TSC scaling

2015-09-28 Thread Haozhong Zhang
On Mon, Sep 28, 2015 at 09:00:39PM -0700, Eric Northup wrote:
> On Sun, Sep 27, 2015 at 10:37 PM, Haozhong Zhang
>  wrote:
> > This patchset adds support for VMX TSC scaling feature which is
> > available on Intel Skylake CPU. The specification of VMX TSC scaling
> > can be found at
> > http://www.intel.com/content/www/us/en/processors/timestamp-counter-scaling-virtualization-white-paper.html
> >
> > VMX TSC scaling allows guest TSC which is read by guest rdtsc(p)
> > instructions increases in a rate that is customized by the hypervisor
> > and can be different than the host TSC rate. Basically, VMX TSC
> > scaling adds a 64-bit field called TSC multiplier in VMCS so that, if
> > VMX TSC scaling is enabled, TSC read by guest rdtsc(p) instructions
> > will be calculated by the following formula:
> >
> >   guest EDX:EAX = (Host TSC * TSC multiplier) >> 48 + VMX TSC Offset
> >
> > where, Host TSC = Host MSR_IA32_TSC + Host MSR_IA32_TSC_ADJUST.
> >
> > This patchset, when cooperating with another QEMU patchset (sent in
> > another email "target-i386: save/restore vcpu's TSC rate during
> > migration"), allows guest programs observe a consistent TSC rate even
> > though they are migrated among machines with different host TSC rates.
> >
> > VMX TSC scaling shares some common logics with SVM TSC scaling which
> > is already supported by KVM. Patch 1 ~ 8 move those common logics from
> > SVM code to the common code. Upon them, patch 9 ~ 12 add VMX-specific
> > support for VMX TSC scaling.
> 
> reviewed-by: Eric Northup 
>

Thank you for the review!

> >
> > Haozhong Zhang (12):
> >   KVM: x86: Collect information for setting TSC scaling ratio
> >   KVM: x86: Add a common TSC scaling ratio field in kvm_vcpu_arch
> >   KVM: x86: Add a common TSC scaling function
> >   KVM: x86: Replace call-back set_tsc_khz() with a common function
> >   KVM: x86: Replace call-back compute_tsc_offset() with a common function
> >   KVM: x86: Move TSC scaling logic out of call-back adjust_tsc_offset()
> >   KVM: x86: Move TSC scaling logic out of call-back read_l1_tsc()
> >   KVM: x86: Use the correct vcpu's TSC rate to compute time scale
> >   KVM: VMX: Enable and initialize VMX TSC scaling
> >   KVM: VMX: Setup TSC scaling ratio when a vcpu is loaded
> >   KVM: VMX: Use a scaled host TSC for guest readings of MSR_IA32_TSC
> >   KVM: VMX: Dump TSC multiplier in dump_vmcs()
> >
> >  arch/x86/include/asm/kvm_host.h |  24 +++
> >  arch/x86/include/asm/vmx.h  |   4 +-
> >  arch/x86/kvm/lapic.c|   5 +-
> >  arch/x86/kvm/svm.c  | 113 +++--
> >  arch/x86/kvm/vmx.c  |  60 
> >  arch/x86/kvm/x86.c  | 154 
> > +---
> >  include/linux/kvm_host.h|  21 +-
> >  7 files changed, 221 insertions(+), 160 deletions(-)
> >
> > --
> > 2.4.8
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majord...@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 06/12] KVM: x86: Move TSC scaling logic out of call-back adjust_tsc_offset()

2015-09-28 Thread Paolo Bonzini


On 28/09/2015 07:38, Haozhong Zhang wrote:
> +
> +static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
> +s64 adjustment)
> +{
> + kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, false);
> +}
> +
> +static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 
> adjustment)
> +{
> + if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
> + WARN_ON(adjustment < 0);
> + adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
> + kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, true);
> +}

You can remove the final argument to the callback (and possibly change
the callback's name to adjust_tsc_offset_guest), because it is now unused.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 03/12] KVM: x86: Add a common TSC scaling function

2015-09-28 Thread Paolo Bonzini


On 28/09/2015 07:38, Haozhong Zhang wrote:
>  
> -static u64 __scale_tsc(u64 ratio, u64 tsc)
> -{
> - u64 mult, frac, _tsc;
> -
> - mult  = ratio >> 32;
> - frac  = ratio & ((1ULL << 32) - 1);
> -
> - _tsc  = tsc;
> - _tsc *= mult;
> - _tsc += (tsc >> 32) * frac;
> - _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32;
> -
> - return _tsc;
> -}

This is basically

return mul_u64_u64_shr(ratio, tsc,
   kvm_tsc_scaling_ratio_frac_bits);

except that Linux has no mul_u64_u64_shr function, only mul_u64_u32_shr.

We should implement that function in include/linux/math64.h instead.
For the x86_64 case (or any other CONFIG_ARCH_SUPPORTS_INT128
architecture) we can just write it directly, as is done already for
mul_u64_u32_shr.

For the 32-bit case, here is an implementation of both the
multiplication and the shift, lifted from QEMU:

static inline void mul64(uint64_t *lo, uint64_t *hi,
 uint64_t a, uint64_t b)
{
typedef union {
uint64_t ll;
struct {
#ifdef __BIG_ENDIAN
uint32_t high, low;
#else
uint32_t low, high;
#endif
} l;
} LL;
LL rl, rm, rn, rh, a0, b0;
uint64_t c;

a0.ll = a;
b0.ll = b;

rl.ll = (uint64_t)a0.l.low * b0.l.low;
rm.ll = (uint64_t)a0.l.low * b0.l.high;
rn.ll = (uint64_t)a0.l.high * b0.l.low;
rh.ll = (uint64_t)a0.l.high * b0.l.high;

c = (uint64_t)rl.l.high + rm.l.low + rn.l.low;
rl.l.high = c;
c >>= 32;
c = c + rm.l.high + rn.l.high + rh.l.low;
rh.l.low = c;
rh.l.high += (uint32_t)(c >> 32);

*lo = rl.ll;
*hi = rh.ll;
}

static inline void rshift128(uint64_t *lo, uint64_t *hi, int n)
{
uint64_t h;
if (!n) {
return;
}
h = *hi >> (n & 63);
if (n >= 64) {
*hi = 0;
*lo = h;
} else {
*lo = (*lo >> n) | (*hi << (64 - n));
*hi = h;
}
}

and you can easily reuse this code in Linux with just uintNN_t types
changed to uNN + some extra cleanups when it's placed in a single functions.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] kvm-all: notice KVM of vcpu's TSC rate after migration

2015-09-28 Thread Eduardo Habkost
On Mon, Sep 28, 2015 at 01:38:31PM +0800, Haozhong Zhang wrote:
> When a vcpu is created in KVM, its TSC rate is initially identical to
> the host TSC rate. If its state is migrated to a vcpu on another
> machine (target machine) which may uses a different host TSC rate, QEMU
> on the target machine should notice KVM of the migrated vcpu's TSC
> rate. In case that KVM on the target machine supports TSC scaling, guest
> programs running on the migrated vcpu will observe the same TSC rate
> before and after the migration.
> 
> Signed-off-by: Haozhong Zhang 
> ---
>  kvm-all.c | 13 +
>  1 file changed, 13 insertions(+)
> 
> diff --git a/kvm-all.c b/kvm-all.c
> index 0be4615..e8de038 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -1769,6 +1769,19 @@ void kvm_cpu_synchronize_post_reset(CPUState *cpu)
>  static void do_kvm_cpu_synchronize_post_init(void *arg)
>  {
>  CPUState *cpu = arg;
> +CPUX86State *env = _CPU(cpu)->env;
> +int r;
> +
> +/*
> + * XXX: KVM_SET_TSC_KHZ must be done before kvm_arch_put_registers().

Could you explain where this requirement comes from?

> + */
> +r = kvm_check_extension(cpu->kvm_state, KVM_CAP_TSC_CONTROL);
> +if (r && env->tsc_khz) {
> +r = kvm_vcpu_ioctl(cpu, KVM_SET_TSC_KHZ, env->tsc_khz);
> +if (r < 0) {
> +fprintf(stderr, "KVM_SET_TSC_KHZ failed\n");
> +}
> +}

This is duplicating the existing KVM_SET_TSC_KHZ call at
kvm_arch_init_vcpu(). I wonder if there's a way to avoid this
duplication. Should we set TSC KHz only at
do_kvm_cpu_synchronize_post_init(), and remove the call from
kvm_arch_init_vcpu()?

Or maybe we shouldn't treat this as VM state, but as configuration, and
let management configure the TSC frequency explicitly if the user really
needs it to stay the same during migration.

(CCing libvir-list to see if they have feedback)

-- 
Eduardo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH][v2] KVM: PPC: e500: Emulate TMCFG0 TMRN register

2015-09-28 Thread Scott Wood
On Fri, 2015-09-25 at 18:02 +0300, Laurentiu Tudor wrote:
> Emulate TMCFG0 TMRN register exposing one HW thread per vcpu.
> 
> Signed-off-by: Mihai Caraman 
> [laurentiu.tu...@freescale.com: rebased on latest kernel, use
>  define instead of hardcoded value, moved code in own function]
> Signed-off-by: Laurentiu Tudor 
> ---
> v2:
>  - moved code in its own function
> Needs this patch: https://patchwork.ozlabs.org/patch/521752/
> 
>  arch/powerpc/include/asm/disassemble.h |  5 +
>  arch/powerpc/kvm/e500_emulate.c| 19 +++
>  2 files changed, 24 insertions(+)

Acked-by: Scott Wood 

-Scott

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH][v2] KVM: PPC: e500: Emulate TMCFG0 TMRN register

2015-09-28 Thread Scott Wood
On Fri, 2015-09-25 at 18:02 +0300, Laurentiu Tudor wrote:
> Emulate TMCFG0 TMRN register exposing one HW thread per vcpu.
> 
> Signed-off-by: Mihai Caraman 
> [laurentiu.tu...@freescale.com: rebased on latest kernel, use
>  define instead of hardcoded value, moved code in own function]
> Signed-off-by: Laurentiu Tudor 
> ---
> v2:
>  - moved code in its own function
> Needs this patch: https://patchwork.ozlabs.org/patch/521752/
> 
>  arch/powerpc/include/asm/disassemble.h |  5 +
>  arch/powerpc/kvm/e500_emulate.c| 19 +++
>  2 files changed, 24 insertions(+)

Acked-by: Scott Wood 

-Scott

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] target-i386: initialize vcpu's TSC rate to the value from KVM

2015-09-28 Thread Eduardo Habkost
On Mon, Sep 28, 2015 at 01:38:30PM +0800, Haozhong Zhang wrote:
> When creating a vcpu, we initialize its TSC rate to the value from
> KVM (through ioctl KVM_GET_TSC_KHZ).
> 
> Signed-off-by: Haozhong Zhang 
> ---
>  target-i386/kvm.c | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index 7b0ba17..c2b161a 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -751,6 +751,13 @@ int kvm_arch_init_vcpu(CPUState *cs)
>  }
>  }
>  
> +r = kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ);
> +if (r < 0) {
> +fprintf(stderr, "KVM_GET_TSC_KHZ failed\n");
> +return r;
> +}
> +env->tsc_khz = r;

You are silently overwriting the tsc_khz value set by the user, why?

-- 
Eduardo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v3 5/5] KVM: arm64: Implement vGICv3 CPU interface access

2015-09-28 Thread Pavel Fedin
 Hello!

> So: sorry for the noise, you can just go ahead with that native 64-bit
> sysregs encoding for [SG]ET_ONE_REG as you had before.

 Ok, good. Take v4 then. Some issues you've commented on were fixed, some other 
things were left as
they are (i replied to your comments, why). Let's move on. :)

Kind regards,
Pavel Fedin
Expert Engineer
Samsung Electronics Research center Russia


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] x86: kvmclock: abolish PVCLOCK_COUNTS_FROM_ZERO

2015-09-28 Thread Paolo Bonzini


On 18/09/2015 17:54, Radim Krčmář wrote:
> + kvm_sched_clock_offset = kvm_clock_read();
> + pv_time_ops.sched_clock = kvm_sched_clock_read;
> + set_sched_clock_stable();
> +
> + printk("kvm-clock: using sched offset of %llu cycles\n",

Ok to change this to KERN_DEBUG or KERN_INFO?

Paolo

> + kvm_sched_clock_offset);
> +
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 0/7] KVM: arm64: Implement API for vGICv3 live migration

2015-09-28 Thread Pavel Fedin
This patchset adds necessary userspace API in order to support vGICv3 live
migration. GICv3 registers are accessed using device attribute ioctls,
similar to GICv2.

v3 => v4:
- Split pure refactoring from anything else
- Documentation brought up to date
- Cleaned up 'mmio' structure usage in vgic_attr_regs_access(),
  use call_range_handler() for 64-bit access handling
- Rebased on new linux-next

v2 => v3:
- KVM_DEV_ARM_VGIC_CPUID_MASK enlarged to 20 bits, allowing more than 256
  CPUs.
- Bug fix: Correctly set mmio->private, necessary for redistributor access.
- Added accessors for ICC_AP0R and ICC_AP1R registers
- Rebased on new linux-next

v1 => v2:
- Do not use generic register get/set API for CPU interface, use only
  device attributes.
- Introduce size specifier for distributor and redistributor register
  accesses, do not assume size any more.
- Lots of refactor and reusable code extraction.
- Added forgotten documentation

Pavel Fedin (7):
  KVM: arm/arm64: Move endianness conversion out of
vgic_attr_regs_access()
  KVM: arm/arm64: Refactor vGIC attributes handling code
  KVM: arm/arm64: Fix the documentation
  KVM: arm64: Implement vGICv3 distributor and redistributor access from
userspace
  KVM: arm64: Refactor system register handlers
  KVM: arm64: Introduce find_reg_by_id()
  Implement vGICv3 CPU interface access

 Documentation/virtual/kvm/devices/arm-vgic.txt |  90 ++-
 arch/arm64/include/uapi/asm/kvm.h  |  11 +-
 arch/arm64/kvm/sys_regs.c  |  83 +++---
 arch/arm64/kvm/sys_regs.h  |   8 +-
 arch/arm64/kvm/sys_regs_generic_v8.c   |   2 +-
 include/linux/irqchip/arm-gic-v3.h |  18 +-
 virt/kvm/arm/vgic-v2-emul.c| 122 ++---
 virt/kvm/arm/vgic-v3-emul.c| 338 -
 virt/kvm/arm/vgic.c|  65 +
 virt/kvm/arm/vgic.h|   4 +
 10 files changed, 571 insertions(+), 170 deletions(-)

-- 
2.4.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 1/7] KVM: arm/arm64: Move endianness conversion out of vgic_attr_regs_access()

2015-09-28 Thread Pavel Fedin
mmio_data_read() and mmio_data_write(), originally used in this function,
are limited only to 32 bits. We are going to refactor this code and
eventually let it do 64-bit I/O for vGICv3. Therefore, our first step is
to get rid of this limitation.

We open up these inlines, which consist of endianness conversion and
masking. Masking is not used here (the mask is set to ~0), so we just move
out the remaining endianness conversion.

Signed-off-by: Pavel Fedin 
---
 virt/kvm/arm/vgic-v2-emul.c | 20 
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c
index 1390797..959b9c6 100644
--- a/virt/kvm/arm/vgic-v2-emul.c
+++ b/virt/kvm/arm/vgic-v2-emul.c
@@ -663,7 +663,7 @@ static const struct vgic_io_range vgic_cpu_ranges[] = {
 
 static int vgic_attr_regs_access(struct kvm_device *dev,
 struct kvm_device_attr *attr,
-u32 *reg, bool is_write)
+__le32 *data, bool is_write)
 {
const struct vgic_io_range *r = NULL, *ranges;
phys_addr_t offset;
@@ -671,7 +671,6 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
struct kvm_vcpu *vcpu, *tmp_vcpu;
struct vgic_dist *vgic;
struct kvm_exit_mmio mmio;
-   u32 data;
 
offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
@@ -693,9 +692,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
 
mmio.len = 4;
mmio.is_write = is_write;
-   mmio.data = 
-   if (is_write)
-   mmio_data_write(, ~0, *reg);
+   mmio.data = data;
switch (attr->group) {
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
mmio.phys_addr = vgic->vgic_dist_base + offset;
@@ -743,9 +740,6 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
offset -= r->base;
r->handle_mmio(vcpu, , offset);
 
-   if (!is_write)
-   *reg = mmio_data_read(, ~0);
-
ret = 0;
 out_vgic_unlock:
spin_unlock(>lock);
@@ -778,11 +772,13 @@ static int vgic_v2_set_attr(struct kvm_device *dev,
case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
u32 __user *uaddr = (u32 __user *)(long)attr->addr;
u32 reg;
+   __le32 data;
 
if (get_user(reg, uaddr))
return -EFAULT;
 
-   return vgic_attr_regs_access(dev, attr, , true);
+   data = cpu_to_le32(reg);
+   return vgic_attr_regs_access(dev, attr, , true);
}
 
}
@@ -803,12 +799,12 @@ static int vgic_v2_get_attr(struct kvm_device *dev,
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
u32 __user *uaddr = (u32 __user *)(long)attr->addr;
-   u32 reg = 0;
+   __le32 data = 0;
 
-   ret = vgic_attr_regs_access(dev, attr, , false);
+   ret = vgic_attr_regs_access(dev, attr, , false);
if (ret)
return ret;
-   return put_user(reg, uaddr);
+   return put_user(le32_to_cpu(data), uaddr);
}
 
}
-- 
2.4.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 7/7] Implement vGICv3 CPU interface access

2015-09-28 Thread Pavel Fedin
The access is done similar to GICv2, using KVM_DEV_ARM_VGIC_GRP_CPU_REGS
group, however attribute ID encodes corresponding system register. Access
size is always 64 bits.

Since CPU interface state actually affects only a single vCPU, no vGIC
locking is done. Just made sure that the vCPU is not running.

Signed-off-by: Pavel Fedin 
---
 Documentation/virtual/kvm/devices/arm-vgic.txt |  38 +++-
 arch/arm64/include/uapi/asm/kvm.h  |   7 +
 include/linux/irqchip/arm-gic-v3.h |  18 +-
 virt/kvm/arm/vgic-v3-emul.c| 244 +
 4 files changed, 303 insertions(+), 4 deletions(-)

diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt 
b/Documentation/virtual/kvm/devices/arm-vgic.txt
index 1c570e4..264bba6 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -90,7 +90,7 @@ Groups:
 -EBUSY: One or more VCPUs are running
 -EINVAL: Invalid CPU index supplied
 
-  KVM_DEV_ARM_VGIC_GRP_CPU_REGS
+  KVM_DEV_ARM_VGIC_GRP_CPU_REGS (vGICv2)
   Attributes:
 The attr field of kvm_device_attr encodes two values:
 bits: | 63     40 | 39 ..  32  |  31   0 |
@@ -118,7 +118,41 @@ Groups:
 
   Limitations:
 - Priorities are not implemented, and registers are RAZ/WI
-- Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
+  Errors:
+-ENXIO: Getting or setting this register is not yet supported
+-EBUSY: One or more VCPUs are running
+-EINVAL: Invalid CPU index supplied
+
+  KVM_DEV_ARM_VGIC_GRP_CPU_REGS (vGICv3)
+  Attributes:
+The attr field of kvm_device_attr encodes the following values:
+bits:   | 63 .. 56 | 55 .. 48 | 47 ... 40 | 39 .. 32 | 31 .. 0 |
+values: |   arch   |   size   | reserved  |  cpu idx |  reg id |
+
+All CPU interface regs are (rw, 64-bit). The only supported size value is
+KVM_REG_SIZE_U64.
+
+Arch, size and reg id fields actually encode system register to be
+accessed. Normally these values are obtained using ARM64_SYS_REG() macro.
+Getting or setting such a register has the same effect as reading or
+writing the register on the actual hardware.
+
+The Active Priorities Registers AP0Rn and AP1Rn are implementation defined,
+so we set a fixed format for our implementation that fits with the model of
+a "GICv3 implementation without the security extensions" which we present
+to the guest. This interface always exposes four register APR[0-3]
+describing the maximum possible 128 preemption levels. The semantics of the
+register indicates if any interrupts in a given preemption level are in the
+active state by setting the corresponding bit.
+
+Thus, preemption level X has one or more active interrupts if and only if:
+
+  APRn[X mod 32] == 0b1,  where n = X / 32
+
+Bits for undefined preemption levels are RAZ/WI.
+
+  Limitations:
+- Priorities are not implemented, and registers are RAZ/WI
   Errors:
 -ENXIO: Getting or setting this register is not yet supported
 -EBUSY: One or more VCPUs are running
diff --git a/arch/arm64/include/uapi/asm/kvm.h 
b/arch/arm64/include/uapi/asm/kvm.h
index 249954f..7d37ccd 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -201,6 +201,13 @@ struct kvm_arch_memory_slot {
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK  (0xfULL << 
KVM_DEV_ARM_VGIC_CPUID_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK (0xULL << 
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define   KVM_DEV_ARM_VGIC_REG_MASK(KVM_REG_SIZE_MASK | \
+KVM_REG_ARM64_SYSREG_OP0_MASK | \
+KVM_REG_ARM64_SYSREG_OP1_MASK | \
+KVM_REG_ARM64_SYSREG_CRN_MASK | \
+KVM_REG_ARM64_SYSREG_CRM_MASK | \
+KVM_REG_ARM64_SYSREG_OP2_MASK)
+
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS   3
 #define KVM_DEV_ARM_VGIC_GRP_CTRL  4
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT   0
diff --git a/include/linux/irqchip/arm-gic-v3.h 
b/include/linux/irqchip/arm-gic-v3.h
index 9eeeb95..dbc5c49 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -259,8 +259,14 @@
 /*
  * CPU interface registers
  */
-#define ICC_CTLR_EL1_EOImode_drop_dir  (0U << 1)
-#define ICC_CTLR_EL1_EOImode_drop  (1U << 1)
+#define ICC_CTLR_EL1_CBPR_SHIFT0
+#define ICC_CTLR_EL1_EOImode_SHIFT 1
+#define ICC_CTLR_EL1_EOImode_drop_dir  (0U << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_EOImode_drop  (1U << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_PRIbits_MASK  (7U << 8)
+#define ICC_CTLR_EL1_IDbits_MASK   (7U << 11)
+#define ICC_CTLR_EL1_SEIS  (1U << 14)
+#define ICC_CTLR_EL1_A3V   (1U << 15)
 #define 

[PATCH v4 2/7] KVM: arm/arm64: Refactor vGIC attributes handling code

2015-09-28 Thread Pavel Fedin
Separate all implementation-independent code in vgic_attr_regs_access()
and move it to vgic.c. This will allow to reuse this code for vGICv3
implementation.

Signed-off-by: Pavel Fedin 
---
 virt/kvm/arm/vgic-v2-emul.c | 118 +---
 virt/kvm/arm/vgic.c |  64 
 virt/kvm/arm/vgic.h |   4 ++
 3 files changed, 92 insertions(+), 94 deletions(-)

diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c
index 959b9c6..b4699c1 100644
--- a/virt/kvm/arm/vgic-v2-emul.c
+++ b/virt/kvm/arm/vgic-v2-emul.c
@@ -661,38 +661,20 @@ static const struct vgic_io_range vgic_cpu_ranges[] = {
},
 };
 
-static int vgic_attr_regs_access(struct kvm_device *dev,
-struct kvm_device_attr *attr,
-__le32 *data, bool is_write)
+static int vgic_v2_attr_regs_access(struct kvm_device *dev,
+   struct kvm_device_attr *attr,
+   __le32 *data, bool is_write)
 {
-   const struct vgic_io_range *r = NULL, *ranges;
+   const struct vgic_io_range *ranges;
phys_addr_t offset;
-   int ret, cpuid, c;
-   struct kvm_vcpu *vcpu, *tmp_vcpu;
-   struct vgic_dist *vgic;
+   int cpuid;
+   struct vgic_dist *vgic = >kvm->arch.vgic;
struct kvm_exit_mmio mmio;
 
offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
KVM_DEV_ARM_VGIC_CPUID_SHIFT;
 
-   mutex_lock(>kvm->lock);
-
-   ret = vgic_init(dev->kvm);
-   if (ret)
-   goto out;
-
-   if (cpuid >= atomic_read(>kvm->online_vcpus)) {
-   ret = -EINVAL;
-   goto out;
-   }
-
-   vcpu = kvm_get_vcpu(dev->kvm, cpuid);
-   vgic = >kvm->arch.vgic;
-
-   mmio.len = 4;
-   mmio.is_write = is_write;
-   mmio.data = data;
switch (attr->group) {
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
mmio.phys_addr = vgic->vgic_dist_base + offset;
@@ -703,49 +685,14 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
ranges = vgic_cpu_ranges;
break;
default:
-   BUG();
+   return -ENXIO;
}
-   r = vgic_find_range(ranges, 4, offset);
-
-   if (unlikely(!r || !r->handle_mmio)) {
-   ret = -ENXIO;
-   goto out;
-   }
-
-
-   spin_lock(>lock);
-
-   /*
-* Ensure that no other VCPU is running by checking the vcpu->cpu
-* field.  If no other VPCUs are running we can safely access the VGIC
-* state, because even if another VPU is run after this point, that
-* VCPU will not touch the vgic state, because it will block on
-* getting the vgic->lock in kvm_vgic_sync_hwstate().
-*/
-   kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) {
-   if (unlikely(tmp_vcpu->cpu != -1)) {
-   ret = -EBUSY;
-   goto out_vgic_unlock;
-   }
-   }
-
-   /*
-* Move all pending IRQs from the LRs on all VCPUs so the pending
-* state can be properly represented in the register state accessible
-* through this API.
-*/
-   kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm)
-   vgic_unqueue_irqs(tmp_vcpu);
 
-   offset -= r->base;
-   r->handle_mmio(vcpu, , offset);
+   mmio.len = 4;
+   mmio.is_write = is_write;
+   mmio.data = data;
 
-   ret = 0;
-out_vgic_unlock:
-   spin_unlock(>lock);
-out:
-   mutex_unlock(>kvm->lock);
-   return ret;
+   return vgic_attr_regs_access(dev, ranges, , offset, cpuid);
 }
 
 static int vgic_v2_create(struct kvm_device *dev, u32 type)
@@ -761,55 +708,38 @@ static void vgic_v2_destroy(struct kvm_device *dev)
 static int vgic_v2_set_attr(struct kvm_device *dev,
struct kvm_device_attr *attr)
 {
+   u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+   u32 reg;
+   __le32 data;
int ret;
 
ret = vgic_set_common_attr(dev, attr);
if (ret != -ENXIO)
return ret;
 
-   switch (attr->group) {
-   case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
-   case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
-   u32 __user *uaddr = (u32 __user *)(long)attr->addr;
-   u32 reg;
-   __le32 data;
-
-   if (get_user(reg, uaddr))
-   return -EFAULT;
-
-   data = cpu_to_le32(reg);
-   return vgic_attr_regs_access(dev, attr, , true);
-   }
-
-   }
+   if (get_user(reg, uaddr))
+   return -EFAULT;
 
-   return -ENXIO;
+   data = cpu_to_le32(reg);
+   return vgic_v2_attr_regs_access(dev, attr, , true);
 }
 
 static int vgic_v2_get_attr(struct kvm_device *dev,

[PATCH v4 5/7] KVM: arm64: Refactor system register handlers

2015-09-28 Thread Pavel Fedin
Replace Rt with data pointer in struct sys_reg_params. This will allow to
reuse system register handling code in implementation of vGICv3 CPU
interface access API. Additionally, got rid of "massive hack"
in kvm_handle_cp_64().

Signed-off-by: Pavel Fedin 
---
 arch/arm64/kvm/sys_regs.c| 61 +---
 arch/arm64/kvm/sys_regs.h|  4 +--
 arch/arm64/kvm/sys_regs_generic_v8.c |  2 +-
 3 files changed, 32 insertions(+), 35 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index d03d3af..39db06dd 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -102,7 +102,7 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
 
BUG_ON(!p->is_write);
 
-   val = *vcpu_reg(vcpu, p->Rt);
+   val = *p->val;
if (!p->is_aarch32) {
vcpu_sys_reg(vcpu, r->reg) = val;
} else {
@@ -125,13 +125,10 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
   const struct sys_reg_params *p,
   const struct sys_reg_desc *r)
 {
-   u64 val;
-
if (!p->is_write)
return read_from_write_only(vcpu, p);
 
-   val = *vcpu_reg(vcpu, p->Rt);
-   vgic_v3_dispatch_sgi(vcpu, val);
+   vgic_v3_dispatch_sgi(vcpu, *p->val);
 
return true;
 }
@@ -153,7 +150,7 @@ static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
if (p->is_write) {
return ignore_write(vcpu, p);
} else {
-   *vcpu_reg(vcpu, p->Rt) = (1 << 3);
+   *p->val = (1 << 3);
return true;
}
 }
@@ -167,7 +164,7 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
} else {
u32 val;
asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val));
-   *vcpu_reg(vcpu, p->Rt) = val;
+   *p->val = val;
return true;
}
 }
@@ -204,13 +201,13 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
 {
if (p->is_write) {
-   vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+   vcpu_sys_reg(vcpu, r->reg) = *p->val;
vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
} else {
-   *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
+   *p->val = vcpu_sys_reg(vcpu, r->reg);
}
 
-   trace_trap_reg(__func__, r->reg, p->is_write, *vcpu_reg(vcpu, p->Rt));
+   trace_trap_reg(__func__, r->reg, p->is_write, *p->val);
 
return true;
 }
@@ -228,7 +225,7 @@ static inline void reg_to_dbg(struct kvm_vcpu *vcpu,
  const struct sys_reg_params *p,
  u64 *dbg_reg)
 {
-   u64 val = *vcpu_reg(vcpu, p->Rt);
+   u64 val = *p->val;
 
if (p->is_32bit) {
val &= 0xUL;
@@ -248,7 +245,7 @@ static inline void dbg_to_reg(struct kvm_vcpu *vcpu,
if (p->is_32bit)
val &= 0xUL;
 
-   *vcpu_reg(vcpu, p->Rt) = val;
+   *p->val = val;
 }
 
 static inline bool trap_bvr(struct kvm_vcpu *vcpu,
@@ -697,10 +694,10 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu,
u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
u32 el3 = !!((pfr >> 12) & 0xf);
 
-   *vcpu_reg(vcpu, p->Rt) = dfr >> 20) & 0xf) << 28) |
- (((dfr >> 12) & 0xf) << 24) |
- (((dfr >> 28) & 0xf) << 20) |
- (6 << 16) | (el3 << 14) | (el3 << 
12));
+   *p->val = dfr >> 20) & 0xf) << 28) |
+  (((dfr >> 12) & 0xf) << 24) |
+  (((dfr >> 28) & 0xf) << 20) |
+  (6 << 16) | (el3 << 14) | (el3 << 12));
return true;
}
 }
@@ -710,10 +707,10 @@ static bool trap_debug32(struct kvm_vcpu *vcpu,
 const struct sys_reg_desc *r)
 {
if (p->is_write) {
-   vcpu_cp14(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+   vcpu_cp14(vcpu, r->reg) = *p->val;
vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
} else {
-   *vcpu_reg(vcpu, p->Rt) = vcpu_cp14(vcpu, r->reg);
+   *p->val = vcpu_cp14(vcpu, r->reg);
}
 
return true;
@@ -740,12 +737,12 @@ static inline bool trap_xvr(struct kvm_vcpu *vcpu,
u64 val = *dbg_reg;
 
val &= 0xUL;
-   val |= *vcpu_reg(vcpu, p->Rt) << 32;
+   val |= *p->val << 32;
*dbg_reg = val;
 
vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
} else {
-   *vcpu_reg(vcpu, p->Rt) = *dbg_reg >> 32;
+   *p->val = *dbg_reg >> 32;
}
 
trace_trap_reg(__func__, rd->reg, p->is_write, 

[PATCH v4 6/7] KVM: arm64: Introduce find_reg_by_id()

2015-09-28 Thread Pavel Fedin
In order to implement vGICv3 CPU interface access, we will need to perform
table lookup of system registers. We would need both index_to_params() and
find_reg() exported for that purpose, but instead we export a single
function which combines them both.

Signed-off-by: Pavel Fedin 
Reviewed-by: Andre Przywara 
---
 arch/arm64/kvm/sys_regs.c | 22 +++---
 arch/arm64/kvm/sys_regs.h |  4 
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 39db06dd..713b4fa 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1276,6 +1276,17 @@ static bool index_to_params(u64 id, struct 
sys_reg_params *params)
}
 }
 
+const struct sys_reg_desc *find_reg_by_id(u64 id,
+ struct sys_reg_params *params,
+ const struct sys_reg_desc table[],
+ unsigned int num)
+{
+   if (!index_to_params(id, params))
+   return NULL;
+
+   return find_reg(params, table, num);
+}
+
 /* Decode an index value, and find the sys_reg_desc entry. */
 static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
u64 id)
@@ -1403,10 +1414,8 @@ static int get_invariant_sys_reg(u64 id, void __user 
*uaddr)
struct sys_reg_params params;
const struct sys_reg_desc *r;
 
-   if (!index_to_params(id, ))
-   return -ENOENT;
-
-   r = find_reg(, invariant_sys_regs, 
ARRAY_SIZE(invariant_sys_regs));
+   r = find_reg_by_id(id, , invariant_sys_regs,
+  ARRAY_SIZE(invariant_sys_regs));
if (!r)
return -ENOENT;
 
@@ -1420,9 +1429,8 @@ static int set_invariant_sys_reg(u64 id, void __user 
*uaddr)
int err;
u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
 
-   if (!index_to_params(id, ))
-   return -ENOENT;
-   r = find_reg(, invariant_sys_regs, 
ARRAY_SIZE(invariant_sys_regs));
+   r = find_reg_by_id(id, , invariant_sys_regs,
+  ARRAY_SIZE(invariant_sys_regs));
if (!r)
return -ENOENT;
 
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 3267518..0646108 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -136,6 +136,10 @@ static inline int cmp_sys_reg(const struct sys_reg_desc 
*i1,
return i1->Op2 - i2->Op2;
 }
 
+const struct sys_reg_desc *find_reg_by_id(u64 id,
+ struct sys_reg_params *params,
+ const struct sys_reg_desc table[],
+ unsigned int num);
 
 #define Op0(_x).Op0 = _x
 #define Op1(_x).Op1 = _x
-- 
2.4.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 4/7] KVM: arm64: Implement vGICv3 distributor and redistributor access from userspace

2015-09-28 Thread Pavel Fedin
The access is done similar to vGICv2, using KVM_DEV_ARM_VGIC_GRP_DIST_REGS
and KVM_DEV_ARM_VGIC_GRP_REDIST_REGS with KVM_SET_DEVICE_ATTR and
KVM_GET_DEVICE_ATTR ioctls. Since GICv3 can handle large number of CPUs,
KVM_DEV_ARM_VGIC_CPUID_MASK has been extended to 20 bits. This is enough
for 1048576 CPUs.

Some registers are 64-bit wide according to the specification.
KVM_DEV_ARM_VGIC_64BIT flag is introduced, allowing to perform full 64-bit
accesses. vgic_attr_regs_access() has also been fixed up in order to be
able to perform 64-bit accesses correctly.

Signed-off-by: Pavel Fedin 
---
 Documentation/virtual/kvm/devices/arm-vgic.txt | 36 --
 arch/arm64/include/uapi/asm/kvm.h  |  4 +-
 virt/kvm/arm/vgic-v3-emul.c| 94 ++
 virt/kvm/arm/vgic.c|  5 +-
 4 files changed, 118 insertions(+), 21 deletions(-)

diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt 
b/Documentation/virtual/kvm/devices/arm-vgic.txt
index 4727829..1c570e4 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -43,10 +43,13 @@ Groups:
   KVM_DEV_ARM_VGIC_GRP_DIST_REGS
   Attributes:
 The attr field of kvm_device_attr encodes two values:
-bits: | 63     40 | 39 ..  32  |  31   0 |
-values:   |reserved   |  cpu idx   |  offset |
+bits: |  63  | 62 .. 52 | 51 ..  32  |  31   0 |
+values:   | size | reserved |  cpu idx   |  offset |
 
-All distributor regs are (rw, 32-bit)
+All distributor regs can be accessed as (rw, 32-bit)
+For GICv3 some regsisters are actually (rw, 64-bit) according to the
+specification. In order to perform full 64-bit access 'size' bit should be
+set to 1. KVM_DEV_ARM_VGIC_64BIT flag value is provided for this purpose.
 
 The offset is relative to the "Distributor base address" as defined in the
 GICv2 specs.  Getting or setting such a register has the same effect as
@@ -54,9 +57,34 @@ Groups:
 index is specified with cpu idx field.  Note that most distributor fields
 are not banked, but return the same value regardless of the cpu idx used to
 access the register.
+
+  Limitations:
+- Priorities are not implemented, and registers are RAZ/WI
+  Errors:
+-ENXIO: Getting or setting this register is not yet supported
+-EBUSY: One or more VCPUs are running
+-EINVAL: Invalid CPU index supplied
+
+  KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
+  Attributes:
+The attr field of kvm_device_attr encodes two values:
+bits: |  63  | 62 .. 52 | 51 ..  32  |  31   0 |
+values:   | size | reserved |  cpu idx   |  offset |
+
+All redistributor regs can be accessed as (rw, 32-bit)
+For GICv3 some registerss are actually (rw, 64-bit) according to the
+specification. In order to perform full 64-bit access 'size' bit should be
+set to 1. KVM_DEV_ARM_VGIC_64BIT flag value is provided for this purpose.
+
+The offset is relative to the "Redistributor base address" as defined in
+the GICv3 specs.  Getting or setting such a register has the same effect as
+reading or writing the register on the actual hardware from the cpu whose
+index is specified with cpu idx field. Note that most distributor fields
+are not banked, but return the same value regardless of the cpu idx used to
+access the register.
+
   Limitations:
 - Priorities are not implemented, and registers are RAZ/WI
-- Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
   Errors:
 -ENXIO: Getting or setting this register is not yet supported
 -EBUSY: One or more VCPUs are running
diff --git a/arch/arm64/include/uapi/asm/kvm.h 
b/arch/arm64/include/uapi/asm/kvm.h
index 0cd7b59..249954f 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -196,13 +196,15 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_ADDR  0
 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS  2
+#define   KVM_DEV_ARM_VGIC_64BIT   (1ULL << 63)
 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
-#define   KVM_DEV_ARM_VGIC_CPUID_MASK  (0xffULL << 
KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_CPUID_MASK  (0xfULL << 
KVM_DEV_ARM_VGIC_CPUID_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK (0xULL << 
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS   3
 #define KVM_DEV_ARM_VGIC_GRP_CTRL  4
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT   0
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c
index e661e7f..ce797bd 100644
--- a/virt/kvm/arm/vgic-v3-emul.c
+++ b/virt/kvm/arm/vgic-v3-emul.c
@@ -39,6 +39,7 @@
 #include 
 #include 
 

[PATCH v4 3/7] KVM: arm/arm64: Fix the documentation

2015-09-28 Thread Pavel Fedin
During refactoring we noticed some mistakes in the documentation.
Correct them.

Signed-off-by: Pavel Fedin 
---
 Documentation/virtual/kvm/devices/arm-vgic.txt | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt 
b/Documentation/virtual/kvm/devices/arm-vgic.txt
index 3fb9054..4727829 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -44,28 +44,29 @@ Groups:
   Attributes:
 The attr field of kvm_device_attr encodes two values:
 bits: | 63     40 | 39 ..  32  |  31   0 |
-values:   |reserved   |   cpu id   |  offset |
+values:   |reserved   |  cpu idx   |  offset |
 
 All distributor regs are (rw, 32-bit)
 
 The offset is relative to the "Distributor base address" as defined in the
 GICv2 specs.  Getting or setting such a register has the same effect as
-reading or writing the register on the actual hardware from the cpu
-specified with cpu id field.  Note that most distributor fields are not
-banked, but return the same value regardless of the cpu id used to access
-the register.
+reading or writing the register on the actual hardware from the cpu whose
+index is specified with cpu idx field.  Note that most distributor fields
+are not banked, but return the same value regardless of the cpu idx used to
+access the register.
   Limitations:
 - Priorities are not implemented, and registers are RAZ/WI
 - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
   Errors:
--ENODEV: Getting or setting this register is not yet supported
+-ENXIO: Getting or setting this register is not yet supported
 -EBUSY: One or more VCPUs are running
+-EINVAL: Invalid CPU index supplied
 
   KVM_DEV_ARM_VGIC_GRP_CPU_REGS
   Attributes:
 The attr field of kvm_device_attr encodes two values:
 bits: | 63     40 | 39 ..  32  |  31   0 |
-values:   |reserved   |   cpu id   |  offset |
+values:   |reserved   |  cpu idx   |  offset |
 
 All CPU interface regs are (rw, 32-bit)
 
@@ -91,8 +92,9 @@ Groups:
 - Priorities are not implemented, and registers are RAZ/WI
 - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
   Errors:
--ENODEV: Getting or setting this register is not yet supported
+-ENXIO: Getting or setting this register is not yet supported
 -EBUSY: One or more VCPUs are running
+-EINVAL: Invalid CPU index supplied
 
   KVM_DEV_ARM_VGIC_GRP_NR_IRQS
   Attributes:
-- 
2.4.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 4/4] scsi: provide UAPI version of scsi/sg.h and scsi/scsi_ioctl.h

2015-09-28 Thread kbuild test robot
Hi Paolo,

[auto build test results on v4.3-rc2 -- if it's inappropriate base, please 
ignore]

config: i386-randconfig-a0-201538 (attached as .config)
reproduce:
  git checkout d88f2083643f6dfacba14b2e95217dc6e0a4be37
  # save the attached .config to linux build tree
  make ARCH=i386 

All warnings (new ones prefixed by >>):

>> ./usr/include/linux/scsi_ioctl.h:49: found __[us]{8,16,32,64} type without 
>> #include 

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data


Re: [PATCH v3 2/4] scsi: cleanup scsi/scsi_ioctl.h

2015-09-28 Thread Douglas Gilbert

On 15-09-25 11:27 AM, Paolo Bonzini wrote:

SCSI_REMOVAL_* goes together with other SCSI command constants in
include/scsi/scsi.h.  It is also used outside the implementation
of the ioctls (and it is not part of the user API).

scsi_fctargaddress/Scsi_FCTargAddress has had no in-tree use since
commit ca61f10ab2b8 ("[SCSI] remove broken driver cpqfc", 2005-10-29).
Remove it, just in time for the the tenth anniversary of its demise.

Cc: James Bottomley 
Cc: Christoph Hellwig 
Cc: linux-s...@vger.kernel.org
Reviewed-by: Bart Van Assche 
Signed-off-by: Paolo Bonzini 


Acked-by: Douglas Gilbert 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 1/4] scsi: remove old-style type names from sg.h

2015-09-28 Thread Douglas Gilbert

On 15-09-25 11:27 AM, Paolo Bonzini wrote:

These will not be exported by the new linux/sg.h header, and scsi/sg.h will
not have any user API after linux/sg.h is created.  Since they have no
user in the kernel, they can be zapped.

Cc: James Bottomley 
Cc: Christoph Hellwig 
Cc: linux-s...@vger.kernel.org
Reviewed-by: Bart Van Assche 
Signed-off-by: Paolo Bonzini 


Acked-by: Douglas Gilbert 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 4/4] scsi: provide UAPI version of scsi/sg.h and scsi/scsi_ioctl.h

2015-09-28 Thread Douglas Gilbert

On 15-09-25 11:27 AM, Paolo Bonzini wrote:

Provide a UAPI version of the header in the kernel, making it easier
for interested projects to use an up-to-date version of the header.

The new headers are placed under uapi/linux/ so as not to conflict
with the glibc-provided headers in /usr/include/scsi.

/dev/sgN default values are implementation aspects, and are moved to
drivers/scsi/sg.c instead (together with e.g. SG_ALLOW_DIO_DEF).
However, SG_SCATTER_SZ is used by Wine so it is kept in linux/sg.h
SG_MAX_QUEUE could also be useful.

struct scsi_ioctl_command and struct scsi_idlun used to be under
"#ifdef __KERNEL__", but they are actually useful for userspace as
well.  Add them to the new header.

Cc: James Bottomley 
Cc: Christoph Hellwig 
Cc: linux-s...@vger.kernel.org
Cc: Bart Van Assche 
Signed-off-by: Paolo Bonzini 


Acked-by: Douglas Gilbert 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 3/4] scsi: move all obsolete ioctls to scsi_ioctl.h

2015-09-28 Thread Douglas Gilbert

On 15-09-25 11:27 AM, Paolo Bonzini wrote:

Some are in scsi.h.  Keep them together in preparation for exposing them
in UAPI headers.

Cc: James Bottomley 
Cc: Christoph Hellwig 
Cc: linux-s...@vger.kernel.org
Reviewed-by: Bart Van Assche 
Signed-off-by: Paolo Bonzini 


Acked-by: Douglas Gilbert 


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] x86: Use entire page for the per-cpu GDT only if paravirt-enabled

2015-09-28 Thread Ingo Molnar

* Denys Vlasenko  wrote:

> On 09/26/2015 09:50 PM, H. Peter Anvin wrote:
> > NAK.  We really should map the GDT read-only on all 64 bit systems,
> > since we can't hide the address from SLDT.  Same with the IDT.
> 
> Sorry, I don't understand your point.

So the problem is that right now the SGDT instruction (which is unprivileged) 
leaks the real address of the kernel image:

 fomalhaut:~> ./sgdt 
 SGDT: 88303fd89000 / 007f

that '88303fd89000' is a kernel address.

 fomalhaut:~> cat sgdt.c 
 #include 
 #include 

 int main(void)
 {
 struct gdt_desc {
 unsigned short  limit;
 unsigned long   addr;
 } __attribute__((packed)) gdt_desc = { -1, -1 };

 asm volatile("sgdt %0": "=m" (gdt_desc));

 printf("SGDT: %016lx / %04x\n", gdt_desc.addr, gdt_desc.limit);

 return 0;
 }

Your observation in the changelog and your patch:

> >> It is page-sized because of paravirt. [...]

... conflicts with the intention to mark (remap) the primary GDT address 
read-only 
on native kernels as well.

So what we should do instead is to use the page alignment properly and remap 
the 
GDT to a read-only location, and load that one.

This would have a couple of advantages:

 - This would give kernel address randomization more teeth on x86.

 - An additional advantage would be that rootkits overwriting the GDT would 
have 
   a bit more work to do.

 - A third advantage would be that for NUMA systems we could 'mirror' the GDT 
into
   node-local memory and load those. This makes GDT load cache-misses a bit less
   expensive.

The IDT is already remapped:

 fomalhaut:~> ./sidt 
 Sidt: ff57b000 / 0fff
 fomalhaut:~> cat sidt.c
 #include 
 #include 

 int main(void)
 {
 struct idt_desc {
 unsigned short  limit;
 unsigned long   addr;
 } __attribute__((packed)) idt_desc = { -1, -1 };

 asm volatile("sidt %0": "=m" (idt_desc));

 printf("Sidt: %016lx / %04x\n", idt_desc.addr, idt_desc.limit);

 return 0;
 }

Thanks,

Ingo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 0/2] kvmclock: fix ABI breakage from PVCLOCK_COUNTS_FROM_ZERO.

2015-09-28 Thread Paolo Bonzini


On 18/09/2015 17:54, Radim Krčmář wrote:
> This patch series will be disabling PVCLOCK_COUNTS_FROM_ZERO flag and is
> RFC because I haven't explored many potential problems or tested it.
> 
> [1/2] uses a different algorithm in the guest to start counting from 0.
> [2/2] stops exposing PVCLOCK_COUNTS_FROM_ZERO in the hypervisor.
> 
> A viable alternative would be to implement opt-in features in kvm clock.
> 
> And because we probably only broke one old user (the infamous SLES 10), a
> workaround like this is also possible: (but I'd rather not do that)

Thanks,

applying 2/2 for 4.4 and 1/2 for 4.3.

Paolo

> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index a60bdbccff51..ae9049248aaf 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2007,7 +2007,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct 
> msr_data *msr_info)
>  
>   ka->boot_vcpu_runs_old_kvmclock = tmp;
>  
> - ka->kvmclock_offset = -get_kernel_ns();
> + if (!ka->boot_vcpu_runs_old_kvmclock)
> + ka->kvmclock_offset = -get_kernel_ns();
>   }
>  
>   vcpu->arch.time = data;
> 
> 
> Radim Krčmář (2):
>   x86: kvmclock: abolish PVCLOCK_COUNTS_FROM_ZERO
>   Revert "KVM: x86: zero kvmclock_offset when vcpu0 initializes kvmclock
> system MSR"
> 
>  arch/x86/include/asm/pvclock-abi.h |  1 +
>  arch/x86/kernel/kvmclock.c | 46 
> +-
>  arch/x86/kvm/x86.c |  4 
>  3 files changed, 36 insertions(+), 15 deletions(-)
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[kvm:queue 20/31] arch/x86/kvm/vmx.c:2502:78: warning: left shift count >= width of type

2015-09-28 Thread kbuild test robot
tree:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue
head:   0e67722dd743e8218b4fc7924a1ab4ffda781517
commit: 8de182e8f597f4ec81a6fca865fb709c0753ca53 [20/31] KVM: nVMX: expose VPID 
capability to L1
config: i386-randconfig-x006-201539 (attached as .config)
reproduce:
  git checkout 8de182e8f597f4ec81a6fca865fb709c0753ca53
  # save the attached .config to linux build tree
  make ARCH=i386 

All warnings (new ones prefixed by >>):

   arch/x86/kvm/vmx.c: In function 'nested_vmx_setup_ctls_msrs':
>> arch/x86/kvm/vmx.c:2502:78: warning: left shift count >= width of type 
>> [-Wshift-count-overflow]
  vmx->nested.nested_vmx_ept_vpid_caps |= (unsigned 
long)vmx_capability.vpid << 32;

 ^

vim +2502 arch/x86/kvm/vmx.c

  2486  SECONDARY_EXEC_XSAVES;
  2487  
  2488  if (enable_ept | enable_vpid) {
  2489  /* nested EPT: emulate EPT also to L1 */
  2490  vmx->nested.nested_vmx_secondary_ctls_high |=
  2491  SECONDARY_EXEC_ENABLE_EPT;
  2492  vmx->nested.nested_vmx_ept_vpid_caps = 
VMX_EPT_PAGE_WALK_4_BIT |
  2493   VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
  2494   VMX_EPT_INVEPT_BIT;
  2495  vmx->nested.nested_vmx_ept_vpid_caps &= 
vmx_capability.ept;
  2496  /*
  2497   * For nested guests, we don't do anything specific
  2498   * for single context invalidation. Hence, only 
advertise
  2499   * support for global context invalidation.
  2500   */
  2501  vmx->nested.nested_vmx_ept_vpid_caps |= 
VMX_EPT_EXTENT_GLOBAL_BIT;
> 2502  vmx->nested.nested_vmx_ept_vpid_caps |= (unsigned 
> long)vmx_capability.vpid << 32;
  2503  } else
  2504  vmx->nested.nested_vmx_ept_vpid_caps = 0;
  2505  
  2506  if (enable_unrestricted_guest)
  2507  vmx->nested.nested_vmx_secondary_ctls_high |=
  2508  SECONDARY_EXEC_UNRESTRICTED_GUEST;
  2509  
  2510  /* miscellaneous data */

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data


Re: [PATCH] KVM: nVMX: expose VPID capability to L1

2015-09-28 Thread Paolo Bonzini


On 24/09/2015 08:51, Wanpeng Li wrote:
>   /*
>* For nested guests, we don't do anything specific
>* for single context invalidation. Hence, only advertise
>* support for global context invalidation.
>*/
> - vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT;
> + vmx->nested.nested_vmx_ept_vpid_caps |= 
> VMX_EPT_EXTENT_GLOBAL_BIT;
> + vmx->nested.nested_vmx_ept_vpid_caps |= (unsigned 
> long)vmx_capability.vpid << 32;

Hi Wanpeng, the comment above is about invept, but the same applies
applies to invvpid.  We can set only VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v3] os-android: Add support to android platform

2015-09-28 Thread Paolo Bonzini


On 24/09/2015 15:21, Houcheng Lin wrote:
> +if [ "$android" = "yes" ] ; then
> +  LIBS="-lglib-2.0 -lgthread-2.0 -lz -lpixman-1 -lintl -liconv -lc $LIBS"
> +  libs_qga="-lglib-2.0 -lgthread-2.0 -lz -lpixman-1 -lintl -liconv -lc"
> +fi

This change should not be necessary.

> +#define getdtablesize qemu_getdtablesize

Please instead replace all occurrences of getdtablesize with
qemu_getdtablesize.

> 
> +#ifdef CONFIG_ANDROID
> +#include "sysemu/os-android.h"
> +#endif
> +

Please replace this with

#include 

#ifndef IOV_MAX
#define IOV_MAX 1024
#endif

and get rid of os-android.h.

> 
> +#if defined(CONFIG_ANDROID)
> +char pty_buf[PATH_MAX];
> +#define ptsname(fd) pty_buf
> +#endif
>  const char *slave;
>  int mfd = -1, sfd = -1;
>  
> @@ -67,17 +72,21 @@ static int openpty(int *amaster, int *aslave, char *name,
>  
>  if (grantpt(mfd) == -1 || unlockpt(mfd) == -1)
>  goto err;
> -
> +#if defined(CONFIG_ANDROID)
> +if (ptsname_r(mfd, pty_buf, PATH_MAX) < 0)
> +goto err;
> +#endif
>  if ((slave = ptsname(mfd)) == NULL)
>  goto err;
>  


Better:

#if defined(CONFIG_ANDROID)
char slave[PATH_MAX];
#else
const char *slave;
#endif

...

#if defined(CONFIG_ANDROID)
if (ptsname_r(mfd, slave, PATH_MAX) < 0)
goto err;
#else
if ((slave = ptsname(mfd)) == NULL)
goto err;
#endif
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


custom screen resolution

2015-09-28 Thread Jason Zhu

Hi There,

I'm running Android-x86 on KVM and I need to set a custom screen 
resolution like 480x800. It's not a standard resolution in vga.


Is there a way to do it?

Thanks in advance!
Jason

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v9 00/18] Add VT-d Posted-Interrupts support - including prerequisite series

2015-09-28 Thread Wu, Feng


> -Original Message-
> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> Sent: Friday, September 25, 2015 7:15 PM
> To: Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: eric.au...@linaro.org; kvm@vger.kernel.org;
> io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v9 00/18] Add VT-d Posted-Interrupts support - including
> prerequisite series
> 
> 
> 
> On 25/09/2015 03:49, Wu, Feng wrote:
> > Hi Paolo,
> >
> > Thanks for your review on this series! I'd like to confirm this series (plus
> > the patch fixing the compilation error) is okay to you and I don't need to
> > do extra things for it, right?
> 
> Yes, can you check if branch vtd-pi of
> git://git.kernel.org/pub/scm/virt/kvm/kvm.git works for you?  If so I'll
> merge it.

Thanks a lot for creating branch for vt-d pi. However, I cannot launch guests
with this tree. I encountered the following kernel dump, and I find that the
problematic commit is " 2260b1cde0b5472ab70ad0764b10095372e41913 "

KVM: x86: put vcpu_create under kvm->srcu critical section

This is needed in case vcpu_create wants to access the memslots array.
Fixes this lockdep splat:

After removing this commit from the tree, my VT-d patch-set works fine.


Kernel dump:
[  221.978182] BUG: unable to handle kernel NULL pointer dereference at 
  (null)
[  221.986085] IP: [] kvm_arch_vcpu_create+0x30/0x90 [kvm]
[  221.993102] PGD 0
[  221.995148] Oops:  [#1] SMP
[  221.998440] Modules linked in: bnep rfcomm bluetooth ax88179_178a usbnet 
intel_rapl mii snd_hda_codec_hdmi iosf_mbi x86_pkg_temp_thermal nouveau 
intel_powerclamp snd_hda_intel snd_hda_codec coretemp kvm_intel snd_hda_core 
kvm snd_hwdep snd_pcm crct10dif_pclmul crc32_pclmul snd_seq_midi 
ghash_clmulni_intel mxm_wmi snd_seq_midi_event snd_rawmidi video snd_seq ttm 
aesni_intel aes_x86_64 lrw gf128mul drm_kms_helper snd_seq_device binfmt_misc 
snd_timer glue_helper ablk_helper drm cryptd fb_sys_fops snd syscopyarea 
sysfillrect sb_edac soundcore sysimgblt mei_me parport_pc edac_core ppdev mei 
shpchp lp lpc_ich mac_hid parport acpi_power_meter wmi ixgbe igb i2c_algo_bit 
hid_generic usbhid ptp ahci hid libahci pps_core mdio
[  222.063533] CPU: 4 PID: 3384 Comm: qemu-system-x86 Not tainted 4.3.0-rc1+ #6
[  222.070612] Hardware name: Intel Corp. GRANGEVILLE/GRANTLEY, BIOS 
GNVDCRB1.86B.0020.V07.1409241147 09/24/2014
[  222.080764] task: 88006e7c8000 ti: 8800714a8000 task.ti: 
8800714a8000
[  222.088283] RIP: 0010:[]  [] 
kvm_arch_vcpu_create+0x30/0x90 [kvm]
[  222.097680] RSP: 0018:8800714abde0  EFLAGS: 00010246
[  222.103153] RAX:  RBX: 88016f28c000 RCX: 
[  222.110407] RDX:  RSI:  RDI: 88016f28c000
[  222.117659] RBP: 8800714abdf8 R08: 0001 R09: 0040
[  222.124824] R10: 880077e86438 R11: 880163e06880 R12: 88016f28c000
[  222.132150] R13:  R14: ae41 R15: 
[  222.139405] FS:  7f43fd7ec700() GS:88017870() 
knlGS:
[  222.147629] CS:  0010 DS:  ES:  CR0: 80050033
[  222.153471] CR2:  CR3: 00017074b000 CR4: 003426e0
[  222.160726] DR0:  DR1:  DR2: 
[  222.167979] DR3:  DR6: fffe0ff0 DR7: 0400
[  222.175231] Stack:
[  222.177277]   88016f28c000  
8800714abea0
[  222.184870]  c0355b17 0008 8800714abe28 
810aba32
[  222.192444]  880178816e40 8800714abe40 810a4f44 
880178816e40
[  222.200017] Call Trace:
[  222.202522]  [] kvm_vm_ioctl+0x277/0x6e0 [kvm]
[  222.208633]  [] ? put_prev_task_fair+0x22/0x40
[  222.214741]  [] ? pick_next_task_idle+0x14/0x30
[  222.220942]  [] do_vfs_ioctl+0x2ba/0x490
[  222.226523]  [] ? __do_page_fault+0x1ba/0x410
[  222.232546]  [] SyS_ioctl+0x79/0x90
[  222.237684]  [] ? syscall_return_slowpath+0x55/0x150
[  222.244323]  [] entry_SYSCALL_64_fastpath+0x16/0x75
[  222.250869] Code: 55 48 89 e5 41 55 41 54 53 41 89 f5 48 89 fb e8 27 61 cb 
c0 85 c0 74 13 8b 83 f0 09 00 00 85 c0 74 09 80 3d 53 2e 04 00 00 74 40 <48> 8b 
04 25 00 00 00 00 48 8d 78 48 e8 7f c4 d6 c0 41 89 c4 48
[  222.270790] RIP  [] kvm_arch_vcpu_create+0x30/0x90 [kvm]
[  222.277813]  RSP 
[  222.281359] CR2: 
[  222.290421] ---[ end trace 957f5a39692fe6c7 ]---
root@feng-bdw-de-pi:~/workspace/tools# dmesg > ~/dmesg.log
root@feng-bdw-de-pi:~/workspace/tools# vim ~/dmesg.log
[  221.998440] Modules linked in: bnep rfcomm bluetooth ax88179_178a usbnet 
intel_rapl mii snd_hda_codec_hdmi iosf_mbi x86_pkg_temp_thermal nouveau 
intel_powerclamp snd_hda_intel snd_hda_codec coretemp kvm_intel snd_hda_core 
kvm snd_hwdep snd_pcm crct10dif_pclmul crc32_pclmul snd_seq_midi 
ghash_clmulni_intel mxm_wmi snd_seq_midi_event 

[FYI PATCH 19/20] KVM: nVMX: nested VPID emulation

2015-09-28 Thread Paolo Bonzini
From: Wanpeng Li 

VPID is used to tag address space and avoid a TLB flush. Currently L0 use
the same VPID to run L1 and all its guests. KVM flushes VPID when switching
between L1 and L2.

This patch advertises VPID to the L1 hypervisor, then address space of L1
and L2 can be separately treated and avoid TLB flush when swithing between
L1 and L2. For each nested vmentry, if vpid12 is changed, reuse shadow vpid
w/ an invvpid.

Performance:

run lmbench on L2 w/ 3.5 kernel.

Context switching - times in microseconds - smaller is better
-
Host OS  2p/0K 2p/16K 2p/64K 8p/16K 8p/64K 16p/16K 16p/64K
 ctxsw  ctxsw  ctxsw ctxsw  ctxsw   ctxsw   ctxsw
- - -- -- -- -- -- --- ---
kernelLinux 3.5.0-1 1.2200 1.3700 1.4500 4.7800 2.3300 5.6 2.88000  
nested VPID
kernelLinux 3.5.0-1 1.2600 1.4300 1.5600   12.7   12.9 3.49000 7.46000  
vanilla

Reviewed-by: Jan Kiszka 
Suggested-by: Wincy Van 
Signed-off-by: Wanpeng Li 
[Handle the case where vpid02 cannot be allocated. - Paolo]
Signed-off-by: Paolo Bonzini 
---
 arch/x86/kvm/vmx.c | 39 ---
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bb7ae30caa13..75f3ee01f59b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -424,6 +424,9 @@ struct nested_vmx {
/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
u64 vmcs01_debugctl;
 
+   u16 vpid02;
+   u16 last_vpid;
+
u32 nested_vmx_procbased_ctls_low;
u32 nested_vmx_procbased_ctls_high;
u32 nested_vmx_true_procbased_ctls_low;
@@ -1157,6 +1160,11 @@ static inline bool 
nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
 }
 
+static inline bool nested_cpu_has_vpid(struct vmcs12 *vmcs12)
+{
+   return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VPID);
+}
+
 static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12)
 {
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT);
@@ -2471,6 +2479,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx 
*vmx)
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
+   SECONDARY_EXEC_ENABLE_VPID |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_WBINVD_EXITING |
@@ -6670,6 +6679,7 @@ static void free_nested(struct vcpu_vmx *vmx)
return;
 
vmx->nested.vmxon = false;
+   free_vpid(vmx->nested.vpid02);
nested_release_vmcs12(vmx);
if (enable_shadow_vmcs)
free_vmcs(vmx->nested.current_shadow_vmcs);
@@ -7199,7 +7209,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
case VMX_VPID_EXTENT_SINGLE_CONTEXT:
case VMX_VPID_EXTENT_ALL_CONTEXT:
-   vmx_flush_tlb(vcpu);
+   __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
nested_vmx_succeed(vcpu);
break;
default:
@@ -8575,8 +8585,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, 
unsigned int id)
goto free_vmcs;
}
 
-   if (nested)
+   if (nested) {
nested_vmx_setup_ctls_msrs(vmx);
+   vmx->nested.vpid02 = allocate_vpid();
+   }
 
vmx->nested.posted_intr_nv = -1;
vmx->nested.current_vmptr = -1ull;
@@ -8597,6 +8609,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, 
unsigned int id)
return >vcpu;
 
 free_vmcs:
+   free_vpid(vmx->nested.vpid02);
free_loaded_vmcs(vmx->loaded_vmcs);
 free_msrs:
kfree(vmx->guest_msrs);
@@ -9458,12 +9471,24 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, 
struct vmcs12 *vmcs12)
 
if (enable_vpid) {
/*
-* Trivially support vpid by letting L2s share their parent
-* L1's vpid. TODO: move to a more elaborate solution, giving
-* each L2 its own vpid and exposing the vpid feature to L1.
+* There is no direct mapping between vpid02 and vpid12, the
+* vpid02 is per-vCPU for L0 and reused while the value of
+* vpid12 is changed w/ one invvpid during nested vmentry.
+* The vpid12 is allocated by L1 for L2, so it will not
+* influence global bitmap(for vpid01 and vpid02 allocation)
+* even if spawn a lot of nested vCPUs.
 */
-   

[FYI PATCH 20/20] KVM: vmx: disable posted interrupts if no local APIC

2015-09-28 Thread Paolo Bonzini
Uniprocessor 32-bit randconfigs can disable the local APIC, and posted
interrupts require reserving a vector on the LAPIC, so they are
incompatible.

Signed-off-by: Paolo Bonzini 
---
 arch/x86/kvm/vmx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 75f3ee01f59b..353b91b744d7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -986,7 +986,8 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
 
 static inline bool cpu_has_vmx_posted_intr(void)
 {
-   return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
+   return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
+   vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
 }
 
 static inline bool cpu_has_vmx_apicv(void)
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[FYI PATCH 04/20] KVM: x86: introduce lapic_in_kernel

2015-09-28 Thread Paolo Bonzini
Avoid pointer chasing and memory barriers, and simplify the code
when split irqchip (LAPIC in kernel, IOAPIC/PIC in userspace)
is introduced.

Signed-off-by: Paolo Bonzini 
---
 arch/x86/kvm/irq.c   |  6 +++---
 arch/x86/kvm/irq.h   |  8 
 arch/x86/kvm/lapic.c |  4 ++--
 arch/x86/kvm/mmu.c   |  2 +-
 arch/x86/kvm/svm.c   |  4 ++--
 arch/x86/kvm/vmx.c   | 46 --
 arch/x86/kvm/x86.c   | 18 +-
 7 files changed, 45 insertions(+), 43 deletions(-)

diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index c0dad893dc59..b653ae202c8e 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -57,7 +57,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
  */
 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
 {
-   if (!irqchip_in_kernel(v->kvm))
+   if (!lapic_in_kernel(v))
return v->arch.interrupt.pending;
 
if (kvm_cpu_has_extint(v))
@@ -75,7 +75,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
  */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 {
-   if (!irqchip_in_kernel(v->kvm))
+   if (!lapic_in_kernel(v))
return v->arch.interrupt.pending;
 
if (kvm_cpu_has_extint(v))
@@ -103,7 +103,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
 {
int vector;
 
-   if (!irqchip_in_kernel(v->kvm))
+   if (!lapic_in_kernel(v))
return v->arch.interrupt.nr;
 
vector = kvm_cpu_get_extint(v);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 3d782a2c336a..9e6e7e04de98 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -92,6 +92,14 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
return vpic != NULL;
 }
 
+static inline int lapic_in_kernel(struct kvm_vcpu *vcpu)
+{
+   /* Same as irqchip_in_kernel(vcpu->kvm), but with less
+* pointer chasing and no unnecessary memory barriers.
+*/
+   return vcpu->arch.apic != NULL;
+}
+
 void kvm_pic_reset(struct kvm_kpic_state *s);
 
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c568d69c7060..c4bcc86d6dc4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1985,7 +1985,7 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, 
u64 data)
struct kvm_lapic *apic = vcpu->arch.apic;
u32 reg = (msr - APIC_BASE_MSR) << 4;
 
-   if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
+   if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
return 1;
 
if (reg == APIC_ICR2)
@@ -2002,7 +2002,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, 
u64 *data)
struct kvm_lapic *apic = vcpu->arch.apic;
u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
 
-   if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
+   if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
return 1;
 
if (reg == APIC_DFR || reg == APIC_ICR2) {
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ff606f507913..c3f39aa9b9cb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3427,7 +3427,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, 
gva_t gva, gfn_t gfn)
 
 static bool can_do_async_pf(struct kvm_vcpu *vcpu)
 {
-   if (unlikely(!irqchip_in_kernel(vcpu->kvm) ||
+   if (unlikely(!lapic_in_kernel(vcpu) ||
 kvm_event_needs_reinjection(vcpu)))
return false;
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 98889c882ced..89d278a0ad37 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3154,7 +3154,7 @@ static int cr8_write_interception(struct vcpu_svm *svm)
u8 cr8_prev = kvm_get_cr8(>vcpu);
/* instruction emulation calls kvm_set_cr8() */
r = cr_interception(svm);
-   if (irqchip_in_kernel(svm->vcpu.kvm))
+   if (lapic_in_kernel(>vcpu))
return r;
if (cr8_prev <= kvm_get_cr8(>vcpu))
return r;
@@ -3409,7 +3409,7 @@ static int interrupt_window_interception(struct vcpu_svm 
*svm)
 * If the user space waits to inject interrupts, exit as soon as
 * possible
 */
-   if (!irqchip_in_kernel(svm->vcpu.kvm) &&
+   if (!lapic_in_kernel(>vcpu) &&
kvm_run->request_interrupt_window &&
!kvm_cpu_has_interrupt(>vcpu)) {
kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 32a38494dd6f..d5b87be89631 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -809,7 +809,6 @@ static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
 static bool vmx_mpx_supported(void);
 static bool vmx_xsaves_supported(void);
-static int vmx_vm_has_apicv(struct kvm *kvm);
 static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 

[FYI PATCH 00/20] kvm/queue will be merged soon to kvm/next

2015-09-28 Thread Paolo Bonzini

Hi all,

these are the patches that will be merged Real Soon Now(tm) to kvm/next.
Since all of them have been lying around for a while, I am sending them
out again for everyone's information.

Thanks,

Paolo

Andrey Smetanin (3):
  kvm/x86: Hyper-V HV_X64_MSR_RESET msr
  kvm/x86: Hyper-V HV_X64_MSR_VP_INDEX export for QEMU.
  kvm/x86: Hyper-V HV_X64_MSR_VP_RUNTIME support

Jason Wang (3):
  kvm: use kmalloc() instead of kzalloc() during iodev
register/unregister
  kvm: add tracepoint for fast mmio
  kvm: add capability for any-length ioeventfds

Paolo Bonzini (6):
  KVM: x86: set TMR when the interrupt is accepted
  KVM: x86: store IOAPIC-handled vectors in each VCPU
  KVM: x86: replace vm_has_apicv hook with cpu_uses_apicv
  KVM: x86: introduce lapic_in_kernel
  KVM: x86: unify handling of interrupt window
  KVM: vmx: disable posted interrupts if no local APIC

Steve Rutherford (4):
  KVM: x86: Split the APIC from the rest of IRQCHIP.
  KVM: x86: Add KVM exit for IOAPIC EOIs
  KVM: x86: Add EOI exit bitmap inference
  KVM: x86: Add support for local interrupt requests from userspace

Wanpeng Li (4):
  KVM: VMX: adjust interface to allocate/free_vpid
  KVM: VMX: introduce __vmx_flush_tlb to handle specific vpid
  KVM: nVMX: emulate the INVVPID instruction
  KVM: nVMX: nested VPID emulation

 Documentation/virtual/kvm/api.txt  |  52 ++--
 arch/x86/include/asm/kvm_host.h|  12 ++-
 arch/x86/include/asm/vmx.h |   1 +
 arch/x86/include/uapi/asm/hyperv.h |   6 ++
 arch/x86/kvm/hyperv.c  |  31 ++-
 arch/x86/kvm/i8254.c   |   4 +-
 arch/x86/kvm/ioapic.c  |  27 +-
 arch/x86/kvm/ioapic.h  |  15 ++--
 arch/x86/kvm/irq.c |  40 ++---
 arch/x86/kvm/irq.h |  27 +-
 arch/x86/kvm/irq_comm.c|  51 ++-
 arch/x86/kvm/lapic.c   |  64 +-
 arch/x86/kvm/lapic.h   |   5 +-
 arch/x86/kvm/mmu.c |   2 +-
 arch/x86/kvm/svm.c |  21 +
 arch/x86/kvm/trace.h   |  18 
 arch/x86/kvm/vmx.c | 168 +++--
 arch/x86/kvm/x86.c | 126 +---
 include/linux/kvm_host.h   |  21 -
 include/uapi/linux/kvm.h   |   7 ++
 kernel/sched/cputime.c |   2 +
 virt/kvm/eventfd.c |   4 +-
 virt/kvm/irqchip.c |  12 +--
 virt/kvm/kvm_main.c|   5 +-
 24 files changed, 516 insertions(+), 205 deletions(-)

-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[FYI PATCH 01/20] KVM: x86: set TMR when the interrupt is accepted

2015-09-28 Thread Paolo Bonzini
Do not compute TMR in advance.  Instead, set the TMR just before the interrupt
is accepted into the IRR.  This limits the coupling between IOAPIC and LAPIC.

Signed-off-by: Paolo Bonzini 
---
 arch/x86/kvm/ioapic.c |  9 ++---
 arch/x86/kvm/ioapic.h |  3 +--
 arch/x86/kvm/lapic.c  | 19 ++-
 arch/x86/kvm/lapic.h  |  1 -
 arch/x86/kvm/x86.c|  5 +
 5 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 856f79105bb5..eaf4ec38d980 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -246,8 +246,7 @@ static void update_handled_vectors(struct kvm_ioapic 
*ioapic)
smp_wmb();
 }
 
-void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
-   u32 *tmr)
+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 {
struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
union kvm_ioapic_redirect_entry *e;
@@ -260,13 +259,9 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 
*eoi_exit_bitmap,
kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, 
index) ||
index == RTC_GSI) {
if (kvm_apic_match_dest(vcpu, NULL, 0,
-   e->fields.dest_id, e->fields.dest_mode)) {
+   e->fields.dest_id, e->fields.dest_mode))
__set_bit(e->fields.vector,
(unsigned long *)eoi_exit_bitmap);
-   if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG)
-   __set_bit(e->fields.vector,
-   (unsigned long *)tmr);
-   }
}
}
spin_unlock(>lock);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index ca0b0b4e6256..3dbd0e2aac4e 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -120,7 +120,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
kvm_lapic *src,
struct kvm_lapic_irq *irq, unsigned long *dest_map);
 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
-void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
-   u32 *tmr);
+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 
 #endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 8d9013c5e1ee..5693dd9fc163 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -551,15 +551,6 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
__clear_bit(KVM_APIC_PV_EOI_PENDING, >arch.apic_attention);
 }
 
-void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr)
-{
-   struct kvm_lapic *apic = vcpu->arch.apic;
-   int i;
-
-   for (i = 0; i < 8; i++)
-   apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]);
-}
-
 static void apic_update_ppr(struct kvm_lapic *apic)
 {
u32 tpr, isrv, ppr, old_ppr;
@@ -781,6 +772,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int 
delivery_mode,
case APIC_DM_LOWEST:
vcpu->arch.apic_arb_prio++;
case APIC_DM_FIXED:
+   if (unlikely(trig_mode && !level))
+   break;
+
/* FIXME add logic for vcpu on reset */
if (unlikely(!apic_enabled(apic)))
break;
@@ -790,6 +784,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int 
delivery_mode,
if (dest_map)
__set_bit(vcpu->vcpu_id, dest_map);
 
+   if (apic_test_vector(vector, apic->regs + APIC_TMR) != 
!!trig_mode) {
+   if (trig_mode)
+   apic_set_vector(vector, apic->regs + APIC_TMR);
+   else
+   apic_clear_vector(vector, apic->regs + 
APIC_TMR);
+   }
+
if (kvm_x86_ops->deliver_posted_interrupt)
kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
else {
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 764037991d26..eb46d6bcaa75 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -57,7 +57,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
 u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
 void kvm_apic_set_version(struct kvm_vcpu *vcpu);
 
-void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
 void __kvm_apic_update_irr(u32 *pir, void *regs);
 void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 991466bf8dee..c0ffc636fc8b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6148,17 +6148,14 @@ static void 

Re: [PATCH v9 00/18] Add VT-d Posted-Interrupts support - including prerequisite series

2015-09-28 Thread Paolo Bonzini


On 28/09/2015 12:14, Wu, Feng wrote:
> Thanks a lot for creating branch for vt-d pi. However, I cannot launch guests
> with this tree. I encountered the following kernel dump, and I find that the
> problematic commit is " 2260b1cde0b5472ab70ad0764b10095372e41913 "
> 
> KVM: x86: put vcpu_create under kvm->srcu critical section
> 
> This is needed in case vcpu_create wants to access the memslots array.
> Fixes this lockdep splat:
> 
> After removing this commit from the tree, my VT-d patch-set works fine.

Great, thanks.  The above commit had already been reverted.

I'm sorting out the kbuild reports, and then will merge VT-d PI.

Paolo

> 
> Kernel dump:
> [  221.978182] BUG: unable to handle kernel NULL pointer dereference at   
> (null)
> [  221.986085] IP: [] kvm_arch_vcpu_create+0x30/0x90 [kvm]
> [  221.993102] PGD 0
> [  221.995148] Oops:  [#1] SMP
> [  221.998440] Modules linked in: bnep rfcomm bluetooth ax88179_178a usbnet 
> intel_rapl mii snd_hda_codec_hdmi iosf_mbi x86_pkg_temp_thermal nouveau 
> intel_powerclamp snd_hda_intel snd_hda_codec coretemp kvm_intel snd_hda_core 
> kvm snd_hwdep snd_pcm crct10dif_pclmul crc32_pclmul snd_seq_midi 
> ghash_clmulni_intel mxm_wmi snd_seq_midi_event snd_rawmidi video snd_seq ttm 
> aesni_intel aes_x86_64 lrw gf128mul drm_kms_helper snd_seq_device binfmt_misc 
> snd_timer glue_helper ablk_helper drm cryptd fb_sys_fops snd syscopyarea 
> sysfillrect sb_edac soundcore sysimgblt mei_me parport_pc edac_core ppdev mei 
> shpchp lp lpc_ich mac_hid parport acpi_power_meter wmi ixgbe igb i2c_algo_bit 
> hid_generic usbhid ptp ahci hid libahci pps_core mdio
> [  222.063533] CPU: 4 PID: 3384 Comm: qemu-system-x86 Not tainted 4.3.0-rc1+ 
> #6
> [  222.070612] Hardware name: Intel Corp. GRANGEVILLE/GRANTLEY, BIOS 
> GNVDCRB1.86B.0020.V07.1409241147 09/24/2014
> [  222.080764] task: 88006e7c8000 ti: 8800714a8000 task.ti: 
> 8800714a8000
> [  222.088283] RIP: 0010:[]  [] 
> kvm_arch_vcpu_create+0x30/0x90 [kvm]
> [  222.097680] RSP: 0018:8800714abde0  EFLAGS: 00010246
> [  222.103153] RAX:  RBX: 88016f28c000 RCX: 
> 
> [  222.110407] RDX:  RSI:  RDI: 
> 88016f28c000
> [  222.117659] RBP: 8800714abdf8 R08: 0001 R09: 
> 0040
> [  222.124824] R10: 880077e86438 R11: 880163e06880 R12: 
> 88016f28c000
> [  222.132150] R13:  R14: ae41 R15: 
> 
> [  222.139405] FS:  7f43fd7ec700() GS:88017870() 
> knlGS:
> [  222.147629] CS:  0010 DS:  ES:  CR0: 80050033
> [  222.153471] CR2:  CR3: 00017074b000 CR4: 
> 003426e0
> [  222.160726] DR0:  DR1:  DR2: 
> 
> [  222.167979] DR3:  DR6: fffe0ff0 DR7: 
> 0400
> [  222.175231] Stack:
> [  222.177277]   88016f28c000  
> 8800714abea0
> [  222.184870]  c0355b17 0008 8800714abe28 
> 810aba32
> [  222.192444]  880178816e40 8800714abe40 810a4f44 
> 880178816e40
> [  222.200017] Call Trace:
> [  222.202522]  [] kvm_vm_ioctl+0x277/0x6e0 [kvm]
> [  222.208633]  [] ? put_prev_task_fair+0x22/0x40
> [  222.214741]  [] ? pick_next_task_idle+0x14/0x30
> [  222.220942]  [] do_vfs_ioctl+0x2ba/0x490
> [  222.226523]  [] ? __do_page_fault+0x1ba/0x410
> [  222.232546]  [] SyS_ioctl+0x79/0x90
> [  222.237684]  [] ? syscall_return_slowpath+0x55/0x150
> [  222.244323]  [] entry_SYSCALL_64_fastpath+0x16/0x75
> [  222.250869] Code: 55 48 89 e5 41 55 41 54 53 41 89 f5 48 89 fb e8 27 61 cb 
> c0 85 c0 74 13 8b 83 f0 09 00 00 85 c0 74 09 80 3d 53 2e 04 00 00 74 40 <48> 
> 8b 04 25 00 00 00 00 48 8d 78 48 e8 7f c4 d6 c0 41 89 c4 48
> [  222.270790] RIP  [] kvm_arch_vcpu_create+0x30/0x90 [kvm]
> [  222.277813]  RSP 
> [  222.281359] CR2: 
> [  222.290421] ---[ end trace 957f5a39692fe6c7 ]---
> root@feng-bdw-de-pi:~/workspace/tools# dmesg > ~/dmesg.log
> root@feng-bdw-de-pi:~/workspace/tools# vim ~/dmesg.log
> [  221.998440] Modules linked in: bnep rfcomm bluetooth ax88179_178a usbnet 
> intel_rapl mii snd_hda_codec_hdmi iosf_mbi x86_pkg_temp_thermal nouveau 
> intel_powerclamp snd_hda_intel snd_hda_codec coretemp kvm_intel snd_hda_core 
> kvm snd_hwdep snd_pcm crct10dif_pclmul crc32_pclmul snd_seq_midi 
> ghash_clmulni_intel mxm_wmi snd_seq_midi_event snd_rawmidi video snd_seq ttm 
> aesni_intel aes_x86_64 lrw gf128mul drm_kms_helper snd_seq_device binfmt_misc 
> snd_timer glue_helper ablk_helper drm cryptd fb_sys_fops snd syscopyarea 
> sysfillrect sb_edac soundcore sysimgblt mei_me parport_pc edac_core ppdev mei 
> shpchp lp lpc_ich mac_hid parport acpi_power_meter wmi ixgbe igb i2c_algo_bit 
> hid_generic usbhid ptp ahci hid libahci pps_core mdio
> [  222.063533] CPU: 4 PID: 3384 Comm: qemu-system-x86 Not 

[PATCH] x86: x2apic: make stub functions available even if !CONFIG_X86_LOCAL_APIC

2015-09-28 Thread Paolo Bonzini
Some CONFIG_X86_X2APIC functions, especially x2apic_enabled(), are not
declared if !CONFIG_X86_LOCAL_APIC.  However, the same stubs that work
for !CONFIG_X86_X2APIC are okay even if there is no local APIC support
at all.

Avoid the introduction of #ifdefs by moving the x2apic declarations
completely outside the CONFIG_X86_LOCAL_APIC block.  (Unfortunately,
diff generation messes up the actual change that this patch makes).
There is no semantic change because CONFIG_X86_X2APIC depends on
CONFIG_X86_LOCAL_APIC.

Reported-by: Fengguang Wu 
Cc: Feng Wu 
Signed-off-by: Paolo Bonzini 
---
Failures are only visible with pending KVM changes to 4.4.
The patch introducing them is at

https://git.kernel.org/cgit/virt/kvm/kvm.git/commit/?h=vtd-pi=43ef83157e66f80a491202e5d65b225ad93d920a
and the corresponding specification is section 9.12 of

http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/vt-directed-io-spec.pdf
[Posted Interrupt Descriptor (PID)].

 arch/x86/include/asm/apic.h | 110 ++--
 1 file changed, 55 insertions(+), 55 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index ebf6d5e5668c..a30316bf801a 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -115,6 +115,59 @@ static inline bool apic_is_x2apic_enabled(void)
return msr & X2APIC_ENABLE;
 }
 
+extern void enable_IR_x2apic(void);
+
+extern int get_physical_broadcast(void);
+
+extern int lapic_get_maxlvt(void);
+extern void clear_local_APIC(void);
+extern void disconnect_bsp_APIC(int virt_wire_setup);
+extern void disable_local_APIC(void);
+extern void lapic_shutdown(void);
+extern void sync_Arb_IDs(void);
+extern void init_bsp_APIC(void);
+extern void setup_local_APIC(void);
+extern void init_apic_mappings(void);
+void register_lapic_address(unsigned long address);
+extern void setup_boot_APIC_clock(void);
+extern void setup_secondary_APIC_clock(void);
+extern int APIC_init_uniprocessor(void);
+
+#ifdef CONFIG_X86_64
+static inline int apic_force_enable(unsigned long addr)
+{
+   return -1;
+}
+#else
+extern int apic_force_enable(unsigned long addr);
+#endif
+
+extern int apic_bsp_setup(bool upmode);
+extern void apic_ap_setup(void);
+
+/*
+ * On 32bit this is mach-xxx local
+ */
+#ifdef CONFIG_X86_64
+extern int apic_is_clustered_box(void);
+#else
+static inline int apic_is_clustered_box(void)
+{
+   return 0;
+}
+#endif
+
+extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
+
+#else /* !CONFIG_X86_LOCAL_APIC */
+static inline void lapic_shutdown(void) { }
+#define local_apic_timer_c2_ok 1
+static inline void init_apic_mappings(void) { }
+static inline void disable_local_APIC(void) { }
+# define setup_boot_APIC_clock x86_init_noop
+# define setup_secondary_APIC_clock x86_init_noop
+#endif /* !CONFIG_X86_LOCAL_APIC */
+
 #ifdef CONFIG_X86_X2APIC
 /*
  * Make previous memory operations globally visible before
@@ -186,67 +239,14 @@ static inline int x2apic_enabled(void)
 }
 
 #define x2apic_supported() (cpu_has_x2apic)
-#else
+#else /* !CONFIG_X86_X2APIC */
 static inline void check_x2apic(void) { }
 static inline void x2apic_setup(void) { }
 static inline int x2apic_enabled(void) { return 0; }
 
 #define x2apic_mode(0)
 #definex2apic_supported()  (0)
-#endif
-
-extern void enable_IR_x2apic(void);
-
-extern int get_physical_broadcast(void);
-
-extern int lapic_get_maxlvt(void);
-extern void clear_local_APIC(void);
-extern void disconnect_bsp_APIC(int virt_wire_setup);
-extern void disable_local_APIC(void);
-extern void lapic_shutdown(void);
-extern void sync_Arb_IDs(void);
-extern void init_bsp_APIC(void);
-extern void setup_local_APIC(void);
-extern void init_apic_mappings(void);
-void register_lapic_address(unsigned long address);
-extern void setup_boot_APIC_clock(void);
-extern void setup_secondary_APIC_clock(void);
-extern int APIC_init_uniprocessor(void);
-
-#ifdef CONFIG_X86_64
-static inline int apic_force_enable(unsigned long addr)
-{
-   return -1;
-}
-#else
-extern int apic_force_enable(unsigned long addr);
-#endif
-
-extern int apic_bsp_setup(bool upmode);
-extern void apic_ap_setup(void);
-
-/*
- * On 32bit this is mach-xxx local
- */
-#ifdef CONFIG_X86_64
-extern int apic_is_clustered_box(void);
-#else
-static inline int apic_is_clustered_box(void)
-{
-   return 0;
-}
-#endif
-
-extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
-
-#else /* !CONFIG_X86_LOCAL_APIC */
-static inline void lapic_shutdown(void) { }
-#define local_apic_timer_c2_ok 1
-static inline void init_apic_mappings(void) { }
-static inline void disable_local_APIC(void) { }
-# define setup_boot_APIC_clock x86_init_noop
-# define setup_secondary_APIC_clock x86_init_noop
-#endif /* !CONFIG_X86_LOCAL_APIC */
+#endif /* 

[FYI PATCH 13/20] kvm/x86: Hyper-V HV_X64_MSR_RESET msr

2015-09-28 Thread Paolo Bonzini
From: Andrey Smetanin 

HV_X64_MSR_RESET msr is used by Hyper-V based Windows guest
to reset guest VM by hypervisor.

Necessary to support loading of winhv.sys in guest, which in turn is
required to support Windows VMBus.

Signed-off-by: Andrey Smetanin 
Reviewed-by: Roman Kagan 
Signed-off-by: Denis V. Lunev 
CC: Paolo Bonzini 
CC: Gleb Natapov 
Signed-off-by: Paolo Bonzini 
---
 arch/x86/include/uapi/asm/hyperv.h |  3 +++
 arch/x86/kvm/hyperv.c  | 10 ++
 arch/x86/kvm/x86.c |  7 +++
 include/linux/kvm_host.h   |  1 +
 4 files changed, 21 insertions(+)

diff --git a/arch/x86/include/uapi/asm/hyperv.h 
b/arch/x86/include/uapi/asm/hyperv.h
index f0412c50c47b..dab584bf7ddf 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -153,6 +153,9 @@
 /* MSR used to provide vcpu index */
 #define HV_X64_MSR_VP_INDEX0x4002
 
+/* MSR used to reset the guest OS. */
+#define HV_X64_MSR_RESET   0x4003
+
 /* MSR used to read the per-partition time reference counter */
 #define HV_X64_MSR_TIME_REF_COUNT  0x4020
 
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index a8160d2ae362..0ad11a232474 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -41,6 +41,7 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
case HV_X64_MSR_TIME_REF_COUNT:
case HV_X64_MSR_CRASH_CTL:
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+   case HV_X64_MSR_RESET:
r = true;
break;
}
@@ -163,6 +164,12 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 
msr, u64 data,
 data);
case HV_X64_MSR_CRASH_CTL:
return kvm_hv_msr_set_crash_ctl(vcpu, data, host);
+   case HV_X64_MSR_RESET:
+   if (data == 1) {
+   vcpu_debug(vcpu, "hyper-v reset requested\n");
+   kvm_make_request(KVM_REQ_HV_RESET, vcpu);
+   }
+   break;
default:
vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
msr, data);
@@ -241,6 +248,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 
msr, u64 *pdata)
 pdata);
case HV_X64_MSR_CRASH_CTL:
return kvm_hv_msr_get_crash_ctl(vcpu, pdata);
+   case HV_X64_MSR_RESET:
+   data = 0;
+   break;
default:
vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
return 1;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b015ab357980..7e207de0a13f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -952,6 +952,7 @@ static u32 emulated_msrs[] = {
HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
+   HV_X64_MSR_RESET,
HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
MSR_KVM_PV_EOI_EN,
 
@@ -6325,6 +6326,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
r = 0;
goto out;
}
+   if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
+   vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+   vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
+   r = 0;
+   goto out;
+   }
}
 
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a73d60d474dd..f3bd0fd0795e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -141,6 +141,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_SMI   26
 #define KVM_REQ_HV_CRASH  27
 #define KVM_REQ_IOAPIC_EOI_EXIT   28
+#define KVM_REQ_HV_RESET  29
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID   1
-- 
1.8.3.1


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[FYI PATCH 16/20] KVM: VMX: adjust interface to allocate/free_vpid

2015-09-28 Thread Paolo Bonzini
From: Wanpeng Li 

Adjust allocate/free_vid so that they can be reused for the nested vpid.

Signed-off-by: Wanpeng Li 
Signed-off-by: Paolo Bonzini 
---
 arch/x86/kvm/vmx.c | 25 -
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b2d619ab3d07..656074153f49 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4155,29 +4155,28 @@ static int alloc_identity_pagetable(struct kvm *kvm)
return r;
 }
 
-static void allocate_vpid(struct vcpu_vmx *vmx)
+static int allocate_vpid(void)
 {
int vpid;
 
-   vmx->vpid = 0;
if (!enable_vpid)
-   return;
+   return 0;
spin_lock(_vpid_lock);
vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
-   if (vpid < VMX_NR_VPIDS) {
-   vmx->vpid = vpid;
+   if (vpid < VMX_NR_VPIDS)
__set_bit(vpid, vmx_vpid_bitmap);
-   }
+   else
+   vpid = 0;
spin_unlock(_vpid_lock);
+   return vpid;
 }
 
-static void free_vpid(struct vcpu_vmx *vmx)
+static void free_vpid(int vpid)
 {
-   if (!enable_vpid)
+   if (!enable_vpid || vpid == 0)
return;
spin_lock(_vpid_lock);
-   if (vmx->vpid != 0)
-   __clear_bit(vmx->vpid, vmx_vpid_bitmap);
+   __clear_bit(vpid, vmx_vpid_bitmap);
spin_unlock(_vpid_lock);
 }
 
@@ -8483,7 +8482,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 
if (enable_pml)
vmx_disable_pml(vmx);
-   free_vpid(vmx);
+   free_vpid(vmx->vpid);
leave_guest_mode(vcpu);
vmx_load_vmcs01(vcpu);
free_nested(vmx);
@@ -8502,7 +8501,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, 
unsigned int id)
if (!vmx)
return ERR_PTR(-ENOMEM);
 
-   allocate_vpid(vmx);
+   vmx->vpid = allocate_vpid();
 
err = kvm_vcpu_init(>vcpu, kvm, id);
if (err)
@@ -8578,7 +8577,7 @@ free_msrs:
 uninit_vcpu:
kvm_vcpu_uninit(>vcpu);
 free_vcpu:
-   free_vpid(vmx);
+   free_vpid(vmx->vpid);
kmem_cache_free(kvm_vcpu_cache, vmx);
return ERR_PTR(err);
 }
-- 
1.8.3.1


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[FYI PATCH 18/20] KVM: nVMX: emulate the INVVPID instruction

2015-09-28 Thread Paolo Bonzini
From: Wanpeng Li 

Add the INVVPID instruction emulation.

Reviewed-by: Wincy Van 
Signed-off-by: Wanpeng Li 
Signed-off-by: Paolo Bonzini 
---
 arch/x86/include/asm/vmx.h |  1 +
 arch/x86/kvm/vmx.c | 23 ++-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 448b7ca61aee..53c9567a29d1 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -397,6 +397,7 @@ enum vmcs_field {
 #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 2)
 
 #define VMX_NR_VPIDS   (1 << 16)
+#define VMX_VPID_EXTENT_INDIVIDUAL_ADDR0
 #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
 #define VMX_VPID_EXTENT_ALL_CONTEXT2
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e1cb4aa026a2..bb7ae30caa13 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7186,7 +7186,28 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 
 static int handle_invvpid(struct kvm_vcpu *vcpu)
 {
-   kvm_queue_exception(vcpu, UD_VECTOR);
+   u32 vmx_instruction_info;
+   unsigned long type;
+
+   if (!nested_vmx_check_permission(vcpu))
+   return 1;
+
+   vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+   type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
+
+   switch (type) {
+   case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
+   case VMX_VPID_EXTENT_SINGLE_CONTEXT:
+   case VMX_VPID_EXTENT_ALL_CONTEXT:
+   vmx_flush_tlb(vcpu);
+   nested_vmx_succeed(vcpu);
+   break;
+   default:
+   nested_vmx_failInvalid(vcpu);
+   break;
+   }
+
+   skip_emulated_instruction(vcpu);
return 1;
 }
 
-- 
1.8.3.1


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[FYI PATCH 15/20] kvm/x86: Hyper-V HV_X64_MSR_VP_RUNTIME support

2015-09-28 Thread Paolo Bonzini
From: Andrey Smetanin 

HV_X64_MSR_VP_RUNTIME msr used by guest to get
"the time the virtual processor consumes running guest code,
and the time the associated logical processor spends running
hypervisor code on behalf of that guest."

Calculation of this time is performed by task_cputime_adjusted()
for vcpu task.

Necessary to support loading of winhv.sys in guest, which in turn is
required to support Windows VMBus.

Signed-off-by: Andrey Smetanin 
Reviewed-by: Roman Kagan 
Signed-off-by: Denis V. Lunev 
CC: Paolo Bonzini 
CC: Gleb Natapov 
Signed-off-by: Paolo Bonzini 
---
 arch/x86/include/asm/kvm_host.h|  1 +
 arch/x86/include/uapi/asm/hyperv.h |  3 +++
 arch/x86/kvm/hyperv.c  | 21 +++--
 arch/x86/kvm/x86.c |  1 +
 kernel/sched/cputime.c |  2 ++
 5 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 76a5b30979b3..d064cb2e19e8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -374,6 +374,7 @@ struct kvm_mtrr {
 /* Hyper-V per vcpu emulation context */
 struct kvm_vcpu_hv {
u64 hv_vapic;
+   s64 runtime_offset;
 };
 
 struct kvm_vcpu_arch {
diff --git a/arch/x86/include/uapi/asm/hyperv.h 
b/arch/x86/include/uapi/asm/hyperv.h
index dab584bf7ddf..2677a0aac2cc 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -156,6 +156,9 @@
 /* MSR used to reset the guest OS. */
 #define HV_X64_MSR_RESET   0x4003
 
+/* MSR used to provide vcpu runtime in 100ns units */
+#define HV_X64_MSR_VP_RUNTIME  0x4010
+
 /* MSR used to read the per-partition time reference counter */
 #define HV_X64_MSR_TIME_REF_COUNT  0x4020
 
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 0ad11a232474..62cf8c915e95 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -178,7 +178,16 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 
msr, u64 data,
return 0;
 }
 
-static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+/* Calculate cpu time spent by current task in 100ns units */
+static u64 current_task_runtime_100ns(void)
+{
+   cputime_t utime, stime;
+
+   task_cputime_adjusted(current, , );
+   return div_u64(cputime_to_nsecs(utime + stime), 100);
+}
+
+static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 {
struct kvm_vcpu_hv *hv = >arch.hyperv;
 
@@ -212,6 +221,11 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, 
u64 data)
return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
case HV_X64_MSR_TPR:
return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
+   case HV_X64_MSR_VP_RUNTIME:
+   if (!host)
+   return 1;
+   hv->runtime_offset = data - current_task_runtime_100ns();
+   break;
default:
vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
msr, data);
@@ -287,6 +301,9 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, 
u64 *pdata)
case HV_X64_MSR_APIC_ASSIST_PAGE:
data = hv->hv_vapic;
break;
+   case HV_X64_MSR_VP_RUNTIME:
+   data = current_task_runtime_100ns() + hv->runtime_offset;
+   break;
default:
vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
return 1;
@@ -305,7 +322,7 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, 
u64 data, bool host)
mutex_unlock(>kvm->lock);
return r;
} else
-   return kvm_hv_set_msr(vcpu, msr, data);
+   return kvm_hv_set_msr(vcpu, msr, data, host);
 }
 
 int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5bc598de28a7..3e4d03220650 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -954,6 +954,7 @@ static u32 emulated_msrs[] = {
HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
HV_X64_MSR_RESET,
HV_X64_MSR_VP_INDEX,
+   HV_X64_MSR_VP_RUNTIME,
HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
MSR_KVM_PV_EOI_EN,
 
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 8cbc3db671df..26a54461bf59 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -444,6 +444,7 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t 
*ut, cputime_t *st)
*ut = p->utime;
*st = p->stime;
 }
+EXPORT_SYMBOL_GPL(task_cputime_adjusted);
 
 void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, 

[FYI PATCH 17/20] KVM: VMX: introduce __vmx_flush_tlb to handle specific vpid

2015-09-28 Thread Paolo Bonzini
From: Wanpeng Li 

Introduce __vmx_flush_tlb() to handle specific vpid.

Signed-off-by: Wanpeng Li 
Signed-off-by: Paolo Bonzini 
---
 arch/x86/kvm/vmx.c | 21 +
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 656074153f49..e1cb4aa026a2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1337,13 +1337,13 @@ static void loaded_vmcs_clear(struct loaded_vmcs 
*loaded_vmcs)
 __loaded_vmcs_clear, loaded_vmcs, 1);
 }
 
-static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx)
+static inline void vpid_sync_vcpu_single(int vpid)
 {
-   if (vmx->vpid == 0)
+   if (vpid == 0)
return;
 
if (cpu_has_vmx_invvpid_single())
-   __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
+   __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0);
 }
 
 static inline void vpid_sync_vcpu_global(void)
@@ -1352,10 +1352,10 @@ static inline void vpid_sync_vcpu_global(void)
__invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0);
 }
 
-static inline void vpid_sync_context(struct vcpu_vmx *vmx)
+static inline void vpid_sync_context(int vpid)
 {
if (cpu_has_vmx_invvpid_single())
-   vpid_sync_vcpu_single(vmx);
+   vpid_sync_vcpu_single(vpid);
else
vpid_sync_vcpu_global();
 }
@@ -3441,9 +3441,9 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
 
 #endif
 
-static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
+static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
 {
-   vpid_sync_context(to_vmx(vcpu));
+   vpid_sync_context(vpid);
if (enable_ept) {
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
@@ -3451,6 +3451,11 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
}
 }
 
+static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
+{
+   __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid);
+}
+
 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
 {
ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
@@ -4784,7 +4789,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool 
init_event)
vmx_fpu_activate(vcpu);
update_exception_bitmap(vcpu);
 
-   vpid_sync_context(vmx);
+   vpid_sync_context(vmx->vpid);
 }
 
 /*
-- 
1.8.3.1


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[FYI PATCH 12/20] kvm: add capability for any-length ioeventfds

2015-09-28 Thread Paolo Bonzini
From: Jason Wang 

Cc: Gleb Natapov 
Cc: Paolo Bonzini 
Signed-off-by: Jason Wang 
Signed-off-by: Paolo Bonzini 
---
 Documentation/virtual/kvm/api.txt | 6 +-
 include/uapi/linux/kvm.h  | 1 +
 virt/kvm/eventfd.c| 4 +---
 virt/kvm/kvm_main.c   | 1 +
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index e3e9c41721a2..34cc068e81ea 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1604,7 +1604,7 @@ provided event instead of triggering an exit.
 struct kvm_ioeventfd {
__u64 datamatch;
__u64 addr;/* legal pio/mmio address */
-   __u32 len; /* 1, 2, 4, or 8 bytes*/
+   __u32 len; /* 0, 1, 2, 4, or 8 bytes*/
__s32 fd;
__u32 flags;
__u8  pad[36];
@@ -1627,6 +1627,10 @@ to the registered address is equal to datamatch in 
struct kvm_ioeventfd.
 For virtio-ccw devices, addr contains the subchannel id and datamatch the
 virtqueue index.
 
+With KVM_CAP_IOEVENTFD_ANY_LENGTH, a zero length ioeventfd is allowed, and
+the kernel will ignore the length of guest write and may get a faster vmexit.
+The speedup may only apply to specific architectures, but the ioeventfd will
+work anyway.
 
 4.60 KVM_DIRTY_TLB
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 12e3afbf0f47..03f3618612aa 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -830,6 +830,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_GUEST_DEBUG_HW_BPS 119
 #define KVM_CAP_GUEST_DEBUG_HW_WPS 120
 #define KVM_CAP_SPLIT_IRQCHIP 121
+#define KVM_CAP_IOEVENTFD_ANY_LENGTH 122
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 79db45336e3a..ac89299b8699 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -914,9 +914,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd 
*args)
return -EINVAL;
 
/* ioeventfd with no length can't be combined with DATAMATCH */
-   if (!args->len &&
-   args->flags & (KVM_IOEVENTFD_FLAG_PIO |
-  KVM_IOEVENTFD_FLAG_DATAMATCH))
+   if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH))
return -EINVAL;
 
ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 23116dcb2129..afd7ae6aec65 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2718,6 +2718,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct 
kvm *kvm, long arg)
case KVM_CAP_IRQFD:
case KVM_CAP_IRQFD_RESAMPLE:
 #endif
+   case KVM_CAP_IOEVENTFD_ANY_LENGTH:
case KVM_CAP_CHECK_EXTENSION_VM:
return 1;
 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-- 
1.8.3.1


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[FYI PATCH 14/20] kvm/x86: Hyper-V HV_X64_MSR_VP_INDEX export for QEMU.

2015-09-28 Thread Paolo Bonzini
From: Andrey Smetanin 

Insert Hyper-V HV_X64_MSR_VP_INDEX into msr's emulated list,
so QEMU can set Hyper-V features cpuid HV_X64_MSR_VP_INDEX_AVAILABLE
bit correctly. KVM emulation part is in place already.

Necessary to support loading of winhv.sys in guest, which in turn is
required to support Windows VMBus.

Signed-off-by: Andrey Smetanin 
Reviewed-by: Roman Kagan 
Signed-off-by: Denis V. Lunev 
CC: Paolo Bonzini 
CC: Gleb Natapov 
Signed-off-by: Paolo Bonzini 
---
 arch/x86/kvm/x86.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7e207de0a13f..5bc598de28a7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -953,6 +953,7 @@ static u32 emulated_msrs[] = {
HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
HV_X64_MSR_RESET,
+   HV_X64_MSR_VP_INDEX,
HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
MSR_KVM_PV_EOI_EN,
 
-- 
1.8.3.1


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[FYI PATCH 08/20] KVM: x86: Add EOI exit bitmap inference

2015-09-28 Thread Paolo Bonzini
From: Steve Rutherford 

In order to support a userspace IOAPIC interacting with an in kernel
APIC, the EOI exit bitmaps need to be configurable.

If the IOAPIC is in userspace (i.e. the irqchip has been split), the
EOI exit bitmaps will be set whenever the GSI Routes are configured.
In particular, for the low MSI routes are reservable for userspace
IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the
destination vector of the route will be set for the destination VCPU.

The intention is for the userspace IOAPICs to use the reservable MSI
routes to inject interrupts into the guest.

This is a slight abuse of the notion of an MSI Route, given that MSIs
classically bypass the IOAPIC. It might be worthwhile to add an
additional route type to improve clarity.

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford 
Signed-off-by: Paolo Bonzini 
---
 Documentation/virtual/kvm/api.txt |  9 ++---
 arch/x86/include/asm/kvm_host.h   |  1 +
 arch/x86/kvm/ioapic.h |  2 ++
 arch/x86/kvm/irq_comm.c   | 42 +++
 arch/x86/kvm/lapic.c  |  3 +--
 arch/x86/kvm/x86.c|  9 -
 include/linux/kvm_host.h  | 17 
 virt/kvm/irqchip.c| 12 ++-
 8 files changed, 79 insertions(+), 16 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 0d14bf5db534..89e71648d748 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3642,7 +3642,7 @@ KVM handlers should exit to userspace with rc = -EREMOTE.
 7.5 KVM_CAP_SPLIT_IRQCHIP
 
 Architectures: x86
-Parameters: None
+Parameters: args[0] - number of routes reserved for userspace IOAPICs
 Returns: 0 on success, -1 on error
 
 Create a local apic for each processor in the kernel. This can be used
@@ -3650,8 +3650,11 @@ instead of KVM_CREATE_IRQCHIP if the userspace VMM 
wishes to emulate the
 IOAPIC and PIC (and also the PIT, even though this has to be enabled
 separately).
 
-This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in kernel
-IOAPIC or PIC. This also enables in kernel routing of interrupt requests.
+This capability also enables in kernel routing of interrupt requests;
+when KVM_CAP_SPLIT_IRQCHIP only routes of KVM_IRQ_ROUTING_MSI type are
+used in the IRQ routing table.  The first args[0] MSI routes are reserved
+for the IOAPIC pins.  Whenever the LAPIC receives an EOI for these routes,
+a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace.
 
 Fails if VCPU has already been created, or if the irqchip is already in the
 kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index af09fa1d1be7..7a5f9debbcd8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -688,6 +688,7 @@ struct kvm_arch {
u64 disabled_quirks;
 
bool irqchip_split;
+   u8 nr_reserved_ioapic_pins;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index a8842c0dee73..084617d37c74 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -9,6 +9,7 @@ struct kvm;
 struct kvm_vcpu;
 
 #define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
+#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES
 #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
 #define IOAPIC_EDGE_TRIG  0
 #define IOAPIC_LEVEL_TRIG 1
@@ -121,5 +122,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
kvm_lapic *src,
 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 
 #endif
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 67f6b62a6814..177460998bb0 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -335,3 +335,45 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm)
 {
return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
 }
+
+void kvm_arch_irq_routing_update(struct kvm *kvm)
+{
+   if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
+   return;
+   kvm_make_scan_ioapic_request(kvm);
+}
+
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+{
+   struct kvm *kvm = vcpu->kvm;
+   struct kvm_kernel_irq_routing_entry *entry;
+   struct kvm_irq_routing_table *table;
+   u32 i, nr_ioapic_pins;
+   int idx;
+
+   /* kvm->irq_routing must be read after clearing
+* KVM_SCAN_IOAPIC. */
+   smp_mb();
+   idx = srcu_read_lock(>irq_srcu);
+   table = srcu_dereference(kvm->irq_routing, >irq_srcu);
+   nr_ioapic_pins = min_t(u32, table->nr_rt_entries,
+  

[FYI PATCH 09/20] KVM: x86: Add support for local interrupt requests from userspace

2015-09-28 Thread Paolo Bonzini
From: Steve Rutherford 

In order to enable userspace PIC support, the userspace PIC needs to
be able to inject local interrupts even when the APICs are in the
kernel.

KVM_INTERRUPT now supports sending local interrupts to an APIC when
APICs are in the kernel.

The ready_for_interrupt_request flag is now only set when the CPU/APIC
will immediately accept and inject an interrupt (i.e. APIC has not
masked the PIC).

When the PIC wishes to initiate an INTA cycle with, say, CPU0, it
kicks CPU0 out of the guest, and renedezvous with CPU0 once it arrives
in userspace.

When the CPU/APIC unmasks the PIC, a KVM_EXIT_IRQ_WINDOW_OPEN is
triggered, so that userspace has a chance to inject a PIC interrupt
if it had been pending.

Overall, this design can lead to a small number of spurious userspace
renedezvous. In particular, whenever the PIC transistions from low to
high while it is masked and whenever the PIC becomes unmasked while
it is low.

Note: this does not buffer more than one local interrupt in the
kernel, so the VMM needs to enter the guest in order to complete
interrupt injection before injecting an additional interrupt.

Compiles for x86.

Can pass the KVM Unit Tests.

Signed-off-by: Steve Rutherford 
Signed-off-by: Paolo Bonzini 
---
 Documentation/virtual/kvm/api.txt | 14 +
 arch/x86/include/asm/kvm_host.h   |  1 +
 arch/x86/kvm/irq.c| 32 +++--
 arch/x86/kvm/irq.h|  8 
 arch/x86/kvm/x86.c| 42 ++-
 5 files changed, 78 insertions(+), 19 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 89e71648d748..e3e9c41721a2 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -401,10 +401,9 @@ Capability: basic
 Architectures: x86, ppc, mips
 Type: vcpu ioctl
 Parameters: struct kvm_interrupt (in)
-Returns: 0 on success, -1 on error
+Returns: 0 on success, negative on failure.
 
-Queues a hardware interrupt vector to be injected.  This is only
-useful if in-kernel local APIC or equivalent is not used.
+Queues a hardware interrupt vector to be injected.
 
 /* for KVM_INTERRUPT */
 struct kvm_interrupt {
@@ -414,7 +413,14 @@ struct kvm_interrupt {
 
 X86:
 
-Note 'irq' is an interrupt vector, not an interrupt pin or line.
+Returns: 0 on success,
+-EEXIST if an interrupt is already enqueued
+-EINVAL the the irq number is invalid
+-ENXIO if the PIC is in the kernel
+-EFAULT if the pointer is invalid
+
+Note 'irq' is an interrupt vector, not an interrupt pin or line. This
+ioctl is useful if the in-kernel PIC is not used.
 
 PPC:
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7a5f9debbcd8..76a5b30979b3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -576,6 +576,7 @@ struct kvm_vcpu_arch {
} pv;
 
int pending_ioapic_eoi;
+   int pending_external_vector;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index b653ae202c8e..097060e33bd6 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -38,14 +38,27 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
 
 /*
+ * check if there is a pending userspace external interrupt
+ */
+static int pending_userspace_extint(struct kvm_vcpu *v)
+{
+   return v->arch.pending_external_vector != -1;
+}
+
+/*
  * check if there is pending interrupt from
  * non-APIC source without intack.
  */
 static int kvm_cpu_has_extint(struct kvm_vcpu *v)
 {
-   if (kvm_apic_accept_pic_intr(v))
-   return pic_irqchip(v->kvm)->output; /* PIC */
-   else
+   u8 accept = kvm_apic_accept_pic_intr(v);
+
+   if (accept) {
+   if (irqchip_split(v->kvm))
+   return pending_userspace_extint(v);
+   else
+   return pic_irqchip(v->kvm)->output;
+   } else
return 0;
 }
 
@@ -91,9 +104,16 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
  */
 static int kvm_cpu_get_extint(struct kvm_vcpu *v)
 {
-   if (kvm_cpu_has_extint(v))
-   return kvm_pic_read_irq(v->kvm); /* PIC */
-   return -1;
+   if (kvm_cpu_has_extint(v)) {
+   if (irqchip_split(v->kvm)) {
+   int vector = v->arch.pending_external_vector;
+
+   v->arch.pending_external_vector = -1;
+   return vector;
+   } else
+   return kvm_pic_read_irq(v->kvm); /* PIC */
+   } else
+   return -1;
 }
 
 /*
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2f9703dcd913..ae5c78f2337d 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -83,6 +83,14 @@ static inline struct kvm_pic *pic_irqchip(struct kvm 

[FYI PATCH 11/20] kvm: add tracepoint for fast mmio

2015-09-28 Thread Paolo Bonzini
From: Jason Wang 

Cc: Gleb Natapov 
Cc: Paolo Bonzini 
Signed-off-by: Jason Wang 
Signed-off-by: Paolo Bonzini 
---
 arch/x86/kvm/trace.h | 18 ++
 arch/x86/kvm/vmx.c   |  1 +
 arch/x86/kvm/x86.c   |  1 +
 3 files changed, 20 insertions(+)

diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 4eae7c35ddf5..ce4abe333c39 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -129,6 +129,24 @@ TRACE_EVENT(kvm_pio,
 );
 
 /*
+ * Tracepoint for fast mmio.
+ */
+TRACE_EVENT(kvm_fast_mmio,
+   TP_PROTO(u64 gpa),
+   TP_ARGS(gpa),
+
+   TP_STRUCT__entry(
+   __field(u64,gpa)
+   ),
+
+   TP_fast_assign(
+   __entry->gpa= gpa;
+   ),
+
+   TP_printk("fast mmio at gpa 0x%llx", __entry->gpa)
+);
+
+/*
  * Tracepoint for cpuid.
  */
 TRACE_EVENT(kvm_cpuid,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 90605f70a7d0..b2d619ab3d07 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5756,6 +5756,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
skip_emulated_instruction(vcpu);
+   trace_kvm_fast_mmio(gpa);
return 1;
}
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1264748e1cc4..b015ab357980 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8074,6 +8074,7 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
 EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
-- 
1.8.3.1


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH 3/3] Target-ppc: Remove unnecessary variable

2015-09-28 Thread Paolo Bonzini


On 26/09/2015 18:15, Eric Blake wrote:
> On 09/25/2015 02:37 AM, Shraddha Barke wrote:
>> Compress lines and remove the variable.
>>
> 
>> +++ b/target-ppc/kvm.c
>> @@ -1782,8 +1782,7 @@ uint32_t kvmppc_get_tbfreq(void)
>>  
>>  ns++;
>>  
>> -retval = atoi(ns);
>> -return retval;
>> +return atoi(ns);
> 
> atoi() is lousy; it cannot properly detect user input errors.  This
> should probably be converted to use the appropriate qemu_strtol variant
> instead.

But it's more or less okay here, it's parsing /proc/cpuinfo.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v9 00/18] Add VT-d Posted-Interrupts support - including prerequisite series

2015-09-28 Thread Wu, Feng


> -Original Message-
> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> Sent: Monday, September 28, 2015 6:19 PM
> To: Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
> mtosa...@redhat.com
> Cc: eric.au...@linaro.org; kvm@vger.kernel.org;
> io...@lists.linux-foundation.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH v9 00/18] Add VT-d Posted-Interrupts support - including
> prerequisite series
> 
> 
> 
> On 28/09/2015 12:14, Wu, Feng wrote:
> > Thanks a lot for creating branch for vt-d pi. However, I cannot launch 
> > guests
> > with this tree. I encountered the following kernel dump, and I find that the
> > problematic commit is " 2260b1cde0b5472ab70ad0764b10095372e41913 "
> >
> > KVM: x86: put vcpu_create under kvm->srcu critical section
> >
> > This is needed in case vcpu_create wants to access the memslots array.
> > Fixes this lockdep splat:
> >
> > After removing this commit from the tree, my VT-d patch-set works fine.
> 
> Great, thanks.  The above commit had already been reverted.
> 
> I'm sorting out the kbuild reports, and then will merge VT-d PI.

Thanks a lot for make this happen!

Thanks,
Feng

> 
> Paolo
> 
> >
> > Kernel dump:
> > [  221.978182] BUG: unable to handle kernel NULL pointer dereference at
> (null)
> > [  221.986085] IP: [] kvm_arch_vcpu_create+0x30/0x90
> [kvm]
> > [  221.993102] PGD 0
> > [  221.995148] Oops:  [#1] SMP
> > [  221.998440] Modules linked in: bnep rfcomm bluetooth ax88179_178a
> usbnet intel_rapl mii snd_hda_codec_hdmi iosf_mbi x86_pkg_temp_thermal
> nouveau intel_powerclamp snd_hda_intel snd_hda_codec coretemp kvm_intel
> snd_hda_core kvm snd_hwdep snd_pcm crct10dif_pclmul crc32_pclmul
> snd_seq_midi ghash_clmulni_intel mxm_wmi snd_seq_midi_event snd_rawmidi
> video snd_seq ttm aesni_intel aes_x86_64 lrw gf128mul drm_kms_helper
> snd_seq_device binfmt_misc snd_timer glue_helper ablk_helper drm cryptd
> fb_sys_fops snd syscopyarea sysfillrect sb_edac soundcore sysimgblt mei_me
> parport_pc edac_core ppdev mei shpchp lp lpc_ich mac_hid parport
> acpi_power_meter wmi ixgbe igb i2c_algo_bit hid_generic usbhid ptp ahci hid
> libahci pps_core mdio
> > [  222.063533] CPU: 4 PID: 3384 Comm: qemu-system-x86 Not tainted
> 4.3.0-rc1+ #6
> > [  222.070612] Hardware name: Intel Corp. GRANGEVILLE/GRANTLEY, BIOS
> GNVDCRB1.86B.0020.V07.1409241147 09/24/2014
> > [  222.080764] task: 88006e7c8000 ti: 8800714a8000 task.ti:
> 8800714a8000
> > [  222.088283] RIP: 0010:[]  []
> kvm_arch_vcpu_create+0x30/0x90 [kvm]
> > [  222.097680] RSP: 0018:8800714abde0  EFLAGS: 00010246
> > [  222.103153] RAX:  RBX: 88016f28c000 RCX:
> 
> > [  222.110407] RDX:  RSI:  RDI:
> 88016f28c000
> > [  222.117659] RBP: 8800714abdf8 R08: 0001 R09:
> 0040
> > [  222.124824] R10: 880077e86438 R11: 880163e06880 R12:
> 88016f28c000
> > [  222.132150] R13:  R14: ae41 R15:
> 
> > [  222.139405] FS:  7f43fd7ec700() GS:88017870()
> knlGS:
> > [  222.147629] CS:  0010 DS:  ES:  CR0: 80050033
> > [  222.153471] CR2:  CR3: 00017074b000 CR4:
> 003426e0
> > [  222.160726] DR0:  DR1:  DR2:
> 
> > [  222.167979] DR3:  DR6: fffe0ff0 DR7:
> 0400
> > [  222.175231] Stack:
> > [  222.177277]   88016f28c000 
> 8800714abea0
> > [  222.184870]  c0355b17 0008 8800714abe28
> 810aba32
> > [  222.192444]  880178816e40 8800714abe40 810a4f44
> 880178816e40
> > [  222.200017] Call Trace:
> > [  222.202522]  [] kvm_vm_ioctl+0x277/0x6e0 [kvm]
> > [  222.208633]  [] ? put_prev_task_fair+0x22/0x40
> > [  222.214741]  [] ? pick_next_task_idle+0x14/0x30
> > [  222.220942]  [] do_vfs_ioctl+0x2ba/0x490
> > [  222.226523]  [] ? __do_page_fault+0x1ba/0x410
> > [  222.232546]  [] SyS_ioctl+0x79/0x90
> > [  222.237684]  [] ? syscall_return_slowpath+0x55/0x150
> > [  222.244323]  []
> entry_SYSCALL_64_fastpath+0x16/0x75
> > [  222.250869] Code: 55 48 89 e5 41 55 41 54 53 41 89 f5 48 89 fb e8 27 61
> cb c0 85 c0 74 13 8b 83 f0 09 00 00 85 c0 74 09 80 3d 53 2e 04 00 00 74 40 
> <48>
> 8b 04 25 00 00 00 00 48 8d 78 48 e8 7f c4 d6 c0 41 89 c4 48
> > [  222.270790] RIP  [] kvm_arch_vcpu_create+0x30/0x90
> [kvm]
> > [  222.277813]  RSP 
> > [  222.281359] CR2: 
> > [  222.290421] ---[ end trace 957f5a39692fe6c7 ]---
> > root@feng-bdw-de-pi:~/workspace/tools# dmesg > ~/dmesg.log
> > root@feng-bdw-de-pi:~/workspace/tools# vim ~/dmesg.log
> > [  221.998440] Modules linked in: bnep rfcomm bluetooth ax88179_178a
> usbnet intel_rapl mii snd_hda_codec_hdmi iosf_mbi x86_pkg_temp_thermal
> nouveau intel_powerclamp snd_hda_intel snd_hda_codec coretemp kvm_intel
> snd_hda_core kvm 

[FYI PATCH 07/20] KVM: x86: Add KVM exit for IOAPIC EOIs

2015-09-28 Thread Paolo Bonzini
From: Steve Rutherford 

Adds KVM_EXIT_IOAPIC_EOI which allows the kernel to EOI
level-triggered IOAPIC interrupts.

Uses a per VCPU exit bitmap to decide whether or not the IOAPIC needs
to be informed (which is identical to the EOI_EXIT_BITMAP field used
by modern x86 processors, but can also be used to elide kvm IOAPIC EOI
exits on older processors).

[Note: A prototype using ResampleFDs found that decoupling the EOI
from the VCPU's thread made it possible for the VCPU to not see a
recent EOI after reentering the guest. This does not match real
hardware.]

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford 
Signed-off-by: Paolo Bonzini 
---
 Documentation/virtual/kvm/api.txt | 12 
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/kvm/lapic.c  | 24 +---
 arch/x86/kvm/x86.c| 11 +++
 include/linux/kvm_host.h  |  2 +-
 include/uapi/linux/kvm.h  |  5 +
 6 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 43e0816d0de1..0d14bf5db534 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3309,6 +3309,18 @@ Valid values for 'type' are:
to ignore the request, or to gather VM memory core dump and/or
reset/shutdown of the VM.
 
+   /* KVM_EXIT_IOAPIC_EOI */
+   struct {
+   __u8 vector;
+   } eoi;
+
+Indicates that the VCPU's in-kernel local APIC received an EOI for a
+level-triggered IOAPIC interrupt.  This exit only triggers when the
+IOAPIC is implemented in userspace (i.e. KVM_CAP_SPLIT_IRQCHIP is enabled);
+the userspace IOAPIC should process the EOI and retrigger the interrupt if
+it is still asserted.  Vector is the LAPIC interrupt vector for which the
+EOI was received.
+
/* Fix the size of the union. */
char padding[256];
};
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index befcf555bddc..af09fa1d1be7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -574,6 +574,8 @@ struct kvm_vcpu_arch {
struct {
bool pv_unhalted;
} pv;
+
+   int pending_ioapic_eoi;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e05946c36b87..ef70f6f3a37a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -877,15 +877,25 @@ static bool kvm_ioapic_handles_vector(struct kvm_lapic 
*apic, int vector)
 
 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
 {
-   if (kvm_ioapic_handles_vector(apic, vector)) {
-   int trigger_mode;
-   if (apic_test_vector(vector, apic->regs + APIC_TMR))
-   trigger_mode = IOAPIC_LEVEL_TRIG;
-   else
-   trigger_mode = IOAPIC_EDGE_TRIG;
+   int trigger_mode;
+
+   /* Eoi the ioapic only if the ioapic doesn't own the vector. */
+   if (!kvm_ioapic_handles_vector(apic, vector))
+   return;
 
-   kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
+   /* Request a KVM exit to inform the userspace IOAPIC. */
+   if (irqchip_split(apic->vcpu->kvm)) {
+   apic->vcpu->arch.pending_ioapic_eoi = vector;
+   kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
+   return;
}
+
+   if (apic_test_vector(vector, apic->regs + APIC_TMR))
+   trigger_mode = IOAPIC_LEVEL_TRIG;
+   else
+   trigger_mode = IOAPIC_EDGE_TRIG;
+
+   kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
 }
 
 static int apic_set_eoi(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b6fbda026708..07604e205c99 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6275,6 +6275,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_pmu_handle_event(vcpu);
if (kvm_check_request(KVM_REQ_PMI, vcpu))
kvm_pmu_deliver_pmi(vcpu);
+   if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
+   BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
+   if (test_bit(vcpu->arch.pending_ioapic_eoi,
+(void *) vcpu->arch.eoi_exit_bitmap)) {
+   vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
+   vcpu->run->eoi.vector =
+   vcpu->arch.pending_ioapic_eoi;
+   r = 0;
+   goto out;
+   }
+   }
if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
vcpu_scan_ioapic(vcpu);
if 

[FYI PATCH 06/20] KVM: x86: Split the APIC from the rest of IRQCHIP.

2015-09-28 Thread Paolo Bonzini
From: Steve Rutherford 

First patch in a series which enables the relocation of the
PIC/IOAPIC to userspace.

Adds capability KVM_CAP_SPLIT_IRQCHIP;

KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the
rest of the irqchip.

Compile tested for x86.

Signed-off-by: Steve Rutherford 
Suggested-by: Andrew Honig 
Signed-off-by: Paolo Bonzini 
---
 Documentation/virtual/kvm/api.txt | 17 +
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/kvm/i8254.c  |  4 +++-
 arch/x86/kvm/ioapic.h |  8 
 arch/x86/kvm/irq.h| 11 ++-
 arch/x86/kvm/irq_comm.c   |  9 -
 arch/x86/kvm/lapic.c  |  6 --
 arch/x86/kvm/x86.c| 23 +--
 include/linux/kvm_host.h  |  1 +
 include/uapi/linux/kvm.h  |  1 +
 10 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index d9ecceea5a02..43e0816d0de1 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3627,6 +3627,23 @@ struct {
 
 KVM handlers should exit to userspace with rc = -EREMOTE.
 
+7.5 KVM_CAP_SPLIT_IRQCHIP
+
+Architectures: x86
+Parameters: None
+Returns: 0 on success, -1 on error
+
+Create a local apic for each processor in the kernel. This can be used
+instead of KVM_CREATE_IRQCHIP if the userspace VMM wishes to emulate the
+IOAPIC and PIC (and also the PIT, even though this has to be enabled
+separately).
+
+This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in kernel
+IOAPIC or PIC. This also enables in kernel routing of interrupt requests.
+
+Fails if VCPU has already been created, or if the irqchip is already in the
+kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
+
 
 8. Other capabilities.
 --
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a0ef289d5a86..befcf555bddc 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -684,6 +684,8 @@ struct kvm_arch {
u32 bsp_vcpu_id;
 
u64 disabled_quirks;
+
+   bool irqchip_split;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index f90952f64e79..08116ff227cc 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 
+#include "ioapic.h"
 #include "irq.h"
 #include "i8254.h"
 #include "x86.h"
@@ -333,7 +334,8 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int 
is_period)
struct kvm_kpit_state *ps = >arch.vpit->pit_state;
s64 interval;
 
-   if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
+   if (!ioapic_in_kernel(kvm) ||
+   ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
return;
 
interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index bf36d66a1951..a8842c0dee73 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -97,6 +97,14 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm 
*kvm)
return kvm->arch.vioapic;
 }
 
+static inline int ioapic_in_kernel(struct kvm *kvm)
+{
+   int ret;
+
+   ret = (ioapic_irqchip(kvm) != NULL);
+   return ret;
+}
+
 void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, unsigned int dest, int dest_mode);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 9e6e7e04de98..2f9703dcd913 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -83,13 +83,22 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
return kvm->arch.vpic;
 }
 
+static inline int irqchip_split(struct kvm *kvm)
+{
+   return kvm->arch.irqchip_split;
+}
+
 static inline int irqchip_in_kernel(struct kvm *kvm)
 {
struct kvm_pic *vpic = pic_irqchip(kvm);
+   bool ret;
+
+   ret = (vpic != NULL);
+   ret |= irqchip_split(kvm);
 
/* Read vpic before kvm->irq_routing.  */
smp_rmb();
-   return vpic != NULL;
+   return ret;
 }
 
 static inline int lapic_in_kernel(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 9efff9e5b58c..67f6b62a6814 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -208,7 +208,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int 
irq_source_id)
goto unlock;
}
clear_bit(irq_source_id, >arch.irq_sources_bitmap);
-   if (!irqchip_in_kernel(kvm))
+   if (!ioapic_in_kernel(kvm))
goto unlock;
 
kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
@@ -328,3 +328,10 @@ int kvm_setup_default_irq_routing(struct kvm *kvm)
return 

[FYI PATCH 05/20] KVM: x86: unify handling of interrupt window

2015-09-28 Thread Paolo Bonzini
The interrupt window is currently checked twice, once in vmx.c/svm.c and
once in dm_request_for_irq_injection.  The only difference is the extra
check for kvm_arch_interrupt_allowed in dm_request_for_irq_injection,
and the different return value (EINTR/KVM_EXIT_INTR for vmx.c/svm.c vs.
0/KVM_EXIT_IRQ_WINDOW_OPEN for dm_request_for_irq_injection).

However, dm_request_for_irq_injection is basically dead code!  Revive it
by removing the checks in vmx.c and svm.c's vmexit handlers, and
fixing the returned values for the dm_request_for_irq_injection case.

Signed-off-by: Paolo Bonzini 
---
 arch/x86/kvm/svm.c | 13 -
 arch/x86/kvm/vmx.c | 11 ---
 arch/x86/kvm/x86.c |  4 ++--
 3 files changed, 2 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 89d278a0ad37..7d7bbe2651d2 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3398,24 +3398,11 @@ static int msr_interception(struct vcpu_svm *svm)
 
 static int interrupt_window_interception(struct vcpu_svm *svm)
 {
-   struct kvm_run *kvm_run = svm->vcpu.run;
-
kvm_make_request(KVM_REQ_EVENT, >vcpu);
svm_clear_vintr(svm);
svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
mark_dirty(svm->vmcb, VMCB_INTR);
++svm->vcpu.stat.irq_window_exits;
-   /*
-* If the user space waits to inject interrupts, exit as soon as
-* possible
-*/
-   if (!lapic_in_kernel(>vcpu) &&
-   kvm_run->request_interrupt_window &&
-   !kvm_cpu_has_interrupt(>vcpu)) {
-   kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
-   return 0;
-   }
-
return 1;
 }
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d5b87be89631..90605f70a7d0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5524,17 +5524,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
kvm_make_request(KVM_REQ_EVENT, vcpu);
 
++vcpu->stat.irq_window_exits;
-
-   /*
-* If the user space waits to inject interrupts, exit as soon as
-* possible
-*/
-   if (!lapic_in_kernel(vcpu) &&
-   vcpu->run->request_interrupt_window &&
-   !kvm_cpu_has_interrupt(vcpu)) {
-   vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
-   return 0;
-   }
return 1;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e25bc4e2b7a9..080aaa7a4d91 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6473,8 +6473,8 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
kvm_inject_pending_timer_irqs(vcpu);
 
if (dm_request_for_irq_injection(vcpu)) {
-   r = -EINTR;
-   vcpu->run->exit_reason = KVM_EXIT_INTR;
+   r = 0;
+   vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
++vcpu->stat.request_irq_exits;
break;
}
-- 
1.8.3.1


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[FYI PATCH 02/20] KVM: x86: store IOAPIC-handled vectors in each VCPU

2015-09-28 Thread Paolo Bonzini
We can reuse the algorithm that computes the EOI exit bitmap to figure
out which vectors are handled by the IOAPIC.  The only difference
between the two is for edge-triggered interrupts other than IRQ8
that have no notifiers active; however, the IOAPIC does not have to
do anything special for these interrupts anyway.

This again limits the interactions between the IOAPIC and the LAPIC,
making it easier to move the former to userspace.

Inspired by a patch from Steve Rutherford.

Signed-off-by: Paolo Bonzini 
---
 arch/x86/include/asm/kvm_host.h |  3 ++-
 arch/x86/kvm/ioapic.c   | 18 ++
 arch/x86/kvm/ioapic.h   |  8 
 arch/x86/kvm/lapic.c| 10 --
 arch/x86/kvm/svm.c  |  2 +-
 arch/x86/kvm/vmx.c  |  3 ++-
 arch/x86/kvm/x86.c  |  8 +++-
 7 files changed, 18 insertions(+), 34 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2beee0382088..33609c2c743b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -396,6 +396,7 @@ struct kvm_vcpu_arch {
u64 efer;
u64 apic_base;
struct kvm_lapic *apic;/* kernel irqchip context */
+   u64 eoi_exit_bitmap[4];
unsigned long apic_attention;
int32_t apic_arb_prio;
int mp_state;
@@ -822,7 +823,7 @@ struct kvm_x86_ops {
int (*vm_has_apicv)(struct kvm *kvm);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
void (*hwapic_isr_update)(struct kvm *kvm, int isr);
-   void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
+   void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu);
void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index eaf4ec38d980..2dcda0f188ba 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -233,19 +233,6 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic 
*ioapic, unsigned long irr)
 }
 
 
-static void update_handled_vectors(struct kvm_ioapic *ioapic)
-{
-   DECLARE_BITMAP(handled_vectors, 256);
-   int i;
-
-   memset(handled_vectors, 0, sizeof(handled_vectors));
-   for (i = 0; i < IOAPIC_NUM_PINS; ++i)
-   __set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors);
-   memcpy(ioapic->handled_vectors, handled_vectors,
-  sizeof(handled_vectors));
-   smp_wmb();
-}
-
 void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 {
struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
@@ -310,7 +297,6 @@ static void ioapic_write_indirect(struct kvm_ioapic 
*ioapic, u32 val)
e->bits |= (u32) val;
e->fields.remote_irr = 0;
}
-   update_handled_vectors(ioapic);
mask_after = e->fields.mask;
if (mask_before != mask_after)
kvm_fire_mask_notifiers(ioapic->kvm, 
KVM_IRQCHIP_IOAPIC, index, mask_after);
@@ -594,7 +580,6 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
ioapic->id = 0;
memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
rtc_irq_eoi_tracking_reset(ioapic);
-   update_handled_vectors(ioapic);
 }
 
 static const struct kvm_io_device_ops ioapic_mmio_ops = {
@@ -623,8 +608,10 @@ int kvm_ioapic_init(struct kvm *kvm)
if (ret < 0) {
kvm->arch.vioapic = NULL;
kfree(ioapic);
+   return ret;
}
 
+   kvm_vcpu_request_scan_ioapic(kvm);
return ret;
 }
 
@@ -661,7 +648,6 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state 
*state)
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
ioapic->irr = 0;
ioapic->irr_delivered = 0;
-   update_handled_vectors(ioapic);
kvm_vcpu_request_scan_ioapic(kvm);
kvm_ioapic_inject_all(ioapic, state->irr);
spin_unlock(>lock);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 3dbd0e2aac4e..bf36d66a1951 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -73,7 +73,6 @@ struct kvm_ioapic {
struct kvm *kvm;
void (*ack_notifier)(void *opaque, int irq);
spinlock_t lock;
-   DECLARE_BITMAP(handled_vectors, 256);
struct rtc_status rtc_status;
struct delayed_work eoi_inject;
u32 irq_eoi[IOAPIC_NUM_PINS];
@@ -98,13 +97,6 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm 
*kvm)
return kvm->arch.vioapic;
 }
 
-static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
-{
-   struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-   smp_rmb();
-   return test_bit(vector, ioapic->handled_vectors);
-}
-
 void 

[FYI PATCH 03/20] KVM: x86: replace vm_has_apicv hook with cpu_uses_apicv

2015-09-28 Thread Paolo Bonzini
This will avoid an unnecessary trip to ->kvm and from there to the VPIC.

Signed-off-by: Paolo Bonzini 
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 arch/x86/kvm/irq.c  | 2 +-
 arch/x86/kvm/lapic.c| 4 ++--
 arch/x86/kvm/lapic.h| 4 ++--
 arch/x86/kvm/svm.c  | 4 ++--
 arch/x86/kvm/vmx.c  | 8 +++-
 6 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 33609c2c743b..a0ef289d5a86 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -820,7 +820,7 @@ struct kvm_x86_ops {
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
-   int (*vm_has_apicv)(struct kvm *kvm);
+   int (*cpu_uses_apicv)(struct kvm_vcpu *vcpu);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
void (*hwapic_isr_update)(struct kvm *kvm, int isr);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index a1ec6a50a05a..c0dad893dc59 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -63,7 +63,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
if (kvm_cpu_has_extint(v))
return 1;
 
-   if (kvm_apic_vid_enabled(v->kvm))
+   if (kvm_vcpu_apic_vid_enabled(v))
return 0;
 
return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 4c30fb0a48a1..c568d69c7060 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -390,7 +390,7 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic 
*apic)
 
vcpu = apic->vcpu;
 
-   if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) {
+   if (unlikely(kvm_vcpu_apic_vid_enabled(vcpu))) {
/* try to update RVI */
apic_clear_vector(vec, apic->regs + APIC_IRR);
kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -1622,7 +1622,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool 
init_event)
apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
}
-   apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm);
+   apic->irr_pending = kvm_vcpu_apic_vid_enabled(vcpu);
apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0;
apic->highest_isr_cache = -1;
update_divide_count(apic);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index eb46d6bcaa75..7259d272416f 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -143,9 +143,9 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic)
return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
 }
 
-static inline bool kvm_apic_vid_enabled(struct kvm *kvm)
+static inline bool kvm_vcpu_apic_vid_enabled(struct kvm_vcpu *vcpu)
 {
-   return kvm_x86_ops->vm_has_apicv(kvm);
+   return kvm_x86_ops->cpu_uses_apicv(vcpu);
 }
 
 static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 79612964e8f1..98889c882ced 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3763,7 +3763,7 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu 
*vcpu, bool set)
return;
 }
 
-static int svm_vm_has_apicv(struct kvm *kvm)
+static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu)
 {
return 0;
 }
@@ -4524,7 +4524,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.enable_irq_window = enable_irq_window,
.update_cr8_intercept = update_cr8_intercept,
.set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
-   .vm_has_apicv = svm_vm_has_apicv,
+   .cpu_uses_apicv = svm_cpu_uses_apicv,
.load_eoi_exitmap = svm_load_eoi_exitmap,
.sync_pir_to_irr = svm_sync_pir_to_irr,
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 82ea70a8c9e7..32a38494dd6f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -810,6 +810,7 @@ static void kvm_cpu_vmxoff(void);
 static bool vmx_mpx_supported(void);
 static bool vmx_xsaves_supported(void);
 static int vmx_vm_has_apicv(struct kvm *kvm);
+static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
@@ -4337,6 +4338,11 @@ static int vmx_vm_has_apicv(struct kvm *kvm)
return enable_apicv && irqchip_in_kernel(kvm);
 }
 
+static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu)
+{
+   return vmx_vm_has_apicv(vcpu->kvm);
+}
+
 static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -10357,7 +10363,7 @@ static struct kvm_x86_ops vmx_x86_ops = {

Re: [PATCH 2/3] target-i386: initialize vcpu's TSC rate to the value from KVM

2015-09-28 Thread Haozhong Zhang
On Tue, Sep 29, 2015 at 09:23:39AM +0800, Haozhong Zhang wrote:
> On Mon, Sep 28, 2015 at 01:17:44PM -0300, Eduardo Habkost wrote:
> > On Mon, Sep 28, 2015 at 01:38:30PM +0800, Haozhong Zhang wrote:
> > > When creating a vcpu, we initialize its TSC rate to the value from
> > > KVM (through ioctl KVM_GET_TSC_KHZ).
> > > 
> > > Signed-off-by: Haozhong Zhang 
> > > ---
> > >  target-i386/kvm.c | 7 +++
> > >  1 file changed, 7 insertions(+)
> > > 
> > > diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> > > index 7b0ba17..c2b161a 100644
> > > --- a/target-i386/kvm.c
> > > +++ b/target-i386/kvm.c
> > > @@ -751,6 +751,13 @@ int kvm_arch_init_vcpu(CPUState *cs)
> > >  }
> > >  }
> > >  
> > > +r = kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ);
> > > +if (r < 0) {
> > > +fprintf(stderr, "KVM_GET_TSC_KHZ failed\n");
> > > +return r;
> > > +}
> > > +env->tsc_khz = r;
> > 
> > You are silently overwriting the tsc_khz value set by the user, why?
> >
> 
> Oh, I need to check if user has provided tsc_khz, and if so then just
> use the user-provided value. So I'll replace it with code like
> 
> if (env->tsc_khz) {
> kvm_vcpu_ioctl(cs, KVM_SET_TSC_KHZ, env->tsc_khz);

Just notice this line duplicates code several lines above. Only the
else branch is needed.

> } else {
> r = kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ);
> if (r < 0) {
> fprintf(stderr, "KVM_GET_TSC_KHZ failed\n");
> return r;
> }
> env->tsc_khz = r;
> }
> 
> - Haozhong
> 
> > -- 
> > Eduardo
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
- Haozhong Zhang
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 06/12] KVM: x86: Move TSC scaling logic out of call-back adjust_tsc_offset()

2015-09-28 Thread Haozhong Zhang
On Mon, Sep 28, 2015 at 10:14:19PM +0200, Paolo Bonzini wrote:
> 
> 
> On 28/09/2015 07:38, Haozhong Zhang wrote:
> > +
> > +static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
> > +  s64 adjustment)
> > +{
> > +   kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, false);
> > +}
> > +
> > +static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 
> > adjustment)
> > +{
> > +   if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
> > +   WARN_ON(adjustment < 0);
> > +   adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
> > +   kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, true);
> > +}
> 
> You can remove the final argument to the callback (and possibly change
> the callback's name to adjust_tsc_offset_guest), because it is now unused.
> 
> Paolo

Thanks! will do it.

- Haozhong
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 03/12] KVM: x86: Add a common TSC scaling function

2015-09-28 Thread Haozhong Zhang
On Mon, Sep 28, 2015 at 10:12:37PM +0200, Paolo Bonzini wrote:
> 
> 
> On 28/09/2015 07:38, Haozhong Zhang wrote:
> >  
> > -static u64 __scale_tsc(u64 ratio, u64 tsc)
> > -{
> > -   u64 mult, frac, _tsc;
> > -
> > -   mult  = ratio >> 32;
> > -   frac  = ratio & ((1ULL << 32) - 1);
> > -
> > -   _tsc  = tsc;
> > -   _tsc *= mult;
> > -   _tsc += (tsc >> 32) * frac;
> > -   _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32;
> > -
> > -   return _tsc;
> > -}
> 
> This is basically
> 
>   return mul_u64_u64_shr(ratio, tsc,
>  kvm_tsc_scaling_ratio_frac_bits);
> 
> except that Linux has no mul_u64_u64_shr function, only mul_u64_u32_shr.
> 
> We should implement that function in include/linux/math64.h instead.
> For the x86_64 case (or any other CONFIG_ARCH_SUPPORTS_INT128
> architecture) we can just write it directly, as is done already for
> mul_u64_u32_shr.
> 
> For the 32-bit case, here is an implementation of both the
> multiplication and the shift, lifted from QEMU:
> 
> static inline void mul64(uint64_t *lo, uint64_t *hi,
>  uint64_t a, uint64_t b)
> {
> typedef union {
> uint64_t ll;
> struct {
> #ifdef __BIG_ENDIAN
> uint32_t high, low;
> #else
> uint32_t low, high;
> #endif
> } l;
> } LL;
> LL rl, rm, rn, rh, a0, b0;
> uint64_t c;
> 
> a0.ll = a;
> b0.ll = b;
> 
> rl.ll = (uint64_t)a0.l.low * b0.l.low;
> rm.ll = (uint64_t)a0.l.low * b0.l.high;
> rn.ll = (uint64_t)a0.l.high * b0.l.low;
> rh.ll = (uint64_t)a0.l.high * b0.l.high;
> 
> c = (uint64_t)rl.l.high + rm.l.low + rn.l.low;
> rl.l.high = c;
> c >>= 32;
> c = c + rm.l.high + rn.l.high + rh.l.low;
> rh.l.low = c;
> rh.l.high += (uint32_t)(c >> 32);
> 
> *lo = rl.ll;
> *hi = rh.ll;
> }
> 
> static inline void rshift128(uint64_t *lo, uint64_t *hi, int n)
> {
> uint64_t h;
> if (!n) {
> return;
> }
> h = *hi >> (n & 63);
> if (n >= 64) {
> *hi = 0;
> *lo = h;
> } else {
> *lo = (*lo >> n) | (*hi << (64 - n));
> *hi = h;
> }
> }
> 
> and you can easily reuse this code in Linux with just uintNN_t types
> changed to uNN + some extra cleanups when it's placed in a single functions.
> 
> Paolo

Thanks! I'll add mul_u64_u64_shr() and replace __scale_tsc().

- Haozhong
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: nVMX: expose VPID capability to L1

2015-09-28 Thread Wanpeng Li

On 9/28/15 8:05 PM, Paolo Bonzini wrote:


On 24/09/2015 08:51, Wanpeng Li wrote:

/*
 * For nested guests, we don't do anything specific
 * for single context invalidation. Hence, only advertise
 * support for global context invalidation.
 */
-   vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT;
+   vmx->nested.nested_vmx_ept_vpid_caps |= 
VMX_EPT_EXTENT_GLOBAL_BIT;
+   vmx->nested.nested_vmx_ept_vpid_caps |= (unsigned 
long)vmx_capability.vpid << 32;

Hi Wanpeng, the comment above is about invept, but the same applies
applies to invvpid.  We can set only VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT.


Agreed. I see the patch has already in kvm/queue, if I need to send out 
another patch or you can adjust it for me? :-)


Regards,
Wanpeng Li
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] target-i386: initialize vcpu's TSC rate to the value from KVM

2015-09-28 Thread Haozhong Zhang
On Mon, Sep 28, 2015 at 01:17:44PM -0300, Eduardo Habkost wrote:
> On Mon, Sep 28, 2015 at 01:38:30PM +0800, Haozhong Zhang wrote:
> > When creating a vcpu, we initialize its TSC rate to the value from
> > KVM (through ioctl KVM_GET_TSC_KHZ).
> > 
> > Signed-off-by: Haozhong Zhang 
> > ---
> >  target-i386/kvm.c | 7 +++
> >  1 file changed, 7 insertions(+)
> > 
> > diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> > index 7b0ba17..c2b161a 100644
> > --- a/target-i386/kvm.c
> > +++ b/target-i386/kvm.c
> > @@ -751,6 +751,13 @@ int kvm_arch_init_vcpu(CPUState *cs)
> >  }
> >  }
> >  
> > +r = kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ);
> > +if (r < 0) {
> > +fprintf(stderr, "KVM_GET_TSC_KHZ failed\n");
> > +return r;
> > +}
> > +env->tsc_khz = r;
> 
> You are silently overwriting the tsc_khz value set by the user, why?
>

Oh, I need to check if user has provided tsc_khz, and if so then just
use the user-provided value. So I'll replace it with code like

if (env->tsc_khz) {
kvm_vcpu_ioctl(cs, KVM_SET_TSC_KHZ, env->tsc_khz);
} else {
r = kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ);
if (r < 0) {
fprintf(stderr, "KVM_GET_TSC_KHZ failed\n");
return r;
}
env->tsc_khz = r;
}

- Haozhong

> -- 
> Eduardo

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] x86: Use entire page for the per-cpu GDT only if paravirt-enabled

2015-09-28 Thread Denys Vlasenko
On 09/28/2015 09:58 AM, Ingo Molnar wrote:
> 
> * Denys Vlasenko  wrote:
> 
>> On 09/26/2015 09:50 PM, H. Peter Anvin wrote:
>>> NAK.  We really should map the GDT read-only on all 64 bit systems,
>>> since we can't hide the address from SLDT.  Same with the IDT.
>>
>> Sorry, I don't understand your point.
> 
> So the problem is that right now the SGDT instruction (which is unprivileged) 
> leaks the real address of the kernel image:
> 
>  fomalhaut:~> ./sgdt 
>  SGDT: 88303fd89000 / 007f
> 
> that '88303fd89000' is a kernel address.

Thank you.
I do know that SGDT and friends are unprivileged on x86
and thus they allow userspace (and guest kernels in paravirt)
learn things they don't need to know.

I don't see how making GDT page-aligned and page-sized
changes anything in this regard. SGDT will still work,
and still leak GDT address.

> Your observation in the changelog and your patch:
> 
 It is page-sized because of paravirt. [...]
> 
> ... conflicts with the intention to mark (remap) the primary GDT address 
> read-only 
> on native kernels as well.
> 
> So what we should do instead is to use the page alignment properly and remap 
> the 
> GDT to a read-only location, and load that one.

If we'd have a small GDT (i.e. what my patch does), we still can remap the 
entire page
which contains small GDT, and simply don't care that some other data is also 
visible
through that RO page.

> This would have a couple of advantages:
> 
>  - This would give kernel address randomization more teeth on x86.
> 
>  - An additional advantage would be that rootkits overwriting the GDT would 
> have 
>a bit more work to do.
> 
>  - A third advantage would be that for NUMA systems we could 'mirror' the GDT 
> into
>node-local memory and load those. This makes GDT load cache-misses a bit 
> less
>expensive.

GDT is per-cpu. Isn't per-cpu memory already NUMA-local?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html