On Sun, Sep 15, 2013 at 8:31 PM, Gleb Natapov <[email protected]> wrote:
> On Fri, Sep 06, 2013 at 10:04:51AM +0800, Arthur Chunqi Li wrote:
>> This patch contains the following two changes:
>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0
>> with some reasons not emulated by L1, preemption timer value should
>> be save in such exits.
>> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls
>> to nVMX.
>>
>> With this patch, nested VMX preemption timer features are fully
>> supported.
>>
>> Signed-off-by: Arthur Chunqi Li <[email protected]>
>> ---
>> ChangeLog to v3:
>> Move nested_adjust_preemption_timer to the latest place just before vmenter.
>> Some minor changes.
>>
>> arch/x86/include/uapi/asm/msr-index.h | 1 +
>> arch/x86/kvm/vmx.c | 49
>> +++++++++++++++++++++++++++++++--
>> 2 files changed, 48 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/include/uapi/asm/msr-index.h
>> b/arch/x86/include/uapi/asm/msr-index.h
>> index bb04650..b93e09a 100644
>> --- a/arch/x86/include/uapi/asm/msr-index.h
>> +++ b/arch/x86/include/uapi/asm/msr-index.h
>> @@ -536,6 +536,7 @@
>>
>> /* MSR_IA32_VMX_MISC bits */
>> #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
>> +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
>> /* AMD-V MSRs */
>>
>> #define MSR_VM_CR 0xc0010114
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 1f1da43..f364d16 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -374,6 +374,8 @@ struct nested_vmx {
>> */
>> struct page *apic_access_page;
>> u64 msr_ia32_feature_control;
>> + /* Set if vmexit is L2->L1 */
>> + bool nested_vmx_exit;
> Do not see why it is needed, see bellow.
>
>> };
>>
>> #define POSTED_INTR_ON 0
>> @@ -2204,7 +2206,17 @@ static __init void nested_vmx_setup_ctls_msrs(void)
>> #ifdef CONFIG_X86_64
>> VM_EXIT_HOST_ADDR_SPACE_SIZE |
>> #endif
>> - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
>> + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
>> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
>> + if (!(nested_vmx_pinbased_ctls_high &
>> + PIN_BASED_VMX_PREEMPTION_TIMER) ||
>> + !(nested_vmx_exit_ctls_high &
>> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
>> + nested_vmx_exit_ctls_high &=
>> + (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER);
>> + nested_vmx_pinbased_ctls_high &=
>> + (~PIN_BASED_VMX_PREEMPTION_TIMER);
>> + }
>> nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
>> VM_EXIT_LOAD_IA32_EFER);
>>
>> @@ -6707,6 +6719,24 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu,
>> u64 *info1, u64 *info2)
>> *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
>> }
>>
>> +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
>> +{
>> + u64 delta_tsc_l1;
>> + u32 preempt_val_l1, preempt_val_l2, preempt_scale;
>> +
>> + preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
>> + MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
>> + preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
>> + delta_tsc_l1 = kvm_x86_ops->read_l1_tsc(vcpu,
>> + native_read_tsc()) - vcpu->arch.last_guest_tsc;
>> + preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
>> + if (preempt_val_l2 <= preempt_val_l1)
>> + preempt_val_l2 = 0;
>> + else
>> + preempt_val_l2 -= preempt_val_l1;
>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
>> +}
>> +
>> /*
>> * The guest has exited. See if we can fix it or if we need userspace
>> * assistance.
>> @@ -6736,9 +6766,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
>> vmx->nested.nested_run_pending = 0;
>>
>> if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
>> + vmx->nested.nested_vmx_exit = true;
>> nested_vmx_vmexit(vcpu);
>> return 1;
>> }
>> + vmx->nested.nested_vmx_exit = false;
>>
>> if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
>> vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
>> @@ -7132,6 +7164,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu
>> *vcpu)
>> debugctlmsr = get_debugctlmsr();
>>
>> vmx->__launched = vmx->loaded_vmcs->launched;
>> + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit))
> How is_guest_mode() and nested_vmx_exi can be both true? The only place
> nested_vmx_exit is set to true is just before call to
> nested_vmx_vmexit(). The firs thing nested_vmx_vmexit() does is makes
> is_guest_mode() false. To enter guest mode again at least one other
> vmexit from L1 to L0 is needed at which point nested_vmx_exit will be
> reset to false again.
>
> If you want to avoid calling nested_adjust_preemption_timer() during
> vmlaunch/vmresume emulation (and it looks like this is what you are
> trying to achieve here) you can check nested_run_pending.
Besides vmlaunch/vmresume emulation, every exit from L2->L1 should not
call nested_adjust_preemption_timer(), this function is just used to
adjust preemption timer when L2->L0->L2. Can nested_run_pending
distinguish this?
Arthur
>
>
>> + nested_adjust_preemption_timer(vcpu);
>> asm(
>> /* Store host registers */
>> "push %%" _ASM_DX "; push %%" _ASM_BP ";"
>> @@ -7518,6 +7552,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu,
>> struct vmcs12 *vmcs12)
>> {
>> struct vcpu_vmx *vmx = to_vmx(vcpu);
>> u32 exec_control;
>> + u32 exit_control;
>>
>> vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
>> vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
>> @@ -7691,7 +7726,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu,
>> struct vmcs12 *vmcs12)
>> * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
>> * bits are further modified by vmx_set_efer() below.
>> */
>> - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
>> + exit_control = vmcs_config.vmexit_ctrl;
>> + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
>> + exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
>> + vmcs_write32(VM_EXIT_CONTROLS, exit_control);
>>
>> /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
>> * emulated by vmx_set_efer(), below.
>> @@ -8090,6 +8128,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu,
>> struct vmcs12 *vmcs12)
>> vmcs12->guest_pending_dbg_exceptions =
>> vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
>>
>> + if ((vmcs12->pin_based_vm_exec_control &
>> + PIN_BASED_VMX_PREEMPTION_TIMER) &&
>> + (vmcs12->vm_exit_controls &
>> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
>> + vmcs12->vmx_preemption_timer_value =
>> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
>> +
>> /*
>> * In some cases (usually, nested EPT), L2 is allowed to change its
>> * own CR3 without exiting. If it has changed it, we must keep it.
>> --
>> 1.7.9.5
>
> --
> Gleb.
--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html