2015-01-28 10:54+0800, Kai Huang:
> This patch adds PML support in VMX. A new module parameter 'enable_pml' is 
> added

(+module_param_named(pml, enable_pml, bool, S_IRUGO);)

> to allow user to enable/disable it manually.
> 
> Signed-off-by: Kai Huang <kai.hu...@linux.intel.com>
> ---
> diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
> index 587149b..a139977 100644
> --- a/arch/x86/kvm/trace.h
> +++ b/arch/x86/kvm/trace.h
> @@ -846,6 +846,24 @@ TRACE_EVENT(kvm_track_tsc,
> +TRACE_EVENT(kvm_pml_full,
> +     TP_PROTO(unsigned int vcpu_id),
> +     TP_ARGS(vcpu_id),
> +
> +     TP_STRUCT__entry(
> +             __field(        unsigned int,   vcpu_id                 )
> +     ),
> +
> +     TP_fast_assign(
> +             __entry->vcpu_id                = vcpu_id;
> +     ),
> +
> +     TP_printk("vcpu %d: PML full", __entry->vcpu_id)
> +);
> +
>  #endif /* CONFIG_X86_64 */

You have it protected by CONFIG_X86_64, but use it unconditionally.

(Also, we can get all this information from kvm_exit tracepoint;
 I'd just drop it.)

> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index c987374..de5ce82 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -516,6 +519,10 @@ struct vcpu_vmx {
> +     /* Support for PML */
> +#define PML_ENTITY_NUM               512

(Readers of this struct likely aren't interested in this number, so it
 would be nicer to define it outside.  I thought it is here to hint the
 amount of allocated pages, but PML_ENTITY_NUM / 512 isn't obvious.)

> +     struct page *pml_pg;
> @@ -4355,6 +4368,9 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx 
> *vmx)
>          a current VMCS12
>       */
>       exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
> +     /* PML is enabled/disabled in creating/destorying vcpu */
> +     exec_control &= ~SECONDARY_EXEC_ENABLE_PML;

What is the harm of enabling it here?

(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES seems similar and does it.)

> +
>       return exec_control;
>  }
> @@ -6971,6 +7001,31 @@ static bool vmx_test_pir(struct kvm_vcpu *vcpu, int 
> vector)
> +static int handle_pml_full(struct kvm_vcpu *vcpu)
> +{
> +     unsigned long exit_qualification;
> +
> +     trace_kvm_pml_full(vcpu->vcpu_id);
> +
> +     exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> +
> +     /*
> +      * PML buffer FULL happened while executing iret from NMI,
> +      * "blocked by NMI" bit has to be set before next VM entry.
> +      */
> +     if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
> +                     cpu_has_virtual_nmis() &&
> +                     (exit_qualification & INTR_INFO_UNBLOCK_NMI))
> +             vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
> +                             GUEST_INTR_STATE_NMI);

Relevant part of the specification is pasted from 27.2.2 (Information
for VM Exits Due to Vectored Events), which makes me think that this bit
is mirrored to IDT-vectoring information field ...

Isn't vmx_recover_nmi_blocking() enough?

(I see the same code in handle_ept_violation(), but wasn't that needed
 just because of a hardware error?)

> +
> +     /*
> +      * PML buffer already flushed at beginning of VMEXIT. Nothing to do
> +      * here.., and there's no userspace involvement needed for PML.
> +      */
> +     return 1;
> +}
> @@ -7325,6 +7381,89 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, 
> u64 *info1, u64 *info2)
> +static int vmx_enable_pml(struct vcpu_vmx *vmx)
> +{
> +     struct page *pml_pg;
> +     u32 exec_control;
> +
> +     pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +     if (!pml_pg)
> +             return -ENOMEM;
> +
> +     vmx->pml_pg = pml_pg;

(It's safe to use vmx->pml_pg directly.)

> +
> +     vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
> +     vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
> +
> +     exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
> +     exec_control |= SECONDARY_EXEC_ENABLE_PML;
> +     vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
> +
> +     return 0;
> +}
> +static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx)
> +{
> +     struct kvm *kvm = vmx->vcpu.kvm;
> +     u64 *pml_buf;
> +     u16 pml_idx;
> +
> +     pml_idx = vmcs_read16(GUEST_PML_INDEX);
> +
> +     /* Do nothing if PML buffer is empty */
> +     if (pml_idx == (PML_ENTITY_NUM - 1))
> +             return;
> +
> +     /* PML index always points to next available PML buffer entity */
> +     if (pml_idx >= PML_ENTITY_NUM)
> +             pml_idx = 0;
> +     else
> +             pml_idx++;

If the pml_idx is >= PML_ENTITY_NUM and < 0xffff, the log is empty,
so it would be better to use just 'pml_idx++' and unsigned modulo.

(Could also 'assert(pml_idx < PML_ENTITY_NUM)' then.)

> +
> +     pml_buf = page_address(vmx->pml_pg);
> +     for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
> +             u64 gpa;
> +
> +             gpa = pml_buf[pml_idx];
> +             WARN_ON(gpa & (PAGE_SIZE - 1));
> +             mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
> +     }
> +
> +     /* reset PML index */
> +     vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
> +}
> @@ -9492,6 +9655,31 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int 
> cpu)
> +static void vmx_slot_enable_log_dirty(struct kvm *kvm,
> +                                  struct kvm_memory_slot *slot)
> +{
> +     kvm_mmu_slot_leaf_clear_dirty(kvm, slot);

(New slot contains dirty pages?)

> +     kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
> +}

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to