[Qemu-devel] [PATCH] i386/kvm: add support for Hyper-V TLB flush
Add support for Hyper-V TLB flush which recently got added to KVM. Just like regular Hyper-V we announce HV_EX_PROCESSOR_MASKS_RECOMMENDED regardless of how many vCPUs we have. Windows is 'smart' and uses less expensive non-EX Hypercall whenever possible (when it wants to flush TLB for all vCPUs or the maximum vCPU index in the vCPU set requires flushing is less than 64). Signed-off-by: Vitaly Kuznetsov --- linux-headers/linux/kvm.h | 1 + target/i386/cpu.c | 1 + target/i386/cpu.h | 1 + target/i386/hyperv-proto.h | 1 + target/i386/kvm.c | 15 ++- 5 files changed, 18 insertions(+), 1 deletion(-) diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index cdb148e959..a52a685690 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -948,6 +948,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_BPB 152 #define KVM_CAP_GET_MSR_FEATURES 153 #define KVM_CAP_HYPERV_EVENTFD 154 +#define KVM_CAP_HYPERV_TLBFLUSH 155 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 94260412e2..756cc78808 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5151,6 +5151,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-stimer", X86CPU, hyperv_stimer, false), DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false), DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false), +DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 664504610e..738d695331 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1363,6 +1363,7 @@ struct X86CPU { bool hyperv_stimer; bool hyperv_frequencies; bool hyperv_reenlightenment; +bool hyperv_tlbflush; bool check_cpuid; bool enforce_cpuid; bool expose_kvm; diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index 93352ebd2a..d6d5a79293 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -58,6 +58,7 @@ #define HV_APIC_ACCESS_RECOMMENDED (1u << 3) #define HV_SYSTEM_RESET_RECOMMENDED (1u << 4) #define HV_RELAXED_TIMING_RECOMMENDED (1u << 5) +#define HV_EX_PROCESSOR_MASKS_RECOMMENDED (1u << 11) /* * Basic virtualized MSRs diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 44f70733e7..91c8e952c8 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -585,7 +585,8 @@ static bool hyperv_enabled(X86CPU *cpu) cpu->hyperv_runtime || cpu->hyperv_synic || cpu->hyperv_stimer || -cpu->hyperv_reenlightenment); +cpu->hyperv_reenlightenment || +cpu->hyperv_tlbflush); } static int kvm_arch_set_tsc_khz(CPUState *cs) @@ -823,6 +824,18 @@ int kvm_arch_init_vcpu(CPUState *cs) if (cpu->hyperv_vapic) { c->eax |= HV_APIC_ACCESS_RECOMMENDED; } +if (cpu->hyperv_tlbflush) { +if (kvm_check_extension(cs->kvm_state, +KVM_CAP_HYPERV_TLBFLUSH) <= 0) { +fprintf(stderr, "Hyper-V TLB flush support " +"(requested by 'hv-tlbflush' cpu flag) " +" is not supported by kernel\n"); +return -ENOSYS; +} +c->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; +c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; +} + c->ebx = cpu->hyperv_spinlock_attempts; c = _data.entries[cpuid_i++]; -- 2.14.4
Re: [Qemu-devel] [PATCH v5 0/1] i386/kvm: TSC page clocksource for Hyper-V-on-KVM fixes
Vitaly Kuznetsov <vkuzn...@redhat.com> writes: > Changes since v4: > - Rebase on top of Roman's patches. > - Drop PATCH2 as it is no longer needed (after adding explicit > hv_frequencies). > > Previously, Ladi was working on enabling TSC page clocksource for nested > Hyper-V-on-KVM workloads. He found out that if Hyper-V frequency MSRs are > exposed to L1 as well as INVTSC flag Hyper-V enables TSC page clocksource > to its guests. Qemu doesn't pass INVTSC by default as it is a migration > blocker. > > I found out there's a different way to make Hyper-V like us: expose > Reenlightenment MSRs to it. KVM doesn't fully support the feature as > we're still unable to migrate nested environments but rudimentary support > we have there is enough. > > Enable Hyper-V reenlightenment MSRs to make things work. > Gentle ping :-) With Roman's Reviewed-by:, is there anything else needed from me to get this accepted? Thanks! -- Vitaly
[Qemu-devel] [PATCH 2/2] i386/kvm: lower requirements for Hyper-V frequency MSRs exposure
Requiring tsc_is_stable_and_known() is too restrictive: even without INVTCS nested Hyper-V-on-KVM enables TSC pages for its guests e.g. when Reenlightenment MSRs are present. Presence of frequency MSRs doesn't mean these frequencies are stable, it just means they're available for reading. Signed-off-by: Vitaly Kuznetsov <vkuzn...@redhat.com> --- target/i386/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 21e06deaf1..43c521f61a 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -646,7 +646,7 @@ static int hyperv_handle_properties(CPUState *cs) env->features[FEAT_HYPERV_EAX] |= HV_TIME_REF_COUNT_AVAILABLE; env->features[FEAT_HYPERV_EAX] |= HV_REFERENCE_TSC_AVAILABLE; -if (has_msr_hv_frequencies && tsc_is_stable_and_known(env)) { +if (has_msr_hv_frequencies && env->tsc_khz) { env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_FREQUENCY_MSRS; env->features[FEAT_HYPERV_EDX] |= HV_FREQUENCY_MSRS_AVAILABLE; } -- 2.14.3
[Qemu-devel] [PATCH 1/2] i386/kvm: add support for Hyper-V reenlightenment MSRs
KVM recently gained support for Hyper-V Reenlightenment MSRs which are required to make KVM-on-Hyper-V enable TSC page clocksource to its guests when INVTSC is not passed to it (and it is not passed by default in Qemu as it effectively blocks migration). Signed-off-by: Vitaly Kuznetsov <vkuzn...@redhat.com> --- target/i386/cpu.h | 3 +++ target/i386/hyperv-proto.h | 9 - target/i386/kvm.c | 33 + 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index faf39ec1ce..502b535be2 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1152,6 +1152,9 @@ typedef struct CPUX86State { uint64_t msr_hv_synic_sint[HV_SINT_COUNT]; uint64_t msr_hv_stimer_config[HV_STIMER_COUNT]; uint64_t msr_hv_stimer_count[HV_STIMER_COUNT]; +uint64_t msr_hv_reenlightenment_control; +uint64_t msr_hv_tsc_emulation_control; +uint64_t msr_hv_tsc_emulation_status; /* exception/interrupt handling */ int error_code; diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index cb4d7f2b7a..93352ebd2a 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -35,7 +35,7 @@ #define HV_RESET_AVAILABLE (1u << 7) #define HV_REFERENCE_TSC_AVAILABLE (1u << 9) #define HV_ACCESS_FREQUENCY_MSRS (1u << 11) - +#define HV_ACCESS_REENLIGHTENMENTS_CONTROL (1u << 13) /* * HV_CPUID_FEATURES.EDX bits @@ -129,6 +129,13 @@ #define HV_X64_MSR_CRASH_CTL0x4105 #define HV_CRASH_CTL_NOTIFY (1ull << 63) +/* + * Reenlightenment notification MSRs + */ +#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x4106 +#define HV_X64_MSR_TSC_EMULATION_CONTROL0x4107 +#define HV_X64_MSR_TSC_EMULATION_STATUS 0x4108 + /* * Hypercall status code */ diff --git a/target/i386/kvm.c b/target/i386/kvm.c index ad4b159b28..21e06deaf1 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -90,6 +90,7 @@ static bool has_msr_hv_runtime; static bool has_msr_hv_synic; static bool has_msr_hv_stimer; static bool has_msr_hv_frequencies; +static bool has_msr_hv_reenlightenment; static bool has_msr_xss; static bool has_msr_spec_ctrl; @@ -649,6 +650,11 @@ static int hyperv_handle_properties(CPUState *cs) env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_FREQUENCY_MSRS; env->features[FEAT_HYPERV_EDX] |= HV_FREQUENCY_MSRS_AVAILABLE; } + +if (has_msr_hv_reenlightenment) { +env->features[FEAT_HYPERV_EAX] |= +HV_ACCESS_REENLIGHTENMENTS_CONTROL; +} } if (cpu->hyperv_crash && has_msr_hv_crash) { env->features[FEAT_HYPERV_EDX] |= HV_GUEST_CRASH_MSR_AVAILABLE; @@ -1154,6 +1160,9 @@ static int kvm_get_supported_msrs(KVMState *s) case HV_X64_MSR_TSC_FREQUENCY: has_msr_hv_frequencies = true; break; +case HV_X64_MSR_REENLIGHTENMENT_CONTROL: +has_msr_hv_reenlightenment = true; +break; case MSR_IA32_SPEC_CTRL: has_msr_spec_ctrl = true; break; @@ -1713,6 +1722,15 @@ static int kvm_put_msrs(X86CPU *cpu, int level) if (cpu->hyperv_time) { kvm_msr_entry_add(cpu, HV_X64_MSR_REFERENCE_TSC, env->msr_hv_tsc); + +if (has_msr_hv_reenlightenment) { +kvm_msr_entry_add(cpu, HV_X64_MSR_REENLIGHTENMENT_CONTROL, + env->msr_hv_reenlightenment_control); +kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_CONTROL, + env->msr_hv_tsc_emulation_control); +kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_STATUS, + env->msr_hv_tsc_emulation_status); +} } } if (cpu->hyperv_vapic) { @@ -2053,6 +2071,12 @@ static int kvm_get_msrs(X86CPU *cpu) } if (cpu->hyperv_time) { kvm_msr_entry_add(cpu, HV_X64_MSR_REFERENCE_TSC, 0); + +if (has_msr_hv_reenlightenment) { +kvm_msr_entry_add(cpu, HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0); +kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_CONTROL, 0); +kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_STATUS, 0); +} } if (has_msr_hv_crash) { int j; @@ -2294,6 +2318,15 @@ static int kvm_get_msrs(X86CPU *cpu) env->msr_hv_stimer_count[(index - HV_X64_MSR_STIMER0_COUNT)/2] = msrs[i].data; break; +case HV_X64_MSR_REENLIGHTENMENT_CONTROL: +env->msr_hv_reenlightenment_control = msrs[i].data; +break; +case HV_
[Qemu-devel] [PATCH 0/2] i386/kvm: TSC page clocksource for Hyper-V-on-KVM fixes
Previously, Ladi was working on enabling TSC page clocksource for nested Hyper-V-on-KVM workloads. He found out that if Hyper-V frequency MSRs are exposed to L1 as well as INVTSC flag Hyper-V enables TSC page clocksource to its guests. Qemu doesn't pass INVTSC by default as it is a migration blocker. I found out there's a different way to make Hyper-V like us: expose Reenlightenment MSRs to it. KVM doesn't fully support the feature as we're still unable to migrate nested environments but rudimentary support we have there (kvm/queue only currently) is enough. Enable Hyper-V reenlightenment MSRs and expose frequency MSRs even without INVTSC to make things work. [My first patches for Qemu, please be nice :-) ] Vitaly Kuznetsov (2): i386/kvm: add support for Hyper-V reenlightenment MSRs i386/kvm: lower requirements for Hyper-V frequency MSRs exposure target/i386/cpu.h | 3 +++ target/i386/hyperv-proto.h | 9 - target/i386/kvm.c | 35 ++- 3 files changed, 45 insertions(+), 2 deletions(-) -- 2.14.3
[Qemu-devel] [PATCH v5 0/1] i386/kvm: TSC page clocksource for Hyper-V-on-KVM fixes
Changes since v4: - Rebase on top of Roman's patches. - Drop PATCH2 as it is no longer needed (after adding explicit hv_frequencies). Previously, Ladi was working on enabling TSC page clocksource for nested Hyper-V-on-KVM workloads. He found out that if Hyper-V frequency MSRs are exposed to L1 as well as INVTSC flag Hyper-V enables TSC page clocksource to its guests. Qemu doesn't pass INVTSC by default as it is a migration blocker. I found out there's a different way to make Hyper-V like us: expose Reenlightenment MSRs to it. KVM doesn't fully support the feature as we're still unable to migrate nested environments but rudimentary support we have there is enough. Enable Hyper-V reenlightenment MSRs to make things work. Vitaly Kuznetsov (1): i386/kvm: add support for Hyper-V reenlightenment MSRs target/i386/cpu.c | 4 +++- target/i386/cpu.h | 4 target/i386/hyperv-proto.h | 9 - target/i386/kvm.c | 39 ++- target/i386/machine.c | 24 5 files changed, 77 insertions(+), 3 deletions(-) -- 2.14.3
[Qemu-devel] [PATCH v5 1/1] i386/kvm: add support for Hyper-V reenlightenment MSRs
KVM recently gained support for Hyper-V Reenlightenment MSRs which are required to make KVM-on-Hyper-V enable TSC page clocksource to its guests when INVTSC is not passed to it (and it is not passed by default in Qemu as it effectively blocks migration). Signed-off-by: Vitaly Kuznetsov <vkuzn...@redhat.com> --- Changes since v4: - Rebase on top of Roman's patches. --- target/i386/cpu.c | 4 +++- target/i386/cpu.h | 4 target/i386/hyperv-proto.h | 9 - target/i386/kvm.c | 39 ++- target/i386/machine.c | 24 5 files changed, 77 insertions(+), 3 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 1a6b082b6f..e0e7a16d21 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -409,7 +409,8 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL /* hv_vpindex_access */, NULL /* hv_msr_reset_access */, NULL /* hv_msr_stats_access */, NULL /* hv_reftsc_access */, NULL /* hv_msr_idle_access */, NULL /* hv_msr_frequency_access */, -NULL, NULL, NULL, NULL, +NULL /* hv_msr_debug_access */, NULL /* hv_msr_reenlightenment_access */, +NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -4762,6 +4763,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-synic", X86CPU, hyperv_synic, false), DEFINE_PROP_BOOL("hv-stimer", X86CPU, hyperv_stimer, false), DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false), +DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 1b219fafc4..b58b779bff 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1174,6 +1174,9 @@ typedef struct CPUX86State { uint64_t msr_hv_synic_sint[HV_SINT_COUNT]; uint64_t msr_hv_stimer_config[HV_STIMER_COUNT]; uint64_t msr_hv_stimer_count[HV_STIMER_COUNT]; +uint64_t msr_hv_reenlightenment_control; +uint64_t msr_hv_tsc_emulation_control; +uint64_t msr_hv_tsc_emulation_status; uint64_t msr_rtit_ctrl; uint64_t msr_rtit_status; @@ -1297,6 +1300,7 @@ struct X86CPU { bool hyperv_synic; bool hyperv_stimer; bool hyperv_frequencies; +bool hyperv_reenlightenment; bool check_cpuid; bool enforce_cpuid; bool expose_kvm; diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index cb4d7f2b7a..93352ebd2a 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -35,7 +35,7 @@ #define HV_RESET_AVAILABLE (1u << 7) #define HV_REFERENCE_TSC_AVAILABLE (1u << 9) #define HV_ACCESS_FREQUENCY_MSRS (1u << 11) - +#define HV_ACCESS_REENLIGHTENMENTS_CONTROL (1u << 13) /* * HV_CPUID_FEATURES.EDX bits @@ -129,6 +129,13 @@ #define HV_X64_MSR_CRASH_CTL0x4105 #define HV_CRASH_CTL_NOTIFY (1ull << 63) +/* + * Reenlightenment notification MSRs + */ +#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x4106 +#define HV_X64_MSR_TSC_EMULATION_CONTROL0x4107 +#define HV_X64_MSR_TSC_EMULATION_STATUS 0x4108 + /* * Hypercall status code */ diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 6c49954e68..da4b19 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -90,6 +90,7 @@ static bool has_msr_hv_runtime; static bool has_msr_hv_synic; static bool has_msr_hv_stimer; static bool has_msr_hv_frequencies; +static bool has_msr_hv_reenlightenment; static bool has_msr_xss; static bool has_msr_spec_ctrl; static bool has_msr_smi_count; @@ -583,7 +584,8 @@ static bool hyperv_enabled(X86CPU *cpu) cpu->hyperv_vpindex || cpu->hyperv_runtime || cpu->hyperv_synic || -cpu->hyperv_stimer); +cpu->hyperv_stimer || +cpu->hyperv_reenlightenment); } static int kvm_arch_set_tsc_khz(CPUState *cs) @@ -669,6 +671,16 @@ static int hyperv_handle_properties(CPUState *cs) } env->features[FEAT_HYPERV_EDX] |= HV_GUEST_CRASH_MSR_AVAILABLE; } +if (cpu->hyperv_reenlightenment) { +if (!has_msr_hv_reenlightenment) { +fprintf(stderr, +"Hyper-V Reenlightenment MSRs " +"(requested by 'hv-reenlightenment' cpu flag) " +"are not supported by kernel\n"); +return -ENOSYS; +} +env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_REENLIGHTENMENTS_CONTROL; +
[Qemu-devel] [PATCH v2 2/2] i386/kvm: lower requirements for Hyper-V frequency MSRs exposure
Requiring tsc_is_stable_and_known() is too restrictive: even without INVTCS nested Hyper-V-on-KVM enables TSC pages for its guests e.g. when Reenlightenment MSRs are present. Presence of frequency MSRs doesn't mean these frequencies are stable, it just means they're available for reading. Signed-off-by: Vitaly Kuznetsov <vkuzn...@redhat.com> --- target/i386/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index accf50eac3..a6d1210f46 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -650,7 +650,7 @@ static int hyperv_handle_properties(CPUState *cs) env->features[FEAT_HYPERV_EAX] |= HV_TIME_REF_COUNT_AVAILABLE; env->features[FEAT_HYPERV_EAX] |= HV_REFERENCE_TSC_AVAILABLE; -if (has_msr_hv_frequencies && tsc_is_stable_and_known(env)) { +if (has_msr_hv_frequencies && env->tsc_khz) { env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_FREQUENCY_MSRS; env->features[FEAT_HYPERV_EDX] |= HV_FREQUENCY_MSRS_AVAILABLE; } -- 2.14.3
Re: [Qemu-devel] [PATCH 2/2] i386/kvm: lower requirements for Hyper-V frequency MSRs exposure
Paolo Bonzini <pbonz...@redhat.com> writes: > On 16/03/2018 16:05, Vitaly Kuznetsov wrote: >>>> >>>> -if (has_msr_hv_frequencies && tsc_is_stable_and_known(env)) { >>>> +if (has_msr_hv_frequencies && env->tsc_khz) { >>> Should this be >>> >>> ((env->tsc_khz && has_msr_hv_reenlightenment) || >>> tsc_is_stable_and_known(env)) >>> >>> so that you don't regress on older kernels? >>> >> I don't actually see where the regression might come from: frequency >> MSRs are supported regardless or reenlightenment/invtsc and there's >> nothing wrong with exposing them but I may be missing something.. > > On older kernel without re-enlightenment support, you don't want to > expose the frequency MSRs unless invtsc is on, right? > Actually no, I think it's OK to expose frequency MSRs even when we don't have invtsc and don't support re-enlightenment. Nested Hyper-V won't pass stable TSC pages to its guests unless it sees either invtsc or reenlightenment. So as long as we have something to put to these MSRs (env->tsc_khz) I *think* we can expose them. I may actually be missing the reason why Ladi put tsc_is_stable_and_known() here. In case we're running Windows (and not Hyper-V) as a guest KVM will update TSC page on migration. And genuine Hyper-V also exposes these MSRs without exposing INVTSC flag by default. -- Vitaly
[Qemu-devel] [PATCH v2 0/2] i386/kvm: TSC page clocksource for Hyper-V-on-KVM fixes
Changes since v1: - add vmstate_msr_hyperv_reenlightenment subsection to vmstate_x86_cpu [Paolo Bonzini] - rebase. Previously, Ladi was working on enabling TSC page clocksource for nested Hyper-V-on-KVM workloads. He found out that if Hyper-V frequency MSRs are exposed to L1 as well as INVTSC flag Hyper-V enables TSC page clocksource to its guests. Qemu doesn't pass INVTSC by default as it is a migration blocker. I found out there's a different way to make Hyper-V like us: expose Reenlightenment MSRs to it. KVM doesn't fully support the feature as we're still unable to migrate nested environments but rudimentary support we have there (kvm/queue only currently) is enough. Enable Hyper-V reenlightenment MSRs and expose frequency MSRs even without INVTSC to make things work. Vitaly Kuznetsov (2): i386/kvm: add support for Hyper-V reenlightenment MSRs i386/kvm: lower requirements for Hyper-V frequency MSRs exposure target/i386/cpu.h | 3 +++ target/i386/hyperv-proto.h | 9 - target/i386/kvm.c | 35 ++- target/i386/machine.c | 24 4 files changed, 69 insertions(+), 2 deletions(-) -- 2.14.3
[Qemu-devel] [PATCH v2 1/2] i386/kvm: add support for Hyper-V reenlightenment MSRs
KVM recently gained support for Hyper-V Reenlightenment MSRs which are required to make KVM-on-Hyper-V enable TSC page clocksource to its guests when INVTSC is not passed to it (and it is not passed by default in Qemu as it effectively blocks migration). Signed-off-by: Vitaly Kuznetsov <vkuzn...@redhat.com> --- Changes since v1: - add vmstate_msr_hyperv_reenlightenment subsection to vmstate_x86_cpu [Paolo Bonzini] --- target/i386/cpu.h | 3 +++ target/i386/hyperv-proto.h | 9 - target/i386/kvm.c | 33 + target/i386/machine.c | 24 4 files changed, 68 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 2e2bab5ff3..0b1b556a56 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1174,6 +1174,9 @@ typedef struct CPUX86State { uint64_t msr_hv_synic_sint[HV_SINT_COUNT]; uint64_t msr_hv_stimer_config[HV_STIMER_COUNT]; uint64_t msr_hv_stimer_count[HV_STIMER_COUNT]; +uint64_t msr_hv_reenlightenment_control; +uint64_t msr_hv_tsc_emulation_control; +uint64_t msr_hv_tsc_emulation_status; uint64_t msr_rtit_ctrl; uint64_t msr_rtit_status; diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index cb4d7f2b7a..93352ebd2a 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -35,7 +35,7 @@ #define HV_RESET_AVAILABLE (1u << 7) #define HV_REFERENCE_TSC_AVAILABLE (1u << 9) #define HV_ACCESS_FREQUENCY_MSRS (1u << 11) - +#define HV_ACCESS_REENLIGHTENMENTS_CONTROL (1u << 13) /* * HV_CPUID_FEATURES.EDX bits @@ -129,6 +129,13 @@ #define HV_X64_MSR_CRASH_CTL0x4105 #define HV_CRASH_CTL_NOTIFY (1ull << 63) +/* + * Reenlightenment notification MSRs + */ +#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x4106 +#define HV_X64_MSR_TSC_EMULATION_CONTROL0x4107 +#define HV_X64_MSR_TSC_EMULATION_STATUS 0x4108 + /* * Hypercall status code */ diff --git a/target/i386/kvm.c b/target/i386/kvm.c index d23fff12f5..accf50eac3 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -90,6 +90,7 @@ static bool has_msr_hv_runtime; static bool has_msr_hv_synic; static bool has_msr_hv_stimer; static bool has_msr_hv_frequencies; +static bool has_msr_hv_reenlightenment; static bool has_msr_xss; static bool has_msr_spec_ctrl; static bool has_msr_smi_count; @@ -653,6 +654,11 @@ static int hyperv_handle_properties(CPUState *cs) env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_FREQUENCY_MSRS; env->features[FEAT_HYPERV_EDX] |= HV_FREQUENCY_MSRS_AVAILABLE; } + +if (has_msr_hv_reenlightenment) { +env->features[FEAT_HYPERV_EAX] |= +HV_ACCESS_REENLIGHTENMENTS_CONTROL; +} } if (cpu->hyperv_crash && has_msr_hv_crash) { env->features[FEAT_HYPERV_EDX] |= HV_GUEST_CRASH_MSR_AVAILABLE; @@ -1185,6 +1191,9 @@ static int kvm_get_supported_msrs(KVMState *s) case HV_X64_MSR_TSC_FREQUENCY: has_msr_hv_frequencies = true; break; +case HV_X64_MSR_REENLIGHTENMENT_CONTROL: +has_msr_hv_reenlightenment = true; +break; case MSR_IA32_SPEC_CTRL: has_msr_spec_ctrl = true; break; @@ -1747,6 +1756,15 @@ static int kvm_put_msrs(X86CPU *cpu, int level) if (cpu->hyperv_time) { kvm_msr_entry_add(cpu, HV_X64_MSR_REFERENCE_TSC, env->msr_hv_tsc); + +if (has_msr_hv_reenlightenment) { +kvm_msr_entry_add(cpu, HV_X64_MSR_REENLIGHTENMENT_CONTROL, + env->msr_hv_reenlightenment_control); +kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_CONTROL, + env->msr_hv_tsc_emulation_control); +kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_STATUS, + env->msr_hv_tsc_emulation_status); +} } } if (cpu->hyperv_vapic) { @@ -2109,6 +2127,12 @@ static int kvm_get_msrs(X86CPU *cpu) } if (cpu->hyperv_time) { kvm_msr_entry_add(cpu, HV_X64_MSR_REFERENCE_TSC, 0); + +if (has_msr_hv_reenlightenment) { +kvm_msr_entry_add(cpu, HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0); +kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_CONTROL, 0); +kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_STATUS, 0); +} } if (has_msr_hv_crash) { int j; @@ -2367,6 +2391,15 @@ static int kvm_get_msrs(X86CPU *cpu) env->msr_hv_stimer_count[(index - HV_X64_MSR_STIMER0_COUNT)/2]
Re: [Qemu-devel] [PATCH 2/2] i386/kvm: lower requirements for Hyper-V frequency MSRs exposure
Paolo Bonzini <pbonz...@redhat.com> writes: > On 12/03/2018 16:12, Vitaly Kuznetsov wrote: >> >> -if (has_msr_hv_frequencies && tsc_is_stable_and_known(env)) { >> +if (has_msr_hv_frequencies && env->tsc_khz) { > > Should this be > > ((env->tsc_khz && has_msr_hv_reenlightenment) || > tsc_is_stable_and_known(env)) > > so that you don't regress on older kernels? > I don't actually see where the regression might come from: frequency MSRs are supported regardless or reenlightenment/invtsc and there's nothing wrong with exposing them but I may be missing something.. -- Vitaly
Re: [Qemu-devel] [PATCH 1/2] i386/kvm: add support for Hyper-V reenlightenment MSRs
Paolo Bonzini <pbonz...@redhat.com> writes: > On 12/03/2018 16:12, Vitaly Kuznetsov wrote: [snip] >> @@ -2294,6 +2318,15 @@ static int kvm_get_msrs(X86CPU *cpu) >> env->msr_hv_stimer_count[(index - HV_X64_MSR_STIMER0_COUNT)/2] = >> msrs[i].data; >> break; >> +case HV_X64_MSR_REENLIGHTENMENT_CONTROL: >> +env->msr_hv_reenlightenment_control = msrs[i].data; >> +break; >> +case HV_X64_MSR_TSC_EMULATION_CONTROL: >> +env->msr_hv_tsc_emulation_control = msrs[i].data; >> +break; >> +case HV_X64_MSR_TSC_EMULATION_STATUS: >> +env->msr_hv_tsc_emulation_status = msrs[i].data; >> +break; >> case MSR_MTRRdefType: >> env->mtrr_deftype = msrs[i].data; >> break; >> > > Doesn't this also need a new subsection in target/i386/machine.c? > Actually yes, missed that completely! Thanks! -- Vitaly
[Qemu-devel] [PATCH v4 2/2] i386/kvm: expose Hyper-V frequency MSRs with reenlightenment
We can also expose Hyper-V frequency MSRs when reenlightenment feature is enabled and TSC frequency is known, Hyper-V on KVM will provide stable TSC page clocksources to its guests. Signed-off-by: Vitaly Kuznetsov <vkuzn...@redhat.com> --- - Expose frequency MSRs only when either INVTSC or Reenlightenment is provided [Paolo Bonzini] --- target/i386/kvm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 75f4e1d69e..2c3c19d690 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -651,7 +651,8 @@ static int hyperv_handle_properties(CPUState *cs) env->features[FEAT_HYPERV_EAX] |= HV_TIME_REF_COUNT_AVAILABLE; env->features[FEAT_HYPERV_EAX] |= HV_REFERENCE_TSC_AVAILABLE; -if (has_msr_hv_frequencies && tsc_is_stable_and_known(env)) { +if (has_msr_hv_frequencies && env->tsc_khz && +(tsc_is_stable_and_known(env) || cpu->hyperv_reenlightenment)) { env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_FREQUENCY_MSRS; env->features[FEAT_HYPERV_EDX] |= HV_FREQUENCY_MSRS_AVAILABLE; } -- 2.14.3
[Qemu-devel] [PATCH v4 0/2] i386/kvm: TSC page clocksource for Hyper-V-on-KVM fixes
Changes since v3: - check cpu->hyperv_reenlightenment instead of has_msr_reenlightenment [Eduardo Habkost, Roman Kagan] - expose frequency MSRs only when either INVTSC or Reenlightenment is provided [Paolo Bonzini] (I'm not doing 'hv_frequency' property for now as the discussion around it seems to be inconclusive.) Previously, Ladi was working on enabling TSC page clocksource for nested Hyper-V-on-KVM workloads. He found out that if Hyper-V frequency MSRs are exposed to L1 as well as INVTSC flag Hyper-V enables TSC page clocksource to its guests. Qemu doesn't pass INVTSC by default as it is a migration blocker. I found out there's a different way to make Hyper-V like us: expose Reenlightenment MSRs to it. KVM doesn't fully support the feature as we're still unable to migrate nested environments but rudimentary support we have there (kvm/queue currently) is enough. Enable Hyper-V reenlightenment MSRs and expose frequency MSRs even without INVTSC to make things work. Vitaly Kuznetsov (2): i386/kvm: add support for Hyper-V reenlightenment MSRs i386/kvm: expose Hyper-V frequency MSRs with reenlightenment target/i386/cpu.c | 4 +++- target/i386/cpu.h | 4 target/i386/hyperv-proto.h | 9 - target/i386/kvm.c | 41 +++-- target/i386/machine.c | 24 5 files changed, 78 insertions(+), 4 deletions(-) -- 2.14.3
[Qemu-devel] [PATCH v4 1/2] i386/kvm: add support for Hyper-V reenlightenment MSRs
KVM recently gained support for Hyper-V Reenlightenment MSRs which are required to make KVM-on-Hyper-V enable TSC page clocksource to its guests when INVTSC is not passed to it (and it is not passed by default in Qemu as it effectively blocks migration). Signed-off-by: Vitaly Kuznetsov <vkuzn...@redhat.com> --- Changes since v3: - check cpu->hyperv_reenlightenment instead of has_msr_reenlightenment [Eduardo Habkost, Roman Kagan] --- target/i386/cpu.c | 4 +++- target/i386/cpu.h | 4 target/i386/hyperv-proto.h | 9 - target/i386/kvm.c | 38 +- target/i386/machine.c | 24 5 files changed, 76 insertions(+), 3 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 6bb4ce8719..02579f8234 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -407,7 +407,8 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL /* hv_vpindex_access */, NULL /* hv_msr_reset_access */, NULL /* hv_msr_stats_access */, NULL /* hv_reftsc_access */, NULL /* hv_msr_idle_access */, NULL /* hv_msr_frequency_access */, -NULL, NULL, NULL, NULL, +NULL /* hv_msr_debug_access */, NULL /* hv_msr_reenlightenment_access */, +NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -4764,6 +4765,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-runtime", X86CPU, hyperv_runtime, false), DEFINE_PROP_BOOL("hv-synic", X86CPU, hyperv_synic, false), DEFINE_PROP_BOOL("hv-stimer", X86CPU, hyperv_stimer, false), +DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 2e2bab5ff3..98eed72937 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1174,6 +1174,9 @@ typedef struct CPUX86State { uint64_t msr_hv_synic_sint[HV_SINT_COUNT]; uint64_t msr_hv_stimer_config[HV_STIMER_COUNT]; uint64_t msr_hv_stimer_count[HV_STIMER_COUNT]; +uint64_t msr_hv_reenlightenment_control; +uint64_t msr_hv_tsc_emulation_control; +uint64_t msr_hv_tsc_emulation_status; uint64_t msr_rtit_ctrl; uint64_t msr_rtit_status; @@ -1296,6 +1299,7 @@ struct X86CPU { bool hyperv_runtime; bool hyperv_synic; bool hyperv_stimer; +bool hyperv_reenlightenment; bool check_cpuid; bool enforce_cpuid; bool expose_kvm; diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index cb4d7f2b7a..93352ebd2a 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -35,7 +35,7 @@ #define HV_RESET_AVAILABLE (1u << 7) #define HV_REFERENCE_TSC_AVAILABLE (1u << 9) #define HV_ACCESS_FREQUENCY_MSRS (1u << 11) - +#define HV_ACCESS_REENLIGHTENMENTS_CONTROL (1u << 13) /* * HV_CPUID_FEATURES.EDX bits @@ -129,6 +129,13 @@ #define HV_X64_MSR_CRASH_CTL0x4105 #define HV_CRASH_CTL_NOTIFY (1ull << 63) +/* + * Reenlightenment notification MSRs + */ +#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x4106 +#define HV_X64_MSR_TSC_EMULATION_CONTROL0x4107 +#define HV_X64_MSR_TSC_EMULATION_STATUS 0x4108 + /* * Hypercall status code */ diff --git a/target/i386/kvm.c b/target/i386/kvm.c index d23fff12f5..75f4e1d69e 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -90,6 +90,7 @@ static bool has_msr_hv_runtime; static bool has_msr_hv_synic; static bool has_msr_hv_stimer; static bool has_msr_hv_frequencies; +static bool has_msr_hv_reenlightenment; static bool has_msr_xss; static bool has_msr_spec_ctrl; static bool has_msr_smi_count; @@ -583,7 +584,8 @@ static bool hyperv_enabled(X86CPU *cpu) cpu->hyperv_vpindex || cpu->hyperv_runtime || cpu->hyperv_synic || -cpu->hyperv_stimer); +cpu->hyperv_stimer || +cpu->hyperv_reenlightenment); } static int kvm_arch_set_tsc_khz(CPUState *cs) @@ -654,6 +656,14 @@ static int hyperv_handle_properties(CPUState *cs) env->features[FEAT_HYPERV_EDX] |= HV_FREQUENCY_MSRS_AVAILABLE; } } +if (cpu->hyperv_reenlightenment) { +if (!has_msr_hv_reenlightenment) { +fprintf(stderr, +"Hyper-V Reenlightenment is not supported by kernel\n"); +return -ENOSYS; +} +env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_REENLIGHTENMENTS_CONTROL; +} if (cpu->hyperv_crash && has_msr_hv
Re: [Qemu-devel] [PATCH v3 1/2] i386/kvm: add support for Hyper-V reenlightenment MSRs
Marcelo Tosatti <mtosa...@redhat.com> writes: > On Tue, Mar 20, 2018 at 06:34:59PM +0100, Vitaly Kuznetsov wrote: >> KVM recently gained support for Hyper-V Reenlightenment MSRs which are >> required to make KVM-on-Hyper-V enable TSC page clocksource to its guests >> when INVTSC is not passed to it (and it is not passed by default in Qemu >> as it effectively blocks migration). > > Hi Vitaly, > > From Microsoft's documentation: > > "An L1 hypervisor can request to be notified when its partition is > migrated. This capability is enumerated in CPUID as > AccessReenlightenmentControls privilege (see 2.4.10)." > > The L0 hypervisor exposes a synthetic MSR > (HV_X64_MSR_REENLIGHTENMENT_CONTROL) that may be used by the L1 > hypervisor to configure an interrupt vector and target processor. The L0 > hypervisor will inject an interrupt with the specified vector after each > migration. > > What prevents a guest from setting the enable bit, and expect > to receive an interrupt, if the reenlightenment MSRs are exposed ? > This is actually desired: Hyper-V on KVM will set this bit and expect to receive an interrupt. Currently, we don't send it because we don't migrate nested workloads but eventually, when we learn how to do this in KVM, sending an interrupt and doint TSC access emulation will be required. Normal Windows on KVM won't use the feature as it doesn't need it: upon migration we update TSC page in KVM and readings from it stay correct. -- Vitaly
Re: [Qemu-devel] [PATCH v2 1/2] i386/kvm: add support for Hyper-V reenlightenment MSRs
Roman Kagan <rka...@virtuozzo.com> writes: > On Fri, Mar 16, 2018 at 06:00:19PM +0100, Vitaly Kuznetsov wrote: >> KVM recently gained support for Hyper-V Reenlightenment MSRs which are >> required to make KVM-on-Hyper-V enable TSC page clocksource to its guests >> when INVTSC is not passed to it (and it is not passed by default in Qemu >> as it effectively blocks migration). >> >> Signed-off-by: Vitaly Kuznetsov <vkuzn...@redhat.com> >> --- >> Changes since v1: >> - add vmstate_msr_hyperv_reenlightenment subsection to vmstate_x86_cpu >> [Paolo Bonzini] >> --- >> target/i386/cpu.h | 3 +++ >> target/i386/hyperv-proto.h | 9 - >> target/i386/kvm.c | 33 + >> target/i386/machine.c | 24 >> 4 files changed, 68 insertions(+), 1 deletion(-) >> >> diff --git a/target/i386/cpu.h b/target/i386/cpu.h >> index 2e2bab5ff3..0b1b556a56 100644 >> --- a/target/i386/cpu.h >> +++ b/target/i386/cpu.h >> @@ -1174,6 +1174,9 @@ typedef struct CPUX86State { >> uint64_t msr_hv_synic_sint[HV_SINT_COUNT]; >> uint64_t msr_hv_stimer_config[HV_STIMER_COUNT]; >> uint64_t msr_hv_stimer_count[HV_STIMER_COUNT]; >> +uint64_t msr_hv_reenlightenment_control; >> +uint64_t msr_hv_tsc_emulation_control; >> +uint64_t msr_hv_tsc_emulation_status; >> >> uint64_t msr_rtit_ctrl; >> uint64_t msr_rtit_status; >> diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h >> index cb4d7f2b7a..93352ebd2a 100644 >> --- a/target/i386/hyperv-proto.h >> +++ b/target/i386/hyperv-proto.h >> @@ -35,7 +35,7 @@ >> #define HV_RESET_AVAILABLE (1u << 7) >> #define HV_REFERENCE_TSC_AVAILABLE (1u << 9) >> #define HV_ACCESS_FREQUENCY_MSRS (1u << 11) >> - >> +#define HV_ACCESS_REENLIGHTENMENTS_CONTROL (1u << 13) >> >> /* >> * HV_CPUID_FEATURES.EDX bits >> @@ -129,6 +129,13 @@ >> #define HV_X64_MSR_CRASH_CTL0x4105 >> #define HV_CRASH_CTL_NOTIFY (1ull << 63) >> >> +/* >> + * Reenlightenment notification MSRs >> + */ >> +#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x4106 >> +#define HV_X64_MSR_TSC_EMULATION_CONTROL0x4107 >> +#define HV_X64_MSR_TSC_EMULATION_STATUS 0x4108 >> + >> /* >> * Hypercall status code >> */ >> diff --git a/target/i386/kvm.c b/target/i386/kvm.c >> index d23fff12f5..accf50eac3 100644 >> --- a/target/i386/kvm.c >> +++ b/target/i386/kvm.c >> @@ -90,6 +90,7 @@ static bool has_msr_hv_runtime; >> static bool has_msr_hv_synic; >> static bool has_msr_hv_stimer; >> static bool has_msr_hv_frequencies; >> +static bool has_msr_hv_reenlightenment; >> static bool has_msr_xss; >> static bool has_msr_spec_ctrl; >> static bool has_msr_smi_count; >> @@ -653,6 +654,11 @@ static int hyperv_handle_properties(CPUState *cs) >> env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_FREQUENCY_MSRS; >> env->features[FEAT_HYPERV_EDX] |= HV_FREQUENCY_MSRS_AVAILABLE; >> } >> + >> +if (has_msr_hv_reenlightenment) { >> +env->features[FEAT_HYPERV_EAX] |= >> +HV_ACCESS_REENLIGHTENMENTS_CONTROL; >> +} > > Can you please add a matching comment to the definition of > feature_word_info[FEAT_HYPERV_EAX].feat_names[]? > Sure, missed that. > Also there appears to be no cpu property to turn this on/off, does it? > It's enabled based only on the support in the KVM it's running against. > So I guess we may have a problem migrating between the hosts with > different KVM versions, one supporting it and the other not. Currently nested workloads don't migrate so I decided to take the opportunity and squeeze the new feature in without adding a new hv_reenlightenment cpu property (which would have to be added to libvirt at least). > (This is also a problem with has_msr_hv_frequencies, and is in general a > long-standing issue of hv_* properties being done differently from the > rest of CPUID features.) Suggestions? (To be honest I don't really like us adding new hv_* property for every new Hyper-V feature we support. I doubt anyone needs 'partial' Hyper-V emulation. It would be nice to have a single versioned 'hv' feature implying everything. We may then forbid migrations to older hv versions. But I don't really know the history of why we decided to go with a separate hv_* for every feature we add). -- Vitaly
Re: [Qemu-devel] [PATCH v3 2/2] i386/kvm: lower requirements for Hyper-V frequency MSRs exposure
Roman Kagan <rka...@virtuozzo.com> writes: > On Tue, Mar 20, 2018 at 06:35:00PM +0100, Vitaly Kuznetsov wrote: >> Requiring tsc_is_stable_and_known() is too restrictive: even without INVTCS >> nested Hyper-V-on-KVM enables TSC pages for its guests e.g. when >> Reenlightenment MSRs are present. Presence of frequency MSRs doesn't mean >> these frequencies are stable, it just means they're available for reading. >> >> Signed-off-by: Vitaly Kuznetsov <vkuzn...@redhat.com> >> --- >> target/i386/kvm.c | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/target/i386/kvm.c b/target/i386/kvm.c >> index 7d9f9ca0b1..74fc3d3b2c 100644 >> --- a/target/i386/kvm.c >> +++ b/target/i386/kvm.c >> @@ -651,7 +651,7 @@ static int hyperv_handle_properties(CPUState *cs) >> env->features[FEAT_HYPERV_EAX] |= HV_TIME_REF_COUNT_AVAILABLE; >> env->features[FEAT_HYPERV_EAX] |= HV_REFERENCE_TSC_AVAILABLE; >> >> -if (has_msr_hv_frequencies && tsc_is_stable_and_known(env)) { >> +if (has_msr_hv_frequencies && env->tsc_khz) { >> env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_FREQUENCY_MSRS; >> env->features[FEAT_HYPERV_EDX] |= HV_FREQUENCY_MSRS_AVAILABLE; >> } > > I suggest that we add a corresponding cpu property here, too. The guest > may legitimately rely on these msrs when it sees the support in CPUID, > and migrating from a kernel with the feature supported (4.14+) to an > older one will make it crash. > This can be arranged, but what happens to people who use these features today? Assuming they also passed 'invtsc' they have stable TSC page clocksource already (when Hyper-V role is enabled) but when we start requesting a new 'hv_frequency' cpu property they'll suddenly lose what they have... -- Vitaly
Re: [Qemu-devel] [PATCH v4 2/2] i386/kvm: expose Hyper-V frequency MSRs with reenlightenment
Eduardo Habkost <ehabk...@redhat.com> writes: > On Thu, Mar 22, 2018 at 02:13:58PM +0100, Vitaly Kuznetsov wrote: >> We can also expose Hyper-V frequency MSRs when reenlightenment feature is >> enabled and TSC frequency is known, Hyper-V on KVM will provide stable TSC >> page clocksources to its guests. >> >> Signed-off-by: Vitaly Kuznetsov <vkuzn...@redhat.com> >> --- >> - Expose frequency MSRs only when either INVTSC or Reenlightenment is >> provided [Paolo Bonzini] >> --- >> target/i386/kvm.c | 3 ++- >> 1 file changed, 2 insertions(+), 1 deletion(-) >> >> diff --git a/target/i386/kvm.c b/target/i386/kvm.c >> index 75f4e1d69e..2c3c19d690 100644 >> --- a/target/i386/kvm.c >> +++ b/target/i386/kvm.c >> @@ -651,7 +651,8 @@ static int hyperv_handle_properties(CPUState *cs) >> env->features[FEAT_HYPERV_EAX] |= HV_TIME_REF_COUNT_AVAILABLE; >> env->features[FEAT_HYPERV_EAX] |= HV_REFERENCE_TSC_AVAILABLE; >> >> -if (has_msr_hv_frequencies && tsc_is_stable_and_known(env)) { >> +if (has_msr_hv_frequencies && env->tsc_khz && > > Why is the check for env->tsc_khz necessary? > > Are there known circumstances where HV_X64_MSR_TSC_FREQUENCY will be supported > by KVM but ioctl(KVM_GET_TSC_KHZ) will return 0, or this is just for extra > safety? > Yes, I didn't experiment with passing '0' to Windows but in general it doesn't sound like a good idea. >> +(tsc_is_stable_and_known(env) || cpu->hyperv_reenlightenment)) { >> env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_FREQUENCY_MSRS; >> env->features[FEAT_HYPERV_EDX] |= HV_FREQUENCY_MSRS_AVAILABLE; >> } >> -- >> 2.14.3 >> -- Vitaly
Re: [Qemu-devel] [PATCH v3 2/2] i386/kvm: lower requirements for Hyper-V frequency MSRs exposure
Paolo Bonzini <pbonz...@redhat.com> writes: > On 20/03/2018 18:35, Vitaly Kuznetsov wrote: >> +if (has_msr_hv_frequencies && env->tsc_khz) { >> env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_FREQUENCY_MSRS; >> env->features[FEAT_HYPERV_EDX] |= HV_FREQUENCY_MSRS_AVAILABLE; >> } > > Since you have added cpu->hyperv_reenlightenment, I'd rather change this > so that we don't make the "license to change guest ABI across migration" > apply more. We can exploit the fact that Windows doesn't even use the > MSRs unless either invtsc or re-enlightenment is present. Something > like this: > >if (has_msr_hv_frequencies && env->tsc_khz && > (tsc_is_stable_and_known(env) || > cpu->hyperv_reenlightenment)) > > will make the MSRs visible in all useful cases, without having to add > yet another knob. > Can be arranged, of course. (What I'm worried about with all our hv_* knobs is that more of them we have easier it is to assemble some frankenstien which won't look like any existing Hyper-V version; we're probably not doing a very good job tesing all possible hv_* combinations as people probably use 'all or nothing'. In case we end up finding a bug in Windows with some weird hv_* combination it's unlikely Microsoft will bother fixing at as it doesn't reproduce on any existent Hyper-V version. That said, it would be great to eventually have something like 'hv_ws2012r2' property making us look exactly the same real WS2012R2 looks like. Unfortunatelly, I'm unsure about a path to get there). > (Don't worry, this backwards-compatibility stuff is the hardest part. > I'm so happy that Eduardo is the one maintaining it :)). I feel the pain :-) Thanks for the reviews! -- Vitaly
[Qemu-devel] [PATCH v3 1/2] i386/kvm: add support for Hyper-V reenlightenment MSRs
KVM recently gained support for Hyper-V Reenlightenment MSRs which are required to make KVM-on-Hyper-V enable TSC page clocksource to its guests when INVTSC is not passed to it (and it is not passed by default in Qemu as it effectively blocks migration). Signed-off-by: Vitaly Kuznetsov <vkuzn...@redhat.com> --- Changes since v2: - add hv-reenlightenment CPU property [Roman Kagan, Paolo Bonzini] - add a comment to feature_word_info [Roman Kagan] --- target/i386/cpu.c | 4 +++- target/i386/cpu.h | 4 target/i386/hyperv-proto.h | 9 - target/i386/kvm.c | 39 ++- target/i386/machine.c | 24 5 files changed, 77 insertions(+), 3 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 6bb4ce8719..02579f8234 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -407,7 +407,8 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL /* hv_vpindex_access */, NULL /* hv_msr_reset_access */, NULL /* hv_msr_stats_access */, NULL /* hv_reftsc_access */, NULL /* hv_msr_idle_access */, NULL /* hv_msr_frequency_access */, -NULL, NULL, NULL, NULL, +NULL /* hv_msr_debug_access */, NULL /* hv_msr_reenlightenment_access */, +NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -4764,6 +4765,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-runtime", X86CPU, hyperv_runtime, false), DEFINE_PROP_BOOL("hv-synic", X86CPU, hyperv_synic, false), DEFINE_PROP_BOOL("hv-stimer", X86CPU, hyperv_stimer, false), +DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 2e2bab5ff3..98eed72937 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1174,6 +1174,9 @@ typedef struct CPUX86State { uint64_t msr_hv_synic_sint[HV_SINT_COUNT]; uint64_t msr_hv_stimer_config[HV_STIMER_COUNT]; uint64_t msr_hv_stimer_count[HV_STIMER_COUNT]; +uint64_t msr_hv_reenlightenment_control; +uint64_t msr_hv_tsc_emulation_control; +uint64_t msr_hv_tsc_emulation_status; uint64_t msr_rtit_ctrl; uint64_t msr_rtit_status; @@ -1296,6 +1299,7 @@ struct X86CPU { bool hyperv_runtime; bool hyperv_synic; bool hyperv_stimer; +bool hyperv_reenlightenment; bool check_cpuid; bool enforce_cpuid; bool expose_kvm; diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index cb4d7f2b7a..93352ebd2a 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -35,7 +35,7 @@ #define HV_RESET_AVAILABLE (1u << 7) #define HV_REFERENCE_TSC_AVAILABLE (1u << 9) #define HV_ACCESS_FREQUENCY_MSRS (1u << 11) - +#define HV_ACCESS_REENLIGHTENMENTS_CONTROL (1u << 13) /* * HV_CPUID_FEATURES.EDX bits @@ -129,6 +129,13 @@ #define HV_X64_MSR_CRASH_CTL0x4105 #define HV_CRASH_CTL_NOTIFY (1ull << 63) +/* + * Reenlightenment notification MSRs + */ +#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x4106 +#define HV_X64_MSR_TSC_EMULATION_CONTROL0x4107 +#define HV_X64_MSR_TSC_EMULATION_STATUS 0x4108 + /* * Hypercall status code */ diff --git a/target/i386/kvm.c b/target/i386/kvm.c index d23fff12f5..7d9f9ca0b1 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -90,6 +90,7 @@ static bool has_msr_hv_runtime; static bool has_msr_hv_synic; static bool has_msr_hv_stimer; static bool has_msr_hv_frequencies; +static bool has_msr_hv_reenlightenment; static bool has_msr_xss; static bool has_msr_spec_ctrl; static bool has_msr_smi_count; @@ -583,7 +584,8 @@ static bool hyperv_enabled(X86CPU *cpu) cpu->hyperv_vpindex || cpu->hyperv_runtime || cpu->hyperv_synic || -cpu->hyperv_stimer); +cpu->hyperv_stimer || +cpu->hyperv_reenlightenment); } static int kvm_arch_set_tsc_khz(CPUState *cs) @@ -654,6 +656,14 @@ static int hyperv_handle_properties(CPUState *cs) env->features[FEAT_HYPERV_EDX] |= HV_FREQUENCY_MSRS_AVAILABLE; } } +if (cpu->hyperv_reenlightenment) { +if (!has_msr_hv_reenlightenment) { +fprintf(stderr, +"Hyper-V Reenlightenment is not supported by kernel\n"); +return -ENOSYS; +} +env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_REENLIGHTENMENTS_CONTROL; +} if (cpu->hyperv_crash && has_msr_hv
[Qemu-devel] [PATCH v3 0/2] i386/kvm: TSC page clocksource for Hyper-V-on-KVM fixes
Changes since v2: - add hv-reenlightenment CPU property [Roman Kagan, Paolo Bonzini] - add a comment to feature_word_info [Roman Kagan] Previously, Ladi was working on enabling TSC page clocksource for nested Hyper-V-on-KVM workloads. He found out that if Hyper-V frequency MSRs are exposed to L1 as well as INVTSC flag Hyper-V enables TSC page clocksource to its guests. Qemu doesn't pass INVTSC by default as it is a migration blocker. I found out there's a different way to make Hyper-V like us: expose Reenlightenment MSRs to it. KVM doesn't fully support the feature as we're still unable to migrate nested environments but rudimentary support we have there (kvm/queue only currently) is enough. Enable Hyper-V reenlightenment MSRs and expose frequency MSRs even without INVTSC to make things work. Vitaly Kuznetsov (2): i386/kvm: add support for Hyper-V reenlightenment MSRs i386/kvm: lower requirements for Hyper-V frequency MSRs exposure target/i386/cpu.c | 4 +++- target/i386/cpu.h | 4 target/i386/hyperv-proto.h | 9 - target/i386/kvm.c | 41 +++-- target/i386/machine.c | 24 5 files changed, 78 insertions(+), 4 deletions(-) -- 2.14.3
[Qemu-devel] [PATCH v3 2/2] i386/kvm: lower requirements for Hyper-V frequency MSRs exposure
Requiring tsc_is_stable_and_known() is too restrictive: even without INVTCS nested Hyper-V-on-KVM enables TSC pages for its guests e.g. when Reenlightenment MSRs are present. Presence of frequency MSRs doesn't mean these frequencies are stable, it just means they're available for reading. Signed-off-by: Vitaly Kuznetsov <vkuzn...@redhat.com> --- target/i386/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 7d9f9ca0b1..74fc3d3b2c 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -651,7 +651,7 @@ static int hyperv_handle_properties(CPUState *cs) env->features[FEAT_HYPERV_EAX] |= HV_TIME_REF_COUNT_AVAILABLE; env->features[FEAT_HYPERV_EAX] |= HV_REFERENCE_TSC_AVAILABLE; -if (has_msr_hv_frequencies && tsc_is_stable_and_known(env)) { +if (has_msr_hv_frequencies && env->tsc_khz) { env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_FREQUENCY_MSRS; env->features[FEAT_HYPERV_EDX] |= HV_FREQUENCY_MSRS_AVAILABLE; } -- 2.14.3
Re: [Qemu-devel] [PATCH] i386/kvm: add support for Hyper-V TLB flush
Vitaly Kuznetsov writes: > Add support for Hyper-V TLB flush which recently got added to KVM. > > Just like regular Hyper-V we announce HV_EX_PROCESSOR_MASKS_RECOMMENDED > regardless of how many vCPUs we have. Windows is 'smart' and uses less > expensive non-EX Hypercall whenever possible (when it wants to flush TLB > for all vCPUs or the maximum vCPU index in the vCPU set requires flushing > is less than 64). > Ping? -- Vitaly
[Qemu-devel] [PATCH 2/2] i386/kvm: add support for Hyper-V IPI send
Hyper-V PV IPI support is merged to KVM, enable the feature in Qemu. When enabled, this allows Windows guests to send IPIs to other vCPUs with a single hypercall even when there are >64 vCPUs in the request. Signed-off-by: Vitaly Kuznetsov --- target/i386/cpu.c | 1 + target/i386/cpu.h | 1 + target/i386/hyperv-proto.h | 1 + target/i386/kvm.c | 14 +- 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index c88876dfe3..32ea041c06 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5564,6 +5564,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false), DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false), DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false), +DEFINE_PROP_BOOL("hv-ipi", X86CPU, hyperv_ipi, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 730c06f80a..caa1544b2e 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1382,6 +1382,7 @@ struct X86CPU { bool hyperv_frequencies; bool hyperv_reenlightenment; bool hyperv_tlbflush; +bool hyperv_ipi; bool check_cpuid; bool enforce_cpuid; bool expose_kvm; diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index d6d5a79293..87f36d14e8 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -58,6 +58,7 @@ #define HV_APIC_ACCESS_RECOMMENDED (1u << 3) #define HV_SYSTEM_RESET_RECOMMENDED (1u << 4) #define HV_RELAXED_TIMING_RECOMMENDED (1u << 5) +#define HV_CLUSTER_IPI_RECOMMENDED (1u << 10) #define HV_EX_PROCESSOR_MASKS_RECOMMENDED (1u << 11) /* diff --git a/target/i386/kvm.c b/target/i386/kvm.c index dc4047b02f..795aa52938 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -608,7 +608,8 @@ static bool hyperv_enabled(X86CPU *cpu) cpu->hyperv_synic || cpu->hyperv_stimer || cpu->hyperv_reenlightenment || -cpu->hyperv_tlbflush); +cpu->hyperv_tlbflush || +cpu->hyperv_ipi); } static int kvm_arch_set_tsc_khz(CPUState *cs) @@ -888,6 +889,17 @@ int kvm_arch_init_vcpu(CPUState *cs) c->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; } +if (cpu->hyperv_ipi) { +if (kvm_check_extension(cs->kvm_state, +KVM_CAP_HYPERV_SEND_IPI) <= 0) { +fprintf(stderr, "Hyper-V IPI send support " +"(requested by 'hv-ipi' cpu flag) " +" is not supported by kernel\n"); +return -ENOSYS; +} +c->eax |= HV_CLUSTER_IPI_RECOMMENDED; +c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; +} c->ebx = cpu->hyperv_spinlock_attempts; -- 2.17.1
[Qemu-devel] [PATCH 1/2] linux-headers: update to pre-4.20 (kvm/queue)
This brings us three new defines: KVM_CAP_ARM_INJECT_SERROR_ESR, KVM_CAP_MSR_PLATFORM_INFO and KVM_CAP_HYPERV_SEND_IPI. Signed-off-by: Vitaly Kuznetsov --- linux-headers/linux/kvm.h | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 66790724f1..043f1e58b6 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -9,7 +9,7 @@ */ #include - +#include #include #include @@ -481,7 +481,7 @@ struct kvm_dirty_log { __u32 slot; __u32 padding1; union { - void *dirty_bitmap; /* one bit per page */ + void __user *dirty_bitmap; /* one bit per page */ __u64 padding2; }; }; @@ -951,6 +951,9 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_TLBFLUSH 155 #define KVM_CAP_S390_HPAGE_1M 156 #define KVM_CAP_NESTED_STATE 157 +#define KVM_CAP_ARM_INJECT_SERROR_ESR 158 +#define KVM_CAP_MSR_PLATFORM_INFO 159 +#define KVM_CAP_HYPERV_SEND_IPI 160 #ifdef KVM_CAP_IRQ_ROUTING -- 2.17.1
[Qemu-devel] [PATCH 0/2] i386/kvm: enable Hyper-V PV IPI mechanism
Hyper-V PV IPI enlightenment was merged to KVM (kvm/queue currently). Support the feature enablement in Qemu. Vitaly Kuznetsov (2): linux-headers: update to pre-4.20 (kvm/queue) i386/kvm: add support for Hyper-V IPI send linux-headers/linux/kvm.h | 7 +-- target/i386/cpu.c | 1 + target/i386/cpu.h | 1 + target/i386/hyperv-proto.h | 1 + target/i386/kvm.c | 14 +- 5 files changed, 21 insertions(+), 3 deletions(-) -- 2.17.1
Re: [Qemu-devel] [PATCH V6 1/5] target/i386 : add coalesced pio support
Peng Hao writes: > diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h > index 83ba4eb..b5d4289 100644 > --- a/linux-headers/linux/kvm.h > +++ b/linux-headers/linux/kvm.h > @@ -420,13 +420,19 @@ struct kvm_run { > struct kvm_coalesced_mmio_zone { > __u64 addr; > __u32 size; > - __u32 pad; > + union { > + __u32 pad; > + __u32 pio; > + }; > }; I was just passing by, but what's the point in keeping 'pad'? > > struct kvm_coalesced_mmio { > __u64 phys_addr; > __u32 len; > + unino { 'union' I would guess? How does it compile? > __u32 pad; > + __u32 pio; > + }; > __u8 data[8]; > }; > > @@ -953,6 +959,7 @@ struct kvm_ppc_resize_hpt { > #define KVM_CAP_NESTED_STATE 157 > #define KVM_CAP_ARM_INJECT_SERROR_ESR 158 > #define KVM_CAP_MSR_PLATFORM_INFO 159 > +#define KVM_CAP_COALESCED_PIO 160 > > #ifdef KVM_CAP_IRQ_ROUTING -- Vitaly
Re: [Qemu-devel] [PATCH 2/2] x86: hv_evmcs CPU flag support
Roman Kagan writes: > On Fri, Oct 19, 2018 at 01:14:32PM +0200, Vitaly Kuznetsov wrote: >> --- a/target/i386/kvm.c >> +++ b/target/i386/kvm.c >> @@ -798,6 +798,7 @@ int kvm_arch_init_vcpu(CPUState *cs) >> uint32_t unused; >> struct kvm_cpuid_entry2 *c; >> uint32_t signature[3]; >> +uint16_t evmcs_version; >> int kvm_base = KVM_CPUID_SIGNATURE; >> int r; >> Error *local_err = NULL; >> @@ -841,7 +842,7 @@ int kvm_arch_init_vcpu(CPUState *cs) >> memset(signature, 0, 12); >> memcpy(signature, cpu->hyperv_vendor_id, len); >> } >> -c->eax = HV_CPUID_MIN; >> +c->eax = cpu->hyperv_evmcs ? HV_CPUID_MIN_NESTED : HV_CPUID_MIN; > > > I think these two aren't meant to be used on the hypervisor side. My > understanding is that HV_CPUID_MIN is only there as a reminder that the > real Hyper-V exposes at least that many hypervisor-specific leaves so > the guest can rely on that. So I'd rather use directly > HV_CPUID_IMPLEMENT_LIMITS : HV_CPUID_NESTED_FEATURES, and not introduce > HV_CPUID_MIN_NESTED. Makes sense, will do v2. > Maybe better yet is to update this field with the > maximum value while populating HV_* leaves: > > if (hyperv_enabled(cpu)) { > uint32_t *cpuid_4000_eax; > c = _data.entries[cpuid_i++]; > c->function = HV_CPUID_VENDOR_AND_MAX_FUNCTIONS; > cpuid_4000_eax = >eax; > *cpuid_4000_eax = c->function; > > > > c = _data.entries[cpuid_i++]; > c->function = HV_CPUID_...; > *cpuid_4000_eax = max(*cpuid_4000_eax, c->function); > > but I think it can be done later and doesn't need to hold this patch. > > Another question related to this: are the guests OK with leaves > 0x4006..0x4009 missing? They seem to be, however, after you've asked I'm leaning towards zeroing them 'just in case'. -- Vitaly
[Qemu-devel] [PATCH v2 1/2] linux-headers: update
Update to kvm/next commit 1e58e5e59148 ("KVM: VMX: enable nested virtualization by default"). kvm_put_vcpu_events() needs to be fixed as 'pad' was renamed to 'pending' in 'struct kvm_vcpu_events' Signed-off-by: Vitaly Kuznetsov --- linux-headers/asm-powerpc/kvm.h | 1 + linux-headers/asm-x86/kvm.h | 8 ++-- linux-headers/linux/kvm.h | 16 ++-- target/i386/kvm.c | 2 +- 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index 1b32b56a03..8c876c166e 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -634,6 +634,7 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe) #define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) +#define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index fd23d5778e..dabfcf7c39 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -288,6 +288,7 @@ struct kvm_reinject_control { #define KVM_VCPUEVENT_VALID_SIPI_VECTOR0x0002 #define KVM_VCPUEVENT_VALID_SHADOW 0x0004 #define KVM_VCPUEVENT_VALID_SMM0x0008 +#define KVM_VCPUEVENT_VALID_PAYLOAD0x0010 /* Interrupt shadow states */ #define KVM_X86_SHADOW_INT_MOV_SS 0x01 @@ -299,7 +300,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 has_error_code; - __u8 pad; + __u8 pending; __u32 error_code; } exception; struct { @@ -322,7 +323,9 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; - __u32 reserved[9]; + __u8 reserved[27]; + __u8 exception_has_payload; + __u64 exception_payload; }; /* for KVM_GET/SET_DEBUGREGS */ @@ -381,6 +384,7 @@ struct kvm_sync_regs { #define KVM_STATE_NESTED_GUEST_MODE0x0001 #define KVM_STATE_NESTED_RUN_PENDING 0x0002 +#define KVM_STATE_NESTED_EVMCS 0x0004 #define KVM_STATE_NESTED_SMM_GUEST_MODE0x0001 #define KVM_STATE_NESTED_SMM_VMXON 0x0002 diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 83ba4eb571..f11a7eb49c 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -420,13 +420,19 @@ struct kvm_run { struct kvm_coalesced_mmio_zone { __u64 addr; __u32 size; - __u32 pad; + union { + __u32 pad; + __u32 pio; + }; }; struct kvm_coalesced_mmio { __u64 phys_addr; __u32 len; - __u32 pad; + union { + __u32 pad; + __u32 pio; + }; __u8 data[8]; }; @@ -719,6 +725,7 @@ struct kvm_ppc_one_seg_page_size { #define KVM_PPC_PAGE_SIZES_REAL0x0001 #define KVM_PPC_1T_SEGMENTS0x0002 +#define KVM_PPC_NO_HASH0x0004 struct kvm_ppc_smmu_info { __u64 flags; @@ -953,6 +960,11 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_NESTED_STATE 157 #define KVM_CAP_ARM_INJECT_SERROR_ESR 158 #define KVM_CAP_MSR_PLATFORM_INFO 159 +#define KVM_CAP_PPC_NESTED_HV 160 +#define KVM_CAP_HYPERV_SEND_IPI 161 +#define KVM_CAP_COALESCED_PIO 162 +#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 +#define KVM_CAP_EXCEPTION_PAYLOAD 164 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/target/i386/kvm.c b/target/i386/kvm.c index dc4047b02f..a46ad102d8 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -2686,7 +2686,7 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) events.exception.nr = env->exception_injected; events.exception.has_error_code = env->has_error_code; events.exception.error_code = env->error_code; -events.exception.pad = 0; +events.exception.pending = 0; events.interrupt.injected = (env->interrupt_injected >= 0); events.interrupt.nr = env->interrupt_injected; -- 2.17.2
[Qemu-devel] [PATCH v2 0/2] i386/kvm: add support for Hyper-V Enlightened VMCS
Changes since v1 [Roman Kagan]: - Throw away HV_CPUID_MIN_NESTED. - Create zeroed 0x4006-0x4009 CPUID leaves. Hyper-V Enlightened VMCS feature was merged to KVM, enable it in Qemu. The feature gives us a significant performance boost for Hyper-V on KVM deployments. The first patch of the series is posted for completeness only. Vitaly Kuznetsov (2): linux-headers: update x86: hv_evmcs CPU flag support linux-headers/asm-powerpc/kvm.h | 1 + linux-headers/asm-x86/kvm.h | 8 ++-- linux-headers/linux/kvm.h | 16 ++-- target/i386/cpu.c | 1 + target/i386/cpu.h | 1 + target/i386/hyperv-proto.h | 2 ++ target/i386/kvm.c | 32 +--- 7 files changed, 54 insertions(+), 7 deletions(-) -- 2.17.2
[Qemu-devel] [PATCH v2 2/2] x86: hv_evmcs CPU flag support
Adds a new CPU flag to enable the Enlightened VMCS KVM feature. QEMU enables KVM_CAP_HYPERV_ENLIGHTENED_VMCS and gets back the version to be advertised in lower 16 bits of CPUID.0x400A:EAX. Suggested-by: Ladi Prosek Signed-off-by: Vitaly Kuznetsov --- Changes since v1: - Throw away HV_CPUID_MIN_NESTED. - Create zeroed 0x4006-0x4009 CPUID leaves. --- target/i386/cpu.c | 1 + target/i386/cpu.h | 1 + target/i386/hyperv-proto.h | 2 ++ target/i386/kvm.c | 30 -- 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index c88876dfe3..5c0e84fb99 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5564,6 +5564,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false), DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false), DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false), +DEFINE_PROP_BOOL("hv-evmcs", X86CPU, hyperv_evmcs, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 730c06f80a..013d953b57 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1382,6 +1382,7 @@ struct X86CPU { bool hyperv_frequencies; bool hyperv_reenlightenment; bool hyperv_tlbflush; +bool hyperv_evmcs; bool check_cpuid; bool enforce_cpuid; bool expose_kvm; diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index d6d5a79293..1e6800ba43 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -18,6 +18,7 @@ #define HV_CPUID_FEATURES 0x4003 #define HV_CPUID_ENLIGHTMENT_INFO 0x4004 #define HV_CPUID_IMPLEMENT_LIMITS 0x4005 +#define HV_CPUID_NESTED_FEATURES 0x400A #define HV_CPUID_MIN 0x4005 #define HV_CPUID_MAX 0x4000 #define HV_HYPERVISOR_PRESENT_BIT 0x8000 @@ -59,6 +60,7 @@ #define HV_SYSTEM_RESET_RECOMMENDED (1u << 4) #define HV_RELAXED_TIMING_RECOMMENDED (1u << 5) #define HV_EX_PROCESSOR_MASKS_RECOMMENDED (1u << 11) +#define HV_ENLIGHTENED_VMCS_RECOMMENDED (1u << 14) /* * Basic virtualized MSRs diff --git a/target/i386/kvm.c b/target/i386/kvm.c index a46ad102d8..6f10abcf6f 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -798,6 +798,7 @@ int kvm_arch_init_vcpu(CPUState *cs) uint32_t unused; struct kvm_cpuid_entry2 *c; uint32_t signature[3]; +uint16_t evmcs_version; int kvm_base = KVM_CPUID_SIGNATURE; int r; Error *local_err = NULL; @@ -841,7 +842,8 @@ int kvm_arch_init_vcpu(CPUState *cs) memset(signature, 0, 12); memcpy(signature, cpu->hyperv_vendor_id, len); } -c->eax = HV_CPUID_MIN; +c->eax = cpu->hyperv_evmcs ? +HV_CPUID_NESTED_FEATURES : HV_CPUID_IMPLEMENT_LIMITS; c->ebx = signature[0]; c->ecx = signature[1]; c->edx = signature[2]; @@ -888,7 +890,16 @@ int kvm_arch_init_vcpu(CPUState *cs) c->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; } - +if (cpu->hyperv_evmcs) { +if (kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0, +(uintptr_t)_version)) { +fprintf(stderr, "Hyper-V Enlightened VMCS " +"(requested by 'hv-evmcs' cpu flag) " +"is not supported by kernel\n"); +return -ENOSYS; +} +c->eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED; +} c->ebx = cpu->hyperv_spinlock_attempts; c = _data.entries[cpuid_i++]; @@ -899,6 +910,21 @@ int kvm_arch_init_vcpu(CPUState *cs) kvm_base = KVM_CPUID_SIGNATURE_NEXT; has_msr_hv_hypercall = true; + +if (cpu->hyperv_evmcs) { +__u32 function; + +/* Create zeroed 0x4006..0x4009 leaves */ +for (function = HV_CPUID_IMPLEMENT_LIMITS + 1; + function < HV_CPUID_NESTED_FEATURES; function++) { +c = _data.entries[cpuid_i++]; +c->function = function; +} + +c = _data.entries[cpuid_i++]; +c->function = HV_CPUID_NESTED_FEATURES; +c->eax = evmcs_version; +} } if (cpu->expose_kvm) { -- 2.17.2
Re: [Qemu-devel] [PATCH v2 2/2] x86: hv_evmcs CPU flag support
Roman Kagan writes: > On Mon, Oct 22, 2018 at 06:55:06PM +0200, Vitaly Kuznetsov wrote: >> Adds a new CPU flag to enable the Enlightened VMCS KVM feature. >> QEMU enables KVM_CAP_HYPERV_ENLIGHTENED_VMCS and gets back the >> version to be advertised in lower 16 bits of CPUID.0x400A:EAX. >> >> Suggested-by: Ladi Prosek >> Signed-off-by: Vitaly Kuznetsov >> --- >> Changes since v1: >> - Throw away HV_CPUID_MIN_NESTED. >> - Create zeroed 0x4006-0x4009 CPUID leaves. >> --- >> target/i386/cpu.c | 1 + >> target/i386/cpu.h | 1 + >> target/i386/hyperv-proto.h | 2 ++ >> target/i386/kvm.c | 30 -- >> 4 files changed, 32 insertions(+), 2 deletions(-) > > Reviewed-by: Roman Kagan With soft freeze date being today, would it still be possible to squeeze this one in? Thanks! -- Vitaly
[Qemu-devel] [PATCH 0/2] i386/kvm: add support for Hyper-V Enlightened VMCS
Hyper-V Enlightened VMCS feature was merged to KVM, enable it in Qemu. The feature gives us a significant performance boost for Hyper-V on KVM deployments. The first patch of the series is posted for completeness only. Vitaly Kuznetsov (2): linux-headers: update x86: hv_evmcs CPU flag support linux-headers/asm-powerpc/kvm.h | 1 + linux-headers/asm-x86/kvm.h | 8 ++-- linux-headers/linux/kvm.h | 16 ++-- target/i386/cpu.c | 1 + target/i386/cpu.h | 1 + target/i386/hyperv-proto.h | 3 +++ target/i386/kvm.c | 22 +++--- 7 files changed, 45 insertions(+), 7 deletions(-) -- 2.17.2
[Qemu-devel] [PATCH 1/2] linux-headers: update
Update to kvm/next commit 1e58e5e59148 ("KVM: VMX: enable nested virtualization by default"). kvm_put_vcpu_events() needs to be fixed as 'pad' was renamed to 'pending' in 'struct kvm_vcpu_events' Signed-off-by: Vitaly Kuznetsov --- linux-headers/asm-powerpc/kvm.h | 1 + linux-headers/asm-x86/kvm.h | 8 ++-- linux-headers/linux/kvm.h | 16 ++-- target/i386/kvm.c | 2 +- 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index 1b32b56a03..8c876c166e 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -634,6 +634,7 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe) #define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) +#define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index fd23d5778e..dabfcf7c39 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -288,6 +288,7 @@ struct kvm_reinject_control { #define KVM_VCPUEVENT_VALID_SIPI_VECTOR0x0002 #define KVM_VCPUEVENT_VALID_SHADOW 0x0004 #define KVM_VCPUEVENT_VALID_SMM0x0008 +#define KVM_VCPUEVENT_VALID_PAYLOAD0x0010 /* Interrupt shadow states */ #define KVM_X86_SHADOW_INT_MOV_SS 0x01 @@ -299,7 +300,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 has_error_code; - __u8 pad; + __u8 pending; __u32 error_code; } exception; struct { @@ -322,7 +323,9 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; - __u32 reserved[9]; + __u8 reserved[27]; + __u8 exception_has_payload; + __u64 exception_payload; }; /* for KVM_GET/SET_DEBUGREGS */ @@ -381,6 +384,7 @@ struct kvm_sync_regs { #define KVM_STATE_NESTED_GUEST_MODE0x0001 #define KVM_STATE_NESTED_RUN_PENDING 0x0002 +#define KVM_STATE_NESTED_EVMCS 0x0004 #define KVM_STATE_NESTED_SMM_GUEST_MODE0x0001 #define KVM_STATE_NESTED_SMM_VMXON 0x0002 diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 83ba4eb571..f11a7eb49c 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -420,13 +420,19 @@ struct kvm_run { struct kvm_coalesced_mmio_zone { __u64 addr; __u32 size; - __u32 pad; + union { + __u32 pad; + __u32 pio; + }; }; struct kvm_coalesced_mmio { __u64 phys_addr; __u32 len; - __u32 pad; + union { + __u32 pad; + __u32 pio; + }; __u8 data[8]; }; @@ -719,6 +725,7 @@ struct kvm_ppc_one_seg_page_size { #define KVM_PPC_PAGE_SIZES_REAL0x0001 #define KVM_PPC_1T_SEGMENTS0x0002 +#define KVM_PPC_NO_HASH0x0004 struct kvm_ppc_smmu_info { __u64 flags; @@ -953,6 +960,11 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_NESTED_STATE 157 #define KVM_CAP_ARM_INJECT_SERROR_ESR 158 #define KVM_CAP_MSR_PLATFORM_INFO 159 +#define KVM_CAP_PPC_NESTED_HV 160 +#define KVM_CAP_HYPERV_SEND_IPI 161 +#define KVM_CAP_COALESCED_PIO 162 +#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 +#define KVM_CAP_EXCEPTION_PAYLOAD 164 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/target/i386/kvm.c b/target/i386/kvm.c index dc4047b02f..a46ad102d8 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -2686,7 +2686,7 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) events.exception.nr = env->exception_injected; events.exception.has_error_code = env->has_error_code; events.exception.error_code = env->error_code; -events.exception.pad = 0; +events.exception.pending = 0; events.interrupt.injected = (env->interrupt_injected >= 0); events.interrupt.nr = env->interrupt_injected; -- 2.17.2
[Qemu-devel] [PATCH 2/2] x86: hv_evmcs CPU flag support
Adds a new CPU flag to enable the Enlightened VMCS KVM feature. QEMU enables KVM_CAP_HYPERV_ENLIGHTENED_VMCS and gets back the version to be advertised in lower 16 bits of CPUID.0x400A:EAX. Suggested-by: Ladi Prosek Signed-off-by: Vitaly Kuznetsov --- target/i386/cpu.c | 1 + target/i386/cpu.h | 1 + target/i386/hyperv-proto.h | 3 +++ target/i386/kvm.c | 20 ++-- 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index c88876dfe3..5c0e84fb99 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5564,6 +5564,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false), DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false), DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false), +DEFINE_PROP_BOOL("hv-evmcs", X86CPU, hyperv_evmcs, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 730c06f80a..013d953b57 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1382,6 +1382,7 @@ struct X86CPU { bool hyperv_frequencies; bool hyperv_reenlightenment; bool hyperv_tlbflush; +bool hyperv_evmcs; bool check_cpuid; bool enforce_cpuid; bool expose_kvm; diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index d6d5a79293..fcb0c416f8 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -18,7 +18,9 @@ #define HV_CPUID_FEATURES 0x4003 #define HV_CPUID_ENLIGHTMENT_INFO 0x4004 #define HV_CPUID_IMPLEMENT_LIMITS 0x4005 +#define HV_CPUID_NESTED_FEATURES 0x400A #define HV_CPUID_MIN 0x4005 +#define HV_CPUID_MIN_NESTED 0x400A #define HV_CPUID_MAX 0x4000 #define HV_HYPERVISOR_PRESENT_BIT 0x8000 @@ -59,6 +61,7 @@ #define HV_SYSTEM_RESET_RECOMMENDED (1u << 4) #define HV_RELAXED_TIMING_RECOMMENDED (1u << 5) #define HV_EX_PROCESSOR_MASKS_RECOMMENDED (1u << 11) +#define HV_ENLIGHTENED_VMCS_RECOMMENDED (1u << 14) /* * Basic virtualized MSRs diff --git a/target/i386/kvm.c b/target/i386/kvm.c index a46ad102d8..8e383e7197 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -798,6 +798,7 @@ int kvm_arch_init_vcpu(CPUState *cs) uint32_t unused; struct kvm_cpuid_entry2 *c; uint32_t signature[3]; +uint16_t evmcs_version; int kvm_base = KVM_CPUID_SIGNATURE; int r; Error *local_err = NULL; @@ -841,7 +842,7 @@ int kvm_arch_init_vcpu(CPUState *cs) memset(signature, 0, 12); memcpy(signature, cpu->hyperv_vendor_id, len); } -c->eax = HV_CPUID_MIN; +c->eax = cpu->hyperv_evmcs ? HV_CPUID_MIN_NESTED : HV_CPUID_MIN; c->ebx = signature[0]; c->ecx = signature[1]; c->edx = signature[2]; @@ -888,7 +889,16 @@ int kvm_arch_init_vcpu(CPUState *cs) c->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; } - +if (cpu->hyperv_evmcs) { +if (kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0, +(uintptr_t)_version)) { +fprintf(stderr, "Hyper-V Enlightened VMCS " +"(requested by 'hv-evmcs' cpu flag) " +"is not supported by kernel\n"); +return -ENOSYS; +} +c->eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED; +} c->ebx = cpu->hyperv_spinlock_attempts; c = _data.entries[cpuid_i++]; @@ -899,6 +909,12 @@ int kvm_arch_init_vcpu(CPUState *cs) kvm_base = KVM_CPUID_SIGNATURE_NEXT; has_msr_hv_hypercall = true; + +if (cpu->hyperv_evmcs) { +c = _data.entries[cpuid_i++]; +c->function = HV_CPUID_NESTED_FEATURES; +c->eax = evmcs_version; +} } if (cpu->expose_kvm) { -- 2.17.2
[Qemu-devel] [PATCH v2 1/2] linux-headers: update to pre-4.20 (kvm/queue)
This brings us three new defines: KVM_CAP_ARM_INJECT_SERROR_ESR, KVM_CAP_MSR_PLATFORM_INFO and KVM_CAP_HYPERV_SEND_IPI. Signed-off-by: Vitaly Kuznetsov --- linux-headers/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 66790724f1..711f5a2c8e 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -951,6 +951,9 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_TLBFLUSH 155 #define KVM_CAP_S390_HPAGE_1M 156 #define KVM_CAP_NESTED_STATE 157 +#define KVM_CAP_ARM_INJECT_SERROR_ESR 158 +#define KVM_CAP_MSR_PLATFORM_INFO 159 +#define KVM_CAP_HYPERV_SEND_IPI 160 #ifdef KVM_CAP_IRQ_ROUTING -- 2.17.1
[Qemu-devel] [PATCH v2 0/2] i386/kvm: enable Hyper-V PV IPI mechanism
Hyper-V PV IPI enlightenment was merged to KVM (kvm/queue currently). Support the feature enablement in Qemu. Changes since v1 [Roman Kagan]: - Strip kvm.h with 'make headers_install' before putting to Qemu. - Add Reviewed-by: to PATCH2 Vitaly Kuznetsov (2): linux-headers: update to pre-4.20 (kvm/queue) i386/kvm: add support for Hyper-V IPI send linux-headers/linux/kvm.h | 3 +++ target/i386/cpu.c | 1 + target/i386/cpu.h | 1 + target/i386/hyperv-proto.h | 1 + target/i386/kvm.c | 14 +- 5 files changed, 19 insertions(+), 1 deletion(-) -- 2.17.1
Re: [Qemu-devel] [PATCH 1/2] linux-headers: update to pre-4.20 (kvm/queue)
Roman Kagan writes: > On Mon, Oct 08, 2018 at 01:27:25PM +0200, Vitaly Kuznetsov wrote: >> This brings us three new defines: KVM_CAP_ARM_INJECT_SERROR_ESR, >> KVM_CAP_MSR_PLATFORM_INFO and KVM_CAP_HYPERV_SEND_IPI. >> >> Signed-off-by: Vitaly Kuznetsov >> --- >> linux-headers/linux/kvm.h | 7 +-- >> 1 file changed, 5 insertions(+), 2 deletions(-) >> >> diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h >> index 66790724f1..043f1e58b6 100644 >> --- a/linux-headers/linux/kvm.h >> +++ b/linux-headers/linux/kvm.h >> @@ -9,7 +9,7 @@ >> */ >> >> #include >> - >> +#include >> #include >> #include >> >> @@ -481,7 +481,7 @@ struct kvm_dirty_log { >> __u32 slot; >> __u32 padding1; >> union { >> -void *dirty_bitmap; /* one bit per page */ >> +void __user *dirty_bitmap; /* one bit per page */ > > Hmm, I thought "__user" and friends are supposed to get stripped while > "make headers_install". ... and of course I forgot about this, will send v2, thanks! > Does it build like this? depends on what you've done to your build system) -- Vitaly
Re: [Qemu-devel] [PATCH 1/2] linux-headers: update to pre-4.20 (kvm/queue)
Roman Kagan writes: > On Tue, Oct 09, 2018 at 03:01:12PM +0200, Vitaly Kuznetsov wrote: >> Roman Kagan writes: >> >> > On Mon, Oct 08, 2018 at 01:27:25PM +0200, Vitaly Kuznetsov wrote: >> >> This brings us three new defines: KVM_CAP_ARM_INJECT_SERROR_ESR, >> >> KVM_CAP_MSR_PLATFORM_INFO and KVM_CAP_HYPERV_SEND_IPI. >> >> >> >> Signed-off-by: Vitaly Kuznetsov >> >> --- >> >> linux-headers/linux/kvm.h | 7 +-- >> >> 1 file changed, 5 insertions(+), 2 deletions(-) >> >> >> >> diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h >> >> index 66790724f1..043f1e58b6 100644 >> >> --- a/linux-headers/linux/kvm.h >> >> +++ b/linux-headers/linux/kvm.h >> >> @@ -9,7 +9,7 @@ >> >> */ >> >> >> >> #include >> >> - >> >> +#include >> >> #include >> >> #include >> >> >> >> @@ -481,7 +481,7 @@ struct kvm_dirty_log { >> >> __u32 slot; >> >> __u32 padding1; >> >> union { >> >> - void *dirty_bitmap; /* one bit per page */ >> >> + void __user *dirty_bitmap; /* one bit per page */ >> > >> > Hmm, I thought "__user" and friends are supposed to get stripped while >> > "make headers_install". >> >> ... and of course I forgot about this, will send v2, thanks! > > Are you trying to say you didn't follow the regular linux header > harvesting process, which is to use scripts/update-linux-headers.sh > (which does 'make headers_install' and then applies further sanitizing, > so that problems like this wouldn't have popped up)? I think you > should, unless this is an interim patchset pending the merge of the > corresponding KVM changes in the mainline kernel. Honestly I didn't know about the standard process but looking at the output: $ scripts/update-linux-headers.sh PATH/TO/LINUX ... $ git diff --stat include/standard-headers/linux/input.h | 9 + linux-headers/asm-arm/kvm.h| 13 + linux-headers/asm-arm64/kvm.h | 13 + linux-headers/asm-s390/kvm.h | 2 ++ linux-headers/asm-x86/kvm.h| 1 + linux-headers/linux/kvm.h | 3 +++ linux-headers/linux/vfio.h | 2 ++ linux-headers/linux/vhost.h| 2 +- 8 files changed, 40 insertions(+), 5 deletions(-) I'm not sure if it would make sense to do this instead: the definition I need is currently in kvm/queue and I'm not exactly sure that updating other headers from there (e.g. linux/input.h) makes any sense. -- Vitaly
[Qemu-devel] [PATCH v2 2/2] i386/kvm: add support for Hyper-V IPI send
Hyper-V PV IPI support is merged to KVM, enable the feature in Qemu. When enabled, this allows Windows guests to send IPIs to other vCPUs with a single hypercall even when there are >64 vCPUs in the request. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Roman Kagan --- target/i386/cpu.c | 1 + target/i386/cpu.h | 1 + target/i386/hyperv-proto.h | 1 + target/i386/kvm.c | 14 +- 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index c88876dfe3..32ea041c06 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5564,6 +5564,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false), DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false), DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false), +DEFINE_PROP_BOOL("hv-ipi", X86CPU, hyperv_ipi, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 730c06f80a..caa1544b2e 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1382,6 +1382,7 @@ struct X86CPU { bool hyperv_frequencies; bool hyperv_reenlightenment; bool hyperv_tlbflush; +bool hyperv_ipi; bool check_cpuid; bool enforce_cpuid; bool expose_kvm; diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index d6d5a79293..87f36d14e8 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -58,6 +58,7 @@ #define HV_APIC_ACCESS_RECOMMENDED (1u << 3) #define HV_SYSTEM_RESET_RECOMMENDED (1u << 4) #define HV_RELAXED_TIMING_RECOMMENDED (1u << 5) +#define HV_CLUSTER_IPI_RECOMMENDED (1u << 10) #define HV_EX_PROCESSOR_MASKS_RECOMMENDED (1u << 11) /* diff --git a/target/i386/kvm.c b/target/i386/kvm.c index dc4047b02f..795aa52938 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -608,7 +608,8 @@ static bool hyperv_enabled(X86CPU *cpu) cpu->hyperv_synic || cpu->hyperv_stimer || cpu->hyperv_reenlightenment || -cpu->hyperv_tlbflush); +cpu->hyperv_tlbflush || +cpu->hyperv_ipi); } static int kvm_arch_set_tsc_khz(CPUState *cs) @@ -888,6 +889,17 @@ int kvm_arch_init_vcpu(CPUState *cs) c->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; } +if (cpu->hyperv_ipi) { +if (kvm_check_extension(cs->kvm_state, +KVM_CAP_HYPERV_SEND_IPI) <= 0) { +fprintf(stderr, "Hyper-V IPI send support " +"(requested by 'hv-ipi' cpu flag) " +" is not supported by kernel\n"); +return -ENOSYS; +} +c->eax |= HV_CLUSTER_IPI_RECOMMENDED; +c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; +} c->ebx = cpu->hyperv_spinlock_attempts; -- 2.17.1
[Qemu-devel] [PATCH] i386: Enable NPT and NRIPSAVE for AMD CPUs
Modern AMD CPUs support NPT and NRIPSAVE features and KVM exposes these when present. NRIPSAVE apeared somewhere in Opteron_G3 lifetime (e.g. QuadCore AMD Opteron 2378 has is but QuadCore AMD Opteron HE 2344 doesn't), NPT was introduced a bit earlier. Add the FEAT_SVM leaf to Opteron_G4/G5 and EPYC/EPYC-IBPB cpu models. Signed-off-by: Vitaly Kuznetsov --- target/i386/cpu.c | 8 1 file changed, 8 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 677a3bd5fb..eb8f9079a6 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -2766,6 +2766,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, +.features[FEAT_SVM] = +CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, /* no xsaveopt! */ .xlevel = 0x801A, .model_id = "AMD Opteron 62xx class CPU", @@ -2797,6 +2799,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, +.features[FEAT_SVM] = +CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, /* no xsaveopt! */ .xlevel = 0x801A, .model_id = "AMD Opteron 63xx class CPU", @@ -2843,6 +2847,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_XSAVE_XGETBV1, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, +.features[FEAT_SVM] = +CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, .xlevel = 0x801E, .model_id = "AMD EPYC Processor", .cache_info = _cache_info, @@ -2891,6 +2897,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_XSAVE_XGETBV1, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, +.features[FEAT_SVM] = +CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, .xlevel = 0x801E, .model_id = "AMD EPYC Processor (with IBPB)", .cache_info = _cache_info, -- 2.20.1
[Qemu-devel] [PATCH v2] i386: Enable NPT and NRIPSAVE for AMD CPUs
Modern AMD CPUs support NPT and NRIPSAVE features and KVM exposes these when present. NRIPSAVE apeared somewhere in Opteron_G3 lifetime (e.g. QuadCore AMD Opteron 2378 has is but QuadCore AMD Opteron HE 2344 doesn't), NPT was introduced a bit earlier. Add the FEAT_SVM leaf to Opteron_G4/G5 and EPYC/EPYC-IBPB cpu models. Signed-off-by: Vitaly Kuznetsov --- Changes since v1: - add npt=off,nrip-save=off to pc_compat_3_1 [Eduardo Habkost] --- hw/i386/pc.c | 8 target/i386/cpu.c | 8 2 files changed, 16 insertions(+) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 73d688f842..9fc80a9f19 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -114,7 +114,15 @@ GlobalProperty pc_compat_3_1[] = { { "intel-iommu", "dma-drain", "off" }, { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" }, { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "off" }, +{ "Opteron_G4" "-" TYPE_X86_CPU, "npt", "off" }, +{ "Opteron_G4" "-" TYPE_X86_CPU, "nrip-save", "off" }, { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "off" }, +{ "Opteron_G5" "-" TYPE_X86_CPU, "npt", "off" }, +{ "Opteron_G5" "-" TYPE_X86_CPU, "nrip-save", "off" }, +{ "EPYC" "-" TYPE_X86_CPU, "npt", "off" }, +{ "EPYC" "-" TYPE_X86_CPU, "nrip-save", "off" }, +{ "EPYC-IBPB" "-" TYPE_X86_CPU, "npt", "off" }, +{ "EPYC-IBPB" "-" TYPE_X86_CPU, "nrip-save", "off" }, { "Skylake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, { "Skylake-Client-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, { "Skylake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 2f5412592d..d170637599 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -2801,6 +2801,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, +.features[FEAT_SVM] = +CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, /* no xsaveopt! */ .xlevel = 0x801A, .model_id = "AMD Opteron 62xx class CPU", @@ -2831,6 +2833,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, +.features[FEAT_SVM] = +CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, /* no xsaveopt! */ .xlevel = 0x801A, .model_id = "AMD Opteron 63xx class CPU", @@ -2877,6 +2881,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_XSAVE_XGETBV1, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, +.features[FEAT_SVM] = +CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, .xlevel = 0x801E, .model_id = "AMD EPYC Processor", .cache_info = _cache_info, @@ -2925,6 +2931,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_XSAVE_XGETBV1, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, +.features[FEAT_SVM] = +CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, .xlevel = 0x801E, .model_id = "AMD EPYC Processor (with IBPB)", .cache_info = _cache_info, -- 2.20.1
[Qemu-devel] [PATCH RFC] i386: Enable NPT and NRIPSAVE for Epyc CPUs
Epyc CPUs support NPT and NRIPSAVE features and KVM exposes these when present. Add them to EPYC and EPYC-IBPB cpu models. Signed-off-by: Vitaly Kuznetsov --- - RFC part: I'm not sure when these features first appeared, we may want to modify some Opteron_* models too. --- target/i386/cpu.c | 4 1 file changed, 4 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 677a3bd5fb..0a10fbeccc 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -2843,6 +2843,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_XSAVE_XGETBV1, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, +.features[FEAT_SVM] = +CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, .xlevel = 0x801E, .model_id = "AMD EPYC Processor", .cache_info = _cache_info, @@ -2891,6 +2893,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_XSAVE_XGETBV1, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, +.features[FEAT_SVM] = +CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, .xlevel = 0x801E, .model_id = "AMD EPYC Processor (with IBPB)", .cache_info = _cache_info, -- 2.19.2
Re: [Qemu-devel] [PATCH] i386/kvm: expose HV_CPUID_ENLIGHTMENT_INFO.EAX and HV_CPUID_NESTED_FEATURES.EAX as feature words
Eduardo Habkost writes: > On Wed, Dec 19, 2018 at 06:25:06PM +0100, Vitaly Kuznetsov wrote: >> Eduardo Habkost writes: >> >> > On Mon, Dec 03, 2018 at 03:17:06PM +0100, Vitaly Kuznetsov wrote: >> >> Eduardo Habkost writes: >> > [...] >> >> > But note that we might still be able to move the existing >> >> > "hyperv_*" features to feature_word_info[].feat_names. We just >> >> > need to keep the same semantics (e.g. enable >> >> > HV_HYPERCALL_AVAILABLE automatically when some features are set). >> >> > >> >> > Maybe we can make some of the feature properties read-only. This >> >> > way we can give them meaningful names for debugging and error >> >> > messages, even if we don't want to make them configurable >> >> > directly. >> >> >> >> I'd suggest (if there are no objections of course) we do this separately >> >> from this patch. [...] >> > >> > Agreed. >> > >> >> Paolo, Eduardo, >> >> in case there are no concerns here, could you please pick this patch up? >> Thanks! > > Queued, thanks! > > Can you please send the comment you wrote about feat_names as a > follow-up patch? Oops, sorry, I just realized I promissed to send out v2 with it and aparently never did. Will send out a follow-up patch shortly. Thanks! -- Vitaly
[Qemu-devel] [PATCH] i386/kvm: add a comment explaining why .feat_names are commented out for Hyper-V feature bits
Hyper-V .feat_names are, unlike hardware features, commented out and it is not obvious why we do that. Document the current status quo. Signed-off-by: Vitaly Kuznetsov --- target/i386/cpu.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 677a3bd5fb..18a486c5a0 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -929,6 +929,13 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { */ .no_autoenable_flags = ~0U, }, +/* + * .feat_names are commented out for Hyper-V enlightenments because we + * don't want to have two different ways for enabling them on QEMU command + * line. Some features (e.g. "hyperv_time", "hyperv_vapic", ...) require + * enabling several feature bits simultaneously, exposing these bits + * individually may just confuse guests. + */ [FEAT_HYPERV_EAX] = { .type = CPUID_FEATURE_WORD, .feat_names = { -- 2.19.2
Re: [Qemu-devel] [PATCH RFC] i386: Enable NPT and NRIPSAVE for Epyc CPUs
Vitaly Kuznetsov writes: > Epyc CPUs support NPT and NRIPSAVE features and KVM exposes these when > present. Add them to EPYC and EPYC-IBPB cpu models. > > Signed-off-by: Vitaly Kuznetsov > --- > - RFC part: I'm not sure when these features first appeared, we may want to > modify some Opteron_* models too. According to http://instlatx64.atw.hu/ data (thanks to Radim!) NRIPSAVE apeared somewhere in Opteron_G3 lifetime (e.g. QuadCore AMD Opteron 2378 has is, QuadCore AMD Opteron HE 2344 doesn't), NPT was introduced a bit earlier. To be on the safe side we can probably add NPT and NRIPSAVE to Opteron_G4 and Opteron_G5 too and leave Opteron_G3 as it is. -- Vitaly
Re: [Qemu-devel] [PATCH] i386/kvm: expose HV_CPUID_ENLIGHTMENT_INFO.EAX and HV_CPUID_NESTED_FEATURES.EAX as feature words
Eduardo Habkost writes: >>[...] Some time ago when merging direct mode stimers for KVM >> Paolo suggested we stop adding capabilities to KVM for each individulat >> feature and replace them with something like KVM_GET_SUPPORTED_HV_CPUID >> ioctl returning all Hyper-V related feature words. When this is done we >> can reconsider how Qemu discoveres Hyper-V related KVM features and as >> part of this work we can take a closer look at feature words and >> feat_names. > > Why a separate ioctl instead of extending GET_SUPPORTED_CPUID? Unfortunatelly both KVM and Hyper-V use feature leaves 0x4000, 0x4001 (so it's up to the userspace - qemu in our case - what to expose to the guest) and GET_SUPPORTED_CPUID already returns KVM's. Not sure this can be changed (to e.g. returning these leaves twice with different flags) without breaking userspace. New ioctl is safer. -- Vitaly
Re: [Qemu-devel] [PATCH] i386/kvm: expose HV_CPUID_ENLIGHTMENT_INFO.EAX and HV_CPUID_NESTED_FEATURES.EAX as feature words
Eduardo Habkost writes: > On Thu, Nov 29, 2018 at 12:51:55PM +0100, Vitaly Kuznetsov wrote: >> Paolo Bonzini writes: >> >> > On 26/11/18 14:59, Vitaly Kuznetsov wrote: >> >> It was found that QMP users of QEMU (e.g. libvirt) may need >> >> HV_CPUID_ENLIGHTMENT_INFO.EAX/HV_CPUID_NESTED_FEATURES.EAX information. In >> >> particular, 'hv_tlbflush' and 'hv_evmcs' enlightenments are only exposed >> >> in >> >> HV_CPUID_ENLIGHTMENT_INFO.EAX. >> >> >> >> HV_CPUID_NESTED_FEATURES.EAX is exposed for two reasons: convenience >> >> (we don't need to export it from hyperv_handle_properties() and as >> >> future-proof for Enlightened MSR-Bitmap, PV EPT invalidation and >> >> direct virtual flush features. >> >> >> >> Signed-off-by: Vitaly Kuznetsov >> > >> > Can you add a comment to feature_word_info, explaining why the >> > feat_names are not set? >> >> I had to do some code archeology to make sure I understand, I think it >> goes back to >> >> http://lists.gnu.org/archive/html/qemu-devel/2016-06/msg06579.html >> >> So the comment (probably added before FEAT_HYPERV_EAX definition) would >> be >> >> ".feat_names are commented out for Hyper-V enlightenments because we >> don't want to have two different ways for enabling them on QEMU command >> line. Some features (e.g. "hyperv_time", "hyperv_vapic", ...) require >> enabling several feature bits simultaneously, exposing these bits >> individually may just confuse guests." >> >> Would do? > > That's an accurate description. > Thanks, I'll send v2 out with it. > But note that we might still be able to move the existing > "hyperv_*" features to feature_word_info[].feat_names. We just > need to keep the same semantics (e.g. enable > HV_HYPERCALL_AVAILABLE automatically when some features are set). > > Maybe we can make some of the feature properties read-only. This > way we can give them meaningful names for debugging and error > messages, even if we don't want to make them configurable > directly. I'd suggest (if there are no objections of course) we do this separately from this patch. Some time ago when merging direct mode stimers for KVM Paolo suggested we stop adding capabilities to KVM for each individulat feature and replace them with something like KVM_GET_SUPPORTED_HV_CPUID ioctl returning all Hyper-V related feature words. When this is done we can reconsider how Qemu discoveres Hyper-V related KVM features and as part of this work we can take a closer look at feature words and feat_names. -- Vitaly
[Qemu-devel] [PATCH v2] i386/kvm: expose HV_CPUID_ENLIGHTMENT_INFO.EAX and HV_CPUID_NESTED_FEATURES.EAX as feature words
It was found that QMP users of QEMU (e.g. libvirt) may need HV_CPUID_ENLIGHTMENT_INFO.EAX/HV_CPUID_NESTED_FEATURES.EAX information. In particular, 'hv_tlbflush' and 'hv_evmcs' enlightenments are only exposed in HV_CPUID_ENLIGHTMENT_INFO.EAX. HV_CPUID_NESTED_FEATURES.EAX is exposed for two reasons: convenience (we don't need to export it from hyperv_handle_properties() and as future-proof for Enlightened MSR-Bitmap, PV EPT invalidation and direct virtual flush features. While on it, add a comment explaining why .feat_names are currently commented out for Hyper-V feature words. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Roman Kagan --- Changes since v1: - Add a comment explaining why .feat_names are currently commented out for Hyper-V feature words. [Paolo Bonzini] - Add Roman's R-b tag. --- target/i386/cpu.c | 37 + target/i386/cpu.h | 2 ++ target/i386/kvm.c | 85 +-- 3 files changed, 84 insertions(+), 40 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index f81d35e1f9..6685c8e5f6 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -929,6 +929,13 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { */ .no_autoenable_flags = ~0U, }, +/* + * .feat_names are commented out for Hyper-V enlightenments because we + * don't want to have two different ways for enabling them on QEMU command + * line. Some features (e.g. "hyperv_time", "hyperv_vapic", ...) require + * enabling several feature bits simultaneously, exposing these bits + * individually may just confuse guests. + */ [FEAT_HYPERV_EAX] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -980,6 +987,36 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .cpuid = { .eax = 0x4003, .reg = R_EDX, }, }, +[FEAT_HV_RECOMM_EAX] = { +.type = CPUID_FEATURE_WORD, +.feat_names = { +NULL /* hv_recommend_pv_as_switch */, +NULL /* hv_recommend_pv_tlbflush_local */, +NULL /* hv_recommend_pv_tlbflush_remote */, +NULL /* hv_recommend_msr_apic_access */, +NULL /* hv_recommend_msr_reset */, +NULL /* hv_recommend_relaxed_timing */, +NULL /* hv_recommend_dma_remapping */, +NULL /* hv_recommend_int_remapping */, +NULL /* hv_recommend_x2apic_msrs */, +NULL /* hv_recommend_autoeoi_deprecation */, +NULL /* hv_recommend_pv_ipi */, +NULL /* hv_recommend_ex_hypercalls */, +NULL /* hv_hypervisor_is_nested */, +NULL /* hv_recommend_int_mbec */, +NULL /* hv_recommend_evmcs */, +NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +}, +.cpuid = { .eax = 0x4004, .reg = R_EAX, }, +}, +[FEAT_HV_NESTED_EAX] = { +.type = CPUID_FEATURE_WORD, +.cpuid = { .eax = 0x400A, .reg = R_EAX, }, +}, [FEAT_SVM] = { .type = CPUID_FEATURE_WORD, .feat_names = { diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 9c52d0cbeb..dd881510ac 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -497,6 +497,8 @@ typedef enum FeatureWord { FEAT_HYPERV_EAX,/* CPUID[4000_0003].EAX */ FEAT_HYPERV_EBX,/* CPUID[4000_0003].EBX */ FEAT_HYPERV_EDX,/* CPUID[4000_0003].EDX */ +FEAT_HV_RECOMM_EAX, /* CPUID[4000_0004].EAX */ +FEAT_HV_NESTED_EAX, /* CPUID[4000_000A].EAX */ FEAT_SVM, /* CPUID[8000_000A].EDX */ FEAT_XSAVE, /* CPUID[EAX=0xd,ECX=1].EAX */ FEAT_6_EAX, /* CPUID[6].EAX */ diff --git a/target/i386/kvm.c b/target/i386/kvm.c index f524e7d929..b4d2b40a40 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -797,6 +797,48 @@ static int hyperv_handle_properties(CPUState *cs) } env->features[FEAT_HYPERV_EAX] |= HV_SYNTIMERS_AVAILABLE; } +if (cpu->hyperv_relaxed_timing) { +env->features[FEAT_HV_RECOMM_EAX] |= HV_RELAXED_TIMING_RECOMMENDED; +} +if (cpu->hyperv_vapic) { +env->features[FEAT_HV_RECOMM_EAX] |= HV_APIC_ACCESS_RECOMMENDED; +} +if (cpu->hyperv_tlbflush) { +if (kvm_check_extension(cs->kvm_state, +KVM_CAP_HYPERV_TLBFLUSH) <= 0) { +fprintf(stderr, "Hyper-V TLB flush support " +"(requested by 'hv-tlbflush' cpu flag) " +" is not supported by kernel\n"); +return -ENOSYS; +} +env->features[FEAT_HV_RECOMM_EAX] |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; +env->features[FEAT_HV_RECOMM_EAX] |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; +} +if (cpu->hyperv_ipi) { +if (kvm_chec
Re: [Qemu-devel] [PATCH] i386/kvm: expose HV_CPUID_ENLIGHTMENT_INFO.EAX and HV_CPUID_NESTED_FEATURES.EAX as feature words
Paolo Bonzini writes: > On 26/11/18 14:59, Vitaly Kuznetsov wrote: >> It was found that QMP users of QEMU (e.g. libvirt) may need >> HV_CPUID_ENLIGHTMENT_INFO.EAX/HV_CPUID_NESTED_FEATURES.EAX information. In >> particular, 'hv_tlbflush' and 'hv_evmcs' enlightenments are only exposed in >> HV_CPUID_ENLIGHTMENT_INFO.EAX. >> >> HV_CPUID_NESTED_FEATURES.EAX is exposed for two reasons: convenience >> (we don't need to export it from hyperv_handle_properties() and as >> future-proof for Enlightened MSR-Bitmap, PV EPT invalidation and >> direct virtual flush features. >> >> Signed-off-by: Vitaly Kuznetsov > > Can you add a comment to feature_word_info, explaining why the > feat_names are not set? I had to do some code archeology to make sure I understand, I think it goes back to http://lists.gnu.org/archive/html/qemu-devel/2016-06/msg06579.html So the comment (probably added before FEAT_HYPERV_EAX definition) would be ".feat_names are commented out for Hyper-V enlightenments because we don't want to have two different ways for enabling them on QEMU command line. Some features (e.g. "hyperv_time", "hyperv_vapic", ...) require enabling several feature bits simultaneously, exposing these bits individually may just confuse guests." Would do? -- Vitaly
[Qemu-devel] [PATCH] i386/kvm: expose HV_CPUID_ENLIGHTMENT_INFO.EAX and HV_CPUID_NESTED_FEATURES.EAX as feature words
It was found that QMP users of QEMU (e.g. libvirt) may need HV_CPUID_ENLIGHTMENT_INFO.EAX/HV_CPUID_NESTED_FEATURES.EAX information. In particular, 'hv_tlbflush' and 'hv_evmcs' enlightenments are only exposed in HV_CPUID_ENLIGHTMENT_INFO.EAX. HV_CPUID_NESTED_FEATURES.EAX is exposed for two reasons: convenience (we don't need to export it from hyperv_handle_properties() and as future-proof for Enlightened MSR-Bitmap, PV EPT invalidation and direct virtual flush features. Signed-off-by: Vitaly Kuznetsov --- target/i386/cpu.c | 30 + target/i386/cpu.h | 2 ++ target/i386/kvm.c | 85 +-- 3 files changed, 77 insertions(+), 40 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index f81d35e1f9..8306670e09 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -980,6 +980,36 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .cpuid = { .eax = 0x4003, .reg = R_EDX, }, }, +[FEAT_HV_RECOMM_EAX] = { +.type = CPUID_FEATURE_WORD, +.feat_names = { +NULL /* hv_recommend_pv_as_switch */, +NULL /* hv_recommend_pv_tlbflush_local */, +NULL /* hv_recommend_pv_tlbflush_remote */, +NULL /* hv_recommend_msr_apic_access */, +NULL /* hv_recommend_msr_reset */, +NULL /* hv_recommend_relaxed_timing */, +NULL /* hv_recommend_dma_remapping */, +NULL /* hv_recommend_int_remapping */, +NULL /* hv_recommend_x2apic_msrs */, +NULL /* hv_recommend_autoeoi_deprecation */, +NULL /* hv_recommend_pv_ipi */, +NULL /* hv_recommend_ex_hypercalls */, +NULL /* hv_hypervisor_is_nested */, +NULL /* hv_recommend_int_mbec */, +NULL /* hv_recommend_evmcs */, +NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +}, +.cpuid = { .eax = 0x4004, .reg = R_EAX, }, +}, +[FEAT_HV_NESTED_EAX] = { +.type = CPUID_FEATURE_WORD, +.cpuid = { .eax = 0x400A, .reg = R_EAX, }, +}, [FEAT_SVM] = { .type = CPUID_FEATURE_WORD, .feat_names = { diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 9c52d0cbeb..dd881510ac 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -497,6 +497,8 @@ typedef enum FeatureWord { FEAT_HYPERV_EAX,/* CPUID[4000_0003].EAX */ FEAT_HYPERV_EBX,/* CPUID[4000_0003].EBX */ FEAT_HYPERV_EDX,/* CPUID[4000_0003].EDX */ +FEAT_HV_RECOMM_EAX, /* CPUID[4000_0004].EAX */ +FEAT_HV_NESTED_EAX, /* CPUID[4000_000A].EAX */ FEAT_SVM, /* CPUID[8000_000A].EDX */ FEAT_XSAVE, /* CPUID[EAX=0xd,ECX=1].EAX */ FEAT_6_EAX, /* CPUID[6].EAX */ diff --git a/target/i386/kvm.c b/target/i386/kvm.c index f524e7d929..b4d2b40a40 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -797,6 +797,48 @@ static int hyperv_handle_properties(CPUState *cs) } env->features[FEAT_HYPERV_EAX] |= HV_SYNTIMERS_AVAILABLE; } +if (cpu->hyperv_relaxed_timing) { +env->features[FEAT_HV_RECOMM_EAX] |= HV_RELAXED_TIMING_RECOMMENDED; +} +if (cpu->hyperv_vapic) { +env->features[FEAT_HV_RECOMM_EAX] |= HV_APIC_ACCESS_RECOMMENDED; +} +if (cpu->hyperv_tlbflush) { +if (kvm_check_extension(cs->kvm_state, +KVM_CAP_HYPERV_TLBFLUSH) <= 0) { +fprintf(stderr, "Hyper-V TLB flush support " +"(requested by 'hv-tlbflush' cpu flag) " +" is not supported by kernel\n"); +return -ENOSYS; +} +env->features[FEAT_HV_RECOMM_EAX] |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; +env->features[FEAT_HV_RECOMM_EAX] |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; +} +if (cpu->hyperv_ipi) { +if (kvm_check_extension(cs->kvm_state, +KVM_CAP_HYPERV_SEND_IPI) <= 0) { +fprintf(stderr, "Hyper-V IPI send support " +"(requested by 'hv-ipi' cpu flag) " +" is not supported by kernel\n"); +return -ENOSYS; +} +env->features[FEAT_HV_RECOMM_EAX] |= HV_CLUSTER_IPI_RECOMMENDED; +env->features[FEAT_HV_RECOMM_EAX] |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; +} +if (cpu->hyperv_evmcs) { +uint16_t evmcs_version; + +if (kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0, +(uintptr_t)_version)) { +fprintf(stderr, "Hyper-V Enlightened VMCS " +"(requested by 'hv-evmcs' cpu flag) " +"is not supported by kernel\n"); +
Re: [Qemu-devel] [PATCH] i386/kvm: expose HV_CPUID_ENLIGHTMENT_INFO.EAX and HV_CPUID_NESTED_FEATURES.EAX as feature words
Vitaly Kuznetsov writes: > Eduardo Habkost writes: > >> On Wed, Dec 19, 2018 at 06:25:06PM +0100, Vitaly Kuznetsov wrote: >>> Eduardo Habkost writes: >>> >>> > On Mon, Dec 03, 2018 at 03:17:06PM +0100, Vitaly Kuznetsov wrote: >>> >> Eduardo Habkost writes: >>> > [...] >>> >> > But note that we might still be able to move the existing >>> >> > "hyperv_*" features to feature_word_info[].feat_names. We just >>> >> > need to keep the same semantics (e.g. enable >>> >> > HV_HYPERCALL_AVAILABLE automatically when some features are set). >>> >> > >>> >> > Maybe we can make some of the feature properties read-only. This >>> >> > way we can give them meaningful names for debugging and error >>> >> > messages, even if we don't want to make them configurable >>> >> > directly. >>> >> >>> >> I'd suggest (if there are no objections of course) we do this separately >>> >> from this patch. [...] >>> > >>> > Agreed. >>> > >>> >>> Paolo, Eduardo, >>> >>> in case there are no concerns here, could you please pick this patch up? >>> Thanks! >> >> Queued, thanks! >> >> Can you please send the comment you wrote about feat_names as a >> follow-up patch? > > Oops, sorry, I just realized I promissed to send out v2 with it and > aparently never did. Will send out a follow-up patch shortly. Thanks! Hey Eduardo, any news about the fate of this patch? (Correcting myself: there was v2 with the comment included: https://lists.gnu.org/archive/html/qemu-devel/2018-12/msg00355.html but as I sent the follow-up patch you requested separately too: https://lists.gnu.org/archive/html/qemu-devel/2018-12/msg05463.html ) -- Vitaly
Re: [Qemu-devel] [PATCH] i386/kvm: expose HV_CPUID_ENLIGHTMENT_INFO.EAX and HV_CPUID_NESTED_FEATURES.EAX as feature words
Eduardo Habkost writes: > On Mon, Dec 03, 2018 at 03:17:06PM +0100, Vitaly Kuznetsov wrote: >> Eduardo Habkost writes: > [...] >> > But note that we might still be able to move the existing >> > "hyperv_*" features to feature_word_info[].feat_names. We just >> > need to keep the same semantics (e.g. enable >> > HV_HYPERCALL_AVAILABLE automatically when some features are set). >> > >> > Maybe we can make some of the feature properties read-only. This >> > way we can give them meaningful names for debugging and error >> > messages, even if we don't want to make them configurable >> > directly. >> >> I'd suggest (if there are no objections of course) we do this separately >> from this patch. [...] > > Agreed. > Paolo, Eduardo, in case there are no concerns here, could you please pick this patch up? Thanks! -- Vitaly
Re: [Qemu-devel] [PATCH RFC 4/8] i386/kvm: Implement 'hv-all' pass-through mode
Roman Kagan writes: > On Fri, Jan 25, 2019 at 02:46:42PM +0100, Vitaly Kuznetsov wrote: >> Roman Kagan writes: >> >> > On Fri, Jan 25, 2019 at 12:41:51PM +0100, Vitaly Kuznetsov wrote: >> >> In many case we just want to give Windows guests all currently supported >> >> Hyper-V enlightenments and that's where this new mode may come handy. We >> >> pass through what was returned by KVM_GET_SUPPORTED_HV_CPUID. >> > >> > How is the compatibility ensured on migration between kernels reporting >> > different feature sets? >> >> AFAIU we don't change anything in this regard (or, my intention was to >> not change anything): hv-all is converted to the individual hv-* >> properties (hv_cpuid_check_and_set()) actually sets cpu->hyperv_* flags >> according to what's supported by kernel so when we migrate we will >> require all these features supported. > > Migration relies on the upper layer to run the destination QEMU with the > identical command line (except for -incoming) as the source, and QEMU is > then supposed to set up identical environment in the target VM as was in > the source, or refuse to start if that's impossible. (If I'm > misunderstanding this Dave (cc-d) may want to correct me.) > > AFAICS this hv-all attribute will enable different feature sets > depending on the kernel it's run on, so the migration between different > kernels will appear to succeed, but the guest may suddenly encounter an > incompatible change in the environment. With 'hv-all' I'm trying to achieve behavior similar to '-cpu host' and AFAIK these VMs are migratable 'at your own risk' (if you do it directly from qemu). Libvirt (or whatever upper layer), however, would do CPU feature comparison and in case you have less features on the destination host than you had on the source code it will forbid the migration. I think if this also works for Hyper-V features than were fine. Dave, feel free to tell me I'm completely wrong with my assumptions) -- Vitaly
[Qemu-devel] [PATCH v2] ioapic: allow buggy guests mishandling level-triggered interrupts to make progress
It was found that Hyper-V 2016 on KVM in some configurations (q35 machine + piix4-usb-uhci) hangs on boot. Root-cause was that one of Hyper-V level-triggered interrupt handler performs EOI before fixing the cause of the interrupt. This results in IOAPIC keep re-raising the level-triggered interrupt after EOI because irq-line remains asserted. Gory details: https://www.spinics.net/lists/kvm/msg184484.html (the whole thread). Turns out we were dealing with similar issues before; in-kernel IOAPIC implementation has commit 184564efae4d ("kvm: ioapic: conditionally delay irq delivery duringeoi broadcast") which describes a very similar issue. Steal the idea from the above mentioned commit for IOAPIC implementation in QEMU. SUCCESSIVE_IRQ_MAX_COUNT, delay and the comment are borrowed as well. Signed-off-by: Vitaly Kuznetsov --- Changes since v1: - timer_mod() -> timer_mod_anticipate() [Paolo Bonzini] - Massaged changelog [Liran Alon] - Make implementation look like in-kernel one [Liran Alon] --- hw/intc/ioapic.c | 57 --- hw/intc/trace-events | 1 + include/hw/i386/ioapic_internal.h | 3 ++ 3 files changed, 56 insertions(+), 5 deletions(-) diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c index 9d75f84d3b..9fb8dd3450 100644 --- a/hw/intc/ioapic.c +++ b/hw/intc/ioapic.c @@ -139,6 +139,15 @@ static void ioapic_service(IOAPICCommonState *s) } } +#define SUCCESSIVE_IRQ_MAX_COUNT 1 + +static void ioapic_timer(void *opaque) +{ +IOAPICCommonState *s = opaque; + +ioapic_service(s); +} + static void ioapic_set_irq(void *opaque, int vector, int level) { IOAPICCommonState *s = opaque; @@ -222,13 +231,40 @@ void ioapic_eoi_broadcast(int vector) } for (n = 0; n < IOAPIC_NUM_PINS; n++) { entry = s->ioredtbl[n]; -if ((entry & IOAPIC_LVT_REMOTE_IRR) -&& (entry & IOAPIC_VECTOR_MASK) == vector) { -trace_ioapic_clear_remote_irr(n, vector); -s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR; -if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) { + +if (((entry & IOAPIC_VECTOR_MASK) != vector) || +!(entry & IOAPIC_LVT_REMOTE_IRR)) { +continue; +} + +trace_ioapic_clear_remote_irr(n, vector); +s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR; + +if (((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) != +IOAPIC_TRIGGER_LEVEL) { +continue; +} + +if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) { +++s->irq_eoi[vector]; +if (s->irq_eoi[vector] >= SUCCESSIVE_IRQ_MAX_COUNT) { +/* + * Real hardware does not deliver the interrupt immediately + * during eoi broadcast, and this lets a buggy guest make + * slow progress even if it does not correctly handle a + * level-triggered interrupt. Emulate this behavior if we + * detect an interrupt storm. + */ +s->irq_eoi[vector] = 0; +timer_mod_anticipate(s->timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + + NANOSECONDS_PER_SECOND / 100); +trace_ioapic_eoi_delayed_reassert(vector); +} else { ioapic_service(s); } +} else { +s->irq_eoi[vector] = 0; } } } @@ -401,6 +437,8 @@ static void ioapic_realize(DeviceState *dev, Error **errp) memory_region_init_io(>io_memory, OBJECT(s), _io_ops, s, "ioapic", 0x1000); +s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ioapic_timer, s); + qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS); ioapics[ioapic_no] = s; @@ -408,6 +446,14 @@ static void ioapic_realize(DeviceState *dev, Error **errp) qemu_add_machine_init_done_notifier(>machine_done); } +static void ioapic_unrealize(DeviceState *dev, Error **errp) +{ +IOAPICCommonState *s = IOAPIC_COMMON(dev); + +timer_del(s->timer); +timer_free(s->timer); +} + static Property ioapic_properties[] = { DEFINE_PROP_UINT8("version", IOAPICCommonState, version, IOAPIC_VER_DEF), DEFINE_PROP_END_OF_LIST(), @@ -419,6 +465,7 @@ static void ioapic_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); k->realize = ioapic_realize; +k->unrealize = ioapic_unrealize; /* * If APIC is in kernel, we need to update the kernel cache after * migration, otherwise first 2
[Qemu-devel] [PATCH 6/8] i386/kvm: hv-stimer requires hv-time and hv-synic
Synthetic timers operate in hv-time time and Windows won't use these without SynIC. Signed-off-by: Vitaly Kuznetsov --- target/i386/kvm.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 9edf76e473..524ee28e9c 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -1186,6 +1186,12 @@ static int hyperv_handle_properties(CPUState *cs, if (cpu->hyperv_evmcs && !cpu->hyperv_vapic) { r |= hv_report_missing_dep(cpu, "hv-evmcs", "hv-vapic"); } +if (cpu->hyperv_stimer && !cpu->hyperv_synic) { +r |= hv_report_missing_dep(cpu, "hv-stimer", "hv-synic"); +} +if (cpu->hyperv_stimer && !cpu->hyperv_time) { +r |= hv_report_missing_dep(cpu, "hv-stimer", "hv-time"); +} /* Not exposed by KVM but needed to make CPU hotplug in Windows work */ env->features[FEAT_HYPERV_EDX] |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE; -- 2.20.1
Re: [Qemu-devel] [PATCH 4/8] i386/kvm: implement 'hv-all' pass-through mode
"Dr. David Alan Gilbert" writes: > Yep, that's probably safest; although if you recorded the features used > in the migration stream you could check for those on the destination and > if they mismatch complain then. > There is no clear use-case for hv-all other than development at this moment; as Daniel previously stated we may never support it in libvirt. I decided to take the easiest path first and think about migration later, when we understand why we would want to migrate such guests. In theory, yes, we may compare Hyper-V feature words on source and destination and allow migration when the former is a subset of the later. -- Vitaly
[Qemu-devel] [PATCH 7/8] i386/kvm: hv-tlbflush/ipi require hv-vpindex
The corresponding hypercalls require using VP indexes. Signed-off-by: Vitaly Kuznetsov --- target/i386/kvm.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 524ee28e9c..976c1d570f 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -1192,6 +1192,12 @@ static int hyperv_handle_properties(CPUState *cs, if (cpu->hyperv_stimer && !cpu->hyperv_time) { r |= hv_report_missing_dep(cpu, "hv-stimer", "hv-time"); } +if (cpu->hyperv_tlbflush && !cpu->hyperv_vpindex) { +r |= hv_report_missing_dep(cpu, "hv-tlbflush", "hv-vpindex"); +} +if (cpu->hyperv_ipi && !cpu->hyperv_vpindex) { +r |= hv_report_missing_dep(cpu, "hv-ipi", "hv-vpindex"); +} /* Not exposed by KVM but needed to make CPU hotplug in Windows work */ env->features[FEAT_HYPERV_EDX] |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE; -- 2.20.1
[Qemu-devel] [PATCH 4/8] i386/kvm: implement 'hv-all' pass-through mode
In many case we just want to give Windows guests all currently supported Hyper-V enlightenments and that's where this new mode may come handy. We pass through what was returned by KVM_GET_SUPPORTED_HV_CPUID. hv_cpuid_check_and_set() is modified to also set cpu->hyperv_* flags as we may want to check them later (and we actually do for hv_runtime, hv_synic,...). 'hv-all' is a development only feature, a migration blocker is added to prevent issues while migrating between hosts with different feature sets. Signed-off-by: Vitaly Kuznetsov --- docs/hyperv.txt | 10 target/i386/cpu.c | 1 + target/i386/cpu.h | 1 + target/i386/kvm.c | 148 +- 4 files changed, 132 insertions(+), 28 deletions(-) diff --git a/docs/hyperv.txt b/docs/hyperv.txt index 397f2517b8..d1299aba81 100644 --- a/docs/hyperv.txt +++ b/docs/hyperv.txt @@ -174,6 +174,16 @@ without the feature to find out if enabling it is beneficial. Requires: hv-vapic +4. Development features + +In some cases (e.g. during development) it may make sense to use QEMU in +'pass-through' mode and give Windows guests all enlightenments currently +supported by KVM. This pass-through mode is enabled by "hv-all" CPU flag. +Note: enabling this flag effectively prevents migration as supported features +may differ between target and destination. +Note: "hv-all" doesn't include 'hv-evmcs', it needs to be enabled explicitly. + + 4. Useful links Hyper-V Top Level Functional specification and other information: diff --git a/target/i386/cpu.c b/target/i386/cpu.c index d6bb57d210..4e01ad076e 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5785,6 +5785,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false), DEFINE_PROP_BOOL("hv-evmcs", X86CPU, hyperv_evmcs, false), DEFINE_PROP_BOOL("hv-ipi", X86CPU, hyperv_ipi, false), +DEFINE_PROP_BOOL("hv-all", X86CPU, hyperv_all, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 83fb522554..9cd3a8bc2f 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1396,6 +1396,7 @@ struct X86CPU { bool hyperv_tlbflush; bool hyperv_evmcs; bool hyperv_ipi; +bool hyperv_all; bool check_cpuid; bool enforce_cpuid; bool expose_kvm; diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 63031358ae..af45241adb 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -656,7 +656,8 @@ static bool hyperv_enabled(X86CPU *cpu) cpu->hyperv_stimer || cpu->hyperv_reenlightenment || cpu->hyperv_tlbflush || -cpu->hyperv_ipi); +cpu->hyperv_ipi || +cpu->hyperv_all); } static int kvm_arch_set_tsc_khz(CPUState *cs) @@ -1004,14 +1005,15 @@ static int hv_cpuid_get_fw(struct kvm_cpuid2 *cpuid, int fw, uint32_t *r) } static int hv_cpuid_check_and_set(CPUState *cs, struct kvm_cpuid2 *cpuid, - const char *name, bool flag) + const char *name, bool *flag) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = >env; uint32_t r, fw, bits;; int i, j; +bool present; -if (!flag) { +if (!*flag && !cpu->hyperv_all) { return 0; } @@ -1020,6 +1022,7 @@ static int hv_cpuid_check_and_set(CPUState *cs, struct kvm_cpuid2 *cpuid, continue; } +present = true; for (j = 0; j < ARRAY_SIZE(kvm_hyperv_properties[i].flags); j++) { fw = kvm_hyperv_properties[i].flags[j].fw; bits = kvm_hyperv_properties[i].flags[j].bits; @@ -1029,17 +1032,26 @@ static int hv_cpuid_check_and_set(CPUState *cs, struct kvm_cpuid2 *cpuid, } if (hv_cpuid_get_fw(cpuid, fw, ) || (r & bits) != bits) { -fprintf(stderr, -"Hyper-V %s (requested by '%s' cpu flag) " -"is not supported by kernel\n", -kvm_hyperv_properties[i].desc, -kvm_hyperv_properties[i].name); -return 1; +if (*flag) { +fprintf(stderr, +"Hyper-V %s (requested by '%s' cpu flag) " +"is not supported by kernel\n", +kvm_hyperv_properties[i].desc, +kvm_hyperv_properties[i].name); +return 1; +} else { +present = false; +break; +
[Qemu-devel] [PATCH 1/8] i386/kvm: add support for KVM_GET_SUPPORTED_HV_CPUID
KVM now supports reporting supported Hyper-V features through CPUID (KVM_GET_SUPPORTED_HV_CPUID ioctl). Going forward, this is going to be the only way to announce new functionality and this has already happened with Direct Mode stimers. While we could just support KVM_GET_SUPPORTED_HV_CPUID for new features, it seems to be beneficial to use it for all Hyper-V enlightenments when possible. This way we can implement 'hv-all' pass-through mode giving the guest all supported Hyper-V features even when QEMU knows nothing about them. Implementation-wise we create a new kvm_hyperv_properties structure defining Hyper-V features, get_supported_hv_cpuid()/ get_supported_hv_cpuid_legacy() returning the supported CPUID set and a bit over-engineered hv_cpuid_check_and_set() which we will also be used to set cpu->hyperv_* properties for 'hv-all' mode. Signed-off-by: Vitaly Kuznetsov --- target/i386/kvm.c | 487 +++--- 1 file changed, 372 insertions(+), 115 deletions(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 3b29ce5c0d..9abee81998 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -700,141 +700,360 @@ static bool tsc_is_stable_and_known(CPUX86State *env) || env->user_tsc_khz; } -static int hyperv_handle_properties(CPUState *cs) +static struct { +const char *name; +const char *desc; +struct { +uint32_t fw; +uint32_t bits; +} flags[2]; +} kvm_hyperv_properties[] = { +{ +.name = "hv-relaxed", +.desc = "relaxed timing", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_HYPERCALL_AVAILABLE}, +{.fw = FEAT_HV_RECOMM_EAX, + .bits = HV_RELAXED_TIMING_RECOMMENDED} +} +}, +{ +.name = "hv-vapic", +.desc = "virtual APIC", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_HYPERCALL_AVAILABLE | HV_APIC_ACCESS_AVAILABLE}, +{.fw = FEAT_HV_RECOMM_EAX, + .bits = HV_APIC_ACCESS_RECOMMENDED} +} +}, +{ +.name = "hv-time", +.desc = "clocksources", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_HYPERCALL_AVAILABLE | HV_TIME_REF_COUNT_AVAILABLE | + HV_REFERENCE_TSC_AVAILABLE}, +{0} +} +}, +{ +.name = "hv-frequencies", +.desc = "frequency MSRs", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_ACCESS_FREQUENCY_MSRS}, +{.fw = FEAT_HYPERV_EDX, + .bits = HV_FREQUENCY_MSRS_AVAILABLE} +} +}, +{ +.name = "hv-crash", +.desc = "crash MSRs", +.flags = { +{.fw = FEAT_HYPERV_EDX, + .bits = HV_GUEST_CRASH_MSR_AVAILABLE}, +{0} +} +}, +{ +.name = "hv-reenlightenment", +.desc = "Reenlightenment MSRs", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_ACCESS_REENLIGHTENMENTS_CONTROL}, +{0} +} +}, +{ +.name = "hv-reset", +.desc = "reset MSR", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_RESET_AVAILABLE}, +{0} +} +}, +{ +.name = "hv-vpindex", +.desc = "VP_INDEX MSR", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_VP_INDEX_AVAILABLE}, +{0} +} +}, +{ +.name = "hv-runtime", +.desc = "VP_RUNTIME MSR", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_VP_RUNTIME_AVAILABLE}, +{0} +} +}, +{ +.name = "hv-synic", +.desc = "SynIC", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_SYNIC_AVAILABLE}, +{0} +} +}, +{ +.name = "hv-stimer", +.desc = "timers", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_SYNTIMERS_AVAILABLE}, +{0} +} +}, +{ +.name = "hv-tlbflush", +.desc = "TLB flush support", +.flags = { +{.fw = FEAT_HV_RECOMM_EAX, + .bits = HV_REMOTE_TLB_FLUSH_RECOMMENDED | + HV_EX_PROCESSOR_MASKS_RECOMMENDED}, +{0} +} +}, +{ +.name = "hv-ipi", +.desc = "IPI send support", +.flags = { +{.fw = FEAT_HV_RECOMM_EAX, + .bits = HV_CLUSTER_IPI_RECOMMENDED | + HV_EX_PROCESSOR_MASKS_RECOMMENDED}, +{0} +} +}, +}; + +static struct kvm_cpuid2 *try_g
[Qemu-devel] [PATCH 3/8] i386/kvm: document existing Hyper-V enlightenments
Currently, there is no doc describing hv-* CPU flags, people are encouraged to get the information from Microsoft Hyper-V Top Level Functional specification (TLFS). There is, however, a bit of QEMU specifics. Signed-off-by: Vitaly Kuznetsov --- docs/hyperv.txt | 180 1 file changed, 180 insertions(+) create mode 100644 docs/hyperv.txt diff --git a/docs/hyperv.txt b/docs/hyperv.txt new file mode 100644 index 00..397f2517b8 --- /dev/null +++ b/docs/hyperv.txt @@ -0,0 +1,180 @@ +Hyper-V Enlightenments +== + + +1. Description +=== +In some cases when implementing a hardware interface in software is slow, KVM +implements its own paravirtualized interfaces. This works well for Linux as +guest support for such features is added simultaneously with the feature itself. +It may, however, be hard-to-impossible to add support for these interfaces to +proprietary OSes, namely, Microsoft Windows. + +KVM on x86 implements Hyper-V Enlightenments for Windows guests. These features +make Windows and Hyper-V guests think they're running on top of a Hyper-V +compatible hypervisor and use Hyper-V specific features. + + +2. Setup += +No Hyper-V enlightenments are enabled by default by either KVM or QEMU. In +QEMU, individual enlightenments can be enabled through CPU flags, e.g: + + qemu-system-x86_64 --enable-kvm --cpu host,hv_relaxed,hv_vpindex,hv_time, ... + +Sometimes there are dependencies between enlightenments, QEMU is supposed to +check that the supplied configuration is sane. + +When any set of the Hyper-V enlightenments is enabled, QEMU changes hypervisor +identification (CPUID 0x4000..0x400A) to Hyper-V. KVM identification +and features are kept in leaves 0x4100..0x4101. + + +3. Existing enlightenments +=== + +3.1. hv-relaxed + +This feature tells guest OS to disable watchdog timeouts as it is running on a +hypervisor. It is known that some Windows versions will do this even when they +see 'hypervisor' CPU flag. + +3.2. hv-vapic +== +Provides so-called VP Assist page MSR to guest allowing it to work with APIC +more efficiently. In particular, this enlightenment allows paravirtualized +(exit-less) EOI processing. + +3.3. hv-spinlocks=xxx +== +Enables paravirtualized spinlocks. The parameter indicates how many times +spinlock acquisition should be attempted before indicating the situation to the +hypervisor. A special value 0x indicates "never to retry". + +3.4. hv-vpindex + +Provides HV_X64_MSR_VP_INDEX (0x4002) MSR to the guest which has Virtual +processor index information. This enlightenment makes sense in conjunction with +hv-synic, hv-stimer and other enlightenments which require the guest to know its +Virtual Processor indices (e.g. when VP index needs to be passed in a +hypercall). + +3.5. hv-runtime + +Provides HV_X64_MSR_VP_RUNTIME (0x4010) MSR to the guest. The MSR keeps the +virtual processor run time in 100ns units. This gives guest operating system an +idea of how much time was 'stolen' from it (when the virtual CPU was preempted +to perform some other work). + +3.6. hv-crash +== +Provides HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 (0x4100..0x4105) and +HV_X64_MSR_CRASH_CTL (0x4105) MSRs to the guest. These MSRs are written to +by the guest when it crashes, HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 MSRs +contain additional crash information. This information is outputted in QEMU log +and through QAPI. +Note: unlike under genuine Hyper-V, write to HV_X64_MSR_CRASH_CTL causes guest +to shutdown. This effectively blocks crash dump generation by Windows. + +3.7. hv-time += +Enables two Hyper-V-specific clocksources available to the guest: MSR-based +Hyper-V clocksource (HV_X64_MSR_TIME_REF_COUNT, 0x4020) and Reference TSC +page (enabled via MSR HV_X64_MSR_REFERENCE_TSC, 0x4021). Both clocksources +are per-guest, Reference TSC page clocksource allows for exit-less time stamp +readings. Using this enlightenment leads to significant speedup of all timestamp +related operations. + +3.8. hv-synic +== +Enables Hyper-V Synthetic interrupt controller - an extension of a local APIC. +When enabled, this enlightenment provides additional communication facilities +to the guest: SynIC messages and Events. This is a pre-requisite for +implementing VMBus devices (not yet in QEMU). Additionally, this enlightenment +is needed to enable Hyper-V synthetic timers. SynIC is controlled through MSRs +HV_X64_MSR_SCONTROL..HV_X64_MSR_EOM (0x4080..0x4084) and +HV_X64_MSR_SINT0..HV_X64_MSR_SINT15 (0x4090..0x409F) + +Requires: hv-vpindex + +3.9. hv-stimer +=== +Enables Hyper-V synthetic timers. There are four synthetic timers per virtual +CPU controlled through HV_X64_MSR_STIMER0_CONFIG..HV_X64_MSR_STI
[Qemu-devel] [PATCH 8/8] i386/kvm: add support for Direct Mode for Hyper-V synthetic timers
Hyper-V on KVM can only use Synthetic timers with Direct Mode (opting for an interrupt instead of VMBus message). This new capability is only announced in KVM_GET_SUPPORTED_HV_CPUID. Signed-off-by: Vitaly Kuznetsov --- docs/hyperv.txt| 10 ++ target/i386/cpu.c | 1 + target/i386/cpu.h | 1 + target/i386/hyperv-proto.h | 1 + target/i386/kvm.c | 15 +++ 5 files changed, 28 insertions(+) diff --git a/docs/hyperv.txt b/docs/hyperv.txt index d1299aba81..e1988ab648 100644 --- a/docs/hyperv.txt +++ b/docs/hyperv.txt @@ -173,6 +173,16 @@ without the feature to find out if enabling it is beneficial. Requires: hv-vapic +3.17. hv-stimer-direct +=== +Hyper-V specification allows synthetic timer operation in two modes: "classic", +when expiration event is delivered as SynIC message and "direct", when the event +is delivered via normal interrupt. It is known that nested Hyper-V can only +use synthetic timers in direct mode and thus 'hv-stimer-direct' needs to be +enabled. + +Requires: hv-vpindex, hv-synic, hv-time, hv-stimer + 4. Development features diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 4e01ad076e..ea3843ed0c 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5785,6 +5785,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false), DEFINE_PROP_BOOL("hv-evmcs", X86CPU, hyperv_evmcs, false), DEFINE_PROP_BOOL("hv-ipi", X86CPU, hyperv_ipi, false), +DEFINE_PROP_BOOL("hv-stimer-direct", X86CPU, hyperv_stimer_direct, false), DEFINE_PROP_BOOL("hv-all", X86CPU, hyperv_all, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 9cd3a8bc2f..3f1ada3e39 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1396,6 +1396,7 @@ struct X86CPU { bool hyperv_tlbflush; bool hyperv_evmcs; bool hyperv_ipi; +bool hyperv_stimer_direct; bool hyperv_all; bool check_cpuid; bool enforce_cpuid; diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index c0272b3a01..cffac10b45 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -49,6 +49,7 @@ #define HV_GUEST_IDLE_STATE_AVAILABLE (1u << 5) #define HV_FREQUENCY_MSRS_AVAILABLE (1u << 8) #define HV_GUEST_CRASH_MSR_AVAILABLE(1u << 10) +#define HV_STIMER_DIRECT_MODE_AVAILABLE (1u << 19) /* * HV_CPUID_ENLIGHTMENT_INFO.EAX bits diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 976c1d570f..f3c73b7c4e 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -657,6 +657,7 @@ static bool hyperv_enabled(X86CPU *cpu) cpu->hyperv_reenlightenment || cpu->hyperv_tlbflush || cpu->hyperv_ipi || +cpu->hyperv_stimer_direct || cpu->hyperv_all); } @@ -832,6 +833,15 @@ static struct { {0} } }, +{ +.name = "hv-stimer-direct", +.desc = "direct mode timers", +.flags = { +{.fw = FEAT_HYPERV_EDX, + .bits = HV_STIMER_DIRECT_MODE_AVAILABLE}, +{0} +} +}, }; static struct kvm_cpuid2 *try_get_hv_cpuid(CPUState *cs, int max) @@ -1178,6 +1188,8 @@ static int hyperv_handle_properties(CPUState *cs, r |= hv_cpuid_check_and_set(cs, cpuid, "hv-tlbflush", >hyperv_tlbflush); r |= hv_cpuid_check_and_set(cs, cpuid, "hv-ipi", >hyperv_ipi); +r |= hv_cpuid_check_and_set(cs, cpuid, "hv-stimer-direct", +>hyperv_stimer_direct); /* Dependencies */ if (cpu->hyperv_synic && !cpu->hyperv_synic_kvm_only && @@ -1198,6 +1210,9 @@ static int hyperv_handle_properties(CPUState *cs, if (cpu->hyperv_ipi && !cpu->hyperv_vpindex) { r |= hv_report_missing_dep(cpu, "hv-ipi", "hv-vpindex"); } +if (cpu->hyperv_stimer_direct && !cpu->hyperv_stimer) { +r |= hv_report_missing_dep(cpu, "hv-stimer-direct", "hv-stimer"); +} /* Not exposed by KVM but needed to make CPU hotplug in Windows work */ env->features[FEAT_HYPERV_EDX] |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE; -- 2.20.1
[Qemu-devel] [PATCH 0/8] i386/kvm/hyper-v: refactor and implement 'hv-stimer-direct' and 'hv-all' enlightenments
It has been a while since my 'RFC' and I apologize for that. Changes since RFC: - 'hv-all' is a migration blocker. - all existing and newly added Hyper-V enlightenments are somewhat documented. Original description: The recently introduced Direct Mode for Hyper-V synthetic timers enlightenment is only exposed through KVM_GET_SUPPORTED_HV_CPUID ioctl. Take the opportunity and re-implement the way we handle Hyper-V enlightenments in QEMU, add support for hv-stimer-direct and 'hv-all' pass-through mode, add missing dependencies between enlightenments. Vitaly Kuznetsov (8): i386/kvm: add support for KVM_GET_SUPPORTED_HV_CPUID i386/kvm: move Hyper-V CPUID filling to hyperv_handle_properties() i386/kvm: document existing Hyper-V enlightenments i386/kvm: implement 'hv-all' pass-through mode i386/kvm: hv-evmcs requires hv-vapic i386/kvm: hv-stimer requires hv-time and hv-synic i386/kvm: hv-tlbflush/ipi require hv-vpindex i386/kvm: add support for Direct Mode for Hyper-V synthetic timers docs/hyperv.txt| 200 ++ target/i386/cpu.c | 2 + target/i386/cpu.h | 2 + target/i386/hyperv-proto.h | 1 + target/i386/kvm.c | 764 - 5 files changed, 785 insertions(+), 184 deletions(-) create mode 100644 docs/hyperv.txt -- 2.20.1
[Qemu-devel] [PATCH 5/8] i386/kvm: hv-evmcs requires hv-vapic
Enlightened VMCS is enabled by writing to a field in VP assist page and these require virtual APIC. Signed-off-by: Vitaly Kuznetsov --- target/i386/kvm.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index af45241adb..9edf76e473 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -1083,7 +1083,7 @@ static int hv_report_missing_dep(X86CPU *cpu, const char *name, return 1; } -if (cpu->hyperv_all) { +if (cpu->hyperv_all && strcmp(name, "hv-evmcs")) { fprintf(stderr, "Hyper-V %s (requested by 'hv-all' cpu flag) " "requires %s (is not supported by kernel)\n", kvm_hyperv_properties[i].desc, kvm_hyperv_properties[j].desc); @@ -1183,6 +1183,9 @@ static int hyperv_handle_properties(CPUState *cs, if (cpu->hyperv_synic && !cpu->hyperv_synic_kvm_only && !cpu->hyperv_vpindex) r |= hv_report_missing_dep(cpu, "hv-synic", "hv-vpindex"); +if (cpu->hyperv_evmcs && !cpu->hyperv_vapic) { +r |= hv_report_missing_dep(cpu, "hv-evmcs", "hv-vapic"); +} /* Not exposed by KVM but needed to make CPU hotplug in Windows work */ env->features[FEAT_HYPERV_EDX] |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE; -- 2.20.1
[Qemu-devel] [PATCH 2/8] i386/kvm: move Hyper-V CPUID filling to hyperv_handle_properties()
Let's consolidate Hyper-V features handling in hyperv_handle_properties(). The change is necessary to support pass-through 'hv-all' mode as we'll be just copying CPUIDs from KVM instead of filling them in. Signed-off-by: Vitaly Kuznetsov --- target/i386/kvm.c | 163 +- 1 file changed, 90 insertions(+), 73 deletions(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 9abee81998..63031358ae 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -1047,13 +1047,25 @@ static int hv_cpuid_check_and_set(CPUState *cs, struct kvm_cpuid2 *cpuid, return 1; } -static int hyperv_handle_properties(CPUState *cs) +/* + * Fill in Hyper-V CPUIDs. Returns the number of entries filled in cpuid_ent in + * case of success, errno < 0 in case of failure and 0 when no Hyper-V + * extentions are enabled. + */ +static int hyperv_handle_properties(CPUState *cs, +struct kvm_cpuid_entry2 *cpuid_ent) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = >env; struct kvm_cpuid2 *cpuid; +struct kvm_cpuid_entry2 *c; +uint32_t signature[3]; +uint32_t cpuid_i = 0; int r = 0; +if (!hyperv_enabled(cpu)) +return 0; + if (cpu->hyperv_evmcs) { uint16_t evmcs_version; @@ -1104,9 +1116,80 @@ static int hyperv_handle_properties(CPUState *cs) /* Not exposed by KVM but needed to make CPU hotplug in Windows work */ env->features[FEAT_HYPERV_EDX] |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE; +if (r) { +r = -ENOSYS; +goto free; +} + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_VENDOR_AND_MAX_FUNCTIONS; +if (!cpu->hyperv_vendor_id) { +memcpy(signature, "Microsoft Hv", 12); +} else { +size_t len = strlen(cpu->hyperv_vendor_id); + +if (len > 12) { +error_report("hv-vendor-id truncated to 12 characters"); +len = 12; +} +memset(signature, 0, 12); +memcpy(signature, cpu->hyperv_vendor_id, len); +} +c->eax = cpu->hyperv_evmcs ? +HV_CPUID_NESTED_FEATURES : HV_CPUID_IMPLEMENT_LIMITS; +c->ebx = signature[0]; +c->ecx = signature[1]; +c->edx = signature[2]; + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_INTERFACE; +memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12); +c->eax = signature[0]; +c->ebx = 0; +c->ecx = 0; +c->edx = 0; + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_VERSION; +c->eax = 0x1bbc; +c->ebx = 0x00060001; + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_FEATURES; +c->eax = env->features[FEAT_HYPERV_EAX]; +c->ebx = env->features[FEAT_HYPERV_EBX]; +c->edx = env->features[FEAT_HYPERV_EDX]; + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_ENLIGHTMENT_INFO; +c->eax = env->features[FEAT_HV_RECOMM_EAX]; +c->ebx = cpu->hyperv_spinlock_attempts; + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_IMPLEMENT_LIMITS; +c->eax = cpu->hv_max_vps; +c->ebx = 0x40; + +if (cpu->hyperv_evmcs) { +__u32 function; + +/* Create zeroed 0x4006..0x4009 leaves */ +for (function = HV_CPUID_IMPLEMENT_LIMITS + 1; + function < HV_CPUID_NESTED_FEATURES; function++) { +c = _ent[cpuid_i++]; +c->function = function; +} + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_NESTED_FEATURES; +c->eax = env->features[FEAT_HV_NESTED_EAX]; +} +r = cpuid_i; + +free: g_free(cpuid); -return r ? -ENOSYS : 0; +return r; } static int hyperv_init_vcpu(X86CPU *cpu) @@ -1215,79 +1298,13 @@ int kvm_arch_init_vcpu(CPUState *cs) } /* Paravirtualization CPUIDs */ -if (hyperv_enabled(cpu)) { -c = _data.entries[cpuid_i++]; -c->function = HV_CPUID_VENDOR_AND_MAX_FUNCTIONS; -if (!cpu->hyperv_vendor_id) { -memcpy(signature, "Microsoft Hv", 12); -} else { -size_t len = strlen(cpu->hyperv_vendor_id); - -if (len > 12) { -error_report("hv-vendor-id truncated to 12 characters"); -len = 12; -} -memset(signature, 0, 12); -memcpy(signature, cpu->hyperv_vendor_id, len); -} -c->eax = cpu->hyperv_evmcs ? -HV_CPUID_NESTED_FEATURES : HV_CPUID_IMPLEMENT_LIMITS; -c->ebx = signature[0]; -c->ecx = signature[1]; -c->edx = signature[2]; - -c = _data.entries[cpuid_i++]; -c->function = HV_CPUID_INTERFACE; -memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12); -c->eax = signature[0]; -c->ebx = 0; -c->ecx = 0; -c->edx = 0
Re: [Qemu-devel] [PATCH 3/8] i386/kvm: document existing Hyper-V enlightenments
Roman Kagan writes: > On Fri, Mar 29, 2019 at 03:18:27PM +0100, Vitaly Kuznetsov wrote: >> Currently, there is no doc describing hv-* CPU flags, people are >> encouraged to get the information from Microsoft Hyper-V Top Level >> Functional specification (TLFS). There is, however, a bit of QEMU >> specifics. > > This is appreciated a lot, thanks for doing this! > >> Signed-off-by: Vitaly Kuznetsov >> --- >> docs/hyperv.txt | 180 >> 1 file changed, 180 insertions(+) >> create mode 100644 docs/hyperv.txt >> >> diff --git a/docs/hyperv.txt b/docs/hyperv.txt >> new file mode 100644 >> index 00..397f2517b8 >> --- /dev/null >> +++ b/docs/hyperv.txt >> @@ -0,0 +1,180 @@ >> +Hyper-V Enlightenments >> +== >> + >> + >> +1. Description >> +=== >> +In some cases when implementing a hardware interface in software is slow, >> KVM >> +implements its own paravirtualized interfaces. This works well for Linux as >> +guest support for such features is added simultaneously with the feature >> itself. >> +It may, however, be hard-to-impossible to add support for these interfaces >> to >> +proprietary OSes, namely, Microsoft Windows. >> + >> +KVM on x86 implements Hyper-V Enlightenments for Windows guests. These >> features >> +make Windows and Hyper-V guests think they're running on top of a Hyper-V >> +compatible hypervisor and use Hyper-V specific features. >> + >> + >> +2. Setup >> += >> +No Hyper-V enlightenments are enabled by default by either KVM or QEMU. In >> +QEMU, individual enlightenments can be enabled through CPU flags, e.g: >> + >> + qemu-system-x86_64 --enable-kvm --cpu host,hv_relaxed,hv_vpindex,hv_time, >> ... >> + >> +Sometimes there are dependencies between enlightenments, QEMU is supposed to >> +check that the supplied configuration is sane. >> + >> +When any set of the Hyper-V enlightenments is enabled, QEMU changes >> hypervisor >> +identification (CPUID 0x4000..0x400A) to Hyper-V. KVM identification >> +and features are kept in leaves 0x4100..0x4101. >> + >> + >> +3. Existing enlightenments >> +=== >> + >> +3.1. hv-relaxed >> + >> +This feature tells guest OS to disable watchdog timeouts as it is running >> on a >> +hypervisor. It is known that some Windows versions will do this even when >> they >> +see 'hypervisor' CPU flag. >> + >> +3.2. hv-vapic >> +== >> +Provides so-called VP Assist page MSR to guest allowing it to work with APIC >> +more efficiently. In particular, this enlightenment allows paravirtualized >> +(exit-less) EOI processing. >> + >> +3.3. hv-spinlocks=xxx >> +== >> +Enables paravirtualized spinlocks. The parameter indicates how many times >> +spinlock acquisition should be attempted before indicating the situation to >> the >> +hypervisor. A special value 0x indicates "never to retry". >> + >> +3.4. hv-vpindex >> + >> +Provides HV_X64_MSR_VP_INDEX (0x4002) MSR to the guest which has Virtual >> +processor index information. This enlightenment makes sense in conjunction >> with >> +hv-synic, hv-stimer and other enlightenments which require the guest to >> know its >> +Virtual Processor indices (e.g. when VP index needs to be passed in a >> +hypercall). >> + >> +3.5. hv-runtime >> + >> +Provides HV_X64_MSR_VP_RUNTIME (0x4010) MSR to the guest. The MSR keeps >> the >> +virtual processor run time in 100ns units. This gives guest operating >> system an >> +idea of how much time was 'stolen' from it (when the virtual CPU was >> preempted >> +to perform some other work). >> + >> +3.6. hv-crash >> +== >> +Provides HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 (0x4100..0x4105) >> and >> +HV_X64_MSR_CRASH_CTL (0x4105) MSRs to the guest. These MSRs are written >> to >> +by the guest when it crashes, HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 MSRs >> +contain additional crash information. This information is outputted in QEMU >> log >> +and through QAPI. >> +Note: unlike under genuine Hyper-V, write to HV_X64_MSR_CRASH_CTL causes >> guest >> +to shutdown. This effectively blocks crash dump generation by Windows. > > Hmm, why? > This was wr
Re: [Qemu-devel] [PATCH 4/8] i386/kvm: implement 'hv-all' pass-through mode
Roman Kagan writes: > On Fri, Mar 29, 2019 at 03:18:28PM +0100, Vitaly Kuznetsov wrote: >> In many case we just want to give Windows guests all currently supported >> Hyper-V enlightenments and that's where this new mode may come handy. We >> pass through what was returned by KVM_GET_SUPPORTED_HV_CPUID. > > The only one out of those "many cases" I can think of is when you've > developed a new hyperv feature in the kernel and you want to test it > with a version of QEMU that's not aware of it. Are there any others? > I can recall the following case I had: benchmark Windows guest performance with different kernels like try to get the best number. As these kernels were supporting different set of hv-* enlightenments I had to do non-trivial work to figure out what's supported and adjust QEMU command line accordingly. Would've been much easier with 'hv-all' >> >> hv_cpuid_check_and_set() is modified to also set cpu->hyperv_* flags as >> we may want to check them later (and we actually do for hv_runtime, >> hv_synic,...). >> >> 'hv-all' is a development only feature, a migration blocker is added to >> prevent issues while migrating between hosts with different feature sets. >> >> Signed-off-by: Vitaly Kuznetsov >> --- >> docs/hyperv.txt | 10 >> target/i386/cpu.c | 1 + >> target/i386/cpu.h | 1 + >> target/i386/kvm.c | 148 +- >> 4 files changed, 132 insertions(+), 28 deletions(-) >> >> diff --git a/docs/hyperv.txt b/docs/hyperv.txt >> index 397f2517b8..d1299aba81 100644 >> --- a/docs/hyperv.txt >> +++ b/docs/hyperv.txt >> @@ -174,6 +174,16 @@ without the feature to find out if enabling it is >> beneficial. >> Requires: hv-vapic >> >> >> +4. Development features >> + >> +In some cases (e.g. during development) it may make sense to use QEMU in >> +'pass-through' mode and give Windows guests all enlightenments currently >> +supported by KVM. This pass-through mode is enabled by "hv-all" CPU flag. >> +Note: enabling this flag effectively prevents migration as supported >> features >> +may differ between target and destination. > > I find 'hv-passthrough' a more adequate name for this. Sure, will adjust. > >> +Note: "hv-all" doesn't include 'hv-evmcs', it needs to be enabled >> explicitly. > > This is extremely confusing, when some features are more equal than > others. I think it'd make more sense instead to support filtering out > some features, like in "hv-passthrough,hv-evmcs=off". hv-evmcs is probably the only enlightenment which is not an obvious 'win': when enabled, some features (e.g. posted interrupts) are getting disabled. But as 'hv-all' is now a developer-only feature I see no problem with enabling evmcs too. -- Vitaly
Re: [Qemu-devel] [PATCH] ioapic: allow buggy guests mishandling level-triggered interrupts to make progress
Liran Alon writes: >> On 1 Apr 2019, at 16:36, Vitaly Kuznetsov wrote: >> >> It was found that Hyper-V 2016 on KVM in some configurations (q35 machine + >> piix4-usb-uhci) hangs on boot. Trace analysis led us to the conclusion that >> it is mishandling level-triggered interrupt performing EOI without fixing >> the root cause. > > I would rephrase as: > It was found that Hyper-V 2016 on KVM in some configurations (q35 machine + > piix4-usb-uhci) hangs on boot. > Root-cause was that one of Hyper-V level-triggered interrupt handler performs > EOI before fixing the root-cause. > This results in IOAPIC keep re-raising the level-triggered interrupt > after EOI because irq-line remains asserted. Ok, thanks for the suggestion. > >> This causes immediate re-assertion and L2 VM (which is >> supposedly expected to fix the cause of the interrupt) is not making any >> progress. > > I don’t know why you assume this. > From the trace we have examined, it seems that the EOI is performed by > Hyper-V and not it’s guest > This means that the handler for this level-triggered interrupt is on > Hyper-V and not it’s guest. If you let it run (with e.g. this patch or by setting preemtion timer > 0) you'll see that MMIO write fixing the cause of the interrupt is happening from L2: (qemu) info pci: Bus 0, device 4, function 0: USB controller: PCI device 8086:7112 PCI subsystem 1af4:1100 IRQ 23. BAR4: I/O at 0x6060 [0x607f]. id "" ... 538597.212494: kvm_exit: reason VMRESUME rip 0xf80004250115 info 0 0 538597.212499: kvm_entry:vcpu 0 538597.212506: kvm_exit: reason IO_INSTRUCTION rip 0xf80e02ac6a27 info 60620009 0 538597.212507: kvm_nested_vmexit:rip f80e02ac6a27 reason IO_INSTRUCTION info1 60620009 info2 0 int_info 0 int_info_err 0 538597.212509: kvm_fpu: unload 538597.212511: kvm_userspace_exit: reason KVM_EXIT_IO (2) 538597.212516: kvm_fpu: load 538597.212518: kvm_pio: pio_read at 0x6062 size 2 count 1 val 0x1 538597.212519: kvm_entry:vcpu 0 538597.212523: kvm_exit: reason IO_INSTRUCTION rip 0xf80e02ac6a61 info 60640009 0 538597.212523: kvm_nested_vmexit:rip f80e02ac6a61 reason IO_INSTRUCTION info1 60640009 info2 0 int_info 0 int_info_err 0 538597.212524: kvm_fpu: unload 538597.212525: kvm_userspace_exit: reason KVM_EXIT_IO (2) 538597.212528: kvm_fpu: load 538597.212528: kvm_pio: pio_read at 0x6064 size 2 count 1 val 0xf ... and this happens after EOI from L1. > >> >> Gory details: >> https://urldefense.proofpoint.com/v2/url?u=https-3A__www.spinics.net_lists_kvm_msg184484.html=DwIDAg=RoP1YumCXCgaWHvlZYR8PZh8Bv7qIrMUB65eapI_JnE=Jk6Q8nNzkQ6LJ6g42qARkg6ryIDGQr-yKXPNGZbpTx0=Q0Ico0Nb_DGRDrDgXkjkRr-xjzIbOLteVOhDJXBD_pU=d_H4_-qzqGvyi8X7g_KA0hZ5a8zjfHQhe1BhLPIokcA= > > Maybe worth to note that one should read the entire thread to understand the > analysis. > Sure. >> >> Turns out we were dealing with similar issues before; in-kernel IOAPIC >> implementation has commit 184564efae4d ("kvm: ioapic: conditionally delay >> irq delivery duringeoi broadcast") which describes a very similar issue. >> >> Steal the idea from the above mentioned commit for IOAPIC implementation in >> QEMU. SUCCESSIVE_IRQ_MAX_COUNT, delay and the comment are borrowed as well. >> >> Signed-off-by: Vitaly Kuznetsov >> --- >> hw/intc/ioapic.c | 43 ++- >> hw/intc/trace-events | 1 + >> include/hw/i386/ioapic_internal.h | 3 +++ >> 3 files changed, 46 insertions(+), 1 deletion(-) >> >> diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c >> index 9d75f84d3b..daf45cc8a8 100644 >> --- a/hw/intc/ioapic.c >> +++ b/hw/intc/ioapic.c >> @@ -139,6 +139,15 @@ static void ioapic_service(IOAPICCommonState *s) >>} >> } >> >> +#define SUCCESSIVE_IRQ_MAX_COUNT 1 >> + >> +static void ioapic_timer(void *opaque) >> +{ >> +IOAPICCommonState *s = opaque; >> + >> +ioapic_service(s); >> +} >> + >> static void ioapic_set_irq(void *opaque, int vector, int level) >> { >>IOAPICCommonState *s = opaque; >> @@ -227,7 +236,28 @@ void ioapic_eoi_broadcast(int vector) >>trace_ioapic_clear_remote_irr(n, vector); >>s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR; > > This clear of remote-irr should happen only for level-triggered interrupts. > So we can make the code here more structured like KVM’s > __kvm_ioapic_update_eoi(). > It als
Re: [Qemu-devel] [PATCH] ioapic: allow buggy guests mishandling level-triggered interrupts to make progress
Paolo Bonzini writes: > On 01/04/19 15:36, Vitaly Kuznetsov wrote: ... >> static void ioapic_set_irq(void *opaque, int vector, int level) >> { >> IOAPICCommonState *s = opaque; >> @@ -227,7 +236,28 @@ void ioapic_eoi_broadcast(int vector) >> trace_ioapic_clear_remote_irr(n, vector); >> s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR; >> if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) { >> -ioapic_service(s); >> +bool level = ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) >> & 1) >> +== IOAPIC_TRIGGER_LEVEL; >> + >> +++s->irq_reassert[vector]; >> +if (!level || >> +s->irq_reassert[vector] < SUCCESSIVE_IRQ_MAX_COUNT) >> { >> +ioapic_service(s); >> +} else { >> +/* >> + * Real hardware does not deliver the interrupt >> + * immediately during eoi broadcast, and this lets a >> + * buggy guest make slow progress even if it does >> not >> + * correctly handle a level-triggered interrupt. >> Emulate >> + * this behavior if we detect an interrupt storm. >> + */ >> +trace_ioapic_eoi_delayed_reassert(vector); >> +timer_mod(s->timer, >> + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + >> + NANOSECONDS_PER_SECOND / 100); > > Should this be done only if the timer isn't pending? Hm, maybe ... but how can this happen? To get here we need remote IRR bit and we clear it so someone needs to re-set it. The source won't probably be doing this (it is a level-triggered interrupt and it is already pending - why re-asserting?) but even if it does ioapic_service(s) will be called and when our timer fires we will just do nothing (consequitive ioapic_service() doesn't hurt). -- Vitaly
Re: [Qemu-devel] [PATCH] ioapic: allow buggy guests mishandling level-triggered interrupts to make progress
Liran Alon writes: >> On 1 Apr 2019, at 18:58, Vitaly Kuznetsov wrote: >> >> Liran Alon writes: >> >>>> On 1 Apr 2019, at 16:36, Vitaly Kuznetsov wrote: >>>> >>>> It was found that Hyper-V 2016 on KVM in some configurations (q35 machine + >>>> piix4-usb-uhci) hangs on boot. Trace analysis led us to the conclusion that >>>> it is mishandling level-triggered interrupt performing EOI without fixing >>>> the root cause. >>> >>> I would rephrase as: >>> It was found that Hyper-V 2016 on KVM in some configurations (q35 machine + >>> piix4-usb-uhci) hangs on boot. >>> Root-cause was that one of Hyper-V level-triggered interrupt handler >>> performs EOI before fixing the root-cause. >>> This results in IOAPIC keep re-raising the level-triggered interrupt >>> after EOI because irq-line remains asserted. >> >> Ok, thanks for the suggestion. >> >>> >>>> This causes immediate re-assertion and L2 VM (which is >>>> supposedly expected to fix the cause of the interrupt) is not making any >>>> progress. >>> >>> I don’t know why you assume this. >>> From the trace we have examined, it seems that the EOI is performed by >>> Hyper-V and not it’s guest >>> This means that the handler for this level-triggered interrupt is on >>> Hyper-V and not it’s guest. >> >> If you let it run (with e.g. this patch or by setting preemtion timer > >> 0) you'll see that MMIO write fixing the cause of the interrupt is >> happening from L2: >> >> (qemu) info pci: >> >> Bus 0, device 4, function 0: >>USB controller: PCI device 8086:7112 >> PCI subsystem 1af4:1100 >> IRQ 23. >> BAR4: I/O at 0x6060 [0x607f]. >> id "" >> >> ... >> 538597.212494: kvm_exit: reason VMRESUME rip 0xf80004250115 >> info 0 0 >> 538597.212499: kvm_entry:vcpu 0 >> 538597.212506: kvm_exit: reason IO_INSTRUCTION rip >> 0xf80e02ac6a27 info 60620009 0 >> 538597.212507: kvm_nested_vmexit:rip f80e02ac6a27 reason >> IO_INSTRUCTION info1 60620009 info2 0 int_info 0 int_info_err 0 >> 538597.212509: kvm_fpu: unload >> 538597.212511: kvm_userspace_exit: reason KVM_EXIT_IO (2) >> 538597.212516: kvm_fpu: load >> 538597.212518: kvm_pio: pio_read at 0x6062 size 2 count 1 val >> 0x1 >> 538597.212519: kvm_entry:vcpu 0 >> 538597.212523: kvm_exit: reason IO_INSTRUCTION rip >> 0xf80e02ac6a61 info 60640009 0 >> 538597.212523: kvm_nested_vmexit:rip f80e02ac6a61 reason >> IO_INSTRUCTION info1 60640009 info2 0 int_info 0 int_info_err 0 >> 538597.212524: kvm_fpu: unload >> 538597.212525: kvm_userspace_exit: reason KVM_EXIT_IO (2) >> 538597.212528: kvm_fpu: load >> 538597.212528: kvm_pio: pio_read at 0x6064 size 2 count 1 val >> 0xf >> ... >> >> and this happens after EOI from L1. > > I see that the L2 guest is doing I/O read to the device BAR4 but do these > reads lower the irq-line? > I would expect a write to lower the irq-line. > > Looking at uhci_port_read(), it seems that offset 0x02 and 0x04 just return a > value. Doesn’t lower irq-line. > (Even though offset 0x04 returns the "interrupt enable register”). > In contrast, looking at uhci_port_write(), it seems that writing to either > offset 0x02 or 0x04 could lower the irq-line. > So you should look for pio_write to port 0x6062 or 0x6064 to see who > is actually responsible for lowering the irq-line. Sorry, I probably like trimming traces too much. Writes happen too: [005] 538597.212532: kvm_exit: reason IO_INSTRUCTION rip 0xf80e02ac6a8f info 60620001 0 [005] 538597.212533: kvm_nested_vmexit:rip f80e02ac6a8f reason IO_INSTRUCTION info1 60620001 info2 0 int_info 0 int_info_err 0 [005] 538597.212534: kvm_pio: pio_write at 0x6062 size 2 count 1 val 0x1 [005] 538597.212534: kvm_fpu: unload [005] 538597.212535: kvm_userspace_exit: reason KVM_EXIT_IO (2) [005] 538597.212543: kvm_fpu: load [005] 538597.212544: kvm_entry:vcpu 0 [005] 538597.212547: kvm_exit: reason IO_INSTRUCTION rip 0xf80e02ac6a9c info 60640001 0 [005] 538597.212548: kvm_nested_vmexit:rip f80e02ac6a9c reason IO_INSTRUCTION info1 60640001 info2 0 int_info 0 int_info_err 0 [005] 538597.212548: kvm_pio: pio_write at 0x6064 size 2 count 1 val 0x0 [005] 538597.212549: kvm_fpu: unload [005] 538597.212550: kvm_userspace_exit: reason KVM_EXIT_IO (2) and this likely lowers the line. I honestly have no idea how this all works on real hw but the comment in kernel ioapic says something about non-immediate delivery of the reasserted interrupt. True or not, this gives me some peace of mind :-) -- Vitaly
[Qemu-devel] [PATCH] ioapic: allow buggy guests mishandling level-triggered interrupts to make progress
It was found that Hyper-V 2016 on KVM in some configurations (q35 machine + piix4-usb-uhci) hangs on boot. Trace analysis led us to the conclusion that it is mishandling level-triggered interrupt performing EOI without fixing the root cause. This causes immediate re-assertion and L2 VM (which is supposedly expected to fix the cause of the interrupt) is not making any progress. Gory details: https://www.spinics.net/lists/kvm/msg184484.html Turns out we were dealing with similar issues before; in-kernel IOAPIC implementation has commit 184564efae4d ("kvm: ioapic: conditionally delay irq delivery duringeoi broadcast") which describes a very similar issue. Steal the idea from the above mentioned commit for IOAPIC implementation in QEMU. SUCCESSIVE_IRQ_MAX_COUNT, delay and the comment are borrowed as well. Signed-off-by: Vitaly Kuznetsov --- hw/intc/ioapic.c | 43 ++- hw/intc/trace-events | 1 + include/hw/i386/ioapic_internal.h | 3 +++ 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c index 9d75f84d3b..daf45cc8a8 100644 --- a/hw/intc/ioapic.c +++ b/hw/intc/ioapic.c @@ -139,6 +139,15 @@ static void ioapic_service(IOAPICCommonState *s) } } +#define SUCCESSIVE_IRQ_MAX_COUNT 1 + +static void ioapic_timer(void *opaque) +{ +IOAPICCommonState *s = opaque; + +ioapic_service(s); +} + static void ioapic_set_irq(void *opaque, int vector, int level) { IOAPICCommonState *s = opaque; @@ -227,7 +236,28 @@ void ioapic_eoi_broadcast(int vector) trace_ioapic_clear_remote_irr(n, vector); s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR; if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) { -ioapic_service(s); +bool level = ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) +== IOAPIC_TRIGGER_LEVEL; + +++s->irq_reassert[vector]; +if (!level || +s->irq_reassert[vector] < SUCCESSIVE_IRQ_MAX_COUNT) { +ioapic_service(s); +} else { +/* + * Real hardware does not deliver the interrupt + * immediately during eoi broadcast, and this lets a + * buggy guest make slow progress even if it does not + * correctly handle a level-triggered interrupt. Emulate + * this behavior if we detect an interrupt storm. + */ +trace_ioapic_eoi_delayed_reassert(vector); +timer_mod(s->timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + + NANOSECONDS_PER_SECOND / 100); +} +} else { +s->irq_reassert[vector] = 0; } } } @@ -401,6 +431,8 @@ static void ioapic_realize(DeviceState *dev, Error **errp) memory_region_init_io(>io_memory, OBJECT(s), _io_ops, s, "ioapic", 0x1000); +s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ioapic_timer, s); + qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS); ioapics[ioapic_no] = s; @@ -408,6 +440,14 @@ static void ioapic_realize(DeviceState *dev, Error **errp) qemu_add_machine_init_done_notifier(>machine_done); } +static void ioapic_unrealize(DeviceState *dev, Error **errp) +{ +IOAPICCommonState *s = IOAPIC_COMMON(dev); + +timer_del(s->timer); +timer_free(s->timer); +} + static Property ioapic_properties[] = { DEFINE_PROP_UINT8("version", IOAPICCommonState, version, IOAPIC_VER_DEF), DEFINE_PROP_END_OF_LIST(), @@ -419,6 +459,7 @@ static void ioapic_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); k->realize = ioapic_realize; +k->unrealize = ioapic_unrealize; /* * If APIC is in kernel, we need to update the kernel cache after * migration, otherwise first 24 gsi routes will be invalid. diff --git a/hw/intc/trace-events b/hw/intc/trace-events index a28bdce925..90c9d07c1a 100644 --- a/hw/intc/trace-events +++ b/hw/intc/trace-events @@ -25,6 +25,7 @@ apic_mem_writel(uint64_t addr, uint32_t val) "0x%"PRIx64" = 0x%08x" ioapic_set_remote_irr(int n) "set remote irr for pin %d" ioapic_clear_remote_irr(int n, int vector) "clear remote irr for pin %d vector %d" ioapic_eoi_broadcast(int vector) "EOI broadcast for vector %d" +ioapic_eoi_delayed_reassert(int vector) "delayed reassert on EOI broadcast for vector %d" ioapic_mem_read(uint8_t addr, uint8_t regsel, uint8_t size, uint32_t val) "ioapic
[Qemu-devel] [Bug 1813165] Re: KVM internal error. Suberror: 1 emulation failure
Thank you David, I see the issue now. -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1813165 Title: KVM internal error. Suberror: 1 emulation failure Status in QEMU: New Bug description: Hello Devs. Having problems getting VM to run with qemu 3.1.0. I should mention it's a nested configuration. 2019-01-24 13:46:08.648+: starting up libvirt version: 4.10.0, qemu version: 3.1.0, kernel: 4.14.94, hostname: one LC_ALL=C PATH=/bin:/sbin:/bin:/sbin:/usr/bin:/usr/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin:/opt/bin HOME=/root USER=root QEMU_AUDIO_DRV=none /usr/bin/kvm -name guest=one-266,debug-threads=on -S -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-1-one-266/master-key.aes -machine pc-i440fx-2.9,accel=kvm,usb=off,dump-guest-core=off -cpu Skylake-Client-IBRS,ss=on,hypervisor=on,tsc_adjust=on,clflushopt=on,ssbd=on,xsaves=on,pdpe1gb=on -m 1024 -realtime mlock=off -smp 2,sockets=2,cores=1,threads=1 -uuid b219b45d-a2f0-4128-a948-8673a7abf968 -no-user-config -nodefaults -chardev socket,id=charmonitor,fd=21,server,nowait -mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown -boot strict=on -device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive file=/var/lib/one//datastores/0/266/disk.0,format=qcow2,if=none,id=drive-virtio-disk0,cache=none -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1,write-cache=on -drive file=/var/lib/one//datastores/0/266/disk.1,format=raw,if=none,id=drive-ide0-0-0,readonly=on -device ide-cd,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -netdev tap,fd=23,id=hostnet0 -device rtl8139,netdev=hostnet0,id=net0,mac=02:00:00:76:69:85,bus=pci.0,addr=0x3 -chardev pty,id=charserial0 -device isa-serial,chardev=charserial0,id=serial0 -vnc 0.0.0.0:266 -device cirrus-vga,id=video0,bus=pci.0,addr=0x2 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5 -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg timestamp=on char device redirected to /dev/pts/1 (label charserial0) KVM internal error. Suberror: 1 emulation failure EAX=0001 EBX=000f7c2c ECX=0001 EDX=0001 ESI=6a26 EDI=3ffbdc48 EBP=69e6 ESP=000a8000 EIP=000fd057 EFL=00010016 [AP-] CPL=0 II=0 A20=1 SMM=1 HLT=0 ES =0010 00c09300 CS = 0fff 00809b00 SS =0010 00c09300 DS =0010 00c09300 FS =0010 00c09300 GS =0010 00c09300 LDT= 8200 TR = 8b00 GDT= 10387cfe fe6c IDT= 0010387c 3810 CR0=0010 CR2= CR3= CR4= DR0= DR1= DR2= DR3= DR6=fffecffc DR7=0e1e0400 EFER= Code=cb 66 ba 4d d0 0f 00 e9 c8 fe bc 00 80 0a 00 e8 31 3a ff ff <0f> aa fa fc 66 ba 66 d0 0f 00 e9 b1 fe f3 90 f0 0f ba 2d ac 3b 0f 00 00 72 f3 8b 25 a8 3b 2019-01-24T13:47:39.383366Z kvm: terminating on signal 15 from pid 2708 (/usr/sbin/libvirtd) Someone has an idea whats going wrong here? thanks and cheers t. To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1813165/+subscriptions
[Qemu-devel] [Bug 1813165] Re: KVM internal error. Suberror: 1 emulation failure
Thomas, Albert, David, I'm having hard times trying to reproduce the issue in my environment; could you please provide your qemu command lines for both L0 and L1? It would also be great if you could try to come up with some 'minimal' configuration (my guess is that in L1 having just "qemu-system-x86_64 -machine q35,smm=on,accel=kvm -cpu host -vnc :0" would do). Thanks! -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1813165 Title: KVM internal error. Suberror: 1 emulation failure Status in QEMU: New Bug description: Hello Devs. Having problems getting VM to run with qemu 3.1.0. I should mention it's a nested configuration. 2019-01-24 13:46:08.648+: starting up libvirt version: 4.10.0, qemu version: 3.1.0, kernel: 4.14.94, hostname: one LC_ALL=C PATH=/bin:/sbin:/bin:/sbin:/usr/bin:/usr/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin:/opt/bin HOME=/root USER=root QEMU_AUDIO_DRV=none /usr/bin/kvm -name guest=one-266,debug-threads=on -S -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-1-one-266/master-key.aes -machine pc-i440fx-2.9,accel=kvm,usb=off,dump-guest-core=off -cpu Skylake-Client-IBRS,ss=on,hypervisor=on,tsc_adjust=on,clflushopt=on,ssbd=on,xsaves=on,pdpe1gb=on -m 1024 -realtime mlock=off -smp 2,sockets=2,cores=1,threads=1 -uuid b219b45d-a2f0-4128-a948-8673a7abf968 -no-user-config -nodefaults -chardev socket,id=charmonitor,fd=21,server,nowait -mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown -boot strict=on -device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive file=/var/lib/one//datastores/0/266/disk.0,format=qcow2,if=none,id=drive-virtio-disk0,cache=none -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1,write-cache=on -drive file=/var/lib/one//datastores/0/266/disk.1,format=raw,if=none,id=drive-ide0-0-0,readonly=on -device ide-cd,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -netdev tap,fd=23,id=hostnet0 -device rtl8139,netdev=hostnet0,id=net0,mac=02:00:00:76:69:85,bus=pci.0,addr=0x3 -chardev pty,id=charserial0 -device isa-serial,chardev=charserial0,id=serial0 -vnc 0.0.0.0:266 -device cirrus-vga,id=video0,bus=pci.0,addr=0x2 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5 -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg timestamp=on char device redirected to /dev/pts/1 (label charserial0) KVM internal error. Suberror: 1 emulation failure EAX=0001 EBX=000f7c2c ECX=0001 EDX=0001 ESI=6a26 EDI=3ffbdc48 EBP=69e6 ESP=000a8000 EIP=000fd057 EFL=00010016 [AP-] CPL=0 II=0 A20=1 SMM=1 HLT=0 ES =0010 00c09300 CS = 0fff 00809b00 SS =0010 00c09300 DS =0010 00c09300 FS =0010 00c09300 GS =0010 00c09300 LDT= 8200 TR = 8b00 GDT= 10387cfe fe6c IDT= 0010387c 3810 CR0=0010 CR2= CR3= CR4= DR0= DR1= DR2= DR3= DR6=fffecffc DR7=0e1e0400 EFER= Code=cb 66 ba 4d d0 0f 00 e9 c8 fe bc 00 80 0a 00 e8 31 3a ff ff <0f> aa fa fc 66 ba 66 d0 0f 00 e9 b1 fe f3 90 f0 0f ba 2d ac 3b 0f 00 00 72 f3 8b 25 a8 3b 2019-01-24T13:47:39.383366Z kvm: terminating on signal 15 from pid 2708 (/usr/sbin/libvirtd) Someone has an idea whats going wrong here? thanks and cheers t. To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1813165/+subscriptions
[Qemu-devel] [Bug 1813165] Re: KVM internal error. Suberror: 1 emulation failure
I sent a patch which is supposed to fix the issue: https://marc.info/?l=kvm=155085391830663=2 it would be great if someone could give it a spin! -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1813165 Title: KVM internal error. Suberror: 1 emulation failure Status in QEMU: New Bug description: Hello Devs. Having problems getting VM to run with qemu 3.1.0. I should mention it's a nested configuration. 2019-01-24 13:46:08.648+: starting up libvirt version: 4.10.0, qemu version: 3.1.0, kernel: 4.14.94, hostname: one LC_ALL=C PATH=/bin:/sbin:/bin:/sbin:/usr/bin:/usr/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin:/opt/bin HOME=/root USER=root QEMU_AUDIO_DRV=none /usr/bin/kvm -name guest=one-266,debug-threads=on -S -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-1-one-266/master-key.aes -machine pc-i440fx-2.9,accel=kvm,usb=off,dump-guest-core=off -cpu Skylake-Client-IBRS,ss=on,hypervisor=on,tsc_adjust=on,clflushopt=on,ssbd=on,xsaves=on,pdpe1gb=on -m 1024 -realtime mlock=off -smp 2,sockets=2,cores=1,threads=1 -uuid b219b45d-a2f0-4128-a948-8673a7abf968 -no-user-config -nodefaults -chardev socket,id=charmonitor,fd=21,server,nowait -mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown -boot strict=on -device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive file=/var/lib/one//datastores/0/266/disk.0,format=qcow2,if=none,id=drive-virtio-disk0,cache=none -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1,write-cache=on -drive file=/var/lib/one//datastores/0/266/disk.1,format=raw,if=none,id=drive-ide0-0-0,readonly=on -device ide-cd,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -netdev tap,fd=23,id=hostnet0 -device rtl8139,netdev=hostnet0,id=net0,mac=02:00:00:76:69:85,bus=pci.0,addr=0x3 -chardev pty,id=charserial0 -device isa-serial,chardev=charserial0,id=serial0 -vnc 0.0.0.0:266 -device cirrus-vga,id=video0,bus=pci.0,addr=0x2 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5 -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg timestamp=on char device redirected to /dev/pts/1 (label charserial0) KVM internal error. Suberror: 1 emulation failure EAX=0001 EBX=000f7c2c ECX=0001 EDX=0001 ESI=6a26 EDI=3ffbdc48 EBP=69e6 ESP=000a8000 EIP=000fd057 EFL=00010016 [AP-] CPL=0 II=0 A20=1 SMM=1 HLT=0 ES =0010 00c09300 CS = 0fff 00809b00 SS =0010 00c09300 DS =0010 00c09300 FS =0010 00c09300 GS =0010 00c09300 LDT= 8200 TR = 8b00 GDT= 10387cfe fe6c IDT= 0010387c 3810 CR0=0010 CR2= CR3= CR4= DR0= DR1= DR2= DR3= DR6=fffecffc DR7=0e1e0400 EFER= Code=cb 66 ba 4d d0 0f 00 e9 c8 fe bc 00 80 0a 00 e8 31 3a ff ff <0f> aa fa fc 66 ba 66 d0 0f 00 e9 b1 fe f3 90 f0 0f ba 2d ac 3b 0f 00 00 72 f3 8b 25 a8 3b 2019-01-24T13:47:39.383366Z kvm: terminating on signal 15 from pid 2708 (/usr/sbin/libvirtd) Someone has an idea whats going wrong here? thanks and cheers t. To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1813165/+subscriptions
[Qemu-devel] [Bug 1813165] Re: KVM internal error. Suberror: 1 emulation failure
Ack, thanks for the bisect! It seems something was overlooked when we did host/guest mmu split. I'll try to investigate. -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1813165 Title: KVM internal error. Suberror: 1 emulation failure Status in QEMU: New Bug description: Hello Devs. Having problems getting VM to run with qemu 3.1.0. I should mention it's a nested configuration. 2019-01-24 13:46:08.648+: starting up libvirt version: 4.10.0, qemu version: 3.1.0, kernel: 4.14.94, hostname: one LC_ALL=C PATH=/bin:/sbin:/bin:/sbin:/usr/bin:/usr/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin:/opt/bin HOME=/root USER=root QEMU_AUDIO_DRV=none /usr/bin/kvm -name guest=one-266,debug-threads=on -S -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-1-one-266/master-key.aes -machine pc-i440fx-2.9,accel=kvm,usb=off,dump-guest-core=off -cpu Skylake-Client-IBRS,ss=on,hypervisor=on,tsc_adjust=on,clflushopt=on,ssbd=on,xsaves=on,pdpe1gb=on -m 1024 -realtime mlock=off -smp 2,sockets=2,cores=1,threads=1 -uuid b219b45d-a2f0-4128-a948-8673a7abf968 -no-user-config -nodefaults -chardev socket,id=charmonitor,fd=21,server,nowait -mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown -boot strict=on -device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive file=/var/lib/one//datastores/0/266/disk.0,format=qcow2,if=none,id=drive-virtio-disk0,cache=none -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1,write-cache=on -drive file=/var/lib/one//datastores/0/266/disk.1,format=raw,if=none,id=drive-ide0-0-0,readonly=on -device ide-cd,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -netdev tap,fd=23,id=hostnet0 -device rtl8139,netdev=hostnet0,id=net0,mac=02:00:00:76:69:85,bus=pci.0,addr=0x3 -chardev pty,id=charserial0 -device isa-serial,chardev=charserial0,id=serial0 -vnc 0.0.0.0:266 -device cirrus-vga,id=video0,bus=pci.0,addr=0x2 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5 -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg timestamp=on char device redirected to /dev/pts/1 (label charserial0) KVM internal error. Suberror: 1 emulation failure EAX=0001 EBX=000f7c2c ECX=0001 EDX=0001 ESI=6a26 EDI=3ffbdc48 EBP=69e6 ESP=000a8000 EIP=000fd057 EFL=00010016 [AP-] CPL=0 II=0 A20=1 SMM=1 HLT=0 ES =0010 00c09300 CS = 0fff 00809b00 SS =0010 00c09300 DS =0010 00c09300 FS =0010 00c09300 GS =0010 00c09300 LDT= 8200 TR = 8b00 GDT= 10387cfe fe6c IDT= 0010387c 3810 CR0=0010 CR2= CR3= CR4= DR0= DR1= DR2= DR3= DR6=fffecffc DR7=0e1e0400 EFER= Code=cb 66 ba 4d d0 0f 00 e9 c8 fe bc 00 80 0a 00 e8 31 3a ff ff <0f> aa fa fc 66 ba 66 d0 0f 00 e9 b1 fe f3 90 f0 0f ba 2d ac 3b 0f 00 00 72 f3 8b 25 a8 3b 2019-01-24T13:47:39.383366Z kvm: terminating on signal 15 from pid 2708 (/usr/sbin/libvirtd) Someone has an idea whats going wrong here? thanks and cheers t. To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1813165/+subscriptions
Re: [Qemu-devel] [PATCH RFC 4/8] i386/kvm: Implement 'hv-all' pass-through mode
Eduardo Habkost writes: > > If libvirt is involved, it's much simpler and safer to use > something like , which generates a > migration-safe CPU configuration based on the current host. Live > migration support with "-cpu host" is only useful for experiments > and carefully controlled environments. > > Is there a real need to make hv-all migratable? What would be > the use case, exactly? If there's no clear use case, I would > recommend making it a migration blocker. There's no clear use-case; I noticed that we keep adding Hyper-V enlightenments and these make Windows' life on KVM easier so we recommend enabling them all (and, with an exception for hv-evmcs, which I also don't enable with hv-all, I'm unawere of cases which would require disabling certain Hyper-V enlightenments). hv-all is mostly a convenience feature. I plan to take a look at 'host-model' to see if we can borrow some ideas from there (that would actually be ideal - build a set of 'hv-*' enlightenments based on capabilites of the current host) but I'm also not totally against keeping it the way it is and making it a migration blocker for the time being (and making it a 'developer-only' feature). -- Vitaly
Re: [Qemu-devel] [PATCH RFC 4/8] i386/kvm: Implement 'hv-all' pass-through mode
"Dr. David Alan Gilbert" writes: > I'm not sure what the equivalent bear traps are in the Hyper-V world, > but I'd be surprised if there weren't any; for example what happens > when someone upgrades one of their hosts to some minor version that > adds/removes a feature? Here we're talking about Hyper-V emulation in KVM, features only get added there, but even if it gets removed it will be detected by libvirt ... > > Also, how does libvirt figure out that the features are actually the > same - does it need a bunch of detection code? ... as I *think* it compares Feature CPUID words (and all Hyper-V features which we enable with hv-all are there). -- Vitaly
[Qemu-devel] [PATCH RFC 3/8] i386/kvm: move Hyper-V CPUID filling to hyperv_handle_properties()
Let's consolidate Hyper-V features handling in hyperv_handle_properties(). The change is necessary to support pass-through 'hv-all' mode as we'll be just copying CPUIDs from KVM instead of filling them in. Signed-off-by: Vitaly Kuznetsov --- target/i386/kvm.c | 163 +- 1 file changed, 90 insertions(+), 73 deletions(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index dc4574daff..ed55040d9e 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -1038,13 +1038,25 @@ static int hv_cpuid_check_and_set(CPUState *cs, struct kvm_cpuid2 *cpuid, return 1; } -static int hyperv_handle_properties(CPUState *cs) +/* + * Fill in Hyper-V CPUIDs. Returns the number of entries filled in cpuid_ent in + * case of success, errno < 0 in case of failure and 0 when no Hyper-V + * extentions are enabled. + */ +static int hyperv_handle_properties(CPUState *cs, +struct kvm_cpuid_entry2 *cpuid_ent) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = >env; struct kvm_cpuid2 *cpuid; +struct kvm_cpuid_entry2 *c; +uint32_t signature[3]; +uint32_t cpuid_i = 0; int r = 0; +if (!hyperv_enabled(cpu)) +return 0; + if (cpu->hyperv_evmcs) { uint16_t evmcs_version; @@ -1095,9 +1107,80 @@ static int hyperv_handle_properties(CPUState *cs) /* Not exposed by KVM but needed to make CPU hotplug in Windows work */ env->features[FEAT_HYPERV_EDX] |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE; +if (r) { +r = -ENOSYS; +goto free; +} + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_VENDOR_AND_MAX_FUNCTIONS; +if (!cpu->hyperv_vendor_id) { +memcpy(signature, "Microsoft Hv", 12); +} else { +size_t len = strlen(cpu->hyperv_vendor_id); + +if (len > 12) { +error_report("hv-vendor-id truncated to 12 characters"); +len = 12; +} +memset(signature, 0, 12); +memcpy(signature, cpu->hyperv_vendor_id, len); +} +c->eax = cpu->hyperv_evmcs ? +HV_CPUID_NESTED_FEATURES : HV_CPUID_IMPLEMENT_LIMITS; +c->ebx = signature[0]; +c->ecx = signature[1]; +c->edx = signature[2]; + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_INTERFACE; +memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12); +c->eax = signature[0]; +c->ebx = 0; +c->ecx = 0; +c->edx = 0; + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_VERSION; +c->eax = 0x1bbc; +c->ebx = 0x00060001; + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_FEATURES; +c->eax = env->features[FEAT_HYPERV_EAX]; +c->ebx = env->features[FEAT_HYPERV_EBX]; +c->edx = env->features[FEAT_HYPERV_EDX]; + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_ENLIGHTMENT_INFO; +c->eax = env->features[FEAT_HV_RECOMM_EAX]; +c->ebx = cpu->hyperv_spinlock_attempts; + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_IMPLEMENT_LIMITS; +c->eax = cpu->hv_max_vps; +c->ebx = 0x40; + +if (cpu->hyperv_evmcs) { +__u32 function; + +/* Create zeroed 0x4006..0x4009 leaves */ +for (function = HV_CPUID_IMPLEMENT_LIMITS + 1; + function < HV_CPUID_NESTED_FEATURES; function++) { +c = _ent[cpuid_i++]; +c->function = function; +} + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_NESTED_FEATURES; +c->eax = env->features[FEAT_HV_NESTED_EAX]; +} +r = cpuid_i; + +free: g_free(cpuid); -return r ? -ENOSYS : 0; +return r; } static int hyperv_init_vcpu(X86CPU *cpu) @@ -1206,79 +1289,13 @@ int kvm_arch_init_vcpu(CPUState *cs) } /* Paravirtualization CPUIDs */ -if (hyperv_enabled(cpu)) { -c = _data.entries[cpuid_i++]; -c->function = HV_CPUID_VENDOR_AND_MAX_FUNCTIONS; -if (!cpu->hyperv_vendor_id) { -memcpy(signature, "Microsoft Hv", 12); -} else { -size_t len = strlen(cpu->hyperv_vendor_id); - -if (len > 12) { -error_report("hv-vendor-id truncated to 12 characters"); -len = 12; -} -memset(signature, 0, 12); -memcpy(signature, cpu->hyperv_vendor_id, len); -} -c->eax = cpu->hyperv_evmcs ? -HV_CPUID_NESTED_FEATURES : HV_CPUID_IMPLEMENT_LIMITS; -c->ebx = signature[0]; -c->ecx = signature[1]; -c->edx = signature[2]; - -c = _data.entries[cpuid_i++]; -c->function = HV_CPUID_INTERFACE; -memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12); -c->eax = signature[0]; -c->ebx = 0; -c->ecx = 0; -c->edx = 0
[Qemu-devel] [PATCH RFC 4/8] i386/kvm: Implement 'hv-all' pass-through mode
In many case we just want to give Windows guests all currently supported Hyper-V enlightenments and that's where this new mode may come handy. We pass through what was returned by KVM_GET_SUPPORTED_HV_CPUID. hv_cpuid_check_and_set() is modified to also set cpu->hyperv_* flags as we may want to check them later (and we actually do for hv_runtime, hv_synic,...). Signed-off-by: Vitaly Kuznetsov --- target/i386/cpu.c | 1 + target/i386/cpu.h | 1 + target/i386/kvm.c | 133 -- 3 files changed, 107 insertions(+), 28 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 2f5412592d..b776be5223 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5771,6 +5771,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false), DEFINE_PROP_BOOL("hv-evmcs", X86CPU, hyperv_evmcs, false), DEFINE_PROP_BOOL("hv-ipi", X86CPU, hyperv_ipi, false), +DEFINE_PROP_BOOL("hv-all", X86CPU, hyperv_all, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 59656a70e6..9b5c2715cc 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1397,6 +1397,7 @@ struct X86CPU { bool hyperv_tlbflush; bool hyperv_evmcs; bool hyperv_ipi; +bool hyperv_all; bool check_cpuid; bool enforce_cpuid; bool expose_kvm; diff --git a/target/i386/kvm.c b/target/i386/kvm.c index ed55040d9e..b373b4ac06 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -647,7 +647,8 @@ static bool hyperv_enabled(X86CPU *cpu) cpu->hyperv_stimer || cpu->hyperv_reenlightenment || cpu->hyperv_tlbflush || -cpu->hyperv_ipi); +cpu->hyperv_ipi || +cpu->hyperv_all); } static int kvm_arch_set_tsc_khz(CPUState *cs) @@ -995,14 +996,15 @@ static int hv_cpuid_get_fw(struct kvm_cpuid2 *cpuid, int fw, uint32_t *r) } static int hv_cpuid_check_and_set(CPUState *cs, struct kvm_cpuid2 *cpuid, - const char *name, bool flag) + const char *name, bool *flag) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = >env; uint32_t r, fw, bits;; int i, j; +bool present; -if (!flag) { +if (!*flag && !cpu->hyperv_all) { return 0; } @@ -1011,6 +1013,7 @@ static int hv_cpuid_check_and_set(CPUState *cs, struct kvm_cpuid2 *cpuid, continue; } +present = true; for (j = 0; j < ARRAY_SIZE(kvm_hyperv_properties[i].flags); j++) { fw = kvm_hyperv_properties[i].flags[j].fw; bits = kvm_hyperv_properties[i].flags[j].bits; @@ -1020,17 +1023,26 @@ static int hv_cpuid_check_and_set(CPUState *cs, struct kvm_cpuid2 *cpuid, } if (hv_cpuid_get_fw(cpuid, fw, ) || (r & bits) != bits) { -fprintf(stderr, -"Hyper-V %s (requested by '%s' cpu flag) " -"is not supported by kernel\n", -kvm_hyperv_properties[i].desc, -kvm_hyperv_properties[i].name); -return 1; +if (*flag) { +fprintf(stderr, +"Hyper-V %s (requested by '%s' cpu flag) " +"is not supported by kernel\n", +kvm_hyperv_properties[i].desc, +kvm_hyperv_properties[i].name); +return 1; +} else { +present = false; +break; +} } env->features[fw] |= bits; } +if (cpu->hyperv_all && present) { +*flag = true; +} + return 0; } @@ -1038,6 +1050,43 @@ static int hv_cpuid_check_and_set(CPUState *cs, struct kvm_cpuid2 *cpuid, return 1; } +static int hv_report_missing_dep(X86CPU *cpu, const char *name, + const char *dep_name) +{ +int i, j, nprops = sizeof(kvm_hyperv_properties); + +for (i = 0; i < nprops; i++) { +if (!strcmp(kvm_hyperv_properties[i].name, name)) { +break; +} +} +for (j = 0; j < nprops; j++) { +if (!strcmp(kvm_hyperv_properties[j].name, dep_name)) { +break; +} +} + +/* + * Internal error: either feature or its dependency is not in + * kvm_hyperv_properties! + */ +if (i == nprops || j == nprops) { +return 1; +} + +if (cpu->hyperv_all) { +fprintf(stderr, "
[Qemu-devel] [PATCH RFC 7/8] i386/kvm: hv-tlbflush/ipi require hv-vpindex
The corresponding hypercalls require using VP indexes. Signed-off-by: Vitaly Kuznetsov --- target/i386/kvm.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 7461bf05dd..14d74ca9c7 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -1183,6 +1183,12 @@ static int hyperv_handle_properties(CPUState *cs, if (cpu->hyperv_stimer && !cpu->hyperv_time) { r |= hv_report_missing_dep(cpu, "hv-stimer", "hv-time"); } +if (cpu->hyperv_tlbflush && !cpu->hyperv_vpindex) { +r |= hv_report_missing_dep(cpu, "hv-tlbflush", "hv-vpindex"); +} +if (cpu->hyperv_ipi && !cpu->hyperv_vpindex) { +r |= hv_report_missing_dep(cpu, "hv-ipi", "hv-vpindex"); +} /* Not exposed by KVM but needed to make CPU hotplug in Windows work */ env->features[FEAT_HYPERV_EDX] |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE; -- 2.20.1
[Qemu-devel] [PATCH RFC 1/8] Update linux headers (5.0-rc2)
Signed-off-by: Vitaly Kuznetsov --- include/standard-headers/drm/drm_fourcc.h | 63 + include/standard-headers/linux/ethtool.h | 19 +- .../linux/input-event-codes.h | 19 + include/standard-headers/linux/pci_regs.h |1 + .../standard-headers/linux/virtio_balloon.h |8 + include/standard-headers/linux/virtio_blk.h | 54 + .../standard-headers/linux/virtio_config.h|3 + include/standard-headers/linux/virtio_gpu.h | 18 + include/standard-headers/linux/virtio_ring.h | 52 + linux-headers/asm-arm/unistd-common.h |1 + linux-headers/asm-arm64/unistd.h |1 + linux-headers/asm-generic/unistd.h| 10 +- linux-headers/asm-mips/sgidefs.h |8 - linux-headers/asm-mips/unistd.h | 1074 + linux-headers/asm-mips/unistd_n64.h | 334 + linux-headers/asm-mips/unistd_o32.h | 374 ++ linux-headers/asm-powerpc/unistd.h| 389 +- linux-headers/asm-powerpc/unistd_32.h | 381 ++ linux-headers/asm-powerpc/unistd_64.h | 372 ++ linux-headers/linux/kvm.h | 29 + linux-headers/linux/vfio.h| 92 ++ linux-headers/linux/vhost.h | 113 +- linux-headers/linux/vhost_types.h | 128 ++ scripts/update-linux-headers.sh | 10 +- 24 files changed, 1963 insertions(+), 1590 deletions(-) create mode 100644 linux-headers/asm-mips/unistd_n64.h create mode 100644 linux-headers/asm-mips/unistd_o32.h create mode 100644 linux-headers/asm-powerpc/unistd_32.h create mode 100644 linux-headers/asm-powerpc/unistd_64.h create mode 100644 linux-headers/linux/vhost_types.h diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h index b53f8d7c8c..44490607f9 100644 --- a/include/standard-headers/drm/drm_fourcc.h +++ b/include/standard-headers/drm/drm_fourcc.h @@ -29,11 +29,50 @@ extern "C" { #endif +/** + * DOC: overview + * + * In the DRM subsystem, framebuffer pixel formats are described using the + * fourcc codes defined in `include/uapi/drm/drm_fourcc.h`. In addition to the + * fourcc code, a Format Modifier may optionally be provided, in order to + * further describe the buffer's format - for example tiling or compression. + * + * Format Modifiers + * + * + * Format modifiers are used in conjunction with a fourcc code, forming a + * unique fourcc:modifier pair. This format:modifier pair must fully define the + * format and data layout of the buffer, and should be the only way to describe + * that particular buffer. + * + * Having multiple fourcc:modifier pairs which describe the same layout should + * be avoided, as such aliases run the risk of different drivers exposing + * different names for the same data format, forcing userspace to understand + * that they are aliases. + * + * Format modifiers may change any property of the buffer, including the number + * of planes and/or the required allocation size. Format modifiers are + * vendor-namespaced, and as such the relationship between a fourcc code and a + * modifier is specific to the modifer being used. For example, some modifiers + * may preserve meaning - such as number of planes - from the fourcc code, + * whereas others may not. + * + * Vendors should document their modifier usage in as much detail as + * possible, to ensure maximum compatibility across devices, drivers and + * applications. + * + * The authoritative list of format modifier codes is found in + * `include/uapi/drm/drm_fourcc.h` + */ + #define fourcc_code(a, b, c, d) ((uint32_t)(a) | ((uint32_t)(b) << 8) | \ ((uint32_t)(c) << 16) | ((uint32_t)(d) << 24)) #define DRM_FORMAT_BIG_ENDIAN (1<<31) /* format is big endian instead of little endian */ +/* Reserve 0 for the invalid format specifier */ +#define DRM_FORMAT_INVALID 0 + /* color index */ #define DRM_FORMAT_C8 fourcc_code('C', '8', ' ', ' ') /* [7:0] C */ @@ -111,6 +150,21 @@ extern "C" { #define DRM_FORMAT_VYUYfourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */ #define DRM_FORMAT_AYUVfourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */ +#define DRM_FORMAT_XYUVfourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */ + +/* + * packed YCbCr420 2x2 tiled formats + * first 64 bits will contain Y,Cb,Cr components for a 2x2 tile + */ +/* [63:0] A3:A2:Y3:0:Cr0:0:Y2:0:A1:A0:Y1:0:Cb0:0:Y0:0 1:1:8:2:8:2:8:2:1:1:8:2:8:2:8:2 little endian */ +#define DRM_FORMAT_Y0L0fourcc_code('Y', '0', 'L', '0') +/* [63:0] X3:X2:Y3:0:Cr0:0:Y2:0:X1:X0:Y1:0:Cb0:0:Y0:0 1:1:8:2:8:2:8:2:1:1:8:2:8:2:8:2 little endian */ +#define DRM_FORMAT_X0L0fourcc_code('X', '0', 'L
[Qemu-devel] [PATCH RFC 2/8] i386/kvm: add support for KVM_GET_SUPPORTED_HV_CPUID
KVM now supports reporting supported Hyper-V features through CPUID (KVM_GET_SUPPORTED_HV_CPUID ioctl). Going forward, this is going to be the only way to announce new functionality and this has already happened with Direct Mode stimers. While we could just support KVM_GET_SUPPORTED_HV_CPUID for new features, it seems to be beneficial to use it for all Hyper-V enlightenments when possible. This was we can implement 'hv-all' pass-through mode giving the guest all supported Hyper-V features even when QEMU knows nothing about them. Implementation-wise we create a new kvm_hyperv_properties structure defining Hyper-V features, get_supported_hv_cpuid()/ get_supported_hv_cpuid_legacy() returning the supported CPUID set and a bit over-engineered hv_cpuid_check_and_set() which we will also be used to set cpu->hyperv_* properties for 'hv-all' mode. Signed-off-by: Vitaly Kuznetsov --- target/i386/kvm.c | 487 +++--- 1 file changed, 372 insertions(+), 115 deletions(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 9af4542fb8..dc4574daff 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -691,141 +691,360 @@ static bool tsc_is_stable_and_known(CPUX86State *env) || env->user_tsc_khz; } -static int hyperv_handle_properties(CPUState *cs) +static struct { +const char *name; +const char *desc; +struct { +uint32_t fw; +uint32_t bits; +} flags[2]; +} kvm_hyperv_properties[] = { +{ +.name = "hv-relaxed", +.desc = "relaxed timing", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_HYPERCALL_AVAILABLE}, +{.fw = FEAT_HV_RECOMM_EAX, + .bits = HV_RELAXED_TIMING_RECOMMENDED} +} +}, +{ +.name = "hv-vapic", +.desc = "virtual APIC", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_HYPERCALL_AVAILABLE | HV_APIC_ACCESS_AVAILABLE}, +{.fw = FEAT_HV_RECOMM_EAX, + .bits = HV_APIC_ACCESS_RECOMMENDED} +} +}, +{ +.name = "hv-time", +.desc = "clocksources", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_HYPERCALL_AVAILABLE | HV_TIME_REF_COUNT_AVAILABLE | + HV_REFERENCE_TSC_AVAILABLE}, +{0} +} +}, +{ +.name = "hv-frequencies", +.desc = "frequency MSRs", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_ACCESS_FREQUENCY_MSRS}, +{.fw = FEAT_HYPERV_EDX, + .bits = HV_FREQUENCY_MSRS_AVAILABLE} +} +}, +{ +.name = "hv-crash", +.desc = "crash MSRs", +.flags = { +{.fw = FEAT_HYPERV_EDX, + .bits = HV_GUEST_CRASH_MSR_AVAILABLE}, +{0} +} +}, +{ +.name = "hv-reenlightenment", +.desc = "Reenlightenment MSRs", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_ACCESS_REENLIGHTENMENTS_CONTROL}, +{0} +} +}, +{ +.name = "hv-reset", +.desc = "reset MSR", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_RESET_AVAILABLE}, +{0} +} +}, +{ +.name = "hv-vpindex", +.desc = "VP_INDEX MSR", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_VP_INDEX_AVAILABLE}, +{0} +} +}, +{ +.name = "hv-runtime", +.desc = "VP_RUNTIME MSR", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_VP_RUNTIME_AVAILABLE}, +{0} +} +}, +{ +.name = "hv-synic", +.desc = "SynIC", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_SYNIC_AVAILABLE}, +{0} +} +}, +{ +.name = "hv-stimer", +.desc = "timers", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_SYNTIMERS_AVAILABLE}, +{0} +} +}, +{ +.name = "hv-tlbflush", +.desc = "TLB flush support", +.flags = { +{.fw = FEAT_HV_RECOMM_EAX, + .bits = HV_REMOTE_TLB_FLUSH_RECOMMENDED | + HV_EX_PROCESSOR_MASKS_RECOMMENDED}, +{0} +} +}, +{ +.name = "hv-ipi", +.desc = "IPI send support", +.flags = { +{.fw = FEAT_HV_RECOMM_EAX, + .bits = HV_CLUSTER_IPI_RECOMMENDED | + HV_EX_PROCESSOR_MASKS_RECOMMENDED}, +{0} +} +}, +}; + +static struct kvm_cpuid2 *try_g
[Qemu-devel] [PATCH RFC 8/8] i386/kvm: add support for Direct Mode for Hyper-V synthetic timers
Hyper-V on KVM can only use Synthetic timers with Direct Mode (opting for an interrupt instead of VMBus message). This new capability is only announced in KVM_GET_SUPPORTED_HV_CPUID. Signed-off-by: Vitaly Kuznetsov --- target/i386/cpu.c | 1 + target/i386/cpu.h | 1 + target/i386/hyperv-proto.h | 1 + target/i386/kvm.c | 15 +++ 4 files changed, 18 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index b776be5223..986cbe88dd 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5771,6 +5771,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false), DEFINE_PROP_BOOL("hv-evmcs", X86CPU, hyperv_evmcs, false), DEFINE_PROP_BOOL("hv-ipi", X86CPU, hyperv_ipi, false), +DEFINE_PROP_BOOL("hv-stimer-direct", X86CPU, hyperv_stimer_direct, false), DEFINE_PROP_BOOL("hv-all", X86CPU, hyperv_all, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 9b5c2715cc..9716cd89d7 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1397,6 +1397,7 @@ struct X86CPU { bool hyperv_tlbflush; bool hyperv_evmcs; bool hyperv_ipi; +bool hyperv_stimer_direct; bool hyperv_all; bool check_cpuid; bool enforce_cpuid; diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index c0272b3a01..cffac10b45 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -49,6 +49,7 @@ #define HV_GUEST_IDLE_STATE_AVAILABLE (1u << 5) #define HV_FREQUENCY_MSRS_AVAILABLE (1u << 8) #define HV_GUEST_CRASH_MSR_AVAILABLE(1u << 10) +#define HV_STIMER_DIRECT_MODE_AVAILABLE (1u << 19) /* * HV_CPUID_ENLIGHTMENT_INFO.EAX bits diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 14d74ca9c7..e7d0f4d3fe 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -648,6 +648,7 @@ static bool hyperv_enabled(X86CPU *cpu) cpu->hyperv_reenlightenment || cpu->hyperv_tlbflush || cpu->hyperv_ipi || +cpu->hyperv_stimer_direct || cpu->hyperv_all); } @@ -823,6 +824,15 @@ static struct { {0} } }, +{ +.name = "hv-stimer-direct", +.desc = "direct mode timers", +.flags = { +{.fw = FEAT_HYPERV_EDX, + .bits = HV_STIMER_DIRECT_MODE_AVAILABLE}, +{0} +} +}, }; static struct kvm_cpuid2 *try_get_hv_cpuid(CPUState *cs, int max) @@ -1169,6 +1179,8 @@ static int hyperv_handle_properties(CPUState *cs, r |= hv_cpuid_check_and_set(cs, cpuid, "hv-tlbflush", >hyperv_tlbflush); r |= hv_cpuid_check_and_set(cs, cpuid, "hv-ipi", >hyperv_ipi); +r |= hv_cpuid_check_and_set(cs, cpuid, "hv-stimer-direct", +>hyperv_stimer_direct); /* Dependencies */ if (cpu->hyperv_synic && !cpu->hyperv_synic_kvm_only && @@ -1189,6 +1201,9 @@ static int hyperv_handle_properties(CPUState *cs, if (cpu->hyperv_ipi && !cpu->hyperv_vpindex) { r |= hv_report_missing_dep(cpu, "hv-ipi", "hv-vpindex"); } +if (cpu->hyperv_stimer_direct && !cpu->hyperv_stimer) { +r |= hv_report_missing_dep(cpu, "hv-stimer-direct", "hv-stimer"); +} /* Not exposed by KVM but needed to make CPU hotplug in Windows work */ env->features[FEAT_HYPERV_EDX] |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE; -- 2.20.1
[Qemu-devel] [PATCH RFC 0/8] i386/kvm/hyper-v: refactor and implement 'hv-stimer-direct' and 'hv-all' enlightenments
The recently introduced Direct Mode for Hyper-V synthetic timers enlightenment is only exposed through KVM_GET_SUPPORTED_HV_CPUID ioctl. Take the opportunity and re-implement the way we handle Hyper-V enlightenments in QEMU, add support for hv-stimer-direct and 'hv-all' pass-through mode, add missing dependencies between enlightenments. RFC: we could've been more conservative and only use KVM_GET_SUPPORTED_HV_CPUID for new enlightenments. I'd also like to know what you think about the 'pass-through' approach taken to implement 'hv-all' mode: we could've kept QEMU filling in signature, vendor,... but we take CPUIDs passed by KVM 'as-is'. Vitaly Kuznetsov (8): Update linux headers (5.0-rc2) i386/kvm: add support for KVM_GET_SUPPORTED_HV_CPUID i386/kvm: move Hyper-V CPUID filling to hyperv_handle_properties() i386/kvm: Implement 'hv-all' pass-through mode i386/kvm: hv-evmcs requires hv-vapic i386/kvm: hv-stimer requires hv-time and hv-synic i386/kvm: hv-tlbflush/ipi require hv-vpindex i386/kvm: add support for Direct Mode for Hyper-V synthetic timers include/standard-headers/drm/drm_fourcc.h | 63 + include/standard-headers/linux/ethtool.h | 19 +- .../linux/input-event-codes.h | 19 + include/standard-headers/linux/pci_regs.h |1 + .../standard-headers/linux/virtio_balloon.h |8 + include/standard-headers/linux/virtio_blk.h | 54 + .../standard-headers/linux/virtio_config.h|3 + include/standard-headers/linux/virtio_gpu.h | 18 + include/standard-headers/linux/virtio_ring.h | 52 + linux-headers/asm-arm/unistd-common.h |1 + linux-headers/asm-arm64/unistd.h |1 + linux-headers/asm-generic/unistd.h| 10 +- linux-headers/asm-mips/sgidefs.h |8 - linux-headers/asm-mips/unistd.h | 1074 + linux-headers/asm-mips/unistd_n64.h | 334 + linux-headers/asm-mips/unistd_o32.h | 374 ++ linux-headers/asm-powerpc/unistd.h| 389 +- linux-headers/asm-powerpc/unistd_32.h | 381 ++ linux-headers/asm-powerpc/unistd_64.h | 372 ++ linux-headers/linux/kvm.h | 29 + linux-headers/linux/vfio.h| 92 ++ linux-headers/linux/vhost.h | 113 +- linux-headers/linux/vhost_types.h | 128 ++ scripts/update-linux-headers.sh | 10 +- target/i386/cpu.c |2 + target/i386/cpu.h |2 + target/i386/hyperv-proto.h|1 + target/i386/kvm.c | 749 +--- 28 files changed, 2533 insertions(+), 1774 deletions(-) create mode 100644 linux-headers/asm-mips/unistd_n64.h create mode 100644 linux-headers/asm-mips/unistd_o32.h create mode 100644 linux-headers/asm-powerpc/unistd_32.h create mode 100644 linux-headers/asm-powerpc/unistd_64.h create mode 100644 linux-headers/linux/vhost_types.h -- 2.20.1
[Qemu-devel] [PATCH RFC 6/8] i386/kvm: hv-stimer requires hv-time and hv-synic
Synthetic timers operate in hv-time time and Windows won't use these without SynIC. Signed-off-by: Vitaly Kuznetsov --- target/i386/kvm.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 674c5dc185..7461bf05dd 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -1177,6 +1177,12 @@ static int hyperv_handle_properties(CPUState *cs, if (cpu->hyperv_evmcs && !cpu->hyperv_vapic) { r |= hv_report_missing_dep(cpu, "hv-evmcs", "hv-vapic"); } +if (cpu->hyperv_stimer && !cpu->hyperv_synic) { +r |= hv_report_missing_dep(cpu, "hv-stimer", "hv-synic"); +} +if (cpu->hyperv_stimer && !cpu->hyperv_time) { +r |= hv_report_missing_dep(cpu, "hv-stimer", "hv-time"); +} /* Not exposed by KVM but needed to make CPU hotplug in Windows work */ env->features[FEAT_HYPERV_EDX] |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE; -- 2.20.1
[Qemu-devel] [PATCH RFC 5/8] i386/kvm: hv-evmcs requires hv-vapic
Enlightened VMCS is enabled by writing to a field in VP assist page and these require virtual APIC. Signed-off-by: Vitaly Kuznetsov --- target/i386/kvm.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index b373b4ac06..674c5dc185 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -1074,7 +1074,7 @@ static int hv_report_missing_dep(X86CPU *cpu, const char *name, return 1; } -if (cpu->hyperv_all) { +if (cpu->hyperv_all && strcmp(name, "hv-evmcs")) { fprintf(stderr, "Hyper-V %s (requested by 'hv-all' cpu flag) " "requires %s (is not supported by kernel)\n", kvm_hyperv_properties[i].desc, kvm_hyperv_properties[j].desc); @@ -1174,6 +1174,9 @@ static int hyperv_handle_properties(CPUState *cs, if (cpu->hyperv_synic && !cpu->hyperv_synic_kvm_only && !cpu->hyperv_vpindex) r |= hv_report_missing_dep(cpu, "hv-synic", "hv-vpindex"); +if (cpu->hyperv_evmcs && !cpu->hyperv_vapic) { +r |= hv_report_missing_dep(cpu, "hv-evmcs", "hv-vapic"); +} /* Not exposed by KVM but needed to make CPU hotplug in Windows work */ env->features[FEAT_HYPERV_EDX] |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE; -- 2.20.1
Re: [Qemu-devel] [PATCH RFC 4/8] i386/kvm: Implement 'hv-all' pass-through mode
Roman Kagan writes: > On Fri, Jan 25, 2019 at 12:41:51PM +0100, Vitaly Kuznetsov wrote: >> In many case we just want to give Windows guests all currently supported >> Hyper-V enlightenments and that's where this new mode may come handy. We >> pass through what was returned by KVM_GET_SUPPORTED_HV_CPUID. > > How is the compatibility ensured on migration between kernels reporting > different feature sets? AFAIU we don't change anything in this regard (or, my intention was to not change anything): hv-all is converted to the individual hv-* properties (hv_cpuid_check_and_set()) actually sets cpu->hyperv_* flags according to what's supported by kernel so when we migrate we will require all these features supported. I'll look to see that my expectations actually match the reallity, thanks for the reminder! -- Vitaly
Re: [Qemu-devel] [PATCH RFC 0/8] i386/kvm/hyper-v: refactor and implement 'hv-stimer-direct' and 'hv-all' enlightenments
no-re...@patchew.org writes: > === OUTPUT BEGIN === > 1/8 Checking commit 345a0718e21e (Update linux headers (5.0-rc2)) > WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? > #1646: > new file mode 100644 > > ERROR: code indent should never use tabs > #3980: FILE: scripts/update-linux-headers.sh:126: > +^Icp "$tmpdir/include/asm/unistd_n64.h" "$output/linux-headers/asm-mips/"$ > > WARNING: line over 80 characters > #3983: FILE: scripts/update-linux-headers.sh:129: > +cp "$tmpdir/include/asm/unistd_32.h" > "$output/linux-headers/asm-powerpc/" > > WARNING: line over 80 characters > #3984: FILE: scripts/update-linux-headers.sh:130: > + cp "$tmpdir/include/asm/unistd_64.h" > "$output/linux-headers/asm-powerpc/" > > ERROR: code indent should never use tabs > #3984: FILE: scripts/update-linux-headers.sh:130: > +^Icp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-powerpc/"$ > I saw these before submitting, however, these are linux headers and we take them as-is with scripts/update-linux-headers.sh: I don't think it is worth it to modify these headers just to silence patchew. -- Vitaly
[Qemu-devel] [PATCH v2 6/9] i386/kvm: hv-stimer requires hv-time and hv-synic
Synthetic timers operate in hv-time time and Windows won't use these without SynIC. Add .dependencies field to kvm_hyperv_properties[] and a generic mechanism to check dependencies between features. Signed-off-by: Vitaly Kuznetsov --- target/i386/kvm.c | 23 +++ 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index e876dc6118..d8b83031a5 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -690,6 +690,7 @@ static struct { uint32_t fw; uint32_t bits; } flags[2]; +uint64_t dependencies; } kvm_hyperv_properties[] = { [HYPERV_FEAT_RELAXED] = { .desc = "relaxed timing (hv-relaxed)", @@ -757,7 +758,8 @@ static struct { .flags = { {.fw = FEAT_HYPERV_EAX, .bits = HV_SYNTIMERS_AVAILABLE} -} +}, +.dependencies = BIT(HYPERV_FEAT_SYNIC) | BIT(HYPERV_FEAT_TIME) }, [HYPERV_FEAT_FREQUENCIES] = { .desc = "frequency MSRs (hv-frequencies)", @@ -987,12 +989,25 @@ static int hv_cpuid_check_and_set(CPUState *cs, struct kvm_cpuid2 *cpuid, X86CPU *cpu = X86_CPU(cs); CPUX86State *env = >env; uint32_t r, fw, bits; -int i; +uint64_t deps; +int i, dep_feat = 0; if (!hyperv_feat_enabled(cpu, feature) && !cpu->hyperv_passthrough) { return 0; } +deps = kvm_hyperv_properties[feature].dependencies; +while ((dep_feat = find_next_bit(, 64, dep_feat)) < 64) { +if (!(hyperv_feat_enabled(cpu, dep_feat))) { +fprintf(stderr, +"Hyper-V %s requires Hyper-V %s\n", +kvm_hyperv_properties[feature].desc, +kvm_hyperv_properties[dep_feat].desc); +return 1; +} +dep_feat++; +} + for (i = 0; i < ARRAY_SIZE(kvm_hyperv_properties[feature].flags); i++) { fw = kvm_hyperv_properties[feature].flags[i].fw; bits = kvm_hyperv_properties[feature].flags[i].bits; @@ -1108,11 +1123,11 @@ static int hyperv_handle_properties(CPUState *cs, r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_EVMCS); r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_IPI); -/* Dependencies */ +/* Additional dependencies not covered by kvm_hyperv_properties[] */ if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC) && !cpu->hyperv_synic_kvm_only && !hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX)) { -fprintf(stderr, "Hyper-V %s requires %s\n", +fprintf(stderr, "Hyper-V %s requires Hyper-V %s\n", kvm_hyperv_properties[HYPERV_FEAT_SYNIC].desc, kvm_hyperv_properties[HYPERV_FEAT_VPINDEX].desc); r |= 1; -- 2.20.1
[Qemu-devel] [PATCH v2 0/9] i386/kvm/hyper-v: refactor and implement 'hv-stimer-direct' and 'hv-passthrough' enlightenments
It has been a while since my 'v1' and I (again) apologize for that. Changes since v1: - Existing Hyper-V properties are converted from BOOL to BIT64, this allows us to express dependencies between them in a more natural way as well as simplifies search in kvm_hyperv_properties [Roman Kagan] (hope I got the idea right, but in any case this should be an improvement). PATCH1 added to the series. - 'hv-all' renamed to 'hv-passthrough' [Roman Kagan] - minor changes mostly related to support the addition of PATCH1. Original description: The recently introduced Direct Mode for Hyper-V synthetic timers enlightenment is only exposed through KVM_GET_SUPPORTED_HV_CPUID ioctl. Take the opportunity and re-implement the way we handle Hyper-V enlightenments in QEMU, add support for hv-stimer-direct and 'hv-all' pass-through mode, add missing dependencies between enlightenments. Vitaly Kuznetsov (9): i386/kvm: convert hyperv enlightenments properties from bools to bits i386/kvm: add support for KVM_GET_SUPPORTED_HV_CPUID i386/kvm: move Hyper-V CPUID filling to hyperv_handle_properties() i386/kvm: document existing Hyper-V enlightenments i386/kvm: implement 'hv-passthrough' mode i386/kvm: hv-stimer requires hv-time and hv-synic i386/kvm: hv-tlbflush/ipi require hv-vpindex i386/kvm: hv-evmcs requires hv-vapic i386/kvm: add support for Direct Mode for Hyper-V synthetic timers docs/hyperv.txt| 201 ++ hw/i386/pc.c | 3 +- target/i386/cpu.c | 47 ++- target/i386/cpu.h | 39 +- target/i386/hyperv-proto.h | 1 + target/i386/hyperv.c | 2 +- target/i386/kvm.c | 770 ++--- target/i386/machine.c | 2 +- 8 files changed, 813 insertions(+), 252 deletions(-) create mode 100644 docs/hyperv.txt -- 2.20.1
[Qemu-devel] [PATCH v2 7/9] i386/kvm: hv-tlbflush/ipi require hv-vpindex
The corresponding hypercalls require using VP indexes. Signed-off-by: Vitaly Kuznetsov --- target/i386/kvm.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index d8b83031a5..7fc97b749e 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -783,7 +783,8 @@ static struct { {.fw = FEAT_HV_RECOMM_EAX, .bits = HV_REMOTE_TLB_FLUSH_RECOMMENDED | HV_EX_PROCESSOR_MASKS_RECOMMENDED} -} +}, +.dependencies = BIT(HYPERV_FEAT_VPINDEX) }, [HYPERV_FEAT_EVMCS] = { .desc = "enlightened VMCS (hv-evmcs)", @@ -798,7 +799,8 @@ static struct { {.fw = FEAT_HV_RECOMM_EAX, .bits = HV_CLUSTER_IPI_RECOMMENDED | HV_EX_PROCESSOR_MASKS_RECOMMENDED} -} +}, +.dependencies = BIT(HYPERV_FEAT_VPINDEX) }, }; -- 2.20.1
[Qemu-devel] [PATCH v2 3/9] i386/kvm: move Hyper-V CPUID filling to hyperv_handle_properties()
Let's consolidate Hyper-V features handling in hyperv_handle_properties(). The change is necessary to support 'hv-passthrough' mode as we'll be just copying CPUIDs from KVM instead of filling them in. Signed-off-by: Vitaly Kuznetsov --- target/i386/kvm.c | 163 +- 1 file changed, 90 insertions(+), 73 deletions(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 6ead422efa..2b13757441 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -1014,13 +1014,25 @@ static int hv_cpuid_check_and_set(CPUState *cs, struct kvm_cpuid2 *cpuid, return 0; } -static int hyperv_handle_properties(CPUState *cs) +/* + * Fill in Hyper-V CPUIDs. Returns the number of entries filled in cpuid_ent in + * case of success, errno < 0 in case of failure and 0 when no Hyper-V + * extentions are enabled. + */ +static int hyperv_handle_properties(CPUState *cs, +struct kvm_cpuid_entry2 *cpuid_ent) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = >env; struct kvm_cpuid2 *cpuid; +struct kvm_cpuid_entry2 *c; +uint32_t signature[3]; +uint32_t cpuid_i = 0; int r = 0; +if (!hyperv_enabled(cpu)) +return 0; + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) { uint16_t evmcs_version; @@ -1069,9 +1081,80 @@ static int hyperv_handle_properties(CPUState *cs) /* Not exposed by KVM but needed to make CPU hotplug in Windows work */ env->features[FEAT_HYPERV_EDX] |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE; +if (r) { +r = -ENOSYS; +goto free; +} + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_VENDOR_AND_MAX_FUNCTIONS; +if (!cpu->hyperv_vendor_id) { +memcpy(signature, "Microsoft Hv", 12); +} else { +size_t len = strlen(cpu->hyperv_vendor_id); + +if (len > 12) { +error_report("hv-vendor-id truncated to 12 characters"); +len = 12; +} +memset(signature, 0, 12); +memcpy(signature, cpu->hyperv_vendor_id, len); +} +c->eax = hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS) ? +HV_CPUID_NESTED_FEATURES : HV_CPUID_IMPLEMENT_LIMITS; +c->ebx = signature[0]; +c->ecx = signature[1]; +c->edx = signature[2]; + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_INTERFACE; +memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12); +c->eax = signature[0]; +c->ebx = 0; +c->ecx = 0; +c->edx = 0; + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_VERSION; +c->eax = 0x1bbc; +c->ebx = 0x00060001; + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_FEATURES; +c->eax = env->features[FEAT_HYPERV_EAX]; +c->ebx = env->features[FEAT_HYPERV_EBX]; +c->edx = env->features[FEAT_HYPERV_EDX]; + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_ENLIGHTMENT_INFO; +c->eax = env->features[FEAT_HV_RECOMM_EAX]; +c->ebx = cpu->hyperv_spinlock_attempts; + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_IMPLEMENT_LIMITS; +c->eax = cpu->hv_max_vps; +c->ebx = 0x40; + +if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) { +__u32 function; + +/* Create zeroed 0x4006..0x4009 leaves */ +for (function = HV_CPUID_IMPLEMENT_LIMITS + 1; + function < HV_CPUID_NESTED_FEATURES; function++) { +c = _ent[cpuid_i++]; +c->function = function; +} + +c = _ent[cpuid_i++]; +c->function = HV_CPUID_NESTED_FEATURES; +c->eax = env->features[FEAT_HV_NESTED_EAX]; +} +r = cpuid_i; + +free: g_free(cpuid); -return r ? -ENOSYS : 0; +return r; } static int hyperv_init_vcpu(X86CPU *cpu) @@ -1180,79 +1263,13 @@ int kvm_arch_init_vcpu(CPUState *cs) } /* Paravirtualization CPUIDs */ -if (hyperv_enabled(cpu)) { -c = _data.entries[cpuid_i++]; -c->function = HV_CPUID_VENDOR_AND_MAX_FUNCTIONS; -if (!cpu->hyperv_vendor_id) { -memcpy(signature, "Microsoft Hv", 12); -} else { -size_t len = strlen(cpu->hyperv_vendor_id); - -if (len > 12) { -error_report("hv-vendor-id truncated to 12 characters"); -len = 12; -} -memset(signature, 0, 12); -memcpy(signature, cpu->hyperv_vendor_id, len); -} -c->eax = hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS) ? -HV_CPUID_NESTED_FEATURES : HV_CPUID_IMPLEMENT_LIMITS; -c->ebx = signature[0]; -c->ecx = signature[1]; -c->edx = signature[2]; - -c = _data.entries[cpuid_i++]; -c->function = HV_CPUID_INTERFACE; -memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12); -c->eax = s
[Qemu-devel] [PATCH v2 2/9] i386/kvm: add support for KVM_GET_SUPPORTED_HV_CPUID
KVM now supports reporting supported Hyper-V features through CPUID (KVM_GET_SUPPORTED_HV_CPUID ioctl). Going forward, this is going to be the only way to announce new functionality and this has already happened with Direct Mode stimers. While we could just support KVM_GET_SUPPORTED_HV_CPUID for new features, it seems to be beneficial to use it for all Hyper-V enlightenments when possible. This way we can implement 'hv-all' pass-through mode giving the guest all supported Hyper-V features even when QEMU knows nothing about them. Implementation-wise we create a new kvm_hyperv_properties structure defining Hyper-V features, get_supported_hv_cpuid()/ get_supported_hv_cpuid_legacy() returning the supported CPUID set and a bit over-engineered hv_cpuid_check_and_set() which we will also be used to set cpu->hyperv_* properties for 'hv-all' mode. Signed-off-by: Vitaly Kuznetsov --- target/i386/kvm.c | 474 ++ 1 file changed, 356 insertions(+), 118 deletions(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 3daac1e4f4..6ead422efa 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -684,156 +684,394 @@ static bool tsc_is_stable_and_known(CPUX86State *env) || env->user_tsc_khz; } -static int hyperv_handle_properties(CPUState *cs) +static struct { +const char *desc; +struct { +uint32_t fw; +uint32_t bits; +} flags[2]; +} kvm_hyperv_properties[] = { +[HYPERV_FEAT_RELAXED] = { +.desc = "relaxed timing (hv-relaxed)", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_HYPERCALL_AVAILABLE}, +{.fw = FEAT_HV_RECOMM_EAX, + .bits = HV_RELAXED_TIMING_RECOMMENDED} +} +}, +[HYPERV_FEAT_VAPIC] = { +.desc = "virtual APIC (hv-vapic)", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_HYPERCALL_AVAILABLE | HV_APIC_ACCESS_AVAILABLE}, +{.fw = FEAT_HV_RECOMM_EAX, + .bits = HV_APIC_ACCESS_RECOMMENDED} +} +}, +[HYPERV_FEAT_TIME] = { +.desc = "clocksources (hv-time)", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_HYPERCALL_AVAILABLE | HV_TIME_REF_COUNT_AVAILABLE | + HV_REFERENCE_TSC_AVAILABLE} +} +}, +[HYPERV_FEAT_CRASH] = { +.desc = "crash MSRs (hv-crash)", +.flags = { +{.fw = FEAT_HYPERV_EDX, + .bits = HV_GUEST_CRASH_MSR_AVAILABLE} +} +}, +[HYPERV_FEAT_RESET] = { +.desc = "reset MSR (hv-reset)", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_RESET_AVAILABLE} +} +}, +[HYPERV_FEAT_VPINDEX] = { +.desc = "VP_INDEX MSR (hv-vpindex)", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_VP_INDEX_AVAILABLE} +} +}, +[HYPERV_FEAT_RUNTIME] = { +.desc = "VP_RUNTIME MSR (hv-runtime)", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_VP_RUNTIME_AVAILABLE} +} +}, +[HYPERV_FEAT_SYNIC] = { +.desc = "synthetic interrupt controller (hv-synic)", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_SYNIC_AVAILABLE} +} +}, +[HYPERV_FEAT_STIMER] = { +.desc = "synthetic timers (hv-stimer)", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_SYNTIMERS_AVAILABLE} +} +}, +[HYPERV_FEAT_FREQUENCIES] = { +.desc = "frequency MSRs (hv-frequencies)", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_ACCESS_FREQUENCY_MSRS}, +{.fw = FEAT_HYPERV_EDX, + .bits = HV_FREQUENCY_MSRS_AVAILABLE} +} +}, +[HYPERV_FEAT_REENLIGHTENMENT] = { +.desc = "reenlightenment MSRs (hv-reenlightenment)", +.flags = { +{.fw = FEAT_HYPERV_EAX, + .bits = HV_ACCESS_REENLIGHTENMENTS_CONTROL} +} +}, +[HYPERV_FEAT_TLBFLUSH] = { +.desc = "paravirtualized TLB flush (hv-tlbflush)", +.flags = { +{.fw = FEAT_HV_RECOMM_EAX, + .bits = HV_REMOTE_TLB_FLUSH_RECOMMENDED | + HV_EX_PROCESSOR_MASKS_RECOMMENDED} +} +}, +[HYPERV_FEAT_EVMCS] = { +.desc = "enlightened VMCS (hv-evmcs)", +.flags = { +{.fw = FEAT_HV_RECOMM_EAX, + .bits = HV_ENLIGHTENED_VMCS_RECOMMENDED} +} +}, +[HYPERV_FEAT_IPI] = { +.desc = "paravirtualized IPI (hv-ipi)", +.flags = { +{.fw = FEAT_HV_RECOMM_EAX, + .bits = HV_CLUSTER_IPI_RECOMMENDED | + HV_EX_PROCESSOR_MASKS_RECOMMENDED} +} +}, +}; + +static struct kvm_cpuid2 *try_g
[Qemu-devel] [PATCH v2 1/9] i386/kvm: convert hyperv enlightenments properties from bools to bits
Representing Hyper-V properties as bits will allow us to check features and dependencies between them in a natural way. Suggested-by: Roman Kagan Signed-off-by: Vitaly Kuznetsov --- hw/i386/pc.c | 3 +- target/i386/cpu.c | 44 +++ target/i386/cpu.h | 37 +++ target/i386/hyperv.c | 2 +- target/i386/kvm.c | 83 ++- target/i386/machine.c | 2 +- 6 files changed, 91 insertions(+), 80 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index d98b737b8f..77c479e667 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -2387,7 +2387,8 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, } cpu->thread_id = topo.smt_id; -if (cpu->hyperv_vpindex && !kvm_hv_vpindex_settable()) { +if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && +!kvm_hv_vpindex_settable()) { error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX"); return; } diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 722c5514d4..9530b28d42 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5819,21 +5819,37 @@ static Property x86_cpu_properties[] = { #endif DEFINE_PROP_INT32("node-id", X86CPU, node_id, CPU_UNSET_NUMA_NODE_ID), DEFINE_PROP_BOOL("pmu", X86CPU, enable_pmu, false), + { .name = "hv-spinlocks", .info = _prop_spinlocks }, -DEFINE_PROP_BOOL("hv-relaxed", X86CPU, hyperv_relaxed_timing, false), -DEFINE_PROP_BOOL("hv-vapic", X86CPU, hyperv_vapic, false), -DEFINE_PROP_BOOL("hv-time", X86CPU, hyperv_time, false), -DEFINE_PROP_BOOL("hv-crash", X86CPU, hyperv_crash, false), -DEFINE_PROP_BOOL("hv-reset", X86CPU, hyperv_reset, false), -DEFINE_PROP_BOOL("hv-vpindex", X86CPU, hyperv_vpindex, false), -DEFINE_PROP_BOOL("hv-runtime", X86CPU, hyperv_runtime, false), -DEFINE_PROP_BOOL("hv-synic", X86CPU, hyperv_synic, false), -DEFINE_PROP_BOOL("hv-stimer", X86CPU, hyperv_stimer, false), -DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false), -DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false), -DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false), -DEFINE_PROP_BOOL("hv-evmcs", X86CPU, hyperv_evmcs, false), -DEFINE_PROP_BOOL("hv-ipi", X86CPU, hyperv_ipi, false), +DEFINE_PROP_BIT64("hv-relaxed", X86CPU, hyperv_features, + HYPERV_FEAT_RELAXED, 0), +DEFINE_PROP_BIT64("hv-vapic", X86CPU, hyperv_features, + HYPERV_FEAT_VAPIC, 0), +DEFINE_PROP_BIT64("hv-time", X86CPU, hyperv_features, + HYPERV_FEAT_TIME, 0), +DEFINE_PROP_BIT64("hv-crash", X86CPU, hyperv_features, + HYPERV_FEAT_CRASH, 0), +DEFINE_PROP_BIT64("hv-reset", X86CPU, hyperv_features, + HYPERV_FEAT_RESET, 0), +DEFINE_PROP_BIT64("hv-vpindex", X86CPU, hyperv_features, + HYPERV_FEAT_VPINDEX, 0), +DEFINE_PROP_BIT64("hv-runtime", X86CPU, hyperv_features, + HYPERV_FEAT_RUNTIME, 0), +DEFINE_PROP_BIT64("hv-synic", X86CPU, hyperv_features, + HYPERV_FEAT_SYNIC, 0), +DEFINE_PROP_BIT64("hv-stimer", X86CPU, hyperv_features, + HYPERV_FEAT_STIMER, 0), +DEFINE_PROP_BIT64("hv-frequencies", X86CPU, hyperv_features, + HYPERV_FEAT_FREQUENCIES, 0), +DEFINE_PROP_BIT64("hv-reenlightenment", X86CPU, hyperv_features, + HYPERV_FEAT_REENLIGHTENMENT, 0), +DEFINE_PROP_BIT64("hv-tlbflush", X86CPU, hyperv_features, + HYPERV_FEAT_TLBFLUSH, 0), +DEFINE_PROP_BIT64("hv-evmcs", X86CPU, hyperv_features, + HYPERV_FEAT_EVMCS, 0), +DEFINE_PROP_BIT64("hv-ipi", X86CPU, hyperv_features, + HYPERV_FEAT_IPI, 0), + DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 0128910661..11fa9e643e 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -743,6 +743,22 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY (1U << 3) #define MSR_ARCH_CAP_SSB_NO (1U << 4) +/* Supported Hyper-V Enlightenments */ +#define HYPERV_FEAT_RELAXED 0 +#define HYPERV_FEAT_VAPIC 1 +#define HYPERV_FEAT_TIME2 +#define HYPERV_FEAT_CRASH 3 +#define HYPERV_FEAT_RESET
[Qemu-devel] [PATCH v2 5/9] i386/kvm: implement 'hv-passthrough' mode
In many case we just want to give Windows guests all currently supported Hyper-V enlightenments and that's where this new mode may come handy. We pass through what was returned by KVM_GET_SUPPORTED_HV_CPUID. hv_cpuid_check_and_set() is modified to also set cpu->hyperv_* flags as we may want to check them later (and we actually do for hv_runtime, hv_synic,...). 'hv-passthrough' is a development only feature, a migration blocker is added to prevent issues while migrating between hosts with different feature sets. Signed-off-by: Vitaly Kuznetsov --- docs/hyperv.txt | 10 ++ target/i386/cpu.c | 1 + target/i386/cpu.h | 1 + target/i386/kvm.c | 89 +++ 4 files changed, 87 insertions(+), 14 deletions(-) diff --git a/docs/hyperv.txt b/docs/hyperv.txt index c423e0fca2..beadb2d0d4 100644 --- a/docs/hyperv.txt +++ b/docs/hyperv.txt @@ -175,6 +175,16 @@ without the feature to find out if enabling it is beneficial. Requires: hv-vapic +4. Development features + +In some cases (e.g. during development) it may make sense to use QEMU in +'pass-through' mode and give Windows guests all enlightenments currently +supported by KVM. This pass-through mode is enabled by "hv-passthrough" CPU +flag. +Note: enabling this flag effectively prevents migration as supported features +may differ between target and destination. + + 4. Useful links Hyper-V Top Level Functional specification and other information: diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 9530b28d42..063551ef55 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5849,6 +5849,7 @@ static Property x86_cpu_properties[] = { HYPERV_FEAT_EVMCS, 0), DEFINE_PROP_BIT64("hv-ipi", X86CPU, hyperv_features, HYPERV_FEAT_IPI, 0), +DEFINE_PROP_BOOL("hv-passthrough", X86CPU, hyperv_passthrough, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 11fa9e643e..1f1f8969b4 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1401,6 +1401,7 @@ struct X86CPU { char *hyperv_vendor_id; bool hyperv_synic_kvm_only; uint64_t hyperv_features; +bool hyperv_passthrough; bool check_cpuid; bool enforce_cpuid; diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 2b13757441..e876dc6118 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -640,7 +640,7 @@ static bool hyperv_enabled(X86CPU *cpu) CPUState *cs = CPU(cpu); return kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV) > 0 && ((cpu->hyperv_spinlock_attempts != HYPERV_SPINLOCK_NEVER_RETRY) || - cpu->hyperv_features); + cpu->hyperv_features || cpu->hyperv_passthrough); } static int kvm_arch_set_tsc_khz(CPUState *cs) @@ -986,10 +986,10 @@ static int hv_cpuid_check_and_set(CPUState *cs, struct kvm_cpuid2 *cpuid, { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = >env; -uint32_t r, fw, bits;; +uint32_t r, fw, bits; int i; -if (!hyperv_feat_enabled(cpu, feature)) { +if (!hyperv_feat_enabled(cpu, feature) && !cpu->hyperv_passthrough) { return 0; } @@ -1002,15 +1002,23 @@ static int hv_cpuid_check_and_set(CPUState *cs, struct kvm_cpuid2 *cpuid, } if (hv_cpuid_get_fw(cpuid, fw, ) || (r & bits) != bits) { -fprintf(stderr, -"Hyper-V %s is not supported by kernel\n", -kvm_hyperv_properties[feature].desc); -return 1; +if (hyperv_feat_enabled(cpu, feature)) { +fprintf(stderr, +"Hyper-V %s is not supported by kernel\n", +kvm_hyperv_properties[feature].desc); +return 1; +} else { +return 0; +} } env->features[fw] |= bits; } +if (cpu->hyperv_passthrough) { +cpu->hyperv_features |= BIT(feature); +} + return 0; } @@ -1028,22 +1036,29 @@ static int hyperv_handle_properties(CPUState *cs, struct kvm_cpuid_entry2 *c; uint32_t signature[3]; uint32_t cpuid_i = 0; -int r = 0; +int r; if (!hyperv_enabled(cpu)) return 0; -if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) { +if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS) || +cpu->hyperv_passthrough) { uint16_t evmcs_version; -if (kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0, -(uintptr_t)_version)) { +r = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0, +(uintptr_t)_version); + +
[Qemu-devel] [PATCH v2 8/9] i386/kvm: hv-evmcs requires hv-vapic
Enlightened VMCS is enabled by writing to a field in VP assist page and these require virtual APIC. Signed-off-by: Vitaly Kuznetsov --- target/i386/kvm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 7fc97b749e..7ae2f63f72 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -791,7 +791,8 @@ static struct { .flags = { {.fw = FEAT_HV_RECOMM_EAX, .bits = HV_ENLIGHTENED_VMCS_RECOMMENDED} -} +}, +.dependencies = BIT(HYPERV_FEAT_VAPIC) }, [HYPERV_FEAT_IPI] = { .desc = "paravirtualized IPI (hv-ipi)", -- 2.20.1
[Qemu-devel] [PATCH v2 4/9] i386/kvm: document existing Hyper-V enlightenments
Currently, there is no doc describing hv-* CPU flags, people are encouraged to get the information from Microsoft Hyper-V Top Level Functional specification (TLFS). There is, however, a bit of QEMU specifics. Signed-off-by: Vitaly Kuznetsov --- docs/hyperv.txt | 181 1 file changed, 181 insertions(+) create mode 100644 docs/hyperv.txt diff --git a/docs/hyperv.txt b/docs/hyperv.txt new file mode 100644 index 00..c423e0fca2 --- /dev/null +++ b/docs/hyperv.txt @@ -0,0 +1,181 @@ +Hyper-V Enlightenments +== + + +1. Description +=== +In some cases when implementing a hardware interface in software is slow, KVM +implements its own paravirtualized interfaces. This works well for Linux as +guest support for such features is added simultaneously with the feature itself. +It may, however, be hard-to-impossible to add support for these interfaces to +proprietary OSes, namely, Microsoft Windows. + +KVM on x86 implements Hyper-V Enlightenments for Windows guests. These features +make Windows and Hyper-V guests think they're running on top of a Hyper-V +compatible hypervisor and use Hyper-V specific features. + + +2. Setup += +No Hyper-V enlightenments are enabled by default by either KVM or QEMU. In +QEMU, individual enlightenments can be enabled through CPU flags, e.g: + + qemu-system-x86_64 --enable-kvm --cpu host,hv_relaxed,hv_vpindex,hv_time, ... + +Sometimes there are dependencies between enlightenments, QEMU is supposed to +check that the supplied configuration is sane. + +When any set of the Hyper-V enlightenments is enabled, QEMU changes hypervisor +identification (CPUID 0x4000..0x400A) to Hyper-V. KVM identification +and features are kept in leaves 0x4100..0x4101. + + +3. Existing enlightenments +=== + +3.1. hv-relaxed + +This feature tells guest OS to disable watchdog timeouts as it is running on a +hypervisor. It is known that some Windows versions will do this even when they +see 'hypervisor' CPU flag. + +3.2. hv-vapic +== +Provides so-called VP Assist page MSR to guest allowing it to work with APIC +more efficiently. In particular, this enlightenment allows paravirtualized +(exit-less) EOI processing. + +3.3. hv-spinlocks=xxx +== +Enables paravirtualized spinlocks. The parameter indicates how many times +spinlock acquisition should be attempted before indicating the situation to the +hypervisor. A special value 0x indicates "never to retry". + +3.4. hv-vpindex + +Provides HV_X64_MSR_VP_INDEX (0x4002) MSR to the guest which has Virtual +processor index information. This enlightenment makes sense in conjunction with +hv-synic, hv-stimer and other enlightenments which require the guest to know its +Virtual Processor indices (e.g. when VP index needs to be passed in a +hypercall). + +3.5. hv-runtime + +Provides HV_X64_MSR_VP_RUNTIME (0x4010) MSR to the guest. The MSR keeps the +virtual processor run time in 100ns units. This gives guest operating system an +idea of how much time was 'stolen' from it (when the virtual CPU was preempted +to perform some other work). + +3.6. hv-crash +== +Provides HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 (0x4100..0x4105) and +HV_X64_MSR_CRASH_CTL (0x4105) MSRs to the guest. These MSRs are written to +by the guest when it crashes, HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 MSRs +contain additional crash information. This information is outputted in QEMU log +and through QAPI. +Note: unlike under genuine Hyper-V, write to HV_X64_MSR_CRASH_CTL causes guest +to shutdown. This effectively blocks crash dump generation by Windows. + +3.7. hv-time += +Enables two Hyper-V-specific clocksources available to the guest: MSR-based +Hyper-V clocksource (HV_X64_MSR_TIME_REF_COUNT, 0x4020) and Reference TSC +page (enabled via MSR HV_X64_MSR_REFERENCE_TSC, 0x4021). Both clocksources +are per-guest, Reference TSC page clocksource allows for exit-less time stamp +readings. Using this enlightenment leads to significant speedup of all timestamp +related operations. + +3.8. hv-synic +== +Enables Hyper-V Synthetic interrupt controller - an extension of a local APIC. +When enabled, this enlightenment provides additional communication facilities +to the guest: SynIC messages and Events. This is a pre-requisite for +implementing VMBus devices (not yet in QEMU). Additionally, this enlightenment +is needed to enable Hyper-V synthetic timers. SynIC is controlled through MSRs +HV_X64_MSR_SCONTROL..HV_X64_MSR_EOM (0x4080..0x4084) and +HV_X64_MSR_SINT0..HV_X64_MSR_SINT15 (0x4090..0x409F) + +Requires: hv-vpindex + +3.9. hv-stimer +=== +Enables Hyper-V synthetic timers. There are four synthetic timers per virtual +CPU controlled through HV_X64_MSR_STIMER0_CONFIG..HV_X64_MSR_STI
[Qemu-devel] [PATCH v2 9/9] i386/kvm: add support for Direct Mode for Hyper-V synthetic timers
Hyper-V on KVM can only use Synthetic timers with Direct Mode (opting for an interrupt instead of VMBus message). This new capability is only announced in KVM_GET_SUPPORTED_HV_CPUID. Signed-off-by: Vitaly Kuznetsov --- docs/hyperv.txt| 10 ++ target/i386/cpu.c | 2 ++ target/i386/cpu.h | 1 + target/i386/hyperv-proto.h | 1 + target/i386/kvm.c | 9 + 5 files changed, 23 insertions(+) diff --git a/docs/hyperv.txt b/docs/hyperv.txt index beadb2d0d4..8fdf25c829 100644 --- a/docs/hyperv.txt +++ b/docs/hyperv.txt @@ -174,6 +174,16 @@ without the feature to find out if enabling it is beneficial. Requires: hv-vapic +3.17. hv-stimer-direct +=== +Hyper-V specification allows synthetic timer operation in two modes: "classic", +when expiration event is delivered as SynIC message and "direct", when the event +is delivered via normal interrupt. It is known that nested Hyper-V can only +use synthetic timers in direct mode and thus 'hv-stimer-direct' needs to be +enabled. + +Requires: hv-vpindex, hv-synic, hv-time, hv-stimer + 4. Development features diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 063551ef55..3cfd85758c 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5849,6 +5849,8 @@ static Property x86_cpu_properties[] = { HYPERV_FEAT_EVMCS, 0), DEFINE_PROP_BIT64("hv-ipi", X86CPU, hyperv_features, HYPERV_FEAT_IPI, 0), +DEFINE_PROP_BIT64("hv-stimer-direct", X86CPU, hyperv_features, + HYPERV_FEAT_STIMER_DIRECT, 0), DEFINE_PROP_BOOL("hv-passthrough", X86CPU, hyperv_passthrough, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 1f1f8969b4..0b6b781ecb 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -758,6 +758,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define HYPERV_FEAT_TLBFLUSH11 #define HYPERV_FEAT_EVMCS 12 #define HYPERV_FEAT_IPI 13 +#define HYPERV_FEAT_STIMER_DIRECT 14 #ifndef HYPERV_SPINLOCK_NEVER_RETRY #define HYPERV_SPINLOCK_NEVER_RETRY 0x diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index c0272b3a01..cffac10b45 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -49,6 +49,7 @@ #define HV_GUEST_IDLE_STATE_AVAILABLE (1u << 5) #define HV_FREQUENCY_MSRS_AVAILABLE (1u << 8) #define HV_GUEST_CRASH_MSR_AVAILABLE(1u << 10) +#define HV_STIMER_DIRECT_MODE_AVAILABLE (1u << 19) /* * HV_CPUID_ENLIGHTMENT_INFO.EAX bits diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 7ae2f63f72..fb29a3057b 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -803,6 +803,14 @@ static struct { }, .dependencies = BIT(HYPERV_FEAT_VPINDEX) }, +[HYPERV_FEAT_STIMER_DIRECT] = { +.desc = "direct mode synthetic timers (hv-stimer-direct)", +.flags = { +{.fw = FEAT_HYPERV_EDX, + .bits = HV_STIMER_DIRECT_MODE_AVAILABLE} +}, +.dependencies = BIT(HYPERV_FEAT_STIMER) +}, }; static struct kvm_cpuid2 *try_get_hv_cpuid(CPUState *cs, int max) @@ -1125,6 +1133,7 @@ static int hyperv_handle_properties(CPUState *cs, r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_TLBFLUSH); r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_EVMCS); r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_IPI); +r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_STIMER_DIRECT); /* Additional dependencies not covered by kvm_hyperv_properties[] */ if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC) && -- 2.20.1