VCPU TSC is not cleared by a warm reset (*), which leaves some types of Linux
 guests (non-pvops guests and those with the kernel parameter no-kvmclock set)
vulnerable to the overflow in cyc2ns_offset fixed by upstream commit
9993bc635d01a6ee7f6b833b4ee65ce7c06350b1 ("sched/x86: Fix overflow in
cyc2ns_offset").

To put it in a nutshell, if such a Linux guest without the patch above applied
has been up more than 208 days and attempts a warm reset chances are that
the newly booted kernel will panic or hang.

(*) Intel Xeon E5 processors show the same broken behavior due to
    the errata "TSC is Not Affected by Warm Reset" (Intel® Xeon®
    Processor E5 Family Specification Update - August 2013): "The
    TSC (Time Stamp Counter MSR 10H) should be cleared on
    reset. Due to this erratum the TSC is not affected by warm
    reset."

Cc: Will Auld <will.a...@intel.com>
Cc: Marcelo Tosatti <mtosa...@redhat.com>
Signed-off-by: Fernando Luis Vazquez Cao <ferna...@oss.ntt.co.jp>
---

diff -urNp qemu-orig/target-i386/cpu.c qemu/target-i386/cpu.c
--- qemu-orig/target-i386/cpu.c 2013-11-28 07:02:45.000000000 +0900
+++ qemu/target-i386/cpu.c      2013-12-05 21:45:19.980156320 +0900
@@ -2446,6 +2446,9 @@ static void x86_cpu_reset(CPUState *s)
     cpu_breakpoint_remove_all(env, BP_CPU);
     cpu_watchpoint_remove_all(env, BP_CPU);
 
+    env->tsc_adjust = 0;
+    env->tsc = 0;
+
 #if !defined(CONFIG_USER_ONLY)
     /* We hard-wire the BSP to the first CPU. */
     if (s->cpu_index == 0) {
diff -urNp qemu-orig/target-i386/kvm.c qemu/target-i386/kvm.c
--- qemu-orig/target-i386/kvm.c 2013-11-28 07:02:45.000000000 +0900
+++ qemu/target-i386/kvm.c      2013-12-05 21:45:28.900200552 +0900
@@ -1139,22 +1139,20 @@ static int kvm_put_msrs(X86CPU *cpu, int
         kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
     }
 #endif
-    if (level == KVM_PUT_FULL_STATE) {
+    /*
+     * The following MSRs have side effects on the guest or are too heavy
+     * for normal writeback. Limit them to reset or full state updates.
+     */
+    if (level >= KVM_PUT_RESET_STATE) {
         /*
          * KVM is yet unable to synchronize TSC values of multiple VCPUs on
          * writeback. Until this is fixed, we only write the offset to SMP
          * guests after migration, desynchronizing the VCPUs, but avoiding
          * huge jump-backs that would occur without any writeback at all.
          */
-        if (smp_cpus == 1 || env->tsc != 0) {
+        if (smp_cpus == 1 || env->tsc != 0 || level == KVM_PUT_RESET_STATE) {
             kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
         }
-    }
-    /*
-     * The following MSRs have side effects on the guest or are too heavy
-     * for normal writeback. Limit them to reset or full state updates.
-     */
-    if (level >= KVM_PUT_RESET_STATE) {
         kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME,
                           env->system_time_msr);
         kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);



Reply via email to