Linus,

Please pull the latest x86-fpu-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-fpu-for-linus

   # HEAD: 064e6a8ba61a751625478f656c6f76a6f37a009e Merge branch 'linus' into 
x86/fpu, to resolve conflicts

The main changes in this cycle were:

 - Do a large round of simplifications after all CPUs do 'eager' FPU context 
   switching in v4.9: remove CR0 twiddling, remove leftover eager/lazy bts, 
etc. 
   (Andy Lutomirski)

 - More FPU code simplifications: remov struct fpu::counter, clarify 
nomenclature, 
   remove unnecessary arguments/functions and better structure the code.
   (Rik van Riel)

  out-of-topic modifications in x86-fpu-for-linus:
  --------------------------------------------------
  drivers/char/hw_random/via-rng.c   # 5a83d60c074d: x86/fpu: Remove 
irq_ts_save(
  drivers/crypto/padlock-aes.c       # 5a83d60c074d: x86/fpu: Remove 
irq_ts_save(
  drivers/crypto/padlock-sha.c       # 5a83d60c074d: x86/fpu: Remove 
irq_ts_save(
  drivers/lguest/hypercalls.c        # cd95ea81f256: x86/fpu, lguest: Remove 
CR0.
  drivers/lguest/lg.h                # cd95ea81f256: x86/fpu, lguest: Remove 
CR0.
  drivers/lguest/x86/core.c          # cd95ea81f256: x86/fpu, lguest: Remove 
CR0.
  include/linux/kvm_host.h           # 3d42de25d290: x86/fpu, kvm: Remove KVM 
vcp

 Thanks,

        Ingo

------------------>
Andy Lutomirski (13):
      x86/crypto, x86/fpu: Remove X86_FEATURE_EAGER_FPU #ifdef from the crc32c 
code
      x86/fpu: Hard-disable lazy FPU mode
      x86/fpu: Remove the XFEATURE_MASK_EAGER/LAZY distinction
      x86/fpu: Remove use_eager_fpu()
      x86/fpu: Finish excising 'eagerfpu'
      x86/fpu: Get rid of two redundant clts() calls
      x86/fpu: Stop saving and restoring CR0.TS in fpu__init_check_bugs()
      x86/fpu: Remove irq_ts_save() and irq_ts_restore()
      x86/fpu, kvm: Remove host CR0.TS manipulation
      x86/fpu, lguest: Remove CR0.TS support
      x86/fpu: Handle #NM without FPU emulation as an error
      x86/fpu: Remove stts()
      x86/fpu: Remove clts()

Rik van Riel (7):
      x86/fpu: Remove struct fpu::counter
      x86/fpu, kvm: Remove KVM vcpu->fpu_counter
      x86/fpu: Rename lazy restore functions to "register state valid"
      x86/fpu: Remove __fpregs_(de)activate()
      x86/fpu: Split old & new FPU code paths
      x86/fpu: Remove 'cpu' argument from __cpu_invalidate_fpregs_state()
      x86/fpu: Split old_fpu & new_fpu handling into separate functions


 Documentation/kernel-parameters.txt      |   6 --
 arch/x86/crypto/crc32c-intel_glue.c      |  22 +----
 arch/x86/include/asm/cpufeatures.h       |   1 -
 arch/x86/include/asm/fpu/api.h           |  10 ---
 arch/x86/include/asm/fpu/internal.h      | 139 ++++++++++---------------------
 arch/x86/include/asm/fpu/types.h         |  34 --------
 arch/x86/include/asm/fpu/xstate.h        |  17 ++--
 arch/x86/include/asm/lguest_hcall.h      |   1 -
 arch/x86/include/asm/paravirt.h          |   5 --
 arch/x86/include/asm/paravirt_types.h    |   2 -
 arch/x86/include/asm/special_insns.h     |  13 ---
 arch/x86/include/asm/trace/fpu.h         |   5 +-
 arch/x86/kernel/fpu/bugs.c               |   7 --
 arch/x86/kernel/fpu/core.c               |  74 ++--------------
 arch/x86/kernel/fpu/init.c               | 107 +-----------------------
 arch/x86/kernel/fpu/signal.c             |   8 +-
 arch/x86/kernel/fpu/xstate.c             |   9 --
 arch/x86/kernel/paravirt.c               |   1 -
 arch/x86/kernel/paravirt_patch_32.c      |   2 -
 arch/x86/kernel/paravirt_patch_64.c      |   2 -
 arch/x86/kernel/process_32.c             |   5 +-
 arch/x86/kernel/process_64.c             |   5 +-
 arch/x86/kernel/smpboot.c                |   2 +-
 arch/x86/kernel/traps.c                  |  20 ++++-
 arch/x86/kvm/cpuid.c                     |   4 +-
 arch/x86/kvm/vmx.c                       |  12 +--
 arch/x86/kvm/x86.c                       |  19 +----
 arch/x86/lguest/boot.c                   |  29 ++-----
 arch/x86/mm/pkeys.c                      |   3 +-
 arch/x86/xen/enlighten.c                 |  13 ---
 drivers/char/hw_random/via-rng.c         |   8 +-
 drivers/crypto/padlock-aes.c             |  23 +----
 drivers/crypto/padlock-sha.c             |  18 ----
 drivers/lguest/hypercalls.c              |   4 -
 drivers/lguest/lg.h                      |   1 -
 drivers/lguest/x86/core.c                |  19 +----
 include/linux/kvm_host.h                 |   1 -
 tools/arch/x86/include/asm/cpufeatures.h |   1 -
 38 files changed, 105 insertions(+), 547 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 37babf91f2cb..459b301137c2 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1074,12 +1074,6 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
        nopku           [X86] Disable Memory Protection Keys CPU feature found
                        in some Intel CPUs.
 
-       eagerfpu=       [X86]
-                       on      enable eager fpu restore
-                       off     disable eager fpu restore
-                       auto    selects the default scheme, which automatically
-                               enables eagerfpu restore for xsaveopt.
-
        module.async_probe [KNL]
                        Enable asynchronous probe on this module.
 
diff --git a/arch/x86/crypto/crc32c-intel_glue.c 
b/arch/x86/crypto/crc32c-intel_glue.c
index 0857b1a1de3b..c194d5717ae5 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -48,26 +48,13 @@
 #ifdef CONFIG_X86_64
 /*
  * use carryless multiply version of crc32c when buffer
- * size is >= 512 (when eager fpu is enabled) or
- * >= 1024 (when eager fpu is disabled) to account
+ * size is >= 512 to account
  * for fpu state save/restore overhead.
  */
-#define CRC32C_PCL_BREAKEVEN_EAGERFPU  512
-#define CRC32C_PCL_BREAKEVEN_NOEAGERFPU        1024
+#define CRC32C_PCL_BREAKEVEN   512
 
 asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
                                unsigned int crc_init);
-static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU;
-#if defined(X86_FEATURE_EAGER_FPU)
-#define set_pcl_breakeven_point()                                      \
-do {                                                                   \
-       if (!use_eager_fpu())                                           \
-               crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU; \
-} while (0)
-#else
-#define set_pcl_breakeven_point()                                      \
-       (crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU)
-#endif
 #endif /* CONFIG_X86_64 */
 
 static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t 
length)
@@ -190,7 +177,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, 
const u8 *data,
         * use faster PCL version if datasize is large enough to
         * overcome kernel fpu state save/restore overhead
         */
-       if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
+       if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) {
                kernel_fpu_begin();
                *crcp = crc_pcl(data, len, *crcp);
                kernel_fpu_end();
@@ -202,7 +189,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, 
const u8 *data,
 static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int 
len,
                                u8 *out)
 {
-       if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
+       if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) {
                kernel_fpu_begin();
                *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
                kernel_fpu_end();
@@ -261,7 +248,6 @@ static int __init crc32c_intel_mod_init(void)
                alg.update = crc32c_pcl_intel_update;
                alg.finup = crc32c_pcl_intel_finup;
                alg.digest = crc32c_pcl_intel_digest;
-               set_pcl_breakeven_point();
        }
 #endif
        return crypto_register_shash(&alg);
diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index a39629206864..cddd5d06e1cb 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -104,7 +104,6 @@
 #define X86_FEATURE_EXTD_APICID        ( 3*32+26) /* has extended APICID (8 
bits) */
 #define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
 #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
-#define X86_FEATURE_EAGER_FPU  ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
 #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state 
*/
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 1429a7c736db..0877ae018fc9 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -27,16 +27,6 @@ extern void kernel_fpu_end(void);
 extern bool irq_fpu_usable(void);
 
 /*
- * Some instructions like VIA's padlock instructions generate a spurious
- * DNA fault but don't modify SSE registers. And these instructions
- * get used from interrupt context as well. To prevent these kernel 
instructions
- * in interrupt context interacting wrongly with other user/kernel fpu usage, 
we
- * should use them only in the context of irq_ts_save/restore()
- */
-extern int  irq_ts_save(void);
-extern void irq_ts_restore(int TS_state);
-
-/*
  * Query the presence of one or more xfeatures. Works on any legacy CPU as 
well.
  *
  * If 'feature_name' is set then put a human-readable description of
diff --git a/arch/x86/include/asm/fpu/internal.h 
b/arch/x86/include/asm/fpu/internal.h
index 2737366ea583..d4a684997497 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -60,11 +60,6 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
 /*
  * FPU related CPU feature flag helper routines:
  */
-static __always_inline __pure bool use_eager_fpu(void)
-{
-       return static_cpu_has(X86_FEATURE_EAGER_FPU);
-}
-
 static __always_inline __pure bool use_xsaveopt(void)
 {
        return static_cpu_has(X86_FEATURE_XSAVEOPT);
@@ -484,42 +479,42 @@ extern int copy_fpstate_to_sigframe(void __user *buf, 
void __user *fp, int size)
 DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
 
 /*
- * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx,
- * on this CPU.
+ * The in-register FPU state for an FPU context on a CPU is assumed to be
+ * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx
+ * matches the FPU.
  *
- * This will disable any lazy FPU state restore of the current FPU state,
- * but if the current thread owns the FPU, it will still be saved by.
+ * If the FPU register state is valid, the kernel can skip restoring the
+ * FPU state from memory.
+ *
+ * Any code that clobbers the FPU registers or updates the in-memory
+ * FPU state for a task MUST let the rest of the kernel know that the
+ * FPU registers are no longer valid for this task.
+ *
+ * Either one of these invalidation functions is enough. Invalidate
+ * a resource you control: CPU if using the CPU for something else
+ * (with preemption disabled), FPU for the current task, or a task that
+ * is prevented from running by the current task.
  */
-static inline void __cpu_disable_lazy_restore(unsigned int cpu)
+static inline void __cpu_invalidate_fpregs_state(void)
 {
-       per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
+       __this_cpu_write(fpu_fpregs_owner_ctx, NULL);
 }
 
-static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu)
-{
-       return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == 
fpu->last_cpu;
-}
-
-
-/*
- * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation'
- * idiom, which is then paired with the sw-flag (fpregs_active) later on:
- */
-
-static inline void __fpregs_activate_hw(void)
+static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
 {
-       if (!use_eager_fpu())
-               clts();
+       fpu->last_cpu = -1;
 }
 
-static inline void __fpregs_deactivate_hw(void)
+static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
 {
-       if (!use_eager_fpu())
-               stts();
+       return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == 
fpu->last_cpu;
 }
 
-/* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */
-static inline void __fpregs_deactivate(struct fpu *fpu)
+/*
+ * These generally need preemption protection to work,
+ * do try to avoid using these on their own:
+ */
+static inline void fpregs_deactivate(struct fpu *fpu)
 {
        WARN_ON_FPU(!fpu->fpregs_active);
 
@@ -528,8 +523,7 @@ static inline void __fpregs_deactivate(struct fpu *fpu)
        trace_x86_fpu_regs_deactivated(fpu);
 }
 
-/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
-static inline void __fpregs_activate(struct fpu *fpu)
+static inline void fpregs_activate(struct fpu *fpu)
 {
        WARN_ON_FPU(fpu->fpregs_active);
 
@@ -554,51 +548,19 @@ static inline int fpregs_active(void)
 }
 
 /*
- * Encapsulate the CR0.TS handling together with the
- * software flag.
- *
- * These generally need preemption protection to work,
- * do try to avoid using these on their own.
- */
-static inline void fpregs_activate(struct fpu *fpu)
-{
-       __fpregs_activate_hw();
-       __fpregs_activate(fpu);
-}
-
-static inline void fpregs_deactivate(struct fpu *fpu)
-{
-       __fpregs_deactivate(fpu);
-       __fpregs_deactivate_hw();
-}
-
-/*
  * FPU state switching for scheduling.
  *
  * This is a two-stage process:
  *
- *  - switch_fpu_prepare() saves the old state and
- *    sets the new state of the CR0.TS bit. This is
- *    done within the context of the old process.
+ *  - switch_fpu_prepare() saves the old state.
+ *    This is done within the context of the old process.
  *
  *  - switch_fpu_finish() restores the new state as
  *    necessary.
  */
-typedef struct { int preload; } fpu_switch_t;
-
-static inline fpu_switch_t
-switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
+static inline void
+switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-       fpu_switch_t fpu;
-
-       /*
-        * If the task has used the math, pre-load the FPU on xsave processors
-        * or if the past 5 consecutive context-switches used math.
-        */
-       fpu.preload = static_cpu_has(X86_FEATURE_FPU) &&
-                     new_fpu->fpstate_active &&
-                     (use_eager_fpu() || new_fpu->counter > 5);
-
        if (old_fpu->fpregs_active) {
                if (!copy_fpregs_to_fpstate(old_fpu))
                        old_fpu->last_cpu = -1;
@@ -608,29 +570,8 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu 
*new_fpu, int cpu)
                /* But leave fpu_fpregs_owner_ctx! */
                old_fpu->fpregs_active = 0;
                trace_x86_fpu_regs_deactivated(old_fpu);
-
-               /* Don't change CR0.TS if we just switch! */
-               if (fpu.preload) {
-                       new_fpu->counter++;
-                       __fpregs_activate(new_fpu);
-                       trace_x86_fpu_regs_activated(new_fpu);
-                       prefetch(&new_fpu->state);
-               } else {
-                       __fpregs_deactivate_hw();
-               }
-       } else {
-               old_fpu->counter = 0;
+       } else
                old_fpu->last_cpu = -1;
-               if (fpu.preload) {
-                       new_fpu->counter++;
-                       if (fpu_want_lazy_restore(new_fpu, cpu))
-                               fpu.preload = 0;
-                       else
-                               prefetch(&new_fpu->state);
-                       fpregs_activate(new_fpu);
-               }
-       }
-       return fpu;
 }
 
 /*
@@ -638,15 +579,19 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu 
*new_fpu, int cpu)
  */
 
 /*
- * By the time this gets called, we've already cleared CR0.TS and
- * given the process the FPU if we are going to preload the FPU
- * state - all we need to do is to conditionally restore the register
- * state itself.
+ * Set up the userspace FPU context for the new task, if the task
+ * has used the FPU.
  */
-static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t 
fpu_switch)
+static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
 {
-       if (fpu_switch.preload)
-               copy_kernel_to_fpregs(&new_fpu->state);
+       bool preload = static_cpu_has(X86_FEATURE_FPU) &&
+                      new_fpu->fpstate_active;
+
+       if (preload) {
+               if (!fpregs_state_valid(new_fpu, cpu))
+                       copy_kernel_to_fpregs(&new_fpu->state);
+               fpregs_activate(new_fpu);
+       }
 }
 
 /*
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 48df486b02f9..3c80f5b9c09d 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -322,17 +322,6 @@ struct fpu {
        unsigned char                   fpregs_active;
 
        /*
-        * @counter:
-        *
-        * This counter contains the number of consecutive context switches
-        * during which the FPU stays used. If this is over a threshold, the
-        * lazy FPU restore logic becomes eager, to save the trap overhead.
-        * This is an unsigned char so that after 256 iterations the counter
-        * wraps and the context switch behavior turns lazy again; this is to
-        * deal with bursty apps that only use the FPU for a short time:
-        */
-       unsigned char                   counter;
-       /*
         * @state:
         *
         * In-memory copy of all FPU registers that we save/restore
@@ -340,29 +329,6 @@ struct fpu {
         * the registers in the FPU are more recent than this state
         * copy. If the task context-switches away then they get
         * saved here and represent the FPU state.
-        *
-        * After context switches there may be a (short) time period
-        * during which the in-FPU hardware registers are unchanged
-        * and still perfectly match this state, if the tasks
-        * scheduled afterwards are not using the FPU.
-        *
-        * This is the 'lazy restore' window of optimization, which
-        * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
-        *
-        * We detect whether a subsequent task uses the FPU via setting
-        * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
-        *
-        * During this window, if the task gets scheduled again, we
-        * might be able to skip having to do a restore from this
-        * memory buffer to the hardware registers - at the cost of
-        * incurring the overhead of #NM fault traps.
-        *
-        * Note that on modern CPUs that support the XSAVEOPT (or other
-        * optimized XSAVE instructions), we don't use #NM traps anymore,
-        * as the hardware can track whether FPU registers need saving
-        * or not. On such CPUs we activate the non-lazy ('eagerfpu')
-        * logic, which unconditionally saves/restores all FPU state
-        * across context switches. (if FPU state exists.)
         */
        union fpregs_state              state;
        /*
diff --git a/arch/x86/include/asm/fpu/xstate.h 
b/arch/x86/include/asm/fpu/xstate.h
index 430bacf73074..1b2799e0699a 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -21,21 +21,16 @@
 /* Supervisor features */
 #define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT)
 
-/* Supported features which support lazy state saving */
-#define XFEATURE_MASK_LAZY     (XFEATURE_MASK_FP | \
+/* All currently supported features */
+#define XCNTXT_MASK            (XFEATURE_MASK_FP | \
                                 XFEATURE_MASK_SSE | \
                                 XFEATURE_MASK_YMM | \
                                 XFEATURE_MASK_OPMASK | \
                                 XFEATURE_MASK_ZMM_Hi256 | \
-                                XFEATURE_MASK_Hi16_ZMM)
-
-/* Supported features which require eager state saving */
-#define XFEATURE_MASK_EAGER    (XFEATURE_MASK_BNDREGS | \
-                                XFEATURE_MASK_BNDCSR | \
-                                XFEATURE_MASK_PKRU)
-
-/* All currently supported features */
-#define XCNTXT_MASK    (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER)
+                                XFEATURE_MASK_Hi16_ZMM  | \
+                                XFEATURE_MASK_PKRU | \
+                                XFEATURE_MASK_BNDREGS | \
+                                XFEATURE_MASK_BNDCSR)
 
 #ifdef CONFIG_X86_64
 #define REX_PREFIX     "0x48, "
diff --git a/arch/x86/include/asm/lguest_hcall.h 
b/arch/x86/include/asm/lguest_hcall.h
index ef01fef3eebc..6c119cfae218 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -9,7 +9,6 @@
 #define LHCALL_FLUSH_TLB       5
 #define LHCALL_LOAD_IDT_ENTRY  6
 #define LHCALL_SET_STACK       7
-#define LHCALL_TS              8
 #define LHCALL_SET_CLOCKEVENT  9
 #define LHCALL_HALT            10
 #define LHCALL_SET_PMD         13
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index ce932812f142..f1fb4dbe9a3e 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -41,11 +41,6 @@ static inline void set_debugreg(unsigned long val, int reg)
        PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val);
 }
 
-static inline void clts(void)
-{
-       PVOP_VCALL0(pv_cpu_ops.clts);
-}
-
 static inline unsigned long read_cr0(void)
 {
        return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0);
diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 0f400c0e4979..545426aa61ef 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -103,8 +103,6 @@ struct pv_cpu_ops {
        unsigned long (*get_debugreg)(int regno);
        void (*set_debugreg)(int regno, unsigned long value);
 
-       void (*clts)(void);
-
        unsigned long (*read_cr0)(void);
        void (*write_cr0)(unsigned long);
 
diff --git a/arch/x86/include/asm/special_insns.h 
b/arch/x86/include/asm/special_insns.h
index 19a2224f9e16..12af3e35edfa 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -6,11 +6,6 @@
 
 #include <asm/nops.h>
 
-static inline void native_clts(void)
-{
-       asm volatile("clts");
-}
-
 /*
  * Volatile isn't enough to prevent the compiler from reordering the
  * read/write functions for the control registers and messing everything up.
@@ -208,16 +203,8 @@ static inline void load_gs_index(unsigned selector)
 
 #endif
 
-/* Clear the 'TS' bit */
-static inline void clts(void)
-{
-       native_clts();
-}
-
 #endif/* CONFIG_PARAVIRT */
 
-#define stts() write_cr0(read_cr0() | X86_CR0_TS)
-
 static inline void clflush(volatile void *__p)
 {
        asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 9217ab1f5bf6..342e59789fcd 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -14,7 +14,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
                __field(struct fpu *, fpu)
                __field(bool, fpregs_active)
                __field(bool, fpstate_active)
-               __field(int, counter)
                __field(u64, xfeatures)
                __field(u64, xcomp_bv)
                ),
@@ -23,17 +22,15 @@ DECLARE_EVENT_CLASS(x86_fpu,
                __entry->fpu            = fpu;
                __entry->fpregs_active  = fpu->fpregs_active;
                __entry->fpstate_active = fpu->fpstate_active;
-               __entry->counter        = fpu->counter;
                if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
                        __entry->xfeatures = fpu->state.xsave.header.xfeatures;
                        __entry->xcomp_bv  = fpu->state.xsave.header.xcomp_bv;
                }
        ),
-       TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d counter: %d 
xfeatures: %llx xcomp_bv: %llx",
+       TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: 
%llx xcomp_bv: %llx",
                        __entry->fpu,
                        __entry->fpregs_active,
                        __entry->fpstate_active,
-                       __entry->counter,
                        __entry->xfeatures,
                        __entry->xcomp_bv
        )
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c
index aad34aafc0e0..d913047f832c 100644
--- a/arch/x86/kernel/fpu/bugs.c
+++ b/arch/x86/kernel/fpu/bugs.c
@@ -23,17 +23,12 @@ static double __initdata y = 3145727.0;
  */
 void __init fpu__init_check_bugs(void)
 {
-       u32 cr0_saved;
        s32 fdiv_bug;
 
        /* kernel_fpu_begin/end() relies on patched alternative instructions. */
        if (!boot_cpu_has(X86_FEATURE_FPU))
                return;
 
-       /* We might have CR0::TS set already, clear it: */
-       cr0_saved = read_cr0();
-       write_cr0(cr0_saved & ~X86_CR0_TS);
-
        kernel_fpu_begin();
 
        /*
@@ -56,8 +51,6 @@ void __init fpu__init_check_bugs(void)
 
        kernel_fpu_end();
 
-       write_cr0(cr0_saved);
-
        if (fdiv_bug) {
                set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV);
                pr_warn("Hmm, FPU with FDIV bug\n");
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index ebb4e95fbd74..e4e97a5355ce 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -58,27 +58,9 @@ static bool kernel_fpu_disabled(void)
        return this_cpu_read(in_kernel_fpu);
 }
 
-/*
- * Were we in an interrupt that interrupted kernel mode?
- *
- * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
- * pair does nothing at all: the thread must not have fpu (so
- * that we don't try to save the FPU state), and TS must
- * be set (so that the clts/stts pair does nothing that is
- * visible in the interrupted kernel thread).
- *
- * Except for the eagerfpu case when we return true; in the likely case
- * the thread has FPU but we are not going to set/clear TS.
- */
 static bool interrupted_kernel_fpu_idle(void)
 {
-       if (kernel_fpu_disabled())
-               return false;
-
-       if (use_eager_fpu())
-               return true;
-
-       return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS);
+       return !kernel_fpu_disabled();
 }
 
 /*
@@ -125,8 +107,7 @@ void __kernel_fpu_begin(void)
                 */
                copy_fpregs_to_fpstate(fpu);
        } else {
-               this_cpu_write(fpu_fpregs_owner_ctx, NULL);
-               __fpregs_activate_hw();
+               __cpu_invalidate_fpregs_state();
        }
 }
 EXPORT_SYMBOL(__kernel_fpu_begin);
@@ -137,8 +118,6 @@ void __kernel_fpu_end(void)
 
        if (fpu->fpregs_active)
                copy_kernel_to_fpregs(&fpu->state);
-       else
-               __fpregs_deactivate_hw();
 
        kernel_fpu_enable();
 }
@@ -159,35 +138,6 @@ void kernel_fpu_end(void)
 EXPORT_SYMBOL_GPL(kernel_fpu_end);
 
 /*
- * CR0::TS save/restore functions:
- */
-int irq_ts_save(void)
-{
-       /*
-        * If in process context and not atomic, we can take a spurious DNA 
fault.
-        * Otherwise, doing clts() in process context requires disabling 
preemption
-        * or some heavy lifting like kernel_fpu_begin()
-        */
-       if (!in_atomic())
-               return 0;
-
-       if (read_cr0() & X86_CR0_TS) {
-               clts();
-               return 1;
-       }
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(irq_ts_save);
-
-void irq_ts_restore(int TS_state)
-{
-       if (TS_state)
-               stts();
-}
-EXPORT_SYMBOL_GPL(irq_ts_restore);
-
-/*
  * Save the FPU state (mark it for reload if necessary):
  *
  * This only ever gets called for the current task.
@@ -200,10 +150,7 @@ void fpu__save(struct fpu *fpu)
        trace_x86_fpu_before_save(fpu);
        if (fpu->fpregs_active) {
                if (!copy_fpregs_to_fpstate(fpu)) {
-                       if (use_eager_fpu())
-                               copy_kernel_to_fpregs(&fpu->state);
-                       else
-                               fpregs_deactivate(fpu);
+                       copy_kernel_to_fpregs(&fpu->state);
                }
        }
        trace_x86_fpu_after_save(fpu);
@@ -247,7 +194,6 @@ EXPORT_SYMBOL_GPL(fpstate_init);
 
 int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 {
-       dst_fpu->counter = 0;
        dst_fpu->fpregs_active = 0;
        dst_fpu->last_cpu = -1;
 
@@ -260,8 +206,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
         * Don't let 'init optimized' areas of the XSAVE area
         * leak into the child task:
         */
-       if (use_eager_fpu())
-               memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
+       memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
 
        /*
         * Save current FPU registers directly into the child
@@ -283,10 +228,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
                memcpy(&src_fpu->state, &dst_fpu->state,
                       fpu_kernel_xstate_size);
 
-               if (use_eager_fpu())
-                       copy_kernel_to_fpregs(&src_fpu->state);
-               else
-                       fpregs_deactivate(src_fpu);
+               copy_kernel_to_fpregs(&src_fpu->state);
        }
        preempt_enable();
 
@@ -366,7 +308,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 
        if (fpu->fpstate_active) {
                /* Invalidate any lazy state: */
-               fpu->last_cpu = -1;
+               __fpu_invalidate_fpregs_state(fpu);
        } else {
                fpstate_init(&fpu->state);
                trace_x86_fpu_init_state(fpu);
@@ -409,7 +351,7 @@ void fpu__current_fpstate_write_begin(void)
         * ensures we will not be lazy and skip a XRSTOR in the
         * future.
         */
-       fpu->last_cpu = -1;
+       __fpu_invalidate_fpregs_state(fpu);
 }
 
 /*
@@ -459,7 +401,6 @@ void fpu__restore(struct fpu *fpu)
        trace_x86_fpu_before_restore(fpu);
        fpregs_activate(fpu);
        copy_kernel_to_fpregs(&fpu->state);
-       fpu->counter++;
        trace_x86_fpu_after_restore(fpu);
        kernel_fpu_enable();
 }
@@ -477,7 +418,6 @@ EXPORT_SYMBOL_GPL(fpu__restore);
 void fpu__drop(struct fpu *fpu)
 {
        preempt_disable();
-       fpu->counter = 0;
 
        if (fpu->fpregs_active) {
                /* Ignore delayed exceptions from user space */
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 2f2b8c7ccb85..60dece392b3a 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -10,18 +10,6 @@
 #include <linux/init.h>
 
 /*
- * Initialize the TS bit in CR0 according to the style of context-switches
- * we are using:
- */
-static void fpu__init_cpu_ctx_switch(void)
-{
-       if (!boot_cpu_has(X86_FEATURE_EAGER_FPU))
-               stts();
-       else
-               clts();
-}
-
-/*
  * Initialize the registers found in all CPUs, CR0 and CR4:
  */
 static void fpu__init_cpu_generic(void)
@@ -58,7 +46,6 @@ void fpu__init_cpu(void)
 {
        fpu__init_cpu_generic();
        fpu__init_cpu_xstate();
-       fpu__init_cpu_ctx_switch();
 }
 
 /*
@@ -233,82 +220,16 @@ static void __init 
fpu__init_system_xstate_size_legacy(void)
 }
 
 /*
- * FPU context switching strategies:
- *
- * Against popular belief, we don't do lazy FPU saves, due to the
- * task migration complications it brings on SMP - we only do
- * lazy FPU restores.
- *
- * 'lazy' is the traditional strategy, which is based on setting
- * CR0::TS to 1 during context-switch (instead of doing a full
- * restore of the FPU state), which causes the first FPU instruction
- * after the context switch (whenever it is executed) to fault - at
- * which point we lazily restore the FPU state into FPU registers.
- *
- * Tasks are of course under no obligation to execute FPU instructions,
- * so it can easily happen that another context-switch occurs without
- * a single FPU instruction being executed. If we eventually switch
- * back to the original task (that still owns the FPU) then we have
- * not only saved the restores along the way, but we also have the
- * FPU ready to be used for the original task.
- *
- * 'lazy' is deprecated because it's almost never a performance win
- * and it's much more complicated than 'eager'.
- *
- * 'eager' switching is by default on all CPUs, there we switch the FPU
- * state during every context switch, regardless of whether the task
- * has used FPU instructions in that time slice or not. This is done
- * because modern FPU context saving instructions are able to optimize
- * state saving and restoration in hardware: they can detect both
- * unused and untouched FPU state and optimize accordingly.
- *
- * [ Note that even in 'lazy' mode we might optimize context switches
- *   to use 'eager' restores, if we detect that a task is using the FPU
- *   frequently. See the fpu->counter logic in fpu/internal.h for that. ]
- */
-static enum { ENABLE, DISABLE } eagerfpu = ENABLE;
-
-/*
  * Find supported xfeatures based on cpu features and command-line input.
  * This must be called after fpu__init_parse_early_param() is called and
  * xfeatures_mask is enumerated.
  */
 u64 __init fpu__get_supported_xfeatures_mask(void)
 {
-       /* Support all xfeatures known to us */
-       if (eagerfpu != DISABLE)
-               return XCNTXT_MASK;
-
-       /* Warning of xfeatures being disabled for no eagerfpu mode */
-       if (xfeatures_mask & XFEATURE_MASK_EAGER) {
-               pr_err("x86/fpu: eagerfpu switching disabled, disabling the 
following xstate features: 0x%llx.\n",
-                       xfeatures_mask & XFEATURE_MASK_EAGER);
-       }
-
-       /* Return a mask that masks out all features requiring eagerfpu mode */
-       return ~XFEATURE_MASK_EAGER;
+       return XCNTXT_MASK;
 }
 
-/*
- * Disable features dependent on eagerfpu.
- */
-static void __init fpu__clear_eager_fpu_features(void)
-{
-       setup_clear_cpu_cap(X86_FEATURE_MPX);
-}
-
-/*
- * Pick the FPU context switching strategy:
- *
- * When eagerfpu is AUTO or ENABLE, we ensure it is ENABLE if either of
- * the following is true:
- *
- * (1) the cpu has xsaveopt, as it has the optimization and doing eager
- *     FPU switching has a relatively low cost compared to a plain xsave;
- * (2) the cpu has xsave features (e.g. MPX) that depend on eager FPU
- *     switching. Should the kernel boot with noxsaveopt, we support MPX
- *     with eager FPU switching at a higher cost.
- */
+/* Legacy code to initialize eager fpu mode. */
 static void __init fpu__init_system_ctx_switch(void)
 {
        static bool on_boot_cpu __initdata = 1;
@@ -317,17 +238,6 @@ static void __init fpu__init_system_ctx_switch(void)
        on_boot_cpu = 0;
 
        WARN_ON_FPU(current->thread.fpu.fpstate_active);
-
-       if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE)
-               eagerfpu = ENABLE;
-
-       if (xfeatures_mask & XFEATURE_MASK_EAGER)
-               eagerfpu = ENABLE;
-
-       if (eagerfpu == ENABLE)
-               setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
-
-       printk(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", 
eagerfpu == ENABLE ? "eager" : "lazy");
 }
 
 /*
@@ -336,11 +246,6 @@ static void __init fpu__init_system_ctx_switch(void)
  */
 static void __init fpu__init_parse_early_param(void)
 {
-       if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) {
-               eagerfpu = DISABLE;
-               fpu__clear_eager_fpu_features();
-       }
-
        if (cmdline_find_option_bool(boot_command_line, "no387"))
                setup_clear_cpu_cap(X86_FEATURE_FPU);
 
@@ -375,14 +280,6 @@ void __init fpu__init_system(struct cpuinfo_x86 *c)
         */
        fpu__init_cpu();
 
-       /*
-        * But don't leave CR0::TS set yet, as some of the FPU setup
-        * methods depend on being able to execute FPU instructions
-        * that will fault on a set TS, such as the FXSAVE in
-        * fpu__init_system_mxcsr().
-        */
-       clts();
-
        fpu__init_system_generic();
        fpu__init_system_xstate_size_legacy();
        fpu__init_system_xstate();
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index a184c210efba..83c23c230b4c 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -340,11 +340,9 @@ static int __fpu__restore_sig(void __user *buf, void 
__user *buf_fx, int size)
                }
 
                fpu->fpstate_active = 1;
-               if (use_eager_fpu()) {
-                       preempt_disable();
-                       fpu__restore(fpu);
-                       preempt_enable();
-               }
+               preempt_disable();
+               fpu__restore(fpu);
+               preempt_enable();
 
                return err;
        } else {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 095ef7ddd6ae..c7c11cc988b7 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -890,15 +890,6 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int 
pkey,
         */
        if (!boot_cpu_has(X86_FEATURE_OSPKE))
                return -EINVAL;
-       /*
-        * For most XSAVE components, this would be an arduous task:
-        * brining fpstate up to date with fpregs, updating fpstate,
-        * then re-populating fpregs.  But, for components that are
-        * never lazily managed, we can just access the fpregs
-        * directly.  PKRU is never managed lazily, so we can just
-        * manipulate it directly.  Make sure it stays that way.
-        */
-       WARN_ON_ONCE(!use_eager_fpu());
 
        /* Set the bits we need in PKRU:  */
        if (init_val & PKEY_DISABLE_ACCESS)
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index bbf3d5933eaa..a1bfba0f7234 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -328,7 +328,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
        .cpuid = native_cpuid,
        .get_debugreg = native_get_debugreg,
        .set_debugreg = native_set_debugreg,
-       .clts = native_clts,
        .read_cr0 = native_read_cr0,
        .write_cr0 = native_write_cr0,
        .read_cr4 = native_read_cr4,
diff --git a/arch/x86/kernel/paravirt_patch_32.c 
b/arch/x86/kernel/paravirt_patch_32.c
index 920c6ae08592..d3f7f14bb328 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -8,7 +8,6 @@ DEF_NATIVE(pv_cpu_ops, iret, "iret");
 DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
 DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
 DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
-DEF_NATIVE(pv_cpu_ops, clts, "clts");
 
 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
 DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
@@ -48,7 +47,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
                PATCH_SITE(pv_mmu_ops, read_cr2);
                PATCH_SITE(pv_mmu_ops, read_cr3);
                PATCH_SITE(pv_mmu_ops, write_cr3);
-               PATCH_SITE(pv_cpu_ops, clts);
 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
                case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
                        if (pv_is_native_spin_unlock()) {
diff --git a/arch/x86/kernel/paravirt_patch_64.c 
b/arch/x86/kernel/paravirt_patch_64.c
index bb3840cedb4f..915a4c0b217c 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
 DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
 DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
 DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
-DEF_NATIVE(pv_cpu_ops, clts, "clts");
 DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
 
 DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
@@ -58,7 +57,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
                PATCH_SITE(pv_mmu_ops, read_cr2);
                PATCH_SITE(pv_mmu_ops, read_cr3);
                PATCH_SITE(pv_mmu_ops, write_cr3);
-               PATCH_SITE(pv_cpu_ops, clts);
                PATCH_SITE(pv_mmu_ops, flush_tlb_single);
                PATCH_SITE(pv_cpu_ops, wbinvd);
 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index bd7be8efdc4c..7dc8c9c3d801 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -232,11 +232,10 @@ __switch_to(struct task_struct *prev_p, struct 
task_struct *next_p)
        struct fpu *next_fpu = &next->fpu;
        int cpu = smp_processor_id();
        struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
-       fpu_switch_t fpu_switch;
 
        /* never put a printk in __switch_to... printk() calls wake_up*() 
indirectly */
 
-       fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
+       switch_fpu_prepare(prev_fpu, cpu);
 
        /*
         * Save away %gs. No need to save %fs, as it was saved on the
@@ -295,7 +294,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct 
*next_p)
        if (prev->gs | next->gs)
                lazy_load_gs(next->gs);
 
-       switch_fpu_finish(next_fpu, fpu_switch);
+       switch_fpu_finish(next_fpu, cpu);
 
        this_cpu_write(current_task, next_p);
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b3760b3c1ca0..9c3a7b04e59e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -265,9 +265,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct 
*next_p)
        int cpu = smp_processor_id();
        struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
        unsigned prev_fsindex, prev_gsindex;
-       fpu_switch_t fpu_switch;
 
-       fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
+       switch_fpu_prepare(prev_fpu, cpu);
 
        /* We must save %fs and %gs before load_TLS() because
         * %fs and %gs may be cleared by load_TLS().
@@ -417,7 +416,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct 
*next_p)
                prev->gsbase = 0;
        prev->gsindex = prev_gsindex;
 
-       switch_fpu_finish(next_fpu, fpu_switch);
+       switch_fpu_finish(next_fpu, cpu);
 
        /*
         * Switch the PDA and FPU contexts.
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 42f5eb7b4f6c..d29c85250108 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1111,7 +1111,7 @@ int native_cpu_up(unsigned int cpu, struct task_struct 
*tidle)
                return err;
 
        /* the FPU context is blank, nobody can own it */
-       __cpu_disable_lazy_restore(cpu);
+       per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
 
        common_cpu_up(cpu, tidle);
 
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index bd4e3d4d3625..bf0c6d049080 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -853,6 +853,8 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long 
error_code)
 dotraplinkage void
 do_device_not_available(struct pt_regs *regs, long error_code)
 {
+       unsigned long cr0;
+
        RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
 
 #ifdef CONFIG_MATH_EMULATION
@@ -866,10 +868,20 @@ do_device_not_available(struct pt_regs *regs, long 
error_code)
                return;
        }
 #endif
-       fpu__restore(&current->thread.fpu); /* interrupts still off */
-#ifdef CONFIG_X86_32
-       cond_local_irq_enable(regs);
-#endif
+
+       /* This should not happen. */
+       cr0 = read_cr0();
+       if (WARN(cr0 & X86_CR0_TS, "CR0.TS was set")) {
+               /* Try to fix it up and carry on. */
+               write_cr0(cr0 & ~X86_CR0_TS);
+       } else {
+               /*
+                * Something terrible happened, and we're better off trying
+                * to kill the task than getting stuck in a never-ending
+                * loop of #NM faults.
+                */
+               die("unexpected #NM exception", regs, error_code);
+       }
 }
 NOKPROBE_SYMBOL(do_device_not_available);
 
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index afa7bbb596cd..0aefb626fa8f 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -16,7 +16,6 @@
 #include <linux/export.h>
 #include <linux/vmalloc.h>
 #include <linux/uaccess.h>
-#include <asm/fpu/internal.h> /* For use_eager_fpu.  Ugh! */
 #include <asm/user.h>
 #include <asm/fpu/xstate.h>
 #include "cpuid.h"
@@ -114,8 +113,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
        if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
                best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
 
-       if (use_eager_fpu())
-               kvm_x86_ops->fpu_activate(vcpu);
+       kvm_x86_ops->fpu_activate(vcpu);
 
        /*
         * The existing code assumes virtual address is 48-bit in the canonical
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5382b82462fc..3980da515fd0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2145,12 +2145,6 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 #endif
        if (vmx->host_state.msr_host_bndcfgs)
                wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
-       /*
-        * If the FPU is not active (through the host task or
-        * the guest vcpu), then restore the cr0.TS bit.
-        */
-       if (!fpregs_active() && !vmx->vcpu.guest_fpu_loaded)
-               stts();
        load_gdt(this_cpu_ptr(&host_gdt));
 }
 
@@ -4845,9 +4839,11 @@ static void vmx_set_constant_host_state(struct vcpu_vmx 
*vmx)
        u32 low32, high32;
        unsigned long tmpl;
        struct desc_ptr dt;
-       unsigned long cr4;
+       unsigned long cr0, cr4;
 
-       vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS);  /* 22.2.3 */
+       cr0 = read_cr0();
+       WARN_ON(cr0 & X86_CR0_TS);
+       vmcs_writel(HOST_CR0, cr0);  /* 22.2.3 */
        vmcs_writel(HOST_CR3, read_cr3());  /* 22.2.3  FIXME: shadow tables */
 
        /* Save the most likely value for this task's CR4 in the VMCS. */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 04c5d96b1d67..56900f3e7bcf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5081,11 +5081,6 @@ static void emulator_get_fpu(struct x86_emulate_ctxt 
*ctxt)
 {
        preempt_disable();
        kvm_load_guest_fpu(emul_to_vcpu(ctxt));
-       /*
-        * CR0.TS may reference the host fpu state, not the guest fpu state,
-        * so it may be clear at this point.
-        */
-       clts();
 }
 
 static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
@@ -7407,25 +7402,13 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
-       if (!vcpu->guest_fpu_loaded) {
-               vcpu->fpu_counter = 0;
+       if (!vcpu->guest_fpu_loaded)
                return;
-       }
 
        vcpu->guest_fpu_loaded = 0;
        copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
        __kernel_fpu_end();
        ++vcpu->stat.fpu_reload;
-       /*
-        * If using eager FPU mode, or if the guest is a frequent user
-        * of the FPU, just leave the FPU active for next time.
-        * Every 255 times fpu_counter rolls over to 0; a guest that uses
-        * the FPU in bursts will revert to loading it on demand.
-        */
-       if (!use_eager_fpu()) {
-               if (++vcpu->fpu_counter < 5)
-                       kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
-       }
        trace_kvm_fpu(0);
 }
 
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 25da5bc8d83d..4ca0d78adcf0 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -497,38 +497,24 @@ static void lguest_cpuid(unsigned int *ax, unsigned int 
*bx,
  * a whole series of functions like read_cr0() and write_cr0().
  *
  * We start with cr0.  cr0 allows you to turn on and off all kinds of basic
- * features, but Linux only really cares about one: the horrifically-named Task
- * Switched (TS) bit at bit 3 (ie. 8)
+ * features, but the only cr0 bit that Linux ever used at runtime was the
+ * horrifically-named Task Switched (TS) bit at bit 3 (ie. 8)
  *
  * What does the TS bit do?  Well, it causes the CPU to trap (interrupt 7) if
  * the floating point unit is used.  Which allows us to restore FPU state
- * lazily after a task switch, and Linux uses that gratefully, but wouldn't a
- * name like "FPUTRAP bit" be a little less cryptic?
+ * lazily after a task switch if we wanted to, but wouldn't a name like
+ * "FPUTRAP bit" be a little less cryptic?
  *
- * We store cr0 locally because the Host never changes it.  The Guest sometimes
- * wants to read it and we'd prefer not to bother the Host unnecessarily.
+ * Fortunately, Linux keeps it simple and doesn't use TS, so we can ignore
+ * cr0.
  */
-static unsigned long current_cr0;
 static void lguest_write_cr0(unsigned long val)
 {
-       lazy_hcall1(LHCALL_TS, val & X86_CR0_TS);
-       current_cr0 = val;
 }
 
 static unsigned long lguest_read_cr0(void)
 {
-       return current_cr0;
-}
-
-/*
- * Intel provided a special instruction to clear the TS bit for people too cool
- * to use write_cr0() to do it.  This "clts" instruction is faster, because all
- * the vowels have been optimized out.
- */
-static void lguest_clts(void)
-{
-       lazy_hcall1(LHCALL_TS, 0);
-       current_cr0 &= ~X86_CR0_TS;
+       return 0;
 }
 
 /*
@@ -1432,7 +1418,6 @@ __init void lguest_init(void)
        pv_cpu_ops.load_tls = lguest_load_tls;
        pv_cpu_ops.get_debugreg = lguest_get_debugreg;
        pv_cpu_ops.set_debugreg = lguest_set_debugreg;
-       pv_cpu_ops.clts = lguest_clts;
        pv_cpu_ops.read_cr0 = lguest_read_cr0;
        pv_cpu_ops.write_cr0 = lguest_write_cr0;
        pv_cpu_ops.read_cr4 = lguest_read_cr4;
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index f88ce0e5efd9..2dab69a706ec 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -141,8 +141,7 @@ u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | 
PKRU_AD_KEY( 3) |
  * Called from the FPU code when creating a fresh set of FPU
  * registers.  This is called from a very specific context where
  * we know the FPU regstiers are safe for use and we can use PKRU
- * directly.  The fact that PKRU is only available when we are
- * using eagerfpu mode makes this possible.
+ * directly.
  */
 void copy_init_pkru_to_fpregs(void)
 {
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bdd855685403..ced7027b3fbc 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -980,17 +980,6 @@ static void xen_io_delay(void)
 {
 }
 
-static void xen_clts(void)
-{
-       struct multicall_space mcs;
-
-       mcs = xen_mc_entry(0);
-
-       MULTI_fpu_taskswitch(mcs.mc, 0);
-
-       xen_mc_issue(PARAVIRT_LAZY_CPU);
-}
-
 static DEFINE_PER_CPU(unsigned long, xen_cr0_value);
 
 static unsigned long xen_read_cr0(void)
@@ -1233,8 +1222,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
        .set_debugreg = xen_set_debugreg,
        .get_debugreg = xen_get_debugreg,
 
-       .clts = xen_clts,
-
        .read_cr0 = xen_read_cr0,
        .write_cr0 = xen_write_cr0,
 
diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c
index 44ce80606944..d1f5bb534e0e 100644
--- a/drivers/char/hw_random/via-rng.c
+++ b/drivers/char/hw_random/via-rng.c
@@ -70,21 +70,17 @@ enum {
  * until we have 4 bytes, thus returning a u32 at a time,
  * instead of the current u8-at-a-time.
  *
- * Padlock instructions can generate a spurious DNA fault, so
- * we have to call them in the context of irq_ts_save/restore()
+ * Padlock instructions can generate a spurious DNA fault, but the
+ * kernel doesn't use CR0.TS, so this doesn't matter.
  */
 
 static inline u32 xstore(u32 *addr, u32 edx_in)
 {
        u32 eax_out;
-       int ts_state;
-
-       ts_state = irq_ts_save();
 
        asm(".byte 0x0F,0xA7,0xC0 /* xstore %%edi (addr=%0) */"
                : "=m" (*addr), "=a" (eax_out), "+d" (edx_in), "+D" (addr));
 
-       irq_ts_restore(ts_state);
        return eax_out;
 }
 
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 441e86b23571..b3869748cc6b 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -183,8 +183,8 @@ static inline void padlock_store_cword(struct cword *cword)
 
 /*
  * While the padlock instructions don't use FP/SSE registers, they
- * generate a spurious DNA fault when cr0.ts is '1'. These instructions
- * should be used only inside the irq_ts_save/restore() context
+ * generate a spurious DNA fault when CR0.TS is '1'.  Fortunately,
+ * the kernel doesn't use CR0.TS.
  */
 
 static inline void rep_xcrypt_ecb(const u8 *input, u8 *output, void *key,
@@ -298,24 +298,18 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 
*output, void *key,
 static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
        struct aes_ctx *ctx = aes_ctx(tfm);
-       int ts_state;
 
        padlock_reset_key(&ctx->cword.encrypt);
-       ts_state = irq_ts_save();
        ecb_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1);
-       irq_ts_restore(ts_state);
        padlock_store_cword(&ctx->cword.encrypt);
 }
 
 static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
        struct aes_ctx *ctx = aes_ctx(tfm);
-       int ts_state;
 
        padlock_reset_key(&ctx->cword.encrypt);
-       ts_state = irq_ts_save();
        ecb_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1);
-       irq_ts_restore(ts_state);
        padlock_store_cword(&ctx->cword.encrypt);
 }
 
@@ -346,14 +340,12 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc,
        struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
        struct blkcipher_walk walk;
        int err;
-       int ts_state;
 
        padlock_reset_key(&ctx->cword.encrypt);
 
        blkcipher_walk_init(&walk, dst, src, nbytes);
        err = blkcipher_walk_virt(desc, &walk);
 
-       ts_state = irq_ts_save();
        while ((nbytes = walk.nbytes)) {
                padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
                                   ctx->E, &ctx->cword.encrypt,
@@ -361,7 +353,6 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc,
                nbytes &= AES_BLOCK_SIZE - 1;
                err = blkcipher_walk_done(desc, &walk, nbytes);
        }
-       irq_ts_restore(ts_state);
 
        padlock_store_cword(&ctx->cword.encrypt);
 
@@ -375,14 +366,12 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc,
        struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
        struct blkcipher_walk walk;
        int err;
-       int ts_state;
 
        padlock_reset_key(&ctx->cword.decrypt);
 
        blkcipher_walk_init(&walk, dst, src, nbytes);
        err = blkcipher_walk_virt(desc, &walk);
 
-       ts_state = irq_ts_save();
        while ((nbytes = walk.nbytes)) {
                padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
                                   ctx->D, &ctx->cword.decrypt,
@@ -390,7 +379,6 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc,
                nbytes &= AES_BLOCK_SIZE - 1;
                err = blkcipher_walk_done(desc, &walk, nbytes);
        }
-       irq_ts_restore(ts_state);
 
        padlock_store_cword(&ctx->cword.encrypt);
 
@@ -425,14 +413,12 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc,
        struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
        struct blkcipher_walk walk;
        int err;
-       int ts_state;
 
        padlock_reset_key(&ctx->cword.encrypt);
 
        blkcipher_walk_init(&walk, dst, src, nbytes);
        err = blkcipher_walk_virt(desc, &walk);
 
-       ts_state = irq_ts_save();
        while ((nbytes = walk.nbytes)) {
                u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr,
                                            walk.dst.virt.addr, ctx->E,
@@ -442,7 +428,6 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc,
                nbytes &= AES_BLOCK_SIZE - 1;
                err = blkcipher_walk_done(desc, &walk, nbytes);
        }
-       irq_ts_restore(ts_state);
 
        padlock_store_cword(&ctx->cword.decrypt);
 
@@ -456,14 +441,12 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc,
        struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
        struct blkcipher_walk walk;
        int err;
-       int ts_state;
 
        padlock_reset_key(&ctx->cword.encrypt);
 
        blkcipher_walk_init(&walk, dst, src, nbytes);
        err = blkcipher_walk_virt(desc, &walk);
 
-       ts_state = irq_ts_save();
        while ((nbytes = walk.nbytes)) {
                padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr,
                                   ctx->D, walk.iv, &ctx->cword.decrypt,
@@ -472,8 +455,6 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc,
                err = blkcipher_walk_done(desc, &walk, nbytes);
        }
 
-       irq_ts_restore(ts_state);
-
        padlock_store_cword(&ctx->cword.encrypt);
 
        return err;
diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c
index 8c5f90647b7a..bc72d20c32c3 100644
--- a/drivers/crypto/padlock-sha.c
+++ b/drivers/crypto/padlock-sha.c
@@ -89,7 +89,6 @@ static int padlock_sha1_finup(struct shash_desc *desc, const 
u8 *in,
        struct sha1_state state;
        unsigned int space;
        unsigned int leftover;
-       int ts_state;
        int err;
 
        dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
@@ -120,14 +119,11 @@ static int padlock_sha1_finup(struct shash_desc *desc, 
const u8 *in,
 
        memcpy(result, &state.state, SHA1_DIGEST_SIZE);
 
-       /* prevent taking the spurious DNA fault with padlock. */
-       ts_state = irq_ts_save();
        asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
                      : \
                      : "c"((unsigned long)state.count + count), \
                        "a"((unsigned long)state.count), \
                        "S"(in), "D"(result));
-       irq_ts_restore(ts_state);
 
        padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);
 
@@ -155,7 +151,6 @@ static int padlock_sha256_finup(struct shash_desc *desc, 
const u8 *in,
        struct sha256_state state;
        unsigned int space;
        unsigned int leftover;
-       int ts_state;
        int err;
 
        dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
@@ -186,14 +181,11 @@ static int padlock_sha256_finup(struct shash_desc *desc, 
const u8 *in,
 
        memcpy(result, &state.state, SHA256_DIGEST_SIZE);
 
-       /* prevent taking the spurious DNA fault with padlock. */
-       ts_state = irq_ts_save();
        asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
                      : \
                      : "c"((unsigned long)state.count + count), \
                        "a"((unsigned long)state.count), \
                        "S"(in), "D"(result));
-       irq_ts_restore(ts_state);
 
        padlock_output_block((uint32_t *)result, (uint32_t *)out, 8);
 
@@ -312,7 +304,6 @@ static int padlock_sha1_update_nano(struct shash_desc *desc,
        u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
                ((aligned(STACK_ALIGN)));
        u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
-       int ts_state;
 
        partial = sctx->count & 0x3f;
        sctx->count += len;
@@ -328,23 +319,19 @@ static int padlock_sha1_update_nano(struct shash_desc 
*desc,
                        memcpy(sctx->buffer + partial, data,
                                done + SHA1_BLOCK_SIZE);
                        src = sctx->buffer;
-                       ts_state = irq_ts_save();
                        asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
                        : "+S"(src), "+D"(dst) \
                        : "a"((long)-1), "c"((unsigned long)1));
-                       irq_ts_restore(ts_state);
                        done += SHA1_BLOCK_SIZE;
                        src = data + done;
                }
 
                /* Process the left bytes from the input data */
                if (len - done >= SHA1_BLOCK_SIZE) {
-                       ts_state = irq_ts_save();
                        asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
                        : "+S"(src), "+D"(dst)
                        : "a"((long)-1),
                        "c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE)));
-                       irq_ts_restore(ts_state);
                        done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE);
                        src = data + done;
                }
@@ -401,7 +388,6 @@ static int padlock_sha256_update_nano(struct shash_desc 
*desc, const u8 *data,
        u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
                ((aligned(STACK_ALIGN)));
        u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
-       int ts_state;
 
        partial = sctx->count & 0x3f;
        sctx->count += len;
@@ -417,23 +403,19 @@ static int padlock_sha256_update_nano(struct shash_desc 
*desc, const u8 *data,
                        memcpy(sctx->buf + partial, data,
                                done + SHA256_BLOCK_SIZE);
                        src = sctx->buf;
-                       ts_state = irq_ts_save();
                        asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
                        : "+S"(src), "+D"(dst)
                        : "a"((long)-1), "c"((unsigned long)1));
-                       irq_ts_restore(ts_state);
                        done += SHA256_BLOCK_SIZE;
                        src = data + done;
                }
 
                /* Process the left bytes from input data*/
                if (len - done >= SHA256_BLOCK_SIZE) {
-                       ts_state = irq_ts_save();
                        asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
                        : "+S"(src), "+D"(dst)
                        : "a"((long)-1),
                        "c"((unsigned long)((len - done) / 64)));
-                       irq_ts_restore(ts_state);
                        done += ((len - done) - (len - done) % 64);
                        src = data + done;
                }
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index 19a32280731d..601f81c04873 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -109,10 +109,6 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args 
*args)
        case LHCALL_SET_CLOCKEVENT:
                guest_set_clockevent(cpu, args->arg1);
                break;
-       case LHCALL_TS:
-               /* This sets the TS flag, as we saw used in run_guest(). */
-               cpu->ts = args->arg1;
-               break;
        case LHCALL_HALT:
                /* Similarly, this sets the halted flag for run_guest(). */
                cpu->halted = 1;
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 69b3814afd2f..2356a2318034 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -43,7 +43,6 @@ struct lg_cpu {
        struct mm_struct *mm;   /* == tsk->mm, but that becomes NULL on exit */
 
        u32 cr2;
-       int ts;
        u32 esp1;
        u16 ss1;
 
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 6e9042e3d2a9..743253fc638f 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -247,14 +247,6 @@ unsigned long *lguest_arch_regptr(struct lg_cpu *cpu, 
size_t reg_off, bool any)
 void lguest_arch_run_guest(struct lg_cpu *cpu)
 {
        /*
-        * Remember the awfully-named TS bit?  If the Guest has asked to set it
-        * we set it now, so we can trap and pass that trap to the Guest if it
-        * uses the FPU.
-        */
-       if (cpu->ts && fpregs_active())
-               stts();
-
-       /*
         * SYSENTER is an optimized way of doing system calls.  We can't allow
         * it because it always jumps to privilege level 0.  A normal Guest
         * won't try it because we don't advertise it in CPUID, but a malicious
@@ -282,10 +274,6 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
         if (boot_cpu_has(X86_FEATURE_SEP))
                wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
 
-       /* Clear the host TS bit if it was set above. */
-       if (cpu->ts && fpregs_active())
-               clts();
-
        /*
         * If the Guest page faulted, then the cr2 register will tell us the
         * bad virtual address.  We have to grab this now, because once we
@@ -421,12 +409,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
                        kill_guest(cpu, "Writing cr2");
                break;
        case 7: /* We've intercepted a Device Not Available fault. */
-               /*
-                * If the Guest doesn't want to know, we already restored the
-                * Floating Point Unit, so we just continue without telling it.
-                */
-               if (!cpu->ts)
-                       return;
+               /* No special handling is needed here. */
                break;
        case 32 ... 255:
                /* This might be a syscall. */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 01c0b9cc3915..cfc212d1cd60 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -224,7 +224,6 @@ struct kvm_vcpu {
 
        int fpu_active;
        int guest_fpu_loaded, guest_xcr0_loaded;
-       unsigned char fpu_counter;
        struct swait_queue_head wq;
        struct pid *pid;
        int sigset_active;
diff --git a/tools/arch/x86/include/asm/cpufeatures.h 
b/tools/arch/x86/include/asm/cpufeatures.h
index a39629206864..cddd5d06e1cb 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -104,7 +104,6 @@
 #define X86_FEATURE_EXTD_APICID        ( 3*32+26) /* has extended APICID (8 
bits) */
 #define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
 #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
-#define X86_FEATURE_EAGER_FPU  ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
 #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state 
*/
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */

Reply via email to