After changing the argument of __raw_xsave_addr() from a mask to number Dave suggested to check if it makes sense to do the same for get_xsave_addr(). As it turns out it does. Only get_xsave_addr() needs the mask to check if the requested feature is part of what is support/saved and then uses the number again. The shift operation is cheaper compared to "find last bit set". Also, the feature number uses less opcode space compared to the mask :)
Make get_xsave_addr() argument a xfeature number instead of mask and fix up its callers. As part of this use xfeature_nr and xfeature_mask consistently. This results in changes to the kvm code as: feature -> xfeature_mask index -> xfeature_nr Suggested-by: Dave Hansen <dave.han...@linux.intel.com> Signed-off-by: Sebastian Andrzej Siewior <bige...@linutronix.de> --- arch/x86/include/asm/fpu/xstate.h | 4 ++-- arch/x86/kernel/fpu/xstate.c | 23 +++++++++++------------ arch/x86/kernel/traps.c | 2 +- arch/x86/kvm/x86.c | 28 ++++++++++++++-------------- arch/x86/mm/mpx.c | 6 +++--- 5 files changed, 31 insertions(+), 32 deletions(-) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 48581988d78c7..fbe41f808e5d8 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -46,8 +46,8 @@ extern void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask); void fpu__xstate_clear_all_cpu_caps(void); -void *get_xsave_addr(struct xregs_state *xsave, int xstate); -const void *get_xsave_field_ptr(int xstate_field); +void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); +const void *get_xsave_field_ptr(int xfeature_nr); int using_compacted_format(void); int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 3dfe3627deaf6..375226055a413 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -832,15 +832,15 @@ static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr) * * Inputs: * xstate: the thread's storage area for all FPU data - * xstate_feature: state which is defined in xsave.h (e.g. - * XFEATURE_MASK_FP, XFEATURE_MASK_SSE, etc...) + * xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP, + * XFEATURE_SSE, etc...) * Output: * address of the state in the xsave area, or NULL if the * field is not present in the xsave buffer. */ -void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) +void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) { - int xfeature_nr; + u64 xfeature_mask = 1ULL << xfeature_nr; /* * Do we even *have* xsave state? */ @@ -852,11 +852,11 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) * have not enabled. Remember that pcntxt_mask is * what we write to the XCR0 register. */ - WARN_ONCE(!(xfeatures_mask & xstate_feature), + WARN_ONCE(!(xfeatures_mask & xfeature_mask), "get of unsupported state"); /* * This assumes the last 'xsave*' instruction to - * have requested that 'xstate_feature' be saved. + * have requested that 'xfeature_mask' be saved. * If it did not, we might be seeing and old value * of the field in the buffer. * @@ -865,10 +865,9 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) * or because the "init optimization" caused it * to not be saved. */ - if (!(xsave->header.xfeatures & xstate_feature)) + if (!(xsave->header.xfeatures & xfeature_mask)) return NULL; - xfeature_nr = fls64(xstate_feature) - 1; return __raw_xsave_addr(xsave, xfeature_nr); } EXPORT_SYMBOL_GPL(get_xsave_addr); @@ -884,13 +883,13 @@ EXPORT_SYMBOL_GPL(get_xsave_addr); * Note that this only works on the current task. * * Inputs: - * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP, - * XFEATURE_MASK_SSE, etc...) + * @xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP, + * XFEATURE_SSE, etc...) * Output: * address of the state in the xsave area or NULL if the state * is not present or is in its 'init state'. */ -const void *get_xsave_field_ptr(int xsave_state) +const void *get_xsave_field_ptr(int xfeature_nr) { struct fpu *fpu = ¤t->thread.fpu; @@ -900,7 +899,7 @@ const void *get_xsave_field_ptr(int xsave_state) */ fpu__save(fpu); - return get_xsave_addr(&fpu->state.xsave, xsave_state); + return get_xsave_addr(&fpu->state.xsave, xfeature_nr); } #ifdef CONFIG_ARCH_HAS_PKEYS diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 9b7c4ca8f0a73..626853b2ac344 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -455,7 +455,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) * which is all zeros which indicates MPX was not * responsible for the exception. */ - bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); + bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR); if (!bndcsr) goto exit_trap; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5cd5647120f2b..75301b439b6e4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3650,15 +3650,15 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) */ valid = xstate_bv & ~XFEATURE_MASK_FPSSE; while (valid) { - u64 feature = valid & -valid; - int index = fls64(feature) - 1; - void *src = get_xsave_addr(xsave, feature); + u64 xfeature_mask = valid & -valid; + int xfeature_nr = fls64(xfeature_mask) - 1; + void *src = get_xsave_addr(xsave, xfeature_nr); if (src) { u32 size, offset, ecx, edx; - cpuid_count(XSTATE_CPUID, index, + cpuid_count(XSTATE_CPUID, xfeature_nr, &size, &offset, &ecx, &edx); - if (feature == XFEATURE_MASK_PKRU) + if (xfeature_nr == XFEATURE_PKRU) memcpy(dest + offset, &vcpu->arch.pkru, sizeof(vcpu->arch.pkru)); else @@ -3666,7 +3666,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) } - valid -= feature; + valid -= xfeature_mask; } } @@ -3693,22 +3693,22 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) */ valid = xstate_bv & ~XFEATURE_MASK_FPSSE; while (valid) { - u64 feature = valid & -valid; - int index = fls64(feature) - 1; - void *dest = get_xsave_addr(xsave, feature); + u64 xfeature_mask = valid & -valid; + int xfeature_nr = fls64(xfeature_mask) - 1; + void *dest = get_xsave_addr(xsave, xfeature_nr); if (dest) { u32 size, offset, ecx, edx; - cpuid_count(XSTATE_CPUID, index, + cpuid_count(XSTATE_CPUID, xfeature_nr, &size, &offset, &ecx, &edx); - if (feature == XFEATURE_MASK_PKRU) + if (xfeature_nr == XFEATURE_PKRU) memcpy(&vcpu->arch.pkru, src + offset, sizeof(vcpu->arch.pkru)); else memcpy(dest, src + offset, size); } - valid -= feature; + valid -= xfeature_mask; } } @@ -8704,11 +8704,11 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) if (init_event) kvm_put_guest_fpu(vcpu); mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave, - XFEATURE_MASK_BNDREGS); + XFEATURE_BNDREGS); if (mpx_state_buffer) memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state)); mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave, - XFEATURE_MASK_BNDCSR); + XFEATURE_BNDCSR); if (mpx_state_buffer) memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr)); if (init_event) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 2385538e80656..bcd55cc3050c2 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -142,7 +142,7 @@ int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs) goto err_out; } /* get bndregs field from current task's xsave area */ - bndregs = get_xsave_field_ptr(XFEATURE_MASK_BNDREGS); + bndregs = get_xsave_field_ptr(XFEATURE_BNDREGS); if (!bndregs) { err = -EINVAL; goto err_out; @@ -190,7 +190,7 @@ static __user void *mpx_get_bounds_dir(void) * The bounds directory pointer is stored in a register * only accessible if we first do an xsave. */ - bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); + bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR); if (!bndcsr) return MPX_INVALID_BOUNDS_DIR; @@ -376,7 +376,7 @@ static int do_mpx_bt_fault(void) const struct mpx_bndcsr *bndcsr; struct mm_struct *mm = current->mm; - bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); + bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR); if (!bndcsr) return -EINVAL; /* -- 2.19.1