Re: [PATCH v2] Issue 64-bit versions of *XSAVE* for 64-bit amd64 programs
The same bug was fixed in FreeBSD here: "amd64: Store full 64bit of FIP/FDP for 64bit processes when using XSAVE." https://github.com/freebsd/freebsd/commit/62a9018a8878533432500e5cb89f9bd07fd9ef14 Kamil Rytarowski CTO, Moritz Systems www.moritz.systems pt., 16 paź 2020 o 15:26 Michał Górny napisał(a): > > When calling FXSAVE, XSAVE, FXRSTOR, ... for 64-bit programs on amd64 > use the 64-suffixed variant in order to include the complete FIP/FDP > registers in the x87 area. > > The difference between the two variants is that the FXSAVE64 (new) > variant represents FIP/FDP as 64-bit fields (union fp_addr.fa_64), > while the legacy FXSAVE variant uses split fields: 32-bit offset, > 16-bit segment and 16-bit reserved field (union fp_addr.fa_32). > The latter implies that the actual addresses are truncated to 32 bits > which is insufficient in modern programs. > > The change is applied only to 64-bit programs on amd64. Plain i386 > and compat32 continue using plain FXSAVE. Similarly, NVMM is not > changed as I am not familiar with that code. > > This is a potentially breaking change. However, I don't think it likely > to actually break anything because the data provided by the old variant > were not meaningful (because of the truncated pointer). > --- > sys/arch/x86/include/cpufunc.h | 76 ++ > sys/arch/x86/include/fpu.h | 4 +- > sys/arch/x86/x86/fpu.c | 30 ++ > sys/dev/nvmm/x86/nvmm_x86_svm.c| 6 +- > sys/dev/nvmm/x86/nvmm_x86_vmx.c| 6 +- > tests/lib/libc/sys/t_ptrace_x86_wait.h | 2 - > 6 files changed, 105 insertions(+), 19 deletions(-) > > diff --git a/sys/arch/x86/include/cpufunc.h b/sys/arch/x86/include/cpufunc.h > index 38534c305544..dd8b0c7dc375 100644 > --- a/sys/arch/x86/include/cpufunc.h > +++ b/sys/arch/x86/include/cpufunc.h > @@ -485,6 +485,82 @@ xrstor(const void *addr, uint64_t mask) > ); > } > > +#ifdef __x86_64__ > +static inline void > +fxsave64(void *addr) > +{ > + uint8_t *area = addr; > + > + __asm volatile ( > + "fxsave64 %[area]" > + : [area] "=m" (*area) > + : > + : "memory" > + ); > +} > + > +static inline void > +fxrstor64(const void *addr) > +{ > + const uint8_t *area = addr; > + > + __asm volatile ( > + "fxrstor64 %[area]" > + : > + : [area] "m" (*area) > + : "memory" > + ); > +} > + > +static inline void > +xsave64(void *addr, uint64_t mask) > +{ > + uint8_t *area = addr; > + uint32_t low, high; > + > + low = mask; > + high = mask >> 32; > + __asm volatile ( > + "xsave64%[area]" > + : [area] "=m" (*area) > + : "a" (low), "d" (high) > + : "memory" > + ); > +} > + > +static inline void > +xsaveopt64(void *addr, uint64_t mask) > +{ > + uint8_t *area = addr; > + uint32_t low, high; > + > + low = mask; > + high = mask >> 32; > + __asm volatile ( > + "xsaveopt64 %[area]" > + : [area] "=m" (*area) > + : "a" (low), "d" (high) > + : "memory" > + ); > +} > + > +static inline void > +xrstor64(const void *addr, uint64_t mask) > +{ > + const uint8_t *area = addr; > + uint32_t low, high; > + > + low = mask; > + high = mask >> 32; > + __asm volatile ( > + "xrstor64 %[area]" > + : > + : [area] "m" (*area), "a" (low), "d" (high) > + : "memory" > + ); > +} > +#endif > + > /* > -- */ > > #ifdef XENPV > diff --git a/sys/arch/x86/include/fpu.h b/sys/arch/x86/include/fpu.h > index 3255a8ca39e0..bdd86abfdd39 100644 > --- a/sys/arch/x86/include/fpu.h > +++ b/sys/arch/x86/include/fpu.h > @@ -14,8 +14,8 @@ struct trapframe; > void fpuinit(struct cpu_info *); > void fpuinit_mxcsr_mask(void); > > -void fpu_area_save(void *, uint64_t); > -void fpu_area_restore(const void *, uint64_t); > +void fpu_area_save(void *, uint64_t, bool); > +void fpu_area_restore(const void *, uint64_t, bool); > > void fpu_save(void); > > diff --git a/sys/arch/x86/x86/fpu.c b/sys/arch/x86/x86/fpu.c > index baff8d008299..e16a64f9ff8a 100644 > --- a/sys/arch/x86/x86/fpu.c > +++ b/sys/arch/x86/x86/fpu.c > @@ -156,7 +156,7 @@ fpu_save_lwp(struct lwp *l) > s = splvm(); > if (l->l_md.md_flags & MDL_FPU_IN_CPU) { > KASSERT((l->l_flag & LW_SYSTEM) == 0); > - fpu_area_save(area, x86_xsave_features); > + fpu_area_save(area, x86_xsave_features, !(l->l_proc->p_flag & > PK_32)); > l->l_md.md_flags &= ~MDL_FPU_IN_CPU; > } > splx(s); > @@ -246,21 +246,27 @@ fpu_errata_amd(void) > fldummy(); > } > > +#ifdef __x86_64__ > +#define XS64(x)
[PATCH v2] Issue 64-bit versions of *XSAVE* for 64-bit amd64 programs
When calling FXSAVE, XSAVE, FXRSTOR, ... for 64-bit programs on amd64 use the 64-suffixed variant in order to include the complete FIP/FDP registers in the x87 area. The difference between the two variants is that the FXSAVE64 (new) variant represents FIP/FDP as 64-bit fields (union fp_addr.fa_64), while the legacy FXSAVE variant uses split fields: 32-bit offset, 16-bit segment and 16-bit reserved field (union fp_addr.fa_32). The latter implies that the actual addresses are truncated to 32 bits which is insufficient in modern programs. The change is applied only to 64-bit programs on amd64. Plain i386 and compat32 continue using plain FXSAVE. Similarly, NVMM is not changed as I am not familiar with that code. This is a potentially breaking change. However, I don't think it likely to actually break anything because the data provided by the old variant were not meaningful (because of the truncated pointer). --- sys/arch/x86/include/cpufunc.h | 76 ++ sys/arch/x86/include/fpu.h | 4 +- sys/arch/x86/x86/fpu.c | 30 ++ sys/dev/nvmm/x86/nvmm_x86_svm.c| 6 +- sys/dev/nvmm/x86/nvmm_x86_vmx.c| 6 +- tests/lib/libc/sys/t_ptrace_x86_wait.h | 2 - 6 files changed, 105 insertions(+), 19 deletions(-) diff --git a/sys/arch/x86/include/cpufunc.h b/sys/arch/x86/include/cpufunc.h index 38534c305544..dd8b0c7dc375 100644 --- a/sys/arch/x86/include/cpufunc.h +++ b/sys/arch/x86/include/cpufunc.h @@ -485,6 +485,82 @@ xrstor(const void *addr, uint64_t mask) ); } +#ifdef __x86_64__ +static inline void +fxsave64(void *addr) +{ + uint8_t *area = addr; + + __asm volatile ( + "fxsave64 %[area]" + : [area] "=m" (*area) + : + : "memory" + ); +} + +static inline void +fxrstor64(const void *addr) +{ + const uint8_t *area = addr; + + __asm volatile ( + "fxrstor64 %[area]" + : + : [area] "m" (*area) + : "memory" + ); +} + +static inline void +xsave64(void *addr, uint64_t mask) +{ + uint8_t *area = addr; + uint32_t low, high; + + low = mask; + high = mask >> 32; + __asm volatile ( + "xsave64%[area]" + : [area] "=m" (*area) + : "a" (low), "d" (high) + : "memory" + ); +} + +static inline void +xsaveopt64(void *addr, uint64_t mask) +{ + uint8_t *area = addr; + uint32_t low, high; + + low = mask; + high = mask >> 32; + __asm volatile ( + "xsaveopt64 %[area]" + : [area] "=m" (*area) + : "a" (low), "d" (high) + : "memory" + ); +} + +static inline void +xrstor64(const void *addr, uint64_t mask) +{ + const uint8_t *area = addr; + uint32_t low, high; + + low = mask; + high = mask >> 32; + __asm volatile ( + "xrstor64 %[area]" + : + : [area] "m" (*area), "a" (low), "d" (high) + : "memory" + ); +} +#endif + /* -- */ #ifdef XENPV diff --git a/sys/arch/x86/include/fpu.h b/sys/arch/x86/include/fpu.h index 3255a8ca39e0..bdd86abfdd39 100644 --- a/sys/arch/x86/include/fpu.h +++ b/sys/arch/x86/include/fpu.h @@ -14,8 +14,8 @@ struct trapframe; void fpuinit(struct cpu_info *); void fpuinit_mxcsr_mask(void); -void fpu_area_save(void *, uint64_t); -void fpu_area_restore(const void *, uint64_t); +void fpu_area_save(void *, uint64_t, bool); +void fpu_area_restore(const void *, uint64_t, bool); void fpu_save(void); diff --git a/sys/arch/x86/x86/fpu.c b/sys/arch/x86/x86/fpu.c index baff8d008299..e16a64f9ff8a 100644 --- a/sys/arch/x86/x86/fpu.c +++ b/sys/arch/x86/x86/fpu.c @@ -156,7 +156,7 @@ fpu_save_lwp(struct lwp *l) s = splvm(); if (l->l_md.md_flags & MDL_FPU_IN_CPU) { KASSERT((l->l_flag & LW_SYSTEM) == 0); - fpu_area_save(area, x86_xsave_features); + fpu_area_save(area, x86_xsave_features, !(l->l_proc->p_flag & PK_32)); l->l_md.md_flags &= ~MDL_FPU_IN_CPU; } splx(s); @@ -246,21 +246,27 @@ fpu_errata_amd(void) fldummy(); } +#ifdef __x86_64__ +#define XS64(x) (is_64bit ? x##64 : x) +#else +#define XS64(x) x +#endif + void -fpu_area_save(void *area, uint64_t xsave_features) +fpu_area_save(void *area, uint64_t xsave_features, bool is_64bit) { switch (x86_fpu_save) { case FPU_SAVE_FSAVE: fnsave(area); break; case FPU_SAVE_FXSAVE: - fxsave(area); + XS64(fxsave)(area); break; case FPU_SAVE_XSAVE: - xsave(area, xsave_features); + XS64(xsave)(area, xsave_features); break; case