On 5/30/19 3:00 AM, Jan Kiszka wrote:
> On 21.05.19 16:34, Ralf Ramsauer wrote:
>> Recent gcc versions emit SSE instructions for 32-bit inmates (e.g., in
>> hex2str or cmdline_parse routines). Inmates aren't able to execute those
>> instructions as SSE is not enabled and will crash.
>>
>> Enabling SSE is the same code for 32 and 64 bit x86 and straight
>> forward: Lookup SSE availability via cpuid and enable OSFXSR in cr4. If
>> SSE is not available, stop the inmate.
>>
>> If AVX is available, activate it (XCR0).
>>
>> Lookup features that need no explicit activation.
>>
>> Signed-off-by: Ralf Ramsauer <[email protected]>
>> ---
>> inmates/lib/x86/Makefile | 6 +-
>> inmates/lib/x86/cpu-features.c | 103 +++++++++++++++++++++++++++++
>> inmates/lib/x86/header-32.S | 9 ++-
>> inmates/lib/x86/header-64.S | 9 ++-
>> inmates/lib/x86/include/asm/regs.h | 36 ++++++++++
>> 5 files changed, 158 insertions(+), 5 deletions(-)
>> create mode 100644 inmates/lib/x86/cpu-features.c
>>
>> diff --git a/inmates/lib/x86/Makefile b/inmates/lib/x86/Makefile
>> index e474ffd0..ed3b04d5 100644
>> --- a/inmates/lib/x86/Makefile
>> +++ b/inmates/lib/x86/Makefile
>> @@ -40,7 +40,7 @@ include $(INMATES_LIB)/Makefile.lib
>>
>> always := lib.a lib32.a
>>
>> -TARGETS := header-common.o ioapic.o printk.o setup.o smp.o uart.o
>> +TARGETS := cpu-features.o header-common.o ioapic.o printk.o setup.o
>> smp.o uart.o
>> TARGETS += ../alloc.o ../pci.o ../string.o ../cmdline.o ../setup.o
>> TARGETS += ../uart-8250.o ../printk.o
>> TARGETS_32_ONLY := header-32.o
>> @@ -57,6 +57,10 @@ $(obj)/lib32.a: $(addprefix $(obj)/,$(lib32-y))
>>
>> targets += header-32.o
>>
>> +# Code of this object is called before SSE/AVX is available. Ensure
>> that the
>> +# compiler won't generate unsupported instructions for this file.
>> +CFLAGS_cpu-features.o += -mno-sse
>> +
>> $(obj)/%-32.o: c_flags += -m32
>> $(obj)/%-32.o: $(src)/%.c
>> $(call if_changed_rule,cc_o_c)
>> diff --git a/inmates/lib/x86/cpu-features.c
>> b/inmates/lib/x86/cpu-features.c
>> new file mode 100644
>> index 00000000..9cf98543
>> --- /dev/null
>> +++ b/inmates/lib/x86/cpu-features.c
>> @@ -0,0 +1,103 @@
>> +/*
>> + * Jailhouse, a Linux-based partitioning hypervisor
>> + *
>> + * Copyright (c) OTH Regensburg, 2019
>> + *
>> + * Authors:
>> + * Ralf Ramsauer <[email protected]>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2. See
>> + * the COPYING file in the top-level directory.
>> + *
>> + * Alternatively, you can use or redistribute this file under the
>> following
>> + * BSD license:
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + * 1. Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + *
>> + * 2. Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in
>> the
>> + * documentation and/or other materials provided with the
>> distribution.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
>> CONTRIBUTORS "AS IS"
>> + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
>> TO, THE
>> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
>> PURPOSE
>> + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
>> CONTRIBUTORS BE
>> + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
>> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
>> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
>> BUSINESS
>> + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
>> WHETHER IN
>> + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
>> OTHERWISE)
>> + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
>> ADVISED OF
>> + * THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <inmate.h>
>> +#include <asm/regs.h>
>> +
>> +/* Must only be called from assembler via jmp */
>> +void arch_init_features(void);
>> +
>> +struct x86_cpu_features x86_cpu_features
>> __attribute__((section(".data")));
>> +
>> +/*
>> + * We arrive here very early, and we don't have a stack. Take care.
>> + *
>> + * Every booting CPU will call this function. We make the assumption
>> that all
>> + * CPUs have the same feature set. So we don't need any locks when
>> writing to
>> + * x86_cpu_features.
Any comments on this? Is it okay that this code will run on each CPU,
even if only required on the primary CPU?
>> + */
>> +void __attribute__((naked, noreturn, section(".boot")))
>
> What's "naked" supposed to do here? The compilers says it will ignore it.
Huh? Does it produce a warning?
naked ensures that there's no pro- and epilogue sequences. And as the
comment states: We won't have a stack that early.
BTW: That raises a question: Where do we set the stack for secondary
CPUs on x86?
>
>> +arch_init_features(void)
>> +{
>> + register u64 features;
>> +
>> + features = cpuid_edx(X86_CPUID_FEATURES, 0);
>> + /* Check availability of FPU */
>> + x86_cpu_features.fpu = !!(features & X86_FEATURE_FPU);
>> +
>> + /* Discover and enable FXSR */
>> + if (features & X86_FEATURE_FXSR) {
>> + write_cr4(read_cr4() | X86_CR4_OSFXSR);
>> + x86_cpu_features.fxsr = true;
>> + }
BTW: Another point that I realised in the meanwhile: AFAICT, the
hypervisor will very likely hand over with OSFXSR enabled as it is set
by the cr4_required1 mask.
Nevertheless, it should be properly set.
>> +
>> + /* Check availability of SSE */
>> + x86_cpu_features.sse = !!(features & X86_FEATURE_SSE);
>> + x86_cpu_features.sse2 = !!(features & X86_FEATURE_SSE2);
>> +
>> + /* ECX hides the rest */
>> + features = cpuid_ecx(X86_CPUID_FEATURES, 0);
>> + x86_cpu_features.sse3 = !!(features & X86_FEATURE_SSE3);
>> + x86_cpu_features.sse4_1 = !!(features & X86_FEATURE_SSE4_1);
>> + x86_cpu_features.sse4_2 = !!(features & X86_FEATURE_SSE4_2);
>> + x86_cpu_features.pclmulqdq = !!(features & X86_FEATURE_PCLMULQDQ);
>> +
>> + if (features & X86_FEATURE_XSAVE) {
>> + /* Enable XSAVE related instructions */
>> + write_cr4(read_cr4() | X86_CR4_OSXSAVE);
>> + x86_cpu_features.xsave = true;
>> +
>> + /*
>> + * Intel SDM 13.2: A bit can be set in XCR0 if and only if the
>> + * corresponding bit is set in this bitmap. Every processor
>> + * that supports the XSAVE feature set will set EAX[0] (x87
>> + * state) and EAX[1] (SSE state).
>> + *
>> + * We can always safely write SSE + FP, but only set AVX if
>> + * available.
>> + */
>> +
>> + features = cpuid_edax(X86_CPUID_XSTATE, 0);
>> + write_xcr0(read_xcr0() | (features & X86_XCR0_AVX) | \
>> + X86_XCR0_SSE | X86_XCR0_X87);
>> + x86_cpu_features.avx = !!(features & X86_XCR0_AVX);
>> + }
>> +
>> + /* hand control back to assembler */
>> + asm volatile("jmp x86_start\t\n");
>
> Hacky... Is there no stack yet to do proper call/ret?
See above, no stack. But I agree on hacky. :-)
>
> But if you want to / have to use a jmp here, you need to tell the
> compiler that
> this will never "return" (__builtin_unreachable).
What's the difference between noreturn and builtin_unreachable?
The noreturn attribute makes sure that - besides the epilogue, we won't
have a ret.
Thanks
Ralf
>
> Jan
>
>> +}
>> diff --git a/inmates/lib/x86/header-32.S b/inmates/lib/x86/header-32.S
>> index 30b3f5e3..ca9f77f9 100644
>> --- a/inmates/lib/x86/header-32.S
>> +++ b/inmates/lib/x86/header-32.S
>> @@ -63,6 +63,13 @@ start32:
>> mov %eax,%es
>> mov %eax,%ss
>>
>> + /* Temporarily hand over to C. Note that we don't have a valid
>> stack. */
>> + jmp arch_init_features
>> +
>> + /* C will jmp back to x86_start */
>> + .globl x86_start
>> +x86_start:
>> +
>> xor %ebx,%ebx
>> xchg ap_entry,%ebx
>> or %ebx,%ebx
>> @@ -74,7 +81,7 @@ start32:
>> cmp $SMP_MAX_CPUS,%edi
>> jae stop
>>
>> - mov $0x01,%eax
>> + mov $X86_CPUID_FEATURES, %eax
>> cpuid
>> shr $24,%ebx
>> mov %bl,smp_cpu_ids(%edi)
>> diff --git a/inmates/lib/x86/header-64.S b/inmates/lib/x86/header-64.S
>> index 2c4caace..53b13173 100644
>> --- a/inmates/lib/x86/header-64.S
>> +++ b/inmates/lib/x86/header-64.S
>> @@ -63,10 +63,13 @@ start32:
>> mov $(X86_CR0_PG | X86_CR0_WP | X86_CR0_PE),%eax
>> mov %eax,%cr0
>>
>> - ljmpl $INMATE_CS64,$start64
>> + /* Temporarily hand over to C. Note that we don't have a valid
>> stack. */
>> + ljmpl $INMATE_CS64, $arch_init_features
>>
>> + /* C will jmp back to x86_start. We're now in 64-bit mode. */
>> .code64
>> -start64:
>> + .globl x86_start
>> +x86_start:
>> xor %rbx,%rbx
>> xchg ap_entry,%rbx
>> or %rbx,%rbx
>> @@ -78,7 +81,7 @@ start64:
>> cmp $SMP_MAX_CPUS,%edi
>> jae stop
>>
>> - mov $0x01,%eax
>> + mov $X86_CPUID_FEATURES, %eax
>> cpuid
>> shr $24,%ebx
>> mov %bl,smp_cpu_ids(%edi)
>> diff --git a/inmates/lib/x86/include/asm/regs.h
>> b/inmates/lib/x86/include/asm/regs.h
>> index 85da043b..905d03ee 100644
>> --- a/inmates/lib/x86/include/asm/regs.h
>> +++ b/inmates/lib/x86/include/asm/regs.h
>> @@ -42,15 +42,51 @@
>>
>> #define X86_CR4_PAE 0x00000020
>> #define X86_CR4_PSE 0x00000010
>> +#define X86_CR4_OSFXSR 0x00000200
>> +#define X86_CR4_OSXSAVE 0x00040000
>> +
>> +#define X86_XCR0_X87 (1 << 0)
>> +#define X86_XCR0_SSE (1 << 1)
>> +#define X86_XCR0_AVX (1 << 2)
>>
>> #define MSR_EFER 0xc0000080
>> #define EFER_LME 0x00000100
>>
>> +#define X86_CPUID_FEATURES 0x00000001 /* Processor Info and
>> Feature Bits */
>> +/* Feature bits in EDX */
>> +# define X86_FEATURE_FPU (1 << 0) /* The processor contains an
>> x87 FPU. */
>> +# define X86_FEATURE_FXSR (1 << 24) /* FXSAVE/FXRSTOR,
>> CR4.OSFXSR */
>> +# define X86_FEATURE_SSE (1 << 25) /* The processor supports SSE */
>> +# define X86_FEATURE_SSE2 (1 << 26) /* The processor supports SSE2 */
>> +/* Feature bits in ECX */
>> +# define X86_FEATURE_SSE3 (1 << 0) /* The processor supports SSE3 */
>> +# define X86_FEATURE_PCLMULQDQ (1 << 1) /* The processor supports
>> PCLMULQDQ */
>> +# define X86_FEATURE_SSE4_1 (1 << 19) /* The processor supports
>> SSE4.1 */
>> +# define X86_FEATURE_SSE4_2 (1 << 20) /* The processor supports
>> SSE4.2 */
>> +# define X86_FEATURE_XSAVE (1 << 26) /* XSAVE/..., CR4.OSXSAVE */
>> +
>> +#define X86_CPUID_XSTATE 0x0000000d /* Extended state features */
>> +
>> #define MSR_MTRR_DEF_TYPE 0x000002ff
>> #define MTRR_ENABLE 0x00000800
>>
>> #ifndef __ASSEMBLY__
>>
>> +struct x86_cpu_features {
>> + bool avx:1;
>> + bool sse:1;
>> + bool sse2:1;
>> + bool sse3:1;
>> + bool sse4_1:1;
>> + bool sse4_2:1;
>> + bool fpu:1;
>> + bool xsave:1;
>> + bool fxsr:1;
>> + bool pclmulqdq:1;
>> +};
>> +
>> +extern struct x86_cpu_features x86_cpu_features;
>> +
>> static unsigned long __force_order;
>>
>> static inline unsigned long read_cr4(void)
>>
>
--
You received this message because you are subscribed to the Google Groups
"Jailhouse" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To view this discussion on the web visit
https://groups.google.com/d/msgid/jailhouse-dev/c6d1eef6-0730-68b2-8fc2-5fd2b3c5b81e%40oth-regensburg.de.
For more options, visit https://groups.google.com/d/optout.