On 5/30/19 3:00 AM, Jan Kiszka wrote:
> On 21.05.19 16:34, Ralf Ramsauer wrote:
>> Recent gcc versions emit SSE instructions for 32-bit inmates (e.g., in
>> hex2str or cmdline_parse routines). Inmates aren't able to execute those
>> instructions as SSE is not enabled and will crash.
>>
>> Enabling SSE is the same code for 32 and 64 bit x86 and straight
>> forward: Lookup SSE availability via cpuid and enable OSFXSR in cr4. If
>> SSE is not available, stop the inmate.
>>
>> If AVX is available, activate it (XCR0).
>>
>> Lookup features that need no explicit activation.
>>
>> Signed-off-by: Ralf Ramsauer <[email protected]>
>> ---
>>   inmates/lib/x86/Makefile           |   6 +-
>>   inmates/lib/x86/cpu-features.c     | 103 +++++++++++++++++++++++++++++
>>   inmates/lib/x86/header-32.S        |   9 ++-
>>   inmates/lib/x86/header-64.S        |   9 ++-
>>   inmates/lib/x86/include/asm/regs.h |  36 ++++++++++
>>   5 files changed, 158 insertions(+), 5 deletions(-)
>>   create mode 100644 inmates/lib/x86/cpu-features.c
>>
>> diff --git a/inmates/lib/x86/Makefile b/inmates/lib/x86/Makefile
>> index e474ffd0..ed3b04d5 100644
>> --- a/inmates/lib/x86/Makefile
>> +++ b/inmates/lib/x86/Makefile
>> @@ -40,7 +40,7 @@ include $(INMATES_LIB)/Makefile.lib
>>
>>   always := lib.a lib32.a
>>
>> -TARGETS := header-common.o ioapic.o printk.o setup.o smp.o uart.o
>> +TARGETS := cpu-features.o header-common.o ioapic.o printk.o setup.o
>> smp.o uart.o
>>   TARGETS += ../alloc.o ../pci.o ../string.o ../cmdline.o ../setup.o
>>   TARGETS += ../uart-8250.o ../printk.o
>>   TARGETS_32_ONLY := header-32.o
>> @@ -57,6 +57,10 @@ $(obj)/lib32.a: $(addprefix $(obj)/,$(lib32-y))
>>
>>   targets += header-32.o
>>
>> +# Code of this object is called before SSE/AVX is available. Ensure
>> that the
>> +# compiler won't generate unsupported instructions for this file.
>> +CFLAGS_cpu-features.o += -mno-sse
>> +
>>   $(obj)/%-32.o: c_flags += -m32
>>   $(obj)/%-32.o: $(src)/%.c
>>       $(call if_changed_rule,cc_o_c)
>> diff --git a/inmates/lib/x86/cpu-features.c
>> b/inmates/lib/x86/cpu-features.c
>> new file mode 100644
>> index 00000000..9cf98543
>> --- /dev/null
>> +++ b/inmates/lib/x86/cpu-features.c
>> @@ -0,0 +1,103 @@
>> +/*
>> + * Jailhouse, a Linux-based partitioning hypervisor
>> + *
>> + * Copyright (c) OTH Regensburg, 2019
>> + *
>> + * Authors:
>> + *  Ralf Ramsauer <[email protected]>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
>> + * the COPYING file in the top-level directory.
>> + *
>> + * Alternatively, you can use or redistribute this file under the
>> following
>> + * BSD license:
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + * 1. Redistributions of source code must retain the above copyright
>> + *    notice, this list of conditions and the following disclaimer.
>> + *
>> + * 2. Redistributions in binary form must reproduce the above copyright
>> + *    notice, this list of conditions and the following disclaimer in
>> the
>> + *    documentation and/or other materials provided with the
>> distribution.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
>> CONTRIBUTORS "AS IS"
>> + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
>> TO, THE
>> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
>> PURPOSE
>> + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
>> CONTRIBUTORS BE
>> + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
>> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
>> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
>> BUSINESS
>> + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
>> WHETHER IN
>> + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
>> OTHERWISE)
>> + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
>> ADVISED OF
>> + * THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <inmate.h>
>> +#include <asm/regs.h>
>> +
>> +/* Must only be called from assembler via jmp */
>> +void arch_init_features(void);
>> +
>> +struct x86_cpu_features x86_cpu_features
>> __attribute__((section(".data")));
>> +
>> +/*
>> + * We arrive here very early, and we don't have a stack. Take care.
>> + *
>> + * Every booting CPU will call this function. We make the assumption
>> that all
>> + * CPUs have the same feature set. So we don't need any locks when
>> writing to
>> + * x86_cpu_features.

Any comments on this? Is it okay that this code will run on each CPU,
even if only required on the primary CPU?

>> + */
>> +void __attribute__((naked, noreturn, section(".boot")))
> 
> What's "naked" supposed to do here? The compilers says it will ignore it.

Huh? Does it produce a warning?

naked ensures that there's no pro- and epilogue sequences. And as the
comment states: We won't have a stack that early.

BTW: That raises a question: Where do we set the stack for secondary
CPUs on x86?

> 
>> +arch_init_features(void)
>> +{
>> +    register u64 features;
>> +
>> +    features = cpuid_edx(X86_CPUID_FEATURES, 0);
>> +    /* Check availability of FPU */
>> +    x86_cpu_features.fpu = !!(features & X86_FEATURE_FPU);
>> +
>> +    /* Discover and enable FXSR */
>> +    if (features & X86_FEATURE_FXSR) {
>> +        write_cr4(read_cr4() | X86_CR4_OSFXSR);
>> +        x86_cpu_features.fxsr = true;
>> +    }

BTW: Another point that I realised in the meanwhile: AFAICT, the
hypervisor will very likely hand over with OSFXSR enabled as it is set
by the cr4_required1 mask.

Nevertheless, it should be properly set.

>> +
>> +    /* Check availability of SSE */
>> +    x86_cpu_features.sse = !!(features & X86_FEATURE_SSE);
>> +    x86_cpu_features.sse2 = !!(features & X86_FEATURE_SSE2);
>> +
>> +    /* ECX hides the rest */
>> +    features = cpuid_ecx(X86_CPUID_FEATURES, 0);
>> +    x86_cpu_features.sse3 = !!(features & X86_FEATURE_SSE3);
>> +    x86_cpu_features.sse4_1 = !!(features & X86_FEATURE_SSE4_1);
>> +    x86_cpu_features.sse4_2 = !!(features & X86_FEATURE_SSE4_2);
>> +    x86_cpu_features.pclmulqdq = !!(features & X86_FEATURE_PCLMULQDQ);
>> +
>> +    if (features & X86_FEATURE_XSAVE) {
>> +        /* Enable XSAVE related instructions */
>> +        write_cr4(read_cr4() | X86_CR4_OSXSAVE);
>> +        x86_cpu_features.xsave = true;
>> +
>> +        /*
>> +         * Intel SDM 13.2: A bit can be set in XCR0 if and only if the
>> +         * corresponding bit is set in this bitmap.  Every processor
>> +         * that supports the XSAVE feature set will set EAX[0] (x87
>> +         * state) and EAX[1] (SSE state).
>> +         *
>> +         * We can always safely write SSE + FP, but only set AVX if
>> +         * available.
>> +         */
>> +
>> +        features = cpuid_edax(X86_CPUID_XSTATE, 0);
>> +        write_xcr0(read_xcr0() | (features & X86_XCR0_AVX) | \
>> +               X86_XCR0_SSE | X86_XCR0_X87);
>> +        x86_cpu_features.avx = !!(features & X86_XCR0_AVX);
>> +    }
>> +
>> +    /* hand control back to assembler */
>> +    asm volatile("jmp x86_start\t\n");
> 
> Hacky... Is there no stack yet to do proper call/ret?

See above, no stack. But I agree on hacky. :-)

> 
> But if you want to / have to use a jmp here, you need to tell the
> compiler that
> this will never "return" (__builtin_unreachable).

What's the difference between noreturn and builtin_unreachable?

The noreturn attribute makes sure that - besides the epilogue, we won't
have a ret.

Thanks
  Ralf

> 
> Jan
> 
>> +}
>> diff --git a/inmates/lib/x86/header-32.S b/inmates/lib/x86/header-32.S
>> index 30b3f5e3..ca9f77f9 100644
>> --- a/inmates/lib/x86/header-32.S
>> +++ b/inmates/lib/x86/header-32.S
>> @@ -63,6 +63,13 @@ start32:
>>       mov %eax,%es
>>       mov %eax,%ss
>>
>> +    /* Temporarily hand over to C. Note that we don't have a valid
>> stack. */
>> +    jmp arch_init_features
>> +
>> +    /* C will jmp back to x86_start */
>> +    .globl x86_start
>> +x86_start:
>> +
>>       xor %ebx,%ebx
>>       xchg ap_entry,%ebx
>>       or %ebx,%ebx
>> @@ -74,7 +81,7 @@ start32:
>>       cmp $SMP_MAX_CPUS,%edi
>>       jae stop
>>
>> -    mov $0x01,%eax
>> +    mov $X86_CPUID_FEATURES, %eax
>>       cpuid
>>       shr $24,%ebx
>>       mov %bl,smp_cpu_ids(%edi)
>> diff --git a/inmates/lib/x86/header-64.S b/inmates/lib/x86/header-64.S
>> index 2c4caace..53b13173 100644
>> --- a/inmates/lib/x86/header-64.S
>> +++ b/inmates/lib/x86/header-64.S
>> @@ -63,10 +63,13 @@ start32:
>>       mov $(X86_CR0_PG | X86_CR0_WP | X86_CR0_PE),%eax
>>       mov %eax,%cr0
>>
>> -    ljmpl $INMATE_CS64,$start64
>> +    /* Temporarily hand over to C. Note that we don't have a valid
>> stack. */
>> +    ljmpl $INMATE_CS64, $arch_init_features
>>
>> +    /* C will jmp back to x86_start. We're now in 64-bit mode. */
>>       .code64
>> -start64:
>> +    .globl x86_start
>> +x86_start:
>>       xor %rbx,%rbx
>>       xchg ap_entry,%rbx
>>       or %rbx,%rbx
>> @@ -78,7 +81,7 @@ start64:
>>       cmp $SMP_MAX_CPUS,%edi
>>       jae stop
>>
>> -    mov $0x01,%eax
>> +    mov $X86_CPUID_FEATURES, %eax
>>       cpuid
>>       shr $24,%ebx
>>       mov %bl,smp_cpu_ids(%edi)
>> diff --git a/inmates/lib/x86/include/asm/regs.h
>> b/inmates/lib/x86/include/asm/regs.h
>> index 85da043b..905d03ee 100644
>> --- a/inmates/lib/x86/include/asm/regs.h
>> +++ b/inmates/lib/x86/include/asm/regs.h
>> @@ -42,15 +42,51 @@
>>
>>   #define X86_CR4_PAE        0x00000020
>>   #define X86_CR4_PSE        0x00000010
>> +#define X86_CR4_OSFXSR        0x00000200
>> +#define X86_CR4_OSXSAVE        0x00040000
>> +
>> +#define X86_XCR0_X87        (1 << 0)
>> +#define X86_XCR0_SSE        (1 << 1)
>> +#define X86_XCR0_AVX        (1 << 2)
>>
>>   #define MSR_EFER        0xc0000080
>>   #define EFER_LME        0x00000100
>>
>> +#define X86_CPUID_FEATURES    0x00000001 /* Processor Info and
>> Feature Bits */
>> +/* Feature bits in EDX */
>> +# define X86_FEATURE_FPU    (1 << 0)  /* The processor contains an
>> x87 FPU. */
>> +# define X86_FEATURE_FXSR       (1 << 24) /* FXSAVE/FXRSTOR,
>> CR4.OSFXSR */
>> +# define X86_FEATURE_SSE    (1 << 25) /* The processor supports SSE */
>> +# define X86_FEATURE_SSE2    (1 << 26) /* The processor supports SSE2 */
>> +/* Feature bits in ECX */
>> +# define X86_FEATURE_SSE3    (1 << 0)  /* The processor supports SSE3 */
>> +# define X86_FEATURE_PCLMULQDQ    (1 << 1)  /* The processor supports
>> PCLMULQDQ */
>> +# define X86_FEATURE_SSE4_1    (1 << 19) /* The processor supports
>> SSE4.1 */
>> +# define X86_FEATURE_SSE4_2    (1 << 20) /* The processor supports
>> SSE4.2 */
>> +# define X86_FEATURE_XSAVE    (1 << 26) /* XSAVE/..., CR4.OSXSAVE */
>> +
>> +#define X86_CPUID_XSTATE    0x0000000d /* Extended state features */
>> +
>>   #define MSR_MTRR_DEF_TYPE    0x000002ff
>>   #define MTRR_ENABLE        0x00000800
>>
>>   #ifndef __ASSEMBLY__
>>
>> +struct x86_cpu_features {
>> +    bool avx:1;
>> +    bool sse:1;
>> +    bool sse2:1;
>> +    bool sse3:1;
>> +    bool sse4_1:1;
>> +    bool sse4_2:1;
>> +    bool fpu:1;
>> +    bool xsave:1;
>> +    bool fxsr:1;
>> +    bool pclmulqdq:1;
>> +};
>> +
>> +extern struct x86_cpu_features x86_cpu_features;
>> +
>>   static unsigned long __force_order;
>>
>>   static inline unsigned long read_cr4(void)
>>
> 

-- 
You received this message because you are subscribed to the Google Groups 
"Jailhouse" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/jailhouse-dev/c6d1eef6-0730-68b2-8fc2-5fd2b3c5b81e%40oth-regensburg.de.
For more options, visit https://groups.google.com/d/optout.

Reply via email to