Recent gcc versions emit SSE instructions for 32-bit inmates (e.g., in
hex2str or cmdline_parse routines). Inmates aren't able to execute those
instructions as SSE is not enabled and will crash.

Enabling SSE is the same code for 32 and 64 bit x86 and straight
forward: Lookup SSE availability via cpuid and enable OSFXSR in cr4. If
SSE is not available, stop the inmate.

If AVX is available, activate it (XCR0).

Lookup features that need no explicit activation.

Signed-off-by: Ralf Ramsauer <[email protected]>
---
 inmates/lib/x86/Makefile           |   6 +-
 inmates/lib/x86/cpu-features.c     | 103 +++++++++++++++++++++++++++++
 inmates/lib/x86/header-32.S        |   9 ++-
 inmates/lib/x86/header-64.S        |   9 ++-
 inmates/lib/x86/include/asm/regs.h |  36 ++++++++++
 5 files changed, 158 insertions(+), 5 deletions(-)
 create mode 100644 inmates/lib/x86/cpu-features.c

diff --git a/inmates/lib/x86/Makefile b/inmates/lib/x86/Makefile
index e474ffd0..ed3b04d5 100644
--- a/inmates/lib/x86/Makefile
+++ b/inmates/lib/x86/Makefile
@@ -40,7 +40,7 @@ include $(INMATES_LIB)/Makefile.lib
 
 always := lib.a lib32.a
 
-TARGETS := header-common.o ioapic.o printk.o setup.o smp.o uart.o
+TARGETS := cpu-features.o header-common.o ioapic.o printk.o setup.o smp.o 
uart.o
 TARGETS += ../alloc.o ../pci.o ../string.o ../cmdline.o ../setup.o
 TARGETS += ../uart-8250.o ../printk.o
 TARGETS_32_ONLY := header-32.o
@@ -57,6 +57,10 @@ $(obj)/lib32.a: $(addprefix $(obj)/,$(lib32-y))
 
 targets += header-32.o
 
+# Code of this object is called before SSE/AVX is available. Ensure that the
+# compiler won't generate unsupported instructions for this file.
+CFLAGS_cpu-features.o += -mno-sse
+
 $(obj)/%-32.o: c_flags += -m32
 $(obj)/%-32.o: $(src)/%.c
        $(call if_changed_rule,cc_o_c)
diff --git a/inmates/lib/x86/cpu-features.c b/inmates/lib/x86/cpu-features.c
new file mode 100644
index 00000000..9cf98543
--- /dev/null
+++ b/inmates/lib/x86/cpu-features.c
@@ -0,0 +1,103 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) OTH Regensburg, 2019
+ *
+ * Authors:
+ *  Ralf Ramsauer <[email protected]>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Alternatively, you can use or redistribute this file under the following
+ * BSD license:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <inmate.h>
+#include <asm/regs.h>
+
+/* Must only be called from assembler via jmp */
+void arch_init_features(void);
+
+struct x86_cpu_features x86_cpu_features __attribute__((section(".data")));
+
+/*
+ * We arrive here very early, and we don't have a stack. Take care.
+ *
+ * Every booting CPU will call this function. We make the assumption that all
+ * CPUs have the same feature set. So we don't need any locks when writing to
+ * x86_cpu_features.
+ */
+void __attribute__((naked, noreturn, section(".boot")))
+arch_init_features(void)
+{
+       register u64 features;
+
+       features = cpuid_edx(X86_CPUID_FEATURES, 0);
+       /* Check availability of FPU */
+       x86_cpu_features.fpu = !!(features & X86_FEATURE_FPU);
+
+       /* Discover and enable FXSR */
+       if (features & X86_FEATURE_FXSR) {
+               write_cr4(read_cr4() | X86_CR4_OSFXSR);
+               x86_cpu_features.fxsr = true;
+       }
+
+       /* Check availability of SSE */
+       x86_cpu_features.sse = !!(features & X86_FEATURE_SSE);
+       x86_cpu_features.sse2 = !!(features & X86_FEATURE_SSE2);
+
+       /* ECX hides the rest */
+       features = cpuid_ecx(X86_CPUID_FEATURES, 0);
+       x86_cpu_features.sse3 = !!(features & X86_FEATURE_SSE3);
+       x86_cpu_features.sse4_1 = !!(features & X86_FEATURE_SSE4_1);
+       x86_cpu_features.sse4_2 = !!(features & X86_FEATURE_SSE4_2);
+       x86_cpu_features.pclmulqdq = !!(features & X86_FEATURE_PCLMULQDQ);
+
+       if (features & X86_FEATURE_XSAVE) {
+               /* Enable XSAVE related instructions */
+               write_cr4(read_cr4() | X86_CR4_OSXSAVE);
+               x86_cpu_features.xsave = true;
+
+               /*
+                * Intel SDM 13.2: A bit can be set in XCR0 if and only if the
+                * corresponding bit is set in this bitmap.  Every processor
+                * that supports the XSAVE feature set will set EAX[0] (x87
+                * state) and EAX[1] (SSE state).
+                *
+                * We can always safely write SSE + FP, but only set AVX if
+                * available.
+                */
+
+               features = cpuid_edax(X86_CPUID_XSTATE, 0);
+               write_xcr0(read_xcr0() | (features & X86_XCR0_AVX) | \
+                          X86_XCR0_SSE | X86_XCR0_X87);
+               x86_cpu_features.avx = !!(features & X86_XCR0_AVX);
+       }
+
+       /* hand control back to assembler */
+       asm volatile("jmp x86_start\t\n");
+}
diff --git a/inmates/lib/x86/header-32.S b/inmates/lib/x86/header-32.S
index 30b3f5e3..ca9f77f9 100644
--- a/inmates/lib/x86/header-32.S
+++ b/inmates/lib/x86/header-32.S
@@ -63,6 +63,13 @@ start32:
        mov %eax,%es
        mov %eax,%ss
 
+       /* Temporarily hand over to C. Note that we don't have a valid stack. */
+       jmp arch_init_features
+
+       /* C will jmp back to x86_start */
+       .globl x86_start
+x86_start:
+
        xor %ebx,%ebx
        xchg ap_entry,%ebx
        or %ebx,%ebx
@@ -74,7 +81,7 @@ start32:
        cmp $SMP_MAX_CPUS,%edi
        jae stop
 
-       mov $0x01,%eax
+       mov $X86_CPUID_FEATURES, %eax
        cpuid
        shr $24,%ebx
        mov %bl,smp_cpu_ids(%edi)
diff --git a/inmates/lib/x86/header-64.S b/inmates/lib/x86/header-64.S
index 2c4caace..53b13173 100644
--- a/inmates/lib/x86/header-64.S
+++ b/inmates/lib/x86/header-64.S
@@ -63,10 +63,13 @@ start32:
        mov $(X86_CR0_PG | X86_CR0_WP | X86_CR0_PE),%eax
        mov %eax,%cr0
 
-       ljmpl $INMATE_CS64,$start64
+       /* Temporarily hand over to C. Note that we don't have a valid stack. */
+       ljmpl $INMATE_CS64, $arch_init_features
 
+       /* C will jmp back to x86_start. We're now in 64-bit mode. */
        .code64
-start64:
+       .globl x86_start
+x86_start:
        xor %rbx,%rbx
        xchg ap_entry,%rbx
        or %rbx,%rbx
@@ -78,7 +81,7 @@ start64:
        cmp $SMP_MAX_CPUS,%edi
        jae stop
 
-       mov $0x01,%eax
+       mov $X86_CPUID_FEATURES, %eax
        cpuid
        shr $24,%ebx
        mov %bl,smp_cpu_ids(%edi)
diff --git a/inmates/lib/x86/include/asm/regs.h 
b/inmates/lib/x86/include/asm/regs.h
index 85da043b..905d03ee 100644
--- a/inmates/lib/x86/include/asm/regs.h
+++ b/inmates/lib/x86/include/asm/regs.h
@@ -42,15 +42,51 @@
 
 #define X86_CR4_PAE            0x00000020
 #define X86_CR4_PSE            0x00000010
+#define X86_CR4_OSFXSR         0x00000200
+#define X86_CR4_OSXSAVE                0x00040000
+
+#define X86_XCR0_X87           (1 << 0)
+#define X86_XCR0_SSE           (1 << 1)
+#define X86_XCR0_AVX           (1 << 2)
 
 #define MSR_EFER               0xc0000080
 #define EFER_LME               0x00000100
 
+#define X86_CPUID_FEATURES     0x00000001 /* Processor Info and Feature Bits */
+/* Feature bits in EDX */
+# define X86_FEATURE_FPU       (1 << 0)  /* The processor contains an x87 FPU. 
*/
+# define X86_FEATURE_FXSR       (1 << 24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+# define X86_FEATURE_SSE       (1 << 25) /* The processor supports SSE */
+# define X86_FEATURE_SSE2      (1 << 26) /* The processor supports SSE2 */
+/* Feature bits in ECX */
+# define X86_FEATURE_SSE3      (1 << 0)  /* The processor supports SSE3 */
+# define X86_FEATURE_PCLMULQDQ (1 << 1)  /* The processor supports PCLMULQDQ */
+# define X86_FEATURE_SSE4_1    (1 << 19) /* The processor supports SSE4.1 */
+# define X86_FEATURE_SSE4_2    (1 << 20) /* The processor supports SSE4.2 */
+# define X86_FEATURE_XSAVE     (1 << 26) /* XSAVE/..., CR4.OSXSAVE */
+
+#define X86_CPUID_XSTATE       0x0000000d /* Extended state features */
+
 #define MSR_MTRR_DEF_TYPE      0x000002ff
 #define MTRR_ENABLE            0x00000800
 
 #ifndef __ASSEMBLY__
 
+struct x86_cpu_features {
+       bool avx:1;
+       bool sse:1;
+       bool sse2:1;
+       bool sse3:1;
+       bool sse4_1:1;
+       bool sse4_2:1;
+       bool fpu:1;
+       bool xsave:1;
+       bool fxsr:1;
+       bool pclmulqdq:1;
+};
+
+extern struct x86_cpu_features x86_cpu_features;
+
 static unsigned long __force_order;
 
 static inline unsigned long read_cr4(void)
-- 
2.21.0

-- 
You received this message because you are subscribed to the Google Groups 
"Jailhouse" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/jailhouse-dev/20190521143423.17734-5-ralf.ramsauer%40oth-regensburg.de.
For more options, visit https://groups.google.com/d/optout.

Reply via email to