On 2017-08-22 18:57, Gustavo Lima Chaves wrote:
> This is a first take on the TODO-list entry
>
> - whitelist-based MSR access [v1.0]
>
> *for Intel architecture*. All the architectural MSRs where given a look
> before the coding started: they were categorized, for ease of finding
> things out when one needs to refer back to them and, for the ones that
> were absolutely necessary (at least given the needs of a typical x86
> Linux build, on both root and inmate cell contexts), access without
> VM-exits was granted. Some actual model specific entries are there as
> well, noticed while testing with our baremetal hardware.
>
> Machine-check exception, thermal event interrupts and others, at least
> on IA, can commonly have scope broader than current core only (e. g. the
> whole package). We tried the best only to give access to registers in
> that domain that would not impact other cores in any hazardous way, e.
> g. enable/disable some MCE errors. We only made such accesses possible
> because Linux relies on them. Currently we're doing nothing on writes
> for these problematic cases and everything seems to run just fine on the
> inmates.
>
> A lot of Linux requirements regarding MSR access could be checked when
> destroying other inmates (or disabling the hypervisor altogether), when
> the CPUs go back to the root cell and it has to bring them online
> again—a lot of MSR interaction happens at those routines. The rest of
> the required MSRs could be checked running Linux as inmate.
>
> The whitelist is structured as to be easy as possible to receive
> additions/corrections.
>
> Signed-off-by: Gustavo Lima Chaves <[email protected]>
> ---
> hypervisor/arch/x86/include/asm/processor.h | 12 ++
> hypervisor/arch/x86/vcpu.c | 41 ++++++
> hypervisor/arch/x86/vmx.c | 218
> +++++++++++++++++++++++-----
> 3 files changed, 236 insertions(+), 35 deletions(-)
>
> diff --git a/hypervisor/arch/x86/include/asm/processor.h
> b/hypervisor/arch/x86/include/asm/processor.h
> index a658039..3236ab7 100644
> --- a/hypervisor/arch/x86/include/asm/processor.h
> +++ b/hypervisor/arch/x86/include/asm/processor.h
> @@ -72,12 +72,24 @@
>
> #define MSR_IA32_APICBASE 0x0000001b
> #define MSR_IA32_FEATURE_CONTROL 0x0000003a
> +#define MSR_IA32_MCG_CTL 0x0000017b
> #define MSR_IA32_PAT 0x00000277
> #define MSR_IA32_MTRR_DEF_TYPE 0x000002ff
> #define MSR_IA32_SYSENTER_CS 0x00000174
> #define MSR_IA32_SYSENTER_ESP 0x00000175
> #define MSR_IA32_SYSENTER_EIP 0x00000176
> +#define MSR_IA32_PERF_CTL 0x00000199
> +#define MSR_IA32_THERM_INTERRUPT 0x0000019b
> +#define MSR_IA32_MISC_ENABLE 0x000001a0
> +#define MSR_OFFCORE_RSP_0 0x000001a6
> +#define MSR_OFFCORE_RSP_1 0x000001a7
> +#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2
> +#define MSR_IA32_MC0_CTL2 0x00000280
> +#define MSR_IA32_MC31_CTL2 0x0000029f
> +#define MSR_IA32_FIXED_CTR_CTRL 0x0000038d
> #define MSR_IA32_PERF_GLOBAL_CTRL 0x0000038f
> +#define MSR_IA32_MC0_CTL 0x00000400
> +#define MSR_IA32_MC28_MISC 0x00000473
> #define MSR_IA32_VMX_BASIC 0x00000480
> #define MSR_IA32_VMX_PINBASED_CTLS 0x00000481
> #define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482
> diff --git a/hypervisor/arch/x86/vcpu.c b/hypervisor/arch/x86/vcpu.c
> index 638d166..c9e541d 100644
> --- a/hypervisor/arch/x86/vcpu.c
> +++ b/hypervisor/arch/x86/vcpu.c
> @@ -35,6 +35,7 @@ static u8 __attribute__((aligned(PAGE_SIZE)))
> parking_code[PAGE_SIZE] = {
> };
>
> struct paging_structures parking_pt;
> +static u32 misc_enable_reserved_bits = 0x1;
Why a variable? Seems constant.
>
> int vcpu_early_init(void)
> {
> @@ -325,6 +326,46 @@ bool vcpu_handle_msr_write(void)
> vcpu_vendor_set_guest_pat((val & MTRR_ENABLE) ?
> cpu_data->pat : 0);
> break;
> + case MSR_IA32_MISC_ENABLE:
> + /* Check for Fast-Strings Enable bit only set */
> + val = get_wrmsr_value(&cpu_data->guest_regs);
> + if ((misc_enable_reserved_bits & val)
> + != misc_enable_reserved_bits) {
> + printk("FATAL: Invalid value on MSR_IA32_MISC_ENABLE "
> + "write: %lx\n", val);
> + return false;
> + }
> + /* No-op for the following, for they may affect things
> + * on a granularity bigger than originating core
> + * and/or they will be handled later on open TODO
> + * entries (e.g. MCE processing and managed
> + * forwarding) */
> + case MSR_IA32_MCG_CTL:
> + /* Enables/disables MCE reporting (globally) */
> + case MSR_IA32_MC0_CTL ... MSR_IA32_MC28_MISC:
> + /* Control signaling of MC for errors produced by a
> + * particular hardware unit */
> + case MSR_IA32_MC0_CTL2 ... MSR_IA32_MC31_CTL2:
> + /* Programming interface to use corrected MC error
> + * signaling */
> + case MSR_IA32_PERF_CTL:
> + /* Used to temporarily disable opportunistic processor
> + * performance operation, but may affect the whole
> + * system */
> + case MSR_IA32_FIXED_CTR_CTRL:
> + /* Control for fixed-function performance counters. May be
> + * unique per package. */
> + case MSR_IA32_THERM_INTERRUPT:
> + case MSR_IA32_PACKAGE_THERM_INTERRUPT:
> + /* Management of thermal events. The non-package
> + * variant may still be unique on some
> + * micro-architectures */
> + case MSR_OFFCORE_RSP_0:
> + case MSR_OFFCORE_RSP_1:
> + /* These offcore counters have information on shared
> + * resources, so we'd better block at least writing on
> + * them */
None of these ignored writes cause troubles to Linux when it reads them
back? Does KVM do something more for any of them?
> + break;
> default:
> panic_printk("FATAL: Unhandled MSR write: %lx\n",
> cpu_data->guest_regs.rcx);
> diff --git a/hypervisor/arch/x86/vmx.c b/hypervisor/arch/x86/vmx.c
> index 0a6e0ce..0868901 100644
> --- a/hypervisor/arch/x86/vmx.c
> +++ b/hypervisor/arch/x86/vmx.c
> @@ -34,48 +34,196 @@ static const struct segment invalid_seg = {
> .access_rights = 0x10000
> };
>
> -/* bit cleared: direct access allowed */
> -// TODO: convert to whitelist
> +/* MSR access whitelist: each bit *set* (LSB 0) will cause a VM-exit,
> + * so all explicit registers listed out of "denial slices" are safe to
> + * be accessed in the given mode without Jailhouse's intervention */
> static u8 __attribute__((aligned(PAGE_SIZE))) msr_bitmap[][0x2000/8] = {
> - [ VMX_MSR_BMP_0000_READ ] = {
> - [ 0/8 ... 0x26f/8 ] = 0,
> - [ 0x270/8 ... 0x277/8 ] = 0x80, /* 0x277 */
> - [ 0x278/8 ... 0x2f7/8 ] = 0,
> - [ 0x2f8/8 ... 0x2ff/8 ] = 0x80, /* 0x2ff */
> - [ 0x300/8 ... 0x7ff/8 ] = 0,
> - [ 0x800/8 ... 0x807/8 ] = 0x0c, /* 0x802, 0x803 */
> - [ 0x808/8 ... 0x80f/8 ] = 0xa5, /* 0x808, 0x80a, 0x80d, 0x80f
> */
> - [ 0x810/8 ... 0x817/8 ] = 0xff, /* 0x810 - 0x817 */
> - [ 0x818/8 ... 0x81f/8 ] = 0xff, /* 0x818 - 0x81f */
> - [ 0x820/8 ... 0x827/8 ] = 0xff, /* 0x820 - 0x827 */
> - [ 0x828/8 ... 0x82f/8 ] = 0x81, /* 0x828, 0x82f */
> - [ 0x830/8 ... 0x837/8 ] = 0xfd, /* 0x830, 0x832 - 0x837 */
> - [ 0x838/8 ... 0x83f/8 ] = 0x43, /* 0x838, 0x839, 0x83e */
> - [ 0x840/8 ... 0x1fff/8 ] = 0,
> + [ VMX_MSR_BMP_0000_READ ] = {
> + /* Deny everything first */
> + [ 0x0/8 ... 0x1fff/8 ] = 0xff,
Hmm, that will prevent us from enabled -Werror=override-init, like we
did for configs. Not yet sure if that is bad or acceptable. Is
overriding like this well defined behaviour in C?
> +
> + /* Platform ID/Machine Info/Feature Control */
> + [ 0x10/8 ... 0x17/8 ] = 0x7f, /* 0x17-IA32_PLATFORM_ID */
> + [ 0x38/8 ... 0x3f/8 ] = 0xbf, /* 0x3a-IA32_FEATURE_CONTROL
> */
> + [ 0xc8/8 ... 0xcf/8 ] = 0xbf, /* 0xce-MSR_PLATFORM_INFO */
> +
> + /* APIC */
> + [ 0x18/8 ... 0x1f/8 ] = 0xf7, /* 0x01b-IA32_APIC_BASE */
> + [ 0x6e0/8 ... 0x6e7/8 ] = 0xfe, /* 0x6e0-IA32_TSC_DEADLINE */
> +
> + /* Microcode update (Linux will restore it) */
> + [ 0x78/8 ... 0x7f/8 ] = 0xfd, /* 0x79-IA32_BIOS_UPDT_TRIG */
> + [ 0x88/8 ... 0x8f/8 ] = 0xf7, /* 0x8b-IA32_BIOS_SIGN_ID */
> +
> + /* Machine-Check Exception */
> + [ 0/8 ... 0xf/8 ] = 0xfc, /* 0x0-IA32_P5_MC_ADDR,
> + * 0x1-IA32_P5_MC_TYPE */
> + [ 0x178/8 ... 0x17f/8 ] = 0xf9, /* 0x179-IA32_MCG_CAP,
> + * 0x17a-IA32_MCG_STATUS */
> + [ 0x280/8 ... 0x29f/8 ] = 0, /* 0x280-IA32_MC0_CTL2,
> + * [...]
> + * 0x29f-IA32_MC31_CTL2 */
> + [ 0x400/8 ... 0x46f/8 ] = 0, /* 0x400-IA32_MC0_CTL,
> + * [...]
> + * 0x46f-IA32_MC27_MISC */
> + [ 0x470/8 ... 0x477/8 ] = 0xf0, /* 0x470-IA32_MC28_CTL,
> + * 0x471-IA32_MC28_STATUS
> + * 0x472-IA32_MC28_ADDR
> + * 0x473-IA32_MC28_MISC */
> +
> + /* Fast System Call */
> + [ 0x170/8 ... 0x177/8 ] = 0x8f, /* 0x174-IA32_SYSENTER_CS,
> + * 0x175-IA32_SYSENTER_ESP,
> + * 0x176-IA32_SYSENTER_EIP */
> +
> + /* Performance/thermal Control */
> + [ 0xe0/8 ... 0xe7/8 ] = 0x7f, /* 0xe7-IA32_MPERF */
> + [ 0xe8/8 ... 0xef/8 ] = 0xfe, /* 0xe8-IA32_APERF */
> + [ 0x198/8 ... 0x19f/8 ] = 0xf5, /* 0x199-IA32_PERF_CTL,
> + * 0x19b-IA32_THERM_INTERRUPT
> */
> + [ 0x1b0/8 ... 0x1b7/8 ] = 0xfa, /*
> 0x1b0-IA32_ENERGY_PERF_BIAS,
> + *
> 0x1b2-IA32_PACKAGE_THERM_INTERRUPT */
> +
> + /* Performance Monitoring (and misc) */
> + [ 0x180/8 ... 0x187/8 ] = 0x3f, /* 0x186-IA32_PERFEVTSEL0,
> + * 0x187-IA32_PERFEVTSEL1 */
> + [ 0x188/8 ... 0x18f/8 ] = 0xfc, /* 0x188-IA32_PERFEVTSEL2,
> + * 0x189-IA32_PERFEVTSEL3 */
> + [ 0x1a0/8 ... 0x1a7/8 ] = 0x3e, /* 0x1a0-IA32_MISC_ENABLE,
> + * 0x1a6-MSR_OFFCORE_RSP_0,
> + * 0x1a7-MSR_OFFCORE_RSP_1 */
> + [ 0x1c8/8 ... 0x1cf/8 ] = 0xfd, /* 0x1c9-MSR_LASTBRANCH_TOS */
> + [ 0x340/8 ... 0x347/8 ] = 0xdf, /*
> 0x345-IA32_PERF_CAPABILITIES */
> + [ 0x388/8 ... 0x38f/8 ] = 0xdf, /* 0x38d-IA32_FIXED_CTR_CTRL
> */
> + [ 0x4c0/8 ... 0x4c7/8 ] = 0x0, /* 0x4c1-IA32_A_PMC0,
> + * 0x4c2-IA32_A_PMC1,
> + * 0x4c3-IA32_A_PMC2,
> + * 0x4c4-IA32_A_PMC3,
> + * 0x4c5-IA32_A_PMC4,
> + * 0x4c6-IA32_A_PMC5,
> + * 0x4c7-IA32_A_PMC6 */
> + [ 0x4c8/8 ... 0x4cf/8 ] = 0xfe, /* 0x4c8-IA32_A_PMC7 */
> + [ 0x570/8 ... 0x577/8 ] = 0xfe, /* 0x570-IA32_RTIT_CTL */
> + [ 0x680/8 ... 0x69f/8 ] = 0x0, /*
> 0x680-MSR_LASTBRANCH_0_FROM_IP,
> + * [...]
> + *
> 0x69f-MSR_LASTBRANCH_31_FROM_IP */
> + [ 0x6c0/8 ... 0x6df/8 ] = 0x0, /*
> 0x6c0-MSR_LASTBRANCH_0_TO_IP,
> + * [...]
> + *
> 0x6df-MSR_LASTBRANCH_31_TO_IP */
> +
> + /* MTRRs (Memory Type Range Registers) */
> + [ 0xf8/8 ... 0xff/8 ] = 0xbf, /* 0xfe-IA32_MTRRCAP */
> + [ 0x200/8 ... 0x20f/8 ] = 0, /* 0x200-IA32_MTRR_PHYSBASE0,
> + * 0x201-IA32_MTRR_PHYSMASK0,
> + * [...]
> + * 0x20e-IA32_MTRR_PHYSBASE7,
> + * 0x20f-IA32_MTRR_PHYSMASK7 */
> + [ 0x210/8 ... 0x217/8 ] = 0xf0, /* 0x210-IA32_MTRR_PHYSBASE8,
> + * 0x211-IA32_MTRR_PHYSMASK8,
> + * 0x212-IA32_MTRR_PHYSBASE9,
> + * 0x213-IA32_MTRR_PHYSMASK9
> */
> + [ 0x250/8 ... 0x257/8 ] = 0xfe, /*
> 0x250-IA32_MTRR_FIX64K_00000 */
> + [ 0x258/8 ... 0x25f/8 ] = 0xfc, /*
> 0x258-IA32_MTRR_FIX16K_80000,
> + *
> 0x259-IA32_MTRR_FIX16K_A0000 */
> + [ 0x268/8 ... 0x26f/8 ] = 0, /* 0x268-IA32_MTRR_FIX4K_C0000,
> + * [...]
> + * 0x26f-IA32_MTRR_FIX4K_F8000 */
> + [ 0x270/8 ... 0x277/8 ] = 0x7f, /* 0x277-IA32_PAT */
> + [ 0x2f8/8 ... 0x2ff/8 ] = 0x7f, /* 0x2ff-IA32_MTRR_DEF_TYPE */
> +
> + /* Virtualization */
> + [ 0x480/8 ... 0x48f/8 ] = 0, /* 0x480-IA32_VMX_BASIC,
> + * [...]
> + * 0x48f-IA32_VMX_TRUE_EXIT_CTLS
> */
> + [ 0x490/8 ... 0x497/8 ] = 0xfc, /*
> 0x490-IA32_VMX_TRUE_ENTRY_CTLS,
> + * 0x491-IA32_VMX_VMFUNC */
> },
> [ VMX_MSR_BMP_C000_READ ] = {
> - [ 0/8 ... 0x1fff/8 ] = 0,
> + /* Deny everything first */
> + [ 0/8 ... 0x1fff/8 ] = 0xff,
> +
> + /* Fast System Call + EFER + TSC_AUX */
> + [ 0x80/8 ... 0x87/8 ] = 0xe0, /* 0xc0000080-IA32_EFER,
> + * 0xc0000081-IA32_STAR,
> + * 0xc0000082-IA32_LSTAR,
> + * 0xc0000083-IA32_CSTAR,
> + * 0xc0000084-IA32_FMASK */
> + [ 0x100/8 ... 0x107/8 ] = 0xf0, /* 0xc0000100-IA32_FS_BASE,
> + * 0xc0000101-IA32_GS_BASE
> + *
> 0xc0000102-IA32_KERNEL_GS_BASE,
> + * 0xc0000103-IA32_TSC_AUX */
> },
> [ VMX_MSR_BMP_0000_WRITE ] = {
> - [ 0/8 ... 0x17/8 ] = 0,
> - [ 0x18/8 ... 0x1f/8 ] = 0x08, /* 0x01b */
> - [ 0x20/8 ... 0x1ff/8 ] = 0,
> - [ 0x200/8 ... 0x277/8 ] = 0xff, /* 0x200 - 0x277 */
> - [ 0x278/8 ... 0x2f7/8 ] = 0,
> - [ 0x2f8/8 ... 0x2ff/8 ] = 0x80, /* 0x2ff */
> - [ 0x300/8 ... 0x387/8 ] = 0,
> - [ 0x388/8 ... 0x38f/8 ] = 0x80, /* 0x38f */
> - [ 0x390/8 ... 0x7ff/8 ] = 0,
> - [ 0x808/8 ... 0x80f/8 ] = 0x89, /* 0x808, 0x80b, 0x80f */
> - [ 0x810/8 ... 0x827/8 ] = 0,
> - [ 0x828/8 ... 0x82f/8 ] = 0x81, /* 0x828, 0x82f */
> - [ 0x830/8 ... 0x837/8 ] = 0xfd, /* 0x830, 0x832 - 0x837 */
> - [ 0x838/8 ... 0x83f/8 ] = 0xc1, /* 0x838, 0x83e, 0x83f */
> - [ 0x840/8 ... 0xd8f/8 ] = 0xff, /* esp. 0xc80 - 0xd8f */
> - [ 0xd90/8 ... 0x1fff/8 ] = 0,
> + /* Deny everything first */
> + [ 0/8 ... 0x1fff/8 ] = 0xff,
> +
> + /* APIC */
> + [ 0x18/8 ... 0x1f/8 ] = 0xf7, /* 0x01b-IA32_APIC_BASE */
> + [ 0x6e0/8 ... 0x6e7/8 ] = 0xfe, /* 0x6e0-IA32_TSC_DEADLINE */
> +
> + /* Microcode update (Linux will restore it) */
> + [ 0x78/8 ... 0x7f/8 ] = 0xfd, /* 0x79-IA32_BIOS_UPDT_TRIG */
> + [ 0x88/8 ... 0x8f/8 ] = 0xf7, /* 0x8b-IA32_BIOS_SIGN_ID */
> +
> + /* Fast System Call */
> + [ 0x170/8 ... 0x177/8 ] = 0x8f, /* 0x174-IA32_SYSENTER_CS,
> + * 0x175-IA32_SYSENTER_ESP,
> + * 0x176-IA32_SYSENTER_EIP */
> +
> + /* Performance Monitoring */
> + [ 0x180/8 ... 0x187/8 ] = 0x3f, /* 0x186-IA32_PERFEVTSEL0,
> + * 0x187-IA32_PERFEVTSEL1 */
> + [ 0x188/8 ... 0x18f/8 ] = 0xfc, /* 0x188-IA32_PERFEVTSEL2,
> + * 0x189-IA32_PERFEVTSEL3 */
> + [ 0x1c8/8 ... 0x1cf/8 ] = 0xfd, /* 0x1c9-MSR_LASTBRANCH_TOS */
> + [ 0x4c0/8 ... 0x4c7/8 ] = 0x0, /* 0x4c1-IA32_A_PMC0,
> + * 0x4c2-IA32_A_PMC1,
> + * 0x4c3-IA32_A_PMC2,
> + * 0x4c4-IA32_A_PMC3,
> + * 0x4c5-IA32_A_PMC4,
> + * 0x4c6-IA32_A_PMC5,
> + * 0x4c7-IA32_A_PMC6 */
> + [ 0x4c8/8 ... 0x4cf/8 ] = 0xfe, /* 0x4c8-IA32_A_PMC7 */
> + [ 0x570/8 ... 0x577/8 ] = 0xfe, /* 0x570-IA32_RTIT_CTL */
> + [ 0x680/8 ... 0x69f/8 ] = 0x0, /*
> 0x680-MSR_LASTBRANCH_0_FROM_IP,
> + * [...]
> + *
> 0x69f-MSR_LASTBRANCH_31_FROM_IP */
> + [ 0x6c0/8 ... 0x6df/8 ] = 0x0, /*
> 0x6c0-MSR_LASTBRANCH_0_TO_IP,
> + * [...]
> + *
> 0x6df-MSR_LASTBRANCH_31_TO_IP */
> },
> [ VMX_MSR_BMP_C000_WRITE ] = {
> - [ 0/8 ... 0x1fff/8 ] = 0,
> + /* Deny everything first */
> + [ 0x10/8 ... 0x1fff/8 ] = 0xff,
> +
> + /* Fast System Call + EFER + TSC_AUX */
> + [ 0x80/8 ... 0x87/8 ] = 0xe0, /* 0xc0000080-IA32_EFER,
> + * 0xc0000081-IA32_STAR,
> + * 0xc0000082-IA32_LSTAR,
> + * 0xc0000083-IA32_CSTAR,
> + * 0xc0000084-IA32_FMASK */
> + [ 0x100/8 ... 0x107/8 ] = 0xf0, /* 0xc0000100-IA32_FS_BASE,
> + * 0xc0000101-IA32_GS_BASE
> + *
> 0xc0000102-IA32_KERNEL_GS_BASE,
> + * 0xc0000103-IA32_TSC_AUX */
> +
> + /* MTRRs (Memory Type Range Registers) */
> + [ 0xf8/8 ... 0xff/8 ] = 0xbf, /* 0xfe-IA32_MTRRCAP */
> + [ 0x200/8 ... 0x20f/8 ] = 0, /* 0x200-IA32_MTRR_PHYSBASE0,
> + * 0x201-IA32_MTRR_PHYSMASK0,
> + * [...]
> + * 0x20e-IA32_MTRR_PHYSBASE7,
> + * 0x20f-IA32_MTRR_PHYSMASK7 */
> + [ 0x210/8 ... 0x217/8 ] = 0xf0, /* 0x210-IA32_MTRR_PHYSBASE8,
> + * 0x211-IA32_MTRR_PHYSMASK8,
> + * 0x212-IA32_MTRR_PHYSBASE9,
> + * 0x213-IA32_MTRR_PHYSMASK9
> */
> + [ 0x250/8 ... 0x257/8 ] = 0xfe, /*
> 0x250-IA32_MTRR_FIX64K_00000 */
> + [ 0x258/8 ... 0x25f/8 ] = 0xfc, /*
> 0x258-IA32_MTRR_FIX16K_80000,
> + *
> 0x259-IA32_MTRR_FIX16K_A0000 */
> + [ 0x268/8 ... 0x26f/8 ] = 0, /* 0x268-IA32_MTRR_FIX4K_C0000,
> + * [...]
> + * 0x26f-IA32_MTRR_FIX4K_F8000 */
> },
> };
>
>
Thanks for the hard work! "Just a few lines" of reconfiguration, but
figuring out all the requirements was surely not that easy.
We need to give this a try.
Jan
--
You received this message because you are subscribed to the Google Groups
"Jailhouse" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.