On 06/04/18 08:52, Juergen Gross wrote:
> Instead of switching XPTI globally on or off add a per-domain flag for
> that purpose. This allows to modify the xpti boot parameter to support
> running dom0 without Meltdown mitigations. Using "xpti=nodom0" as boot
> parameter will achieve that.
>
> Move the xpti boot parameter handling to xen/arch/x86/pv/domain.c as
> it is pv-domain specific.
>
> Signed-off-by: Juergen Gross <jgr...@suse.com>
> Reviewed-by: Jan Beulich <jbeul...@suse.com>
> ---
> V3:
> - latch get_cpu_info() return value in variable (Jan Beulich)
> - call always xpti_domain_init() for pv dom0 (Jan Beulich)
> - add __init annotations (Jan Beulich)
> - drop per domain XPTI message (Jan Beulich)
> - document xpti=default support (Jan Beulich)
> - move domain xpti flag into a padding hole (Jan Beulich)
> ---
>  docs/misc/xen-command-line.markdown | 10 ++++-
>  xen/arch/x86/domctl.c               |  4 ++
>  xen/arch/x86/mm.c                   | 12 +++++-
>  xen/arch/x86/pv/dom0_build.c        |  3 ++
>  xen/arch/x86/pv/domain.c            | 77 
> +++++++++++++++++++++++++++++++++++++
>  xen/arch/x86/setup.c                | 20 +---------
>  xen/arch/x86/smpboot.c              |  4 +-
>  xen/arch/x86/x86_64/entry.S         |  2 +
>  xen/include/asm-x86/current.h       |  3 +-
>  xen/include/asm-x86/domain.h        |  3 ++
>  xen/include/asm-x86/pv/domain.h     |  4 ++
>  11 files changed, 118 insertions(+), 24 deletions(-)
>
> diff --git a/docs/misc/xen-command-line.markdown 
> b/docs/misc/xen-command-line.markdown
> index b353352adf..79be9a6ba5 100644
> --- a/docs/misc/xen-command-line.markdown
> +++ b/docs/misc/xen-command-line.markdown
> @@ -1955,7 +1955,7 @@ clustered mode.  The default, given no hint from the 
> **FADT**, is cluster
>  mode.
>  
>  ### xpti
> -> `= <boolean>`
> +> `= default | nodom0 | <boolean>`
>  
>  > Default: `false` on AMD hardware
>  > Default: `true` everywhere else
> @@ -1963,6 +1963,14 @@ mode.
>  Override default selection of whether to isolate 64-bit PV guest page
>  tables.
>  
> +`true` activates page table isolation even on AMD hardware.
> +
> +`false` deactivates page table isolation on all systems.
> +
> +`default` sets the default behaviour.
> +
> +`nodom0` deactivates page table isolation for dom0. 

Thinking more about this, a simple set of options like this is
insufficiently expressive.  What if I'd like to run domU's but not dom0
with XPTI on AMD hardware?

How about `= List of [ <boolean>, dom0=bool ]`

which allows for selecting dom0 independently of the general default. 
It also avoids the somewhat odd "nodom0" which doesn't match our
prevailing style of "no-" prefixes for boolean options.

Also, the text should note that the dom0 option is ignored for non-PV
dom0's.

> +
>  ### xsave
>  > `= <boolean>`
>  
> diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
> index 8fbbf3aeb3..0704f398c7 100644
> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -24,6 +24,7 @@
>  #include <asm/hvm/hvm.h>
>  #include <asm/hvm/support.h>
>  #include <asm/processor.h>
> +#include <asm/pv/domain.h>
>  #include <asm/acpi.h> /* for hvm_acpi_power_button */
>  #include <xen/hypercall.h> /* for arch_do_domctl */
>  #include <xsm/xsm.h>
> @@ -610,6 +611,9 @@ long arch_do_domctl(
>              ret = switch_compat(d);
>          else
>              ret = -EINVAL;
> +
> +        if ( ret == 0 )
> +            xpti_domain_init(d);
>          break;
>  
>      case XEN_DOMCTL_get_address_size:
> diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
> index cf2ccb07e6..131433af9b 100644
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -505,13 +505,21 @@ void make_cr3(struct vcpu *v, mfn_t mfn)
>  
>  void write_ptbase(struct vcpu *v)
>  {
> -    if ( this_cpu(root_pgt) && is_pv_vcpu(v) && !is_pv_32bit_vcpu(v) )
> +    struct cpu_info *cpu_info = get_cpu_info();
> +
> +    if ( is_pv_vcpu(v) && v->domain->arch.pv_domain.xpti )
>      {
> -        get_cpu_info()->root_pgt_changed = true;
> +        cpu_info->root_pgt_changed = true;
> +        cpu_info->pv_cr3 = __pa(this_cpu(root_pgt));
>          asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" );
>      }
>      else
> +    {
> +        /* Make sure to clear xen_cr3 before pv_cr3; write_cr3() serializes. 
> */
> +        cpu_info->xen_cr3 = 0;
>          write_cr3(v->arch.cr3);
> +        cpu_info->pv_cr3 = 0;
> +    }
>  }
>  
>  /*
> diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
> index 0bd2f1bf90..77186c19bd 100644
> --- a/xen/arch/x86/pv/dom0_build.c
> +++ b/xen/arch/x86/pv/dom0_build.c
> @@ -19,6 +19,7 @@
>  #include <asm/dom0_build.h>
>  #include <asm/guest.h>
>  #include <asm/page.h>
> +#include <asm/pv/domain.h>
>  #include <asm/pv/mm.h>
>  #include <asm/setup.h>
>  
> @@ -707,6 +708,8 @@ int __init dom0_construct_pv(struct domain *d,
>              cpu = p->processor;
>      }
>  
> +    xpti_domain_init(d);
> +
>      d->arch.paging.mode = 0;
>  
>      /* Set up CR3 value for write_ptbase */
> diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
> index 42522a2db3..2bef9c48bc 100644
> --- a/xen/arch/x86/pv/domain.c
> +++ b/xen/arch/x86/pv/domain.c
> @@ -9,6 +9,8 @@
>  #include <xen/lib.h>
>  #include <xen/sched.h>
>  
> +#include <asm/cpufeature.h>
> +#include <asm/msr-index.h>
>  #include <asm/pv/domain.h>
>  
>  /* Override macros from asm/page.h to make them work with mfn_t */
> @@ -17,6 +19,81 @@
>  #undef page_to_mfn
>  #define page_to_mfn(pg) _mfn(__page_to_mfn(pg))
>  
> +static __read_mostly enum {
> +    XPTI_DEFAULT,
> +    XPTI_ON,
> +    XPTI_OFF,
> +    XPTI_NODOM0
> +} opt_xpti = XPTI_DEFAULT;
> +
> +static __init int parse_xpti(const char *s)
> +{
> +    int rc = 0;
> +
> +    switch ( parse_bool(s, NULL) )
> +    {
> +    case 0:
> +        opt_xpti = XPTI_OFF;
> +        break;
> +    case 1:
> +        opt_xpti = XPTI_ON;
> +        break;
> +    default:
> +        if ( !strcmp(s, "default") )
> +            opt_xpti = XPTI_DEFAULT;
> +        else if ( !strcmp(s, "nodom0") )
> +            opt_xpti = XPTI_NODOM0;
> +        else
> +            rc = -EINVAL;
> +        break;
> +    }
> +
> +    return rc;
> +}
> +custom_param("xpti", parse_xpti);
> +
> +void __init xpti_init(void)
> +{
> +    uint64_t caps = 0;
> +
> +    if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
> +        caps = ARCH_CAPABILITIES_RDCL_NO;
> +    else if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) )
> +        rdmsrl(MSR_ARCH_CAPABILITIES, caps);
> +
> +    if ( opt_xpti != XPTI_ON && (caps & ARCH_CAPABILITIES_RDCL_NO) )
> +        opt_xpti = XPTI_OFF;
> +    else if ( opt_xpti == XPTI_DEFAULT )
> +        opt_xpti = XPTI_ON;
> +
> +    if ( opt_xpti == XPTI_OFF )
> +        setup_force_cpu_cap(X86_FEATURE_NO_XPTI);
> +    else
> +        setup_clear_cpu_cap(X86_FEATURE_NO_XPTI);
> +}

Hmm - its probably arguable, but I'd say that init work for XPTI really
does belong in spec_ctrl.c rather than pv/domain.c.  This would also
avoid having both a toplevel xpti_init() and
init_speculation_migrations() call from __start_xen().

> +
> +void xpti_domain_init(struct domain *d)
> +{
> +    if ( !is_pv_domain(d) || is_pv_32bit_domain(d) )
> +        return;

Nice, but nil pois.  You've ended up with the same bug as my patch has,
despite attempting to fix the 32bit case.  This 32bit check causes an
early exit and leaves xpti set.

The least invasive way to fix is to have the switch statement fill in a
local xpti boolean, then have

d->arch.pv_domain.xpti = xpti && is_pv_32bit_domain(d);

at the end.

> +
> +    switch ( opt_xpti )
> +    {
> +    case XPTI_OFF:
> +        break;
> +    case XPTI_ON:
> +        d->arch.pv_domain.xpti = true;
> +        break;
> +    case XPTI_NODOM0:
> +        d->arch.pv_domain.xpti = d->domain_id != 0 &&
> +                                 d->domain_id != hardware_domid;
> +        break;

Newlines after breaks please.

This wants to be is_hardware_domain(d), but that is liable to cause
problems when constructing dom0.  Instead, I'd suggest just the latter
half of conditional, as hardware_domid is 0 until dom0 chooses to build
a second hardware domain.

~Andrew

> +    default:
> +        ASSERT_UNREACHABLE();
> +        break;
> +    }
> +}
> +
>  static void noreturn continue_nonidle_domain(struct vcpu *v)
>  {
>      check_wakeup_from_wait();
> diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
> index 02673d9512..e83d3b4f07 100644
> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -53,6 +53,7 @@
>  #include <asm/cpuid.h>
>  #include <asm/spec_ctrl.h>
>  #include <asm/guest.h>
> +#include <asm/pv/domain.h>
>  
>  /* opt_nosmp: If true, secondary processors are ignored. */
>  static bool __initdata opt_nosmp;
> @@ -169,9 +170,6 @@ static int __init parse_smap_param(const char *s)
>  }
>  custom_param("smap", parse_smap_param);
>  
> -static int8_t __initdata opt_xpti = -1;
> -boolean_param("xpti", opt_xpti);
> -
>  bool __read_mostly acpi_disabled;
>  bool __initdata acpi_force;
>  static char __initdata acpi_param[10] = "";
> @@ -1546,21 +1544,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>  
>      cr4_pv32_mask = mmu_cr4_features & XEN_CR4_PV32_BITS;
>  
> -    if ( opt_xpti < 0 )
> -    {
> -        uint64_t caps = 0;
> -
> -        if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
> -            caps = ARCH_CAPABILITIES_RDCL_NO;
> -        else if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) )
> -            rdmsrl(MSR_ARCH_CAPABILITIES, caps);
> -
> -        opt_xpti = !(caps & ARCH_CAPABILITIES_RDCL_NO);
> -    }
> -    if ( opt_xpti )
> -        setup_clear_cpu_cap(X86_FEATURE_NO_XPTI);
> -    else
> -        setup_force_cpu_cap(X86_FEATURE_NO_XPTI);
> +    xpti_init();
>  
>      if ( cpu_has_fsgsbase )
>          set_in_cr4(X86_CR4_FSGSBASE);
> diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
> index b0b72ca544..346a8e8a3f 100644
> --- a/xen/arch/x86/smpboot.c
> +++ b/xen/arch/x86/smpboot.c
> @@ -331,7 +331,7 @@ void start_secondary(void *unused)
>      spin_debug_disable();
>  
>      get_cpu_info()->xen_cr3 = 0;
> -    get_cpu_info()->pv_cr3 = this_cpu(root_pgt) ? __pa(this_cpu(root_pgt)) : 
> 0;
> +    get_cpu_info()->pv_cr3 = 0;
>  
>      load_system_tables();
>  
> @@ -1050,7 +1050,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
>          panic("Error %d setting up PV root page table\n", rc);
>      if ( per_cpu(root_pgt, 0) )
>      {
> -        get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0));
> +        get_cpu_info()->pv_cr3 = 0;
>  
>          /*
>           * All entry points which may need to switch page tables have to 
> start
> diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
> index 30c9da5446..5026cfa490 100644
> --- a/xen/arch/x86/x86_64/entry.S
> +++ b/xen/arch/x86/x86_64/entry.S
> @@ -129,6 +129,8 @@ restore_all_guest:
>          mov   VCPU_cr3(%rbx), %r9
>          GET_STACK_END(dx)
>          mov   STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rax
> +        test  %rax, %rax
> +        jz    .Lrag_keep_cr3
>          cmpb  $0, STACK_CPUINFO_FIELD(root_pgt_changed)(%rdx)
>          je    .Lrag_copy_done
>          movb  $0, STACK_CPUINFO_FIELD(root_pgt_changed)(%rdx)
> diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h
> index f2491b4423..b2475783f8 100644
> --- a/xen/include/asm-x86/current.h
> +++ b/xen/include/asm-x86/current.h
> @@ -44,7 +44,8 @@ struct cpu_info {
>      /*
>       * Of the two following fields the latter is being set to the CR3 value
>       * to be used on the given pCPU for loading whenever 64-bit PV guest
> -     * context is being entered. The value never changes once set.
> +     * context is being entered. A value of zero indicates no setting of CR3
> +     * is to be performed.
>       * The former is the value to restore when re-entering Xen, if any. IOW
>       * its value being zero means there's nothing to restore. However, its
>       * value can also be negative, indicating to the exit-to-Xen code that
> diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
> index a12ae47f1b..ed4199931a 100644
> --- a/xen/include/asm-x86/domain.h
> +++ b/xen/include/asm-x86/domain.h
> @@ -253,6 +253,9 @@ struct pv_domain
>  
>      atomic_t nr_l4_pages;
>  
> +    /* XPTI active? */
> +    bool xpti;
> +
>      /* map_domain_page() mapping cache. */
>      struct mapcache_domain mapcache;
>  
> diff --git a/xen/include/asm-x86/pv/domain.h b/xen/include/asm-x86/pv/domain.h
> index 5e34176939..911e5dc07f 100644
> --- a/xen/include/asm-x86/pv/domain.h
> +++ b/xen/include/asm-x86/pv/domain.h
> @@ -27,6 +27,8 @@ void pv_vcpu_destroy(struct vcpu *v);
>  int pv_vcpu_initialise(struct vcpu *v);
>  void pv_domain_destroy(struct domain *d);
>  int pv_domain_initialise(struct domain *d);
> +void xpti_init(void);
> +void xpti_domain_init(struct domain *d);
>  
>  #else  /* !CONFIG_PV */
>  
> @@ -36,6 +38,8 @@ static inline void pv_vcpu_destroy(struct vcpu *v) {}
>  static inline int pv_vcpu_initialise(struct vcpu *v) { return -EOPNOTSUPP; }
>  static inline void pv_domain_destroy(struct domain *d) {}
>  static inline int pv_domain_initialise(struct domain *d) { return 
> -EOPNOTSUPP; }
> +static inline void xpti_init(void) {}
> +static inline void xpti_domain_init(struct domain *d) {}
>  
>  #endif       /* CONFIG_PV */
>  


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Reply via email to