Re: [Xen-devel] [PATCH] viridian: fix the HvFlushVirtualAddress/List hypercall implementation

2019-02-14 Thread Paul Durrant
> -Original Message-
> From: Jan Beulich [mailto:jbeul...@suse.com]
> Sent: 14 February 2019 10:43
> To: Paul Durrant 
> Cc: Andrew Cooper ; Roger Pau Monne
> ; Wei Liu ; xen-devel  de...@lists.xenproject.org>
> Subject: Re: [PATCH] viridian: fix the HvFlushVirtualAddress/List
> hypercall implementation
> 
> >>> On 13.02.19 at 16:39,  wrote:
> > --- a/xen/arch/x86/hvm/hvm.c
> > +++ b/xen/arch/x86/hvm/hvm.c
> > @@ -3964,45 +3964,72 @@ static void hvm_s3_resume(struct domain *d)
> >  }
> >  }
> >
> > -static int hvmop_flush_tlb_all(void)
> > +static DEFINE_PER_CPU(cpumask_t, flush_cpumask);
> 
> It's not outright unacceptable (after all delete the other one from
> viridian.c), but this amount to half a kb of per-CPU data in a
> 4095-CPU build. Is there a reason not to use cpumask_scratch,
> given that the local CPU isn't undergoing any scheduling actions?

Only that I was unaware of it! Yes, that would work perfectly well.

> 
> If it's going to stay, my minimal request would be for it to be
> moved into the (only) function using it.
> 
> > +bool hvm_flush_vcpu_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
> > +void *ctxt)
> >  {
> > +cpumask_t *mask = _cpu(flush_cpumask);
> >  struct domain *d = current->domain;
> >  struct vcpu *v;
> >
> > -if ( !is_hvm_domain(d) )
> > -return -EINVAL;
> > -
> >  /* Avoid deadlock if more than one vcpu tries this at the same
> time. */
> >  if ( !spin_trylock(>hypercall_deadlock_mutex) )
> > -return -ERESTART;
> > +return false;
> >
> >  /* Pause all other vcpus. */
> >  for_each_vcpu ( d, v )
> > -if ( v != current )
> > +if ( v != current && flush_vcpu(ctxt, v) )
> >  vcpu_pause_nosync(v);
> >
> > +cpumask_clear(mask);
> > +
> >  /* Now that all VCPUs are signalled to deschedule, we wait... */
> >  for_each_vcpu ( d, v )
> > -if ( v != current )
> > -while ( !vcpu_runnable(v) && v->is_running )
> > -cpu_relax();
> > +{
> > +unsigned int cpu;
> > +
> > +if ( v == current || !flush_vcpu(ctxt, v) )
> > +continue;
> > +
> > +while ( !vcpu_runnable(v) && v->is_running )
> > +cpu_relax();
> > +
> > +cpu = read_atomic(>dirty_cpu);
> > +if ( is_vcpu_dirty_cpu(cpu) )
> > +__cpumask_set_cpu(cpu, mask);
> > +}
> >
> >  /* All other vcpus are paused, safe to unlock now. */
> >  spin_unlock(>hypercall_deadlock_mutex);
> >
> >  /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE
> cache). */
> >  for_each_vcpu ( d, v )
> > -paging_update_cr3(v, false);
> > +if ( flush_vcpu(ctxt, v) )
> > +paging_update_cr3(v, false);
> >
> > -/* Flush all dirty TLBs. */
> > -flush_tlb_mask(d->dirty_cpumask);
> > +/* Flush TLBs on all CPUs with dirty vcpu state. */
> > +flush_tlb_mask(mask);
> 
> The logic above skips the local CPU when accumulating into mask.

Ah yes, that's an oversight.

> Previously there was no such special case, and the local CPU is
> guaranteed to have its bit set in d->dirty_cpumask.
> 
> I also think you'd better delay latching dirty state as much as
> possible, as the chances then grow for the dirty state to be gone
> by the time you evaluate it. By moving the latching into the loop
> right after dropping the lock, you'd deal with both issues at the
> same time.
> 

Yes... exactly what I was thinking after your previous comment.

> > --- a/xen/arch/x86/hvm/viridian/viridian.c
> > +++ b/xen/arch/x86/hvm/viridian/viridian.c
> > @@ -430,7 +430,12 @@ void viridian_domain_deinit(struct domain *d)
> >  viridian_vcpu_deinit(v);
> >  }
> >
> > -static DEFINE_PER_CPU(cpumask_t, ipi_cpumask);
> > +static bool need_flush(void *ctxt, struct vcpu *v)
> > +{
> > +uint64_t vcpu_mask = *(uint64_t *)ctxt;
> > +
> > +return vcpu_mask & (1ul << v->vcpu_id);
> > +}
> 
> I've been puzzled by similar code before - the Viridian interface
> continues to be limited to 64 CPUs?

Yes, in these hypercalls (and other similar ones). As I said in the commit 
comment, there are newer variants (having an 'Ex' suffix) that use a structure 
called an HV_VP_SET to allow more than more than 64 vcpus (by breaking cpus 
down into 'banks' of 64).

> If so, as an unrelated
> change, shouldn't we deny turning on support for it on guests
> with more vCPU-s? Iirc Windows goes south in such a case.

I think Windows really should not issuing these older hypercalls if the VM has 
more than 64 vCPUs but I guess there's still a risk it might use one if all the 
CPU indices for the particular hypercall happened to be <64. The right thing to 
do, of course, is add support for the new variants and I'll get to that a.s.a.p.

  Paul

> 
> Jan
> 


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org

Re: [Xen-devel] [PATCH] viridian: fix the HvFlushVirtualAddress/List hypercall implementation

2019-02-14 Thread Jan Beulich
>>> On 13.02.19 at 16:39,  wrote:
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -3964,45 +3964,72 @@ static void hvm_s3_resume(struct domain *d)
>  }
>  }
>  
> -static int hvmop_flush_tlb_all(void)
> +static DEFINE_PER_CPU(cpumask_t, flush_cpumask);

It's not outright unacceptable (after all delete the other one from
viridian.c), but this amount to half a kb of per-CPU data in a
4095-CPU build. Is there a reason not to use cpumask_scratch,
given that the local CPU isn't undergoing any scheduling actions?

If it's going to stay, my minimal request would be for it to be
moved into the (only) function using it.

> +bool hvm_flush_vcpu_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
> +void *ctxt)
>  {
> +cpumask_t *mask = _cpu(flush_cpumask);
>  struct domain *d = current->domain;
>  struct vcpu *v;
>  
> -if ( !is_hvm_domain(d) )
> -return -EINVAL;
> -
>  /* Avoid deadlock if more than one vcpu tries this at the same time. */
>  if ( !spin_trylock(>hypercall_deadlock_mutex) )
> -return -ERESTART;
> +return false;
>  
>  /* Pause all other vcpus. */
>  for_each_vcpu ( d, v )
> -if ( v != current )
> +if ( v != current && flush_vcpu(ctxt, v) )
>  vcpu_pause_nosync(v);
>  
> +cpumask_clear(mask);
> +
>  /* Now that all VCPUs are signalled to deschedule, we wait... */
>  for_each_vcpu ( d, v )
> -if ( v != current )
> -while ( !vcpu_runnable(v) && v->is_running )
> -cpu_relax();
> +{
> +unsigned int cpu;
> +
> +if ( v == current || !flush_vcpu(ctxt, v) )
> +continue;
> +
> +while ( !vcpu_runnable(v) && v->is_running )
> +cpu_relax();
> +
> +cpu = read_atomic(>dirty_cpu);
> +if ( is_vcpu_dirty_cpu(cpu) )
> +__cpumask_set_cpu(cpu, mask);
> +}
>  
>  /* All other vcpus are paused, safe to unlock now. */
>  spin_unlock(>hypercall_deadlock_mutex);
>  
>  /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */
>  for_each_vcpu ( d, v )
> -paging_update_cr3(v, false);
> +if ( flush_vcpu(ctxt, v) )
> +paging_update_cr3(v, false);
>  
> -/* Flush all dirty TLBs. */
> -flush_tlb_mask(d->dirty_cpumask);
> +/* Flush TLBs on all CPUs with dirty vcpu state. */
> +flush_tlb_mask(mask);

The logic above skips the local CPU when accumulating into mask.
Previously there was no such special case, and the local CPU is
guaranteed to have its bit set in d->dirty_cpumask.

I also think you'd better delay latching dirty state as much as
possible, as the chances then grow for the dirty state to be gone
by the time you evaluate it. By moving the latching into the loop
right after dropping the lock, you'd deal with both issues at the
same time.

> --- a/xen/arch/x86/hvm/viridian/viridian.c
> +++ b/xen/arch/x86/hvm/viridian/viridian.c
> @@ -430,7 +430,12 @@ void viridian_domain_deinit(struct domain *d)
>  viridian_vcpu_deinit(v);
>  }
>  
> -static DEFINE_PER_CPU(cpumask_t, ipi_cpumask);
> +static bool need_flush(void *ctxt, struct vcpu *v)
> +{
> +uint64_t vcpu_mask = *(uint64_t *)ctxt;
> +
> +return vcpu_mask & (1ul << v->vcpu_id);
> +}

I've been puzzled by similar code before - the Viridian interface
continues to be limited to 64 CPUs? If so, as an unrelated
change, shouldn't we deny turning on support for it on guests
with more vCPU-s? Iirc Windows goes south in such a case.

Jan



___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH] viridian: fix the HvFlushVirtualAddress/List hypercall implementation

2019-02-13 Thread Paul Durrant
Apologies if you get multiple copies of this but I seem to be having problems 
sending to the list.

  Paul

> -Original Message-
> From: Paul Durrant [mailto:paul.durr...@citrix.com]
> Sent: 13 February 2019 16:03
> To: Paul Durrant 
> Cc: Jan Beulich ; Andrew Cooper
> ; Wei Liu ; Roger Pau
> Monne 
> Subject: [PATCH] viridian: fix the HvFlushVirtualAddress/List hypercall
> implementation
> 
> The current code uses hvm_asid_flush_vcpu() but this is insufficient for
> a guest running in shadow mode, which results in guest crashes early in
> boot if the 'hcall_remote_tlb_flush' is enabled.
> 
> This patch, instead of open coding a new flush algorithm, adapts the one
> already used by the HVMOP_flush_tlbs Xen hypercall. The implementation is
> modified to allow TLB flushing a subset of a domain's vCPUs. A callback
> function determines whether or not a vCPU requires flushing. This
> mechanism
> was chosen because, while it is the case that the currently implemented
> viridian hypercalls specify a vCPU mask, there are newer variants which
> specify a sparse HV_VP_SET and thus use of a callback will avoid needing
> to
> expose details of this outside of the viridian subsystem if and when those
> newer variants are implemented.
> 
> NOTE: Use of the common flush function requires that the hypercalls are
>   restartable and so, with this patch applied, viridian_hypercall()
>   can now return HVM_HCALL_preempted. This is safe as no modification
>   to struct cpu_user_regs is done before the return.
> 
> Signed-off-by: Paul Durrant 
> ---
> Cc: Jan Beulich 
> Cc: Andrew Cooper 
> Cc: Wei Liu 
> Cc: "Roger Pau Monné" 
> ---
>  xen/arch/x86/hvm/hvm.c   | 55 +---
>  xen/arch/x86/hvm/viridian/viridian.c | 41 +
>  xen/include/asm-x86/hvm/hvm.h|  2 +
>  3 files changed, 53 insertions(+), 45 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 410623d437..5de2c2aec7 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -3964,45 +3964,72 @@ static void hvm_s3_resume(struct domain *d)
>  }
>  }
> 
> -static int hvmop_flush_tlb_all(void)
> +static DEFINE_PER_CPU(cpumask_t, flush_cpumask);
> +
> +bool hvm_flush_vcpu_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
> +void *ctxt)
>  {
> +cpumask_t *mask = _cpu(flush_cpumask);
>  struct domain *d = current->domain;
>  struct vcpu *v;
> 
> -if ( !is_hvm_domain(d) )
> -return -EINVAL;
> -
>  /* Avoid deadlock if more than one vcpu tries this at the same time.
> */
>  if ( !spin_trylock(>hypercall_deadlock_mutex) )
> -return -ERESTART;
> +return false;
> 
>  /* Pause all other vcpus. */
>  for_each_vcpu ( d, v )
> -if ( v != current )
> +if ( v != current && flush_vcpu(ctxt, v) )
>  vcpu_pause_nosync(v);
> 
> +cpumask_clear(mask);
> +
>  /* Now that all VCPUs are signalled to deschedule, we wait... */
>  for_each_vcpu ( d, v )
> -if ( v != current )
> -while ( !vcpu_runnable(v) && v->is_running )
> -cpu_relax();
> +{
> +unsigned int cpu;
> +
> +if ( v == current || !flush_vcpu(ctxt, v) )
> +continue;
> +
> +while ( !vcpu_runnable(v) && v->is_running )
> +cpu_relax();
> +
> +cpu = read_atomic(>dirty_cpu);
> +if ( is_vcpu_dirty_cpu(cpu) )
> +__cpumask_set_cpu(cpu, mask);
> +}
> 
>  /* All other vcpus are paused, safe to unlock now. */
>  spin_unlock(>hypercall_deadlock_mutex);
> 
>  /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE
> cache). */
>  for_each_vcpu ( d, v )
> -paging_update_cr3(v, false);
> +if ( flush_vcpu(ctxt, v) )
> +paging_update_cr3(v, false);
> 
> -/* Flush all dirty TLBs. */
> -flush_tlb_mask(d->dirty_cpumask);
> +/* Flush TLBs on all CPUs with dirty vcpu state. */
> +flush_tlb_mask(mask);
> 
>  /* Done. */
>  for_each_vcpu ( d, v )
> -if ( v != current )
> +if ( v != current && flush_vcpu(ctxt, v) )
>  vcpu_unpause(v);
> 
> -return 0;
> +return true;
> +}
> +
> +static bool always_flush(void *ctxt, struct vcpu *v)
> +{
> +return true;
> +}
> +
> +static int hvmop_flush_tlb_all(void)
> +{
> +if ( !is_hvm_domain(current->domain) )
> +return -EINVAL;
> +
> +return hvm_flush_vcpu_tlb(always_flush, NULL) ? 0 : -ERESTART;
>  }
> 
>  static int hvmop_set_evtchn_upcall_vector(
> diff --git a/xen/arch/x86/hvm/viridian/viridian.c
> b/xen/arch/x86/hvm/viridian/viridian.c
> index c78b2918d9..6bb702f27e 100644
> --- a/xen/arch/x86/hvm/viridian/viridian.c
> +++ b/xen/arch/x86/hvm/viridian/viridian.c
> @@ -430,7 +430,12 @@ void viridian_domain_deinit(struct domain *d)
>  viridian_vcpu_deinit(v);
>  }
> 
> -static DEFINE_PER_CPU(cpumask_t, ipi_cpumask);
>