Re: [Xen-devel] [PATCH v2 3/3] x86/hyperv: L0 assisted TLB flush
On Fri, Feb 14, 2020 at 04:42:47PM +, Michael Kelley wrote: > From: Wei Liu On Behalf Of Wei Liu Sent: Friday, > February 14, 2020 4:35 AM > > > > Implement L0 assisted TLB flush for Xen on Hyper-V. It takes advantage > > of several hypercalls: > > > > * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST > > * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX > > * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE > > * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX > > > > Pick the most efficient hypercalls available. > > > > Signed-off-by: Wei Liu > > --- > > v2: > > 1. Address Roger and Jan's comments re types etc. > > 2. Fix pointer arithmetic. > > 3. Misc improvement to code. > > --- > > xen/arch/x86/guest/hyperv/Makefile | 1 + > > xen/arch/x86/guest/hyperv/private.h | 9 ++ > > xen/arch/x86/guest/hyperv/tlb.c | 172 +++- > > xen/arch/x86/guest/hyperv/util.c| 74 > > 4 files changed, 255 insertions(+), 1 deletion(-) > > create mode 100644 xen/arch/x86/guest/hyperv/util.c > > > > diff --git a/xen/arch/x86/guest/hyperv/Makefile > > b/xen/arch/x86/guest/hyperv/Makefile > > index 18902c33e9..0e39410968 100644 > > --- a/xen/arch/x86/guest/hyperv/Makefile > > +++ b/xen/arch/x86/guest/hyperv/Makefile > > @@ -1,2 +1,3 @@ > > obj-y += hyperv.o > > obj-y += tlb.o > > +obj-y += util.o > > diff --git a/xen/arch/x86/guest/hyperv/private.h > > b/xen/arch/x86/guest/hyperv/private.h > > index 509bedaafa..79a77930a0 100644 > > --- a/xen/arch/x86/guest/hyperv/private.h > > +++ b/xen/arch/x86/guest/hyperv/private.h > > @@ -24,12 +24,21 @@ > > > > #include > > #include > > +#include > > > > DECLARE_PER_CPU(void *, hv_input_page); > > DECLARE_PER_CPU(void *, hv_vp_assist); > > DECLARE_PER_CPU(unsigned int, hv_vp_index); > > > > +static inline unsigned int hv_vp_index(unsigned int cpu) > > +{ > > +return per_cpu(hv_vp_index, cpu); > > +} > > + > > int hyperv_flush_tlb(const cpumask_t *mask, const void *va, > > unsigned int flags); > > > > +/* Returns number of banks, -ev if error */ > > +int cpumask_to_vpset(struct hv_vpset *vpset, const cpumask_t *mask); > > + > > #endif /* __XEN_HYPERV_PRIVIATE_H__ */ > > diff --git a/xen/arch/x86/guest/hyperv/tlb.c > > b/xen/arch/x86/guest/hyperv/tlb.c > > index 48f527229e..f68e14f151 100644 > > --- a/xen/arch/x86/guest/hyperv/tlb.c > > +++ b/xen/arch/x86/guest/hyperv/tlb.c > > @@ -19,15 +19,185 @@ > > * Copyright (c) 2020 Microsoft. > > */ > > > > +#include > > #include > > #include > > > > +#include > > +#include > > +#include > > + > > #include "private.h" > > > > +/* > > + * It is possible to encode up to 4096 pages using the lower 12 bits > > + * in an element of gva_list > > + */ > > +#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) > > + > > +static unsigned int fill_gva_list(uint64_t *gva_list, const void *va, > > + unsigned int order) > > +{ > > +unsigned long start = (unsigned long)va; > > +unsigned long end = start + (PAGE_SIZE << order) - 1; > > +unsigned int n = 0; > > + > > +do { > > +unsigned long remain = end - start; > > The calculated value here isn't actually the remaining bytes in the > range to flush -- it's one less than the remaining bytes in the range > to flush because of the -1 in the calculation of 'end'. That difference > will mess up the comparison below against HV_TLB_FLUSH_UNIT > in the case that there are exactly 4096 page remaining to be > flushed. It should take the "=" case, but won't. Also, the > '-1' in 'remain - 1' in the else clause becomes unneeded, and > the 'start = end' assignment then propagates the error. > > In the parallel code in Linux, if you follow the call sequence to get to > fill_gav_list(), the 'end' argument is really the address of the first byte > of the first page that isn't in the flush range (i.e., one beyond the true > 'end') and so is a bit misnamed. > > I think the calculation of 'end' should drop the -1, and perhaps 'end' > should be renamed. Thanks for the detailed review. Let me fix this. Wei. > > Michael > ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v2 3/3] x86/hyperv: L0 assisted TLB flush
From: Wei Liu On Behalf Of Wei Liu Sent: Friday, February 14, 2020 4:35 AM > > Implement L0 assisted TLB flush for Xen on Hyper-V. It takes advantage > of several hypercalls: > > * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST > * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX > * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE > * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX > > Pick the most efficient hypercalls available. > > Signed-off-by: Wei Liu > --- > v2: > 1. Address Roger and Jan's comments re types etc. > 2. Fix pointer arithmetic. > 3. Misc improvement to code. > --- > xen/arch/x86/guest/hyperv/Makefile | 1 + > xen/arch/x86/guest/hyperv/private.h | 9 ++ > xen/arch/x86/guest/hyperv/tlb.c | 172 +++- > xen/arch/x86/guest/hyperv/util.c| 74 > 4 files changed, 255 insertions(+), 1 deletion(-) > create mode 100644 xen/arch/x86/guest/hyperv/util.c > > diff --git a/xen/arch/x86/guest/hyperv/Makefile > b/xen/arch/x86/guest/hyperv/Makefile > index 18902c33e9..0e39410968 100644 > --- a/xen/arch/x86/guest/hyperv/Makefile > +++ b/xen/arch/x86/guest/hyperv/Makefile > @@ -1,2 +1,3 @@ > obj-y += hyperv.o > obj-y += tlb.o > +obj-y += util.o > diff --git a/xen/arch/x86/guest/hyperv/private.h > b/xen/arch/x86/guest/hyperv/private.h > index 509bedaafa..79a77930a0 100644 > --- a/xen/arch/x86/guest/hyperv/private.h > +++ b/xen/arch/x86/guest/hyperv/private.h > @@ -24,12 +24,21 @@ > > #include > #include > +#include > > DECLARE_PER_CPU(void *, hv_input_page); > DECLARE_PER_CPU(void *, hv_vp_assist); > DECLARE_PER_CPU(unsigned int, hv_vp_index); > > +static inline unsigned int hv_vp_index(unsigned int cpu) > +{ > +return per_cpu(hv_vp_index, cpu); > +} > + > int hyperv_flush_tlb(const cpumask_t *mask, const void *va, > unsigned int flags); > > +/* Returns number of banks, -ev if error */ > +int cpumask_to_vpset(struct hv_vpset *vpset, const cpumask_t *mask); > + > #endif /* __XEN_HYPERV_PRIVIATE_H__ */ > diff --git a/xen/arch/x86/guest/hyperv/tlb.c b/xen/arch/x86/guest/hyperv/tlb.c > index 48f527229e..f68e14f151 100644 > --- a/xen/arch/x86/guest/hyperv/tlb.c > +++ b/xen/arch/x86/guest/hyperv/tlb.c > @@ -19,15 +19,185 @@ > * Copyright (c) 2020 Microsoft. > */ > > +#include > #include > #include > > +#include > +#include > +#include > + > #include "private.h" > > +/* > + * It is possible to encode up to 4096 pages using the lower 12 bits > + * in an element of gva_list > + */ > +#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) > + > +static unsigned int fill_gva_list(uint64_t *gva_list, const void *va, > + unsigned int order) > +{ > +unsigned long start = (unsigned long)va; > +unsigned long end = start + (PAGE_SIZE << order) - 1; > +unsigned int n = 0; > + > +do { > +unsigned long remain = end - start; The calculated value here isn't actually the remaining bytes in the range to flush -- it's one less than the remaining bytes in the range to flush because of the -1 in the calculation of 'end'. That difference will mess up the comparison below against HV_TLB_FLUSH_UNIT in the case that there are exactly 4096 page remaining to be flushed. It should take the "=" case, but won't. Also, the '-1' in 'remain - 1' in the else clause becomes unneeded, and the 'start = end' assignment then propagates the error. In the parallel code in Linux, if you follow the call sequence to get to fill_gav_list(), the 'end' argument is really the address of the first byte of the first page that isn't in the flush range (i.e., one beyond the true 'end') and so is a bit misnamed. I think the calculation of 'end' should drop the -1, and perhaps 'end' should be renamed. Michael > + > +gva_list[n] = start & PAGE_MASK; > + > +/* > + * Use lower 12 bits to encode the number of additional pages > + * to flush > + */ > +if ( remain >= HV_TLB_FLUSH_UNIT ) > +{ > +gva_list[n] |= ~PAGE_MASK; > +start += HV_TLB_FLUSH_UNIT; > +} > +else if ( remain ) > +{ > +gva_list[n] |= (remain - 1) >> PAGE_SHIFT; > +start = end; > +} > + > +n++; > +} while ( start < end ); > + > +return n; > +} > + ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v2 3/3] x86/hyperv: L0 assisted TLB flush
On Fri, Feb 14, 2020 at 03:42:17PM +0100, Roger Pau Monné wrote: [...] > > #endif /* __XEN_HYPERV_PRIVIATE_H__ */ > > diff --git a/xen/arch/x86/guest/hyperv/tlb.c > > b/xen/arch/x86/guest/hyperv/tlb.c > > index 48f527229e..f68e14f151 100644 > > --- a/xen/arch/x86/guest/hyperv/tlb.c > > +++ b/xen/arch/x86/guest/hyperv/tlb.c > > @@ -19,15 +19,185 @@ > > * Copyright (c) 2020 Microsoft. > > */ > > > > +#include > > #include > > #include > > > > +#include > > +#include > > +#include > > + > > #include "private.h" > > > > +/* > > + * It is possible to encode up to 4096 pages using the lower 12 bits > > + * in an element of gva_list > > + */ > > +#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) > > + > > +static unsigned int fill_gva_list(uint64_t *gva_list, const void *va, > > + unsigned int order) > > +{ > > +unsigned long start = (unsigned long)va; > > +unsigned long end = start + (PAGE_SIZE << order) - 1; > > +unsigned int n = 0; > > + > > +do { > > +unsigned long remain = end - start; > > + > > +gva_list[n] = start & PAGE_MASK; > > + > > +/* > > + * Use lower 12 bits to encode the number of additional pages > > + * to flush > > + */ > > +if ( remain >= HV_TLB_FLUSH_UNIT ) > > +{ > > +gva_list[n] |= ~PAGE_MASK; > > +start += HV_TLB_FLUSH_UNIT; > > +} > > +else if ( remain ) > > remain is always going to be > 0, since the loop condition is end > > start, and hence this can be a plain else. Ack. > > > +{ > > +gva_list[n] |= (remain - 1) >> PAGE_SHIFT; > > +start = end; > > +} > > + > > +n++; > > +} while ( start < end ); > > + > > +return n; > > +} > > + > > +static uint64_t flush_tlb_ex(const cpumask_t *mask, const void *va, > > + unsigned int flags) > > +{ > > +struct hv_tlb_flush_ex *flush = this_cpu(hv_input_page); > > +int nr_banks; > > +unsigned int max_gvas, order = flags & FLUSH_ORDER_MASK; > > +uint64_t ret; > > + > > +if ( !flush || local_irq_is_enabled() ) > > +{ > > +ASSERT_UNREACHABLE(); > > +return ~0ULL; > > +} > > + > > +if ( !(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED) ) > > +return ~0ULL; > > + > > +flush->address_space = 0; > > +flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; > > +if ( !(flags & FLUSH_TLB_GLOBAL) ) > > +flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; > > + > > +nr_banks = cpumask_to_vpset(>hv_vp_set, mask); > > +if ( nr_banks < 0 ) > > +return ~0ULL; > > It would be nice to propagate the error code from cpumask_to_vpset, > but since the function can also return HyperV error codes this doesn't > make much sense. > > > + > > +max_gvas = > > +(PAGE_SIZE - sizeof(*flush) - nr_banks * > > + sizeof(flush->hv_vp_set.bank_contents[0])) / > > +sizeof(uint64_t); /* gva is represented as uint64_t */ > > + > > +/* > > + * Flush the entire address space if va is NULL or if there is not > > + * enough space for gva_list. > > + */ > > +if ( !va || (PAGE_SIZE << order) / HV_TLB_FLUSH_UNIT > max_gvas ) > > +ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 0, > > + nr_banks, virt_to_maddr(flush), 0); > > You could just return hv_do_rep_hypercall(...); here, which will avoid > the else branch below and the indentation. Ack. > > > +else > > +{ > > +uint64_t *gva_list = > > +(uint64_t *)flush + sizeof(*flush) / sizeof(uint64_t) + > > nr_banks; > > +unsigned int gvas = fill_gva_list(gva_list, va, order); > > + > > +BUILD_BUG_ON(sizeof(*flush) % sizeof(uint64_t)); > > + > > +ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX, > > + gvas, nr_banks, virt_to_maddr(flush), 0); > > +} > > + > > +return ret; > > +} > > + > > int hyperv_flush_tlb(const cpumask_t *mask, const void *va, > > unsigned int flags) > > { > > -return -EOPNOTSUPP; > > +unsigned long irq_flags; > > +struct hv_tlb_flush *flush = this_cpu(hv_input_page); > > +unsigned int max_gvas, order = flags & FLUSH_ORDER_MASK; > > +uint64_t ret; > > + > > +ASSERT(flush); > > +ASSERT(!cpumask_empty(mask)); > > I would also turn this into an if ( ... ) { ASSERT; return -EFOO; } Ack. > > > + > > +local_irq_save(irq_flags); > > + > > +flush->address_space = 0; > > +flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; > > +flush->processor_mask = 0; > > +if ( !(flags & FLUSH_TLB_GLOBAL) ) > > +flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; > > + > > +if ( cpumask_equal(mask, _online_map) ) > > +flush->flags |= HV_FLUSH_ALL_PROCESSORS; > > +else > > +{ > > +
Re: [Xen-devel] [PATCH v2 3/3] x86/hyperv: L0 assisted TLB flush
On Fri, Feb 14, 2020 at 12:34:30PM +, Wei Liu wrote: > Implement L0 assisted TLB flush for Xen on Hyper-V. It takes advantage > of several hypercalls: > > * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST > * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX > * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE > * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX > > Pick the most efficient hypercalls available. > > Signed-off-by: Wei Liu Thanks! LGTM, I've just got a couple of comments below. > --- > v2: > 1. Address Roger and Jan's comments re types etc. > 2. Fix pointer arithmetic. > 3. Misc improvement to code. > --- > xen/arch/x86/guest/hyperv/Makefile | 1 + > xen/arch/x86/guest/hyperv/private.h | 9 ++ > xen/arch/x86/guest/hyperv/tlb.c | 172 +++- > xen/arch/x86/guest/hyperv/util.c| 74 > 4 files changed, 255 insertions(+), 1 deletion(-) > create mode 100644 xen/arch/x86/guest/hyperv/util.c > > diff --git a/xen/arch/x86/guest/hyperv/Makefile > b/xen/arch/x86/guest/hyperv/Makefile > index 18902c33e9..0e39410968 100644 > --- a/xen/arch/x86/guest/hyperv/Makefile > +++ b/xen/arch/x86/guest/hyperv/Makefile > @@ -1,2 +1,3 @@ > obj-y += hyperv.o > obj-y += tlb.o > +obj-y += util.o > diff --git a/xen/arch/x86/guest/hyperv/private.h > b/xen/arch/x86/guest/hyperv/private.h > index 509bedaafa..79a77930a0 100644 > --- a/xen/arch/x86/guest/hyperv/private.h > +++ b/xen/arch/x86/guest/hyperv/private.h > @@ -24,12 +24,21 @@ > > #include > #include > +#include > > DECLARE_PER_CPU(void *, hv_input_page); > DECLARE_PER_CPU(void *, hv_vp_assist); > DECLARE_PER_CPU(unsigned int, hv_vp_index); > > +static inline unsigned int hv_vp_index(unsigned int cpu) > +{ > +return per_cpu(hv_vp_index, cpu); > +} > + > int hyperv_flush_tlb(const cpumask_t *mask, const void *va, > unsigned int flags); > > +/* Returns number of banks, -ev if error */ > +int cpumask_to_vpset(struct hv_vpset *vpset, const cpumask_t *mask); > + > #endif /* __XEN_HYPERV_PRIVIATE_H__ */ > diff --git a/xen/arch/x86/guest/hyperv/tlb.c b/xen/arch/x86/guest/hyperv/tlb.c > index 48f527229e..f68e14f151 100644 > --- a/xen/arch/x86/guest/hyperv/tlb.c > +++ b/xen/arch/x86/guest/hyperv/tlb.c > @@ -19,15 +19,185 @@ > * Copyright (c) 2020 Microsoft. > */ > > +#include > #include > #include > > +#include > +#include > +#include > + > #include "private.h" > > +/* > + * It is possible to encode up to 4096 pages using the lower 12 bits > + * in an element of gva_list > + */ > +#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) > + > +static unsigned int fill_gva_list(uint64_t *gva_list, const void *va, > + unsigned int order) > +{ > +unsigned long start = (unsigned long)va; > +unsigned long end = start + (PAGE_SIZE << order) - 1; > +unsigned int n = 0; > + > +do { > +unsigned long remain = end - start; > + > +gva_list[n] = start & PAGE_MASK; > + > +/* > + * Use lower 12 bits to encode the number of additional pages > + * to flush > + */ > +if ( remain >= HV_TLB_FLUSH_UNIT ) > +{ > +gva_list[n] |= ~PAGE_MASK; > +start += HV_TLB_FLUSH_UNIT; > +} > +else if ( remain ) remain is always going to be > 0, since the loop condition is end > start, and hence this can be a plain else. > +{ > +gva_list[n] |= (remain - 1) >> PAGE_SHIFT; > +start = end; > +} > + > +n++; > +} while ( start < end ); > + > +return n; > +} > + > +static uint64_t flush_tlb_ex(const cpumask_t *mask, const void *va, > + unsigned int flags) > +{ > +struct hv_tlb_flush_ex *flush = this_cpu(hv_input_page); > +int nr_banks; > +unsigned int max_gvas, order = flags & FLUSH_ORDER_MASK; > +uint64_t ret; > + > +if ( !flush || local_irq_is_enabled() ) > +{ > +ASSERT_UNREACHABLE(); > +return ~0ULL; > +} > + > +if ( !(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED) ) > +return ~0ULL; > + > +flush->address_space = 0; > +flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; > +if ( !(flags & FLUSH_TLB_GLOBAL) ) > +flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; > + > +nr_banks = cpumask_to_vpset(>hv_vp_set, mask); > +if ( nr_banks < 0 ) > +return ~0ULL; It would be nice to propagate the error code from cpumask_to_vpset, but since the function can also return HyperV error codes this doesn't make much sense. > + > +max_gvas = > +(PAGE_SIZE - sizeof(*flush) - nr_banks * > + sizeof(flush->hv_vp_set.bank_contents[0])) / > +sizeof(uint64_t); /* gva is represented as uint64_t */ > + > +/* > + * Flush the entire address space if va is NULL or if there is not > + * enough space for gva_list. > + */ > +if ( !va || (PAGE_SIZE << order) / HV_TLB_FLUSH_UNIT > max_gvas )
[Xen-devel] [PATCH v2 3/3] x86/hyperv: L0 assisted TLB flush
Implement L0 assisted TLB flush for Xen on Hyper-V. It takes advantage of several hypercalls: * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX Pick the most efficient hypercalls available. Signed-off-by: Wei Liu --- v2: 1. Address Roger and Jan's comments re types etc. 2. Fix pointer arithmetic. 3. Misc improvement to code. --- xen/arch/x86/guest/hyperv/Makefile | 1 + xen/arch/x86/guest/hyperv/private.h | 9 ++ xen/arch/x86/guest/hyperv/tlb.c | 172 +++- xen/arch/x86/guest/hyperv/util.c| 74 4 files changed, 255 insertions(+), 1 deletion(-) create mode 100644 xen/arch/x86/guest/hyperv/util.c diff --git a/xen/arch/x86/guest/hyperv/Makefile b/xen/arch/x86/guest/hyperv/Makefile index 18902c33e9..0e39410968 100644 --- a/xen/arch/x86/guest/hyperv/Makefile +++ b/xen/arch/x86/guest/hyperv/Makefile @@ -1,2 +1,3 @@ obj-y += hyperv.o obj-y += tlb.o +obj-y += util.o diff --git a/xen/arch/x86/guest/hyperv/private.h b/xen/arch/x86/guest/hyperv/private.h index 509bedaafa..79a77930a0 100644 --- a/xen/arch/x86/guest/hyperv/private.h +++ b/xen/arch/x86/guest/hyperv/private.h @@ -24,12 +24,21 @@ #include #include +#include DECLARE_PER_CPU(void *, hv_input_page); DECLARE_PER_CPU(void *, hv_vp_assist); DECLARE_PER_CPU(unsigned int, hv_vp_index); +static inline unsigned int hv_vp_index(unsigned int cpu) +{ +return per_cpu(hv_vp_index, cpu); +} + int hyperv_flush_tlb(const cpumask_t *mask, const void *va, unsigned int flags); +/* Returns number of banks, -ev if error */ +int cpumask_to_vpset(struct hv_vpset *vpset, const cpumask_t *mask); + #endif /* __XEN_HYPERV_PRIVIATE_H__ */ diff --git a/xen/arch/x86/guest/hyperv/tlb.c b/xen/arch/x86/guest/hyperv/tlb.c index 48f527229e..f68e14f151 100644 --- a/xen/arch/x86/guest/hyperv/tlb.c +++ b/xen/arch/x86/guest/hyperv/tlb.c @@ -19,15 +19,185 @@ * Copyright (c) 2020 Microsoft. */ +#include #include #include +#include +#include +#include + #include "private.h" +/* + * It is possible to encode up to 4096 pages using the lower 12 bits + * in an element of gva_list + */ +#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) + +static unsigned int fill_gva_list(uint64_t *gva_list, const void *va, + unsigned int order) +{ +unsigned long start = (unsigned long)va; +unsigned long end = start + (PAGE_SIZE << order) - 1; +unsigned int n = 0; + +do { +unsigned long remain = end - start; + +gva_list[n] = start & PAGE_MASK; + +/* + * Use lower 12 bits to encode the number of additional pages + * to flush + */ +if ( remain >= HV_TLB_FLUSH_UNIT ) +{ +gva_list[n] |= ~PAGE_MASK; +start += HV_TLB_FLUSH_UNIT; +} +else if ( remain ) +{ +gva_list[n] |= (remain - 1) >> PAGE_SHIFT; +start = end; +} + +n++; +} while ( start < end ); + +return n; +} + +static uint64_t flush_tlb_ex(const cpumask_t *mask, const void *va, + unsigned int flags) +{ +struct hv_tlb_flush_ex *flush = this_cpu(hv_input_page); +int nr_banks; +unsigned int max_gvas, order = flags & FLUSH_ORDER_MASK; +uint64_t ret; + +if ( !flush || local_irq_is_enabled() ) +{ +ASSERT_UNREACHABLE(); +return ~0ULL; +} + +if ( !(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED) ) +return ~0ULL; + +flush->address_space = 0; +flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; +if ( !(flags & FLUSH_TLB_GLOBAL) ) +flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; + +nr_banks = cpumask_to_vpset(>hv_vp_set, mask); +if ( nr_banks < 0 ) +return ~0ULL; + +max_gvas = +(PAGE_SIZE - sizeof(*flush) - nr_banks * + sizeof(flush->hv_vp_set.bank_contents[0])) / +sizeof(uint64_t); /* gva is represented as uint64_t */ + +/* + * Flush the entire address space if va is NULL or if there is not + * enough space for gva_list. + */ +if ( !va || (PAGE_SIZE << order) / HV_TLB_FLUSH_UNIT > max_gvas ) +ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 0, + nr_banks, virt_to_maddr(flush), 0); +else +{ +uint64_t *gva_list = +(uint64_t *)flush + sizeof(*flush) / sizeof(uint64_t) + nr_banks; +unsigned int gvas = fill_gva_list(gva_list, va, order); + +BUILD_BUG_ON(sizeof(*flush) % sizeof(uint64_t)); + +ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX, + gvas, nr_banks, virt_to_maddr(flush), 0); +} + +return ret; +} + int hyperv_flush_tlb(const cpumask_t *mask, const void *va, unsigned int flags) { -