[tip:x86/hyperv] x86/hyperv: Add functions to allocate/deallocate page for Hyper-V
Commit-ID: 8c3e44bde7fd1b8291515f046008225711ac7beb Gitweb: https://git.kernel.org/tip/8c3e44bde7fd1b8291515f046008225711ac7beb Author: Maya Nakamura AuthorDate: Fri, 12 Jul 2019 08:21:25 + Committer: Thomas Gleixner CommitDate: Mon, 22 Jul 2019 11:06:45 +0200 x86/hyperv: Add functions to allocate/deallocate page for Hyper-V Introduce two new functions, hv_alloc_hyperv_page() and hv_free_hyperv_page(), to allocate/deallocate memory with the size and alignment that Hyper-V expects as a page. Although currently they are not used, they are ready to be used to allocate/deallocate memory on x86 when their ARM64 counterparts are implemented, keeping symmetry between architectures with potentially different guest page sizes. Signed-off-by: Maya Nakamura Signed-off-by: Thomas Gleixner Reviewed-by: Michael Kelley Reviewed-by: Vitaly Kuznetsov Link: https://lore.kernel.org/lkml/alpine.deb.2.21.1906272334560.32...@nanos.tec.linutronix.de/ Link: https://lore.kernel.org/lkml/87muindr9c@vitty.brq.redhat.com/ Link: https://lkml.kernel.org/r/706b2e71eb3e587b5f8801e50f090fae2a00e35d.1562916939.git.m.maya.nakam...@gmail.com --- arch/x86/hyperv/hv_init.c | 14 ++ arch/x86/include/asm/mshyperv.h | 5 - 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 0d258688c8cf..d314cf1e15fd 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -37,6 +37,20 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); u32 hv_max_vp_index; EXPORT_SYMBOL_GPL(hv_max_vp_index); +void *hv_alloc_hyperv_page(void) +{ + BUILD_BUG_ON(PAGE_SIZE != HV_HYP_PAGE_SIZE); + + return (void *)__get_free_page(GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page); + +void hv_free_hyperv_page(unsigned long addr) +{ + free_page(addr); +} +EXPORT_SYMBOL_GPL(hv_free_hyperv_page); + static int hv_cpu_init(unsigned int cpu) { u64 msr_vp_index; diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 2ef31cc8c529..f4138aeb4280 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -218,7 +218,8 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) void __init hyperv_init(void); void hyperv_setup_mmu_ops(void); - +void *hv_alloc_hyperv_page(void); +void hv_free_hyperv_page(unsigned long addr); void hyperv_reenlightenment_intr(struct pt_regs *regs); void set_hv_tscchange_cb(void (*cb)(void)); void clear_hv_tscchange_cb(void); @@ -241,6 +242,8 @@ static inline void hv_apic_init(void) {} #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline void hyperv_setup_mmu_ops(void) {} +static inline void *hv_alloc_hyperv_page(void) { return NULL; } +static inline void hv_free_hyperv_page(unsigned long addr) {} static inline void set_hv_tscchange_cb(void (*cb)(void)) {} static inline void clear_hv_tscchange_cb(void) {} static inline void hyperv_stop_tsc_emulation(void) {};
[tip:x86/hyperv] drivers: hv: vmbus: Replace page definition with Hyper-V specific one
Commit-ID: 83527ef7abf7c02c33a90b00f0954db35415adbd Gitweb: https://git.kernel.org/tip/83527ef7abf7c02c33a90b00f0954db35415adbd Author: Maya Nakamura AuthorDate: Fri, 12 Jul 2019 08:25:18 + Committer: Thomas Gleixner CommitDate: Mon, 22 Jul 2019 11:06:45 +0200 drivers: hv: vmbus: Replace page definition with Hyper-V specific one Replace PAGE_SIZE with HV_HYP_PAGE_SIZE because the guest page size may not be 4096 on all architectures and Hyper-V always runs with a page size of 4096. Signed-off-by: Maya Nakamura Signed-off-by: Thomas Gleixner Reviewed-by: Michael Kelley Reviewed-by: Vitaly Kuznetsov Acked-by: Sasha Levin Link: https://lkml.kernel.org/r/0d9e80ecabcc950dc279fdd2e39bea4060123ba4.1562916939.git.m.maya.nakam...@gmail.com --- drivers/hv/hyperv_vmbus.h | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 362e70e9d145..6bf64cb6e31a 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -192,11 +192,11 @@ int hv_ringbuffer_read(struct vmbus_channel *channel, u64 *requestid, bool raw); /* - * Maximum channels is determined by the size of the interrupt page - * which is PAGE_SIZE. 1/2 of PAGE_SIZE is for send endpoint interrupt - * and the other is receive endpoint interrupt + * The Maximum number of channels (16348) is determined by the size of the + * interrupt page, which is HV_HYP_PAGE_SIZE. 1/2 of HV_HYP_PAGE_SIZE is to + * send endpoint interrupts, and the other is to receive endpoint interrupts. */ -#define MAX_NUM_CHANNELS ((PAGE_SIZE >> 1) << 3) /* 16348 channels */ +#define MAX_NUM_CHANNELS ((HV_HYP_PAGE_SIZE >> 1) << 3) /* The value here must be in multiple of 32 */ /* TODO: Need to make this configurable */
[tip:x86/hyperv] x86/hyperv: Create and use Hyper-V page definitions
Commit-ID: fcd3f6222a4ece735d0b3ffb93f646eff693aa69 Gitweb: https://git.kernel.org/tip/fcd3f6222a4ece735d0b3ffb93f646eff693aa69 Author: Maya Nakamura AuthorDate: Fri, 12 Jul 2019 08:14:47 + Committer: Thomas Gleixner CommitDate: Mon, 22 Jul 2019 11:06:44 +0200 x86/hyperv: Create and use Hyper-V page definitions Define HV_HYP_PAGE_SHIFT, HV_HYP_PAGE_SIZE, and HV_HYP_PAGE_MASK because the Linux guest page size and hypervisor page size concepts are different, even though they happen to be the same value on x86. Also, replace PAGE_SIZE with HV_HYP_PAGE_SIZE. Signed-off-by: Maya Nakamura Signed-off-by: Thomas Gleixner Reviewed-by: Michael Kelley Reviewed-by: Vitaly Kuznetsov Link: https://lkml.kernel.org/r/e95111629abf65d016e983f72494cbf110ce605f.1562916939.git.m.maya.nakam...@gmail.com --- arch/x86/include/asm/hyperv-tlfs.h | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index af78cd72b8f3..7a2705694f5b 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -12,6 +12,16 @@ #include #include +/* + * While not explicitly listed in the TLFS, Hyper-V always runs with a page size + * of 4096. These definitions are used when communicating with Hyper-V using + * guest physical pages and guest physical page addresses, since the guest page + * size may not be 4096 on all architectures. + */ +#define HV_HYP_PAGE_SHIFT 12 +#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT) +#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1)) + /* * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent * is set by CPUID(HvCpuIdFunctionVersionAndFeatures). @@ -847,7 +857,7 @@ union hv_gpa_page_range { * count is equal with how many entries of union hv_gpa_page_range can * be populated into the input parameter page. */ -#define HV_MAX_FLUSH_REP_COUNT ((PAGE_SIZE - 2 * sizeof(u64)) /\ +#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) / \ sizeof(union hv_gpa_page_range)) struct hv_guest_mapping_flush_list {
[PATCH v4 5/5] Input: hv: Remove dependencies on PAGE_SIZE for ring buffer
Define the ring buffer size as a constant expression because it should not depend on the guest page size. Signed-off-by: Maya Nakamura Reviewed-by: Michael Kelley --- drivers/input/serio/hyperv-keyboard.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/serio/hyperv-keyboard.c b/drivers/input/serio/hyperv-keyboard.c index 8e457e50f837..88ae7c2ac3c8 100644 --- a/drivers/input/serio/hyperv-keyboard.c +++ b/drivers/input/serio/hyperv-keyboard.c @@ -75,8 +75,8 @@ struct synth_kbd_keystroke { #define HK_MAXIMUM_MESSAGE_SIZE 256 -#define KBD_VSC_SEND_RING_BUFFER_SIZE (10 * PAGE_SIZE) -#define KBD_VSC_RECV_RING_BUFFER_SIZE (10 * PAGE_SIZE) +#define KBD_VSC_SEND_RING_BUFFER_SIZE (40 * 1024) +#define KBD_VSC_RECV_RING_BUFFER_SIZE (40 * 1024) #define XTKBD_EMUL0 0xe0 #define XTKBD_EMUL1 0xe1 -- 2.17.1
[PATCH v4 4/5] HID: hv: Remove dependencies on PAGE_SIZE for ring buffer
Define the ring buffer size as a constant expression because it should not depend on the guest page size. Signed-off-by: Maya Nakamura Reviewed-by: Michael Kelley --- drivers/hid/hid-hyperv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c index 7795831d37c2..cc5b09b87ab0 100644 --- a/drivers/hid/hid-hyperv.c +++ b/drivers/hid/hid-hyperv.c @@ -104,8 +104,8 @@ struct synthhid_input_report { #pragma pack(pop) -#define INPUTVSC_SEND_RING_BUFFER_SIZE (10*PAGE_SIZE) -#define INPUTVSC_RECV_RING_BUFFER_SIZE (10*PAGE_SIZE) +#define INPUTVSC_SEND_RING_BUFFER_SIZE (40 * 1024) +#define INPUTVSC_RECV_RING_BUFFER_SIZE (40 * 1024) enum pipe_prot_msg_type { -- 2.17.1
[PATCH v4 3/5] hv: vmbus: Replace page definition with Hyper-V specific one
Replace PAGE_SIZE with HV_HYP_PAGE_SIZE because the guest page size may not be 4096 on all architectures and Hyper-V always runs with a page size of 4096. Signed-off-by: Maya Nakamura Reviewed-by: Michael Kelley Reviewed-by: Vitaly Kuznetsov --- drivers/hv/hyperv_vmbus.h | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 362e70e9d145..019469c3cbca 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -192,11 +192,11 @@ int hv_ringbuffer_read(struct vmbus_channel *channel, u64 *requestid, bool raw); /* - * Maximum channels is determined by the size of the interrupt page - * which is PAGE_SIZE. 1/2 of PAGE_SIZE is for send endpoint interrupt - * and the other is receive endpoint interrupt + * Maximum channels, 16348, is determined by the size of the interrupt page, + * which is HV_HYP_PAGE_SIZE. 1/2 of HV_HYP_PAGE_SIZE is to send endpoint + * interrupt, and the other is to receive endpoint interrupt. */ -#define MAX_NUM_CHANNELS ((PAGE_SIZE >> 1) << 3) /* 16348 channels */ +#define MAX_NUM_CHANNELS ((HV_HYP_PAGE_SIZE >> 1) << 3) /* The value here must be in multiple of 32 */ /* TODO: Need to make this configurable */ -- 2.17.1
[PATCH v4 2/5] x86: hv: Add functions to allocate/deallocate page for Hyper-V
Introduce two new functions, hv_alloc_hyperv_page() and hv_free_hyperv_page(), to allocate/deallocate memory with the size and alignment that Hyper-V expects as a page. Although currently they are not used, they are ready to be used to allocate/deallocate memory on x86 when their ARM64 counterparts are implemented, keeping symmetry between architectures with potentially different guest page sizes. Link: https://lore.kernel.org/lkml/alpine.deb.2.21.1906272334560.32...@nanos.tec.linutronix.de/ Link: https://lore.kernel.org/lkml/87muindr9c@vitty.brq.redhat.com/ Signed-off-by: Maya Nakamura Reviewed-by: Michael Kelley Reviewed-by: Vitaly Kuznetsov --- arch/x86/hyperv/hv_init.c | 14 ++ arch/x86/include/asm/mshyperv.h | 5 - 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 0e033ef11a9f..e8960a83add7 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -37,6 +37,20 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); u32 hv_max_vp_index; EXPORT_SYMBOL_GPL(hv_max_vp_index); +void *hv_alloc_hyperv_page(void) +{ + BUILD_BUG_ON(PAGE_SIZE != HV_HYP_PAGE_SIZE); + + return (void *)__get_free_page(GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page); + +void hv_free_hyperv_page(unsigned long addr) +{ + free_page(addr); +} +EXPORT_SYMBOL_GPL(hv_free_hyperv_page); + static int hv_cpu_init(unsigned int cpu) { u64 msr_vp_index; diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 2a793bf6ebb0..32ec9df39a99 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -218,7 +218,8 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) void __init hyperv_init(void); void hyperv_setup_mmu_ops(void); - +void *hv_alloc_hyperv_page(void); +void hv_free_hyperv_page(unsigned long addr); void hyperv_reenlightenment_intr(struct pt_regs *regs); void set_hv_tscchange_cb(void (*cb)(void)); void clear_hv_tscchange_cb(void); @@ -241,6 +242,8 @@ static inline void hv_apic_init(void) {} #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline void hyperv_setup_mmu_ops(void) {} +static inline void *hv_alloc_hyperv_page(void) { return NULL; } +static inline void hv_free_hyperv_page(unsigned long addr) {} static inline void set_hv_tscchange_cb(void (*cb)(void)) {} static inline void clear_hv_tscchange_cb(void) {} static inline void hyperv_stop_tsc_emulation(void) {}; -- 2.17.1
[PATCH v4 1/5] x86: hv: hyperv-tlfs.h: Create and use Hyper-V page definitions
Define HV_HYP_PAGE_SHIFT, HV_HYP_PAGE_SIZE, and HV_HYP_PAGE_MASK because the Linux guest page size and hypervisor page size concepts are different, even though they happen to be the same value on x86. Also, replace PAGE_SIZE with HV_HYP_PAGE_SIZE. Signed-off-by: Maya Nakamura Reviewed-by: Michael Kelley Reviewed-by: Vitaly Kuznetsov --- arch/x86/include/asm/hyperv-tlfs.h | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index af78cd72b8f3..7a2705694f5b 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -12,6 +12,16 @@ #include #include +/* + * While not explicitly listed in the TLFS, Hyper-V always runs with a page size + * of 4096. These definitions are used when communicating with Hyper-V using + * guest physical pages and guest physical page addresses, since the guest page + * size may not be 4096 on all architectures. + */ +#define HV_HYP_PAGE_SHIFT 12 +#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT) +#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1)) + /* * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent * is set by CPUID(HvCpuIdFunctionVersionAndFeatures). @@ -847,7 +857,7 @@ union hv_gpa_page_range { * count is equal with how many entries of union hv_gpa_page_range can * be populated into the input parameter page. */ -#define HV_MAX_FLUSH_REP_COUNT ((PAGE_SIZE - 2 * sizeof(u64)) /\ +#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) / \ sizeof(union hv_gpa_page_range)) struct hv_guest_mapping_flush_list { -- 2.17.1
[PATCH v4 0/5] hv: Remove dependencies on guest page size
The Linux guest page size and hypervisor page size concepts are different, even though they happen to be the same value on x86. Hyper-V code mixes up the two, so this patchset begins to address that by creating and using a set of Hyper-V specific page definitions. A major benefit of those new definitions is that they support non-x86 architectures, such as ARM64, that use different page sizes. On ARM64, the guest page size may not be 4096, and Hyper-V always runs with a page size of 4096. In this patchset, the first two patches lay the foundation for the others, creating definitions and preparing for allocation of memory with the size and alignment that Hyper-V expects as a page. Patch 3 applies the page size definition where the guest VM and Hyper-V communicate, and where the code intends to use the Hyper-V page size. The last two patches set the ring buffer size to a fixed value, removing the dependency on the guest page size. This is the initial set of changes to the Hyper-V code, and future patches will make additional changes using the same foundation, for example, replace __vmalloc() and related functions when Hyper-V pages are intended. Changes in v4 (all apply to patch 2 only): - Remove file name from the subject. - Include prototypes of two new functions. - Add another Link tag. Changes in v3: - Simplify expression for BUILD_BUG_ON() in patch 2. - Add Link and Reviewed-by tags. Change in v2: - Replace patch 2 with a new one. Maya Nakamura (5): x86: hv: hyperv-tlfs.h: Create and use Hyper-V page definitions x86: hv: Add functions to allocate/deallocate page for Hyper-V hv: vmbus: Replace page definition with Hyper-V specific one HID: hv: Remove dependencies on PAGE_SIZE for ring buffer Input: hv: Remove dependencies on PAGE_SIZE for ring buffer arch/x86/hyperv/hv_init.c | 14 ++ arch/x86/include/asm/hyperv-tlfs.h| 12 +++- arch/x86/include/asm/mshyperv.h | 5 - drivers/hid/hid-hyperv.c | 4 ++-- drivers/hv/hyperv_vmbus.h | 8 drivers/input/serio/hyperv-keyboard.c | 4 ++-- 6 files changed, 37 insertions(+), 10 deletions(-) -- 2.17.1
Re: [PATCH v3 2/5] x86: hv: hv_init.c: Add functions to allocate/deallocate page for Hyper-V
On Thu, Jun 27, 2019 at 11:38:14PM +0200, Thomas Gleixner wrote: > Maya, > > On Tue, 18 Jun 2019, Maya Nakamura wrote: > > > diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c > > index 0e033ef11a9f..e8960a83add7 100644 > > --- a/arch/x86/hyperv/hv_init.c > > +++ b/arch/x86/hyperv/hv_init.c > > @@ -37,6 +37,20 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); > > u32 hv_max_vp_index; > > EXPORT_SYMBOL_GPL(hv_max_vp_index); > > > > +void *hv_alloc_hyperv_page(void) > > +{ > > + BUILD_BUG_ON(PAGE_SIZE != HV_HYP_PAGE_SIZE); > > + > > + return (void *)__get_free_page(GFP_KERNEL); > > +} > > +EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page); > > + > > +void hv_free_hyperv_page(unsigned long addr) > > +{ > > + free_page(addr); > > +} > > +EXPORT_SYMBOL_GPL(hv_free_hyperv_page); > > These functions need to be declared in a header file. > > Thanks, > > tglx > Thank you for pointing that out, Thomas. I will resubmit my patch set to include a header file with the function prototypes.
[PATCH v3 1/5] x86: hv: hyperv-tlfs.h: Create and use Hyper-V page definitions
Define HV_HYP_PAGE_SHIFT, HV_HYP_PAGE_SIZE, and HV_HYP_PAGE_MASK because the Linux guest page size and hypervisor page size concepts are different, even though they happen to be the same value on x86. Also, replace PAGE_SIZE with HV_HYP_PAGE_SIZE. Signed-off-by: Maya Nakamura Reviewed-by: Michael Kelley Reviewed-by: Vitaly Kuznetsov --- arch/x86/include/asm/hyperv-tlfs.h | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index af78cd72b8f3..4097edf552b3 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -12,6 +12,16 @@ #include #include +/* + * While not explicitly listed in the TLFS, Hyper-V always runs with a page size + * of 4096. These definitions are used when communicating with Hyper-V using + * guest physical pages and guest physical page addresses, since the guest page + * size may not be 4096 on all architectures. + */ +#define HV_HYP_PAGE_SHIFT 12 +#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT) +#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1)) + /* * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent * is set by CPUID(HvCpuIdFunctionVersionAndFeatures). @@ -847,7 +857,7 @@ union hv_gpa_page_range { * count is equal with how many entries of union hv_gpa_page_range can * be populated into the input parameter page. */ -#define HV_MAX_FLUSH_REP_COUNT ((PAGE_SIZE - 2 * sizeof(u64)) /\ +#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) / \ sizeof(union hv_gpa_page_range)) struct hv_guest_mapping_flush_list { -- 2.17.1
[PATCH v3 3/5] hv: vmbus: Replace page definition with Hyper-V specific one
Replace PAGE_SIZE with HV_HYP_PAGE_SIZE because the guest page size may not be 4096 on all architectures and Hyper-V always runs with a page size of 4096. Signed-off-by: Maya Nakamura Reviewed-by: Michael Kelley Reviewed-by: Vitaly Kuznetsov --- drivers/hv/hyperv_vmbus.h | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 362e70e9d145..019469c3cbca 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -192,11 +192,11 @@ int hv_ringbuffer_read(struct vmbus_channel *channel, u64 *requestid, bool raw); /* - * Maximum channels is determined by the size of the interrupt page - * which is PAGE_SIZE. 1/2 of PAGE_SIZE is for send endpoint interrupt - * and the other is receive endpoint interrupt + * Maximum channels, 16348, is determined by the size of the interrupt page, + * which is HV_HYP_PAGE_SIZE. 1/2 of HV_HYP_PAGE_SIZE is to send endpoint + * interrupt, and the other is to receive endpoint interrupt. */ -#define MAX_NUM_CHANNELS ((PAGE_SIZE >> 1) << 3) /* 16348 channels */ +#define MAX_NUM_CHANNELS ((HV_HYP_PAGE_SIZE >> 1) << 3) /* The value here must be in multiple of 32 */ /* TODO: Need to make this configurable */ -- 2.17.1
[PATCH v3 0/5] hv: Remove dependencies on guest page size
The Linux guest page size and hypervisor page size concepts are different, even though they happen to be the same value on x86. Hyper-V code mixes up the two, so this patchset begins to address that by creating and using a set of Hyper-V specific page definitions. A major benefit of those new definitions is that they support non-x86 architectures, such as ARM64, that use different page sizes. On ARM64, the guest page size may not be 4096, and Hyper-V always runs with a page size of 4096. In this patchset, the first two patches lay the foundation for the others, creating definitions and preparing for allocation of memory with the size and alignment that Hyper-V expects as a page. Patch 3 applies the page size definition where the guest VM and Hyper-V communicate, and where the code intends to use the Hyper-V page size. The last two patches set the ring buffer size to a fixed value, removing the dependency on the guest page size. This is the initial set of changes to the Hyper-V code, and future patches will make additional changes using the same foundation, for example, replace __vmalloc() and related functions when Hyper-V pages are intended. Changes in v3: - [PATCH v2 2/5] Simplify expression for BUILD_BUG_ON(). - Add Link and Reviewed-by tags. Change in v2: - [PATCH 2/5] Replace with a new patch. Maya Nakamura (5): x86: hv: hyperv-tlfs.h: Create and use Hyper-V page definitions x86: hv: hv_init.c: Add functions to allocate/deallocate page for Hyper-V hv: vmbus: Replace page definition with Hyper-V specific one HID: hv: Remove dependencies on PAGE_SIZE for ring buffer Input: hv: Remove dependencies on PAGE_SIZE for ring buffer arch/x86/hyperv/hv_init.c | 14 ++ arch/x86/include/asm/hyperv-tlfs.h| 12 +++- drivers/hid/hid-hyperv.c | 4 ++-- drivers/hv/hyperv_vmbus.h | 8 drivers/input/serio/hyperv-keyboard.c | 4 ++-- 5 files changed, 33 insertions(+), 9 deletions(-) -- 2.17.1
[PATCH v3 5/5] Input: hv: Remove dependencies on PAGE_SIZE for ring buffer
Define the ring buffer size as a constant expression because it should not depend on the guest page size. Signed-off-by: Maya Nakamura Reviewed-by: Michael Kelley --- drivers/input/serio/hyperv-keyboard.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/serio/hyperv-keyboard.c b/drivers/input/serio/hyperv-keyboard.c index 8e457e50f837..88ae7c2ac3c8 100644 --- a/drivers/input/serio/hyperv-keyboard.c +++ b/drivers/input/serio/hyperv-keyboard.c @@ -75,8 +75,8 @@ struct synth_kbd_keystroke { #define HK_MAXIMUM_MESSAGE_SIZE 256 -#define KBD_VSC_SEND_RING_BUFFER_SIZE (10 * PAGE_SIZE) -#define KBD_VSC_RECV_RING_BUFFER_SIZE (10 * PAGE_SIZE) +#define KBD_VSC_SEND_RING_BUFFER_SIZE (40 * 1024) +#define KBD_VSC_RECV_RING_BUFFER_SIZE (40 * 1024) #define XTKBD_EMUL0 0xe0 #define XTKBD_EMUL1 0xe1 -- 2.17.1
[PATCH v3 2/5] x86: hv: hv_init.c: Add functions to allocate/deallocate page for Hyper-V
Introduce two new functions, hv_alloc_hyperv_page() and hv_free_hyperv_page(), to allocate/deallocate memory with the size and alignment that Hyper-V expects as a page. Although currently they are not used, they are ready to be used to allocate/deallocate memory on x86 when their ARM64 counterparts are implemented, keeping symmetry between architectures with potentially different guest page sizes. Link: https://lore.kernel.org/lkml/87muindr9c@vitty.brq.redhat.com/ Signed-off-by: Maya Nakamura Reviewed-by: Michael Kelley Reviewed-by: Vitaly Kuznetsov --- arch/x86/hyperv/hv_init.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 0e033ef11a9f..e8960a83add7 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -37,6 +37,20 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); u32 hv_max_vp_index; EXPORT_SYMBOL_GPL(hv_max_vp_index); +void *hv_alloc_hyperv_page(void) +{ + BUILD_BUG_ON(PAGE_SIZE != HV_HYP_PAGE_SIZE); + + return (void *)__get_free_page(GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page); + +void hv_free_hyperv_page(unsigned long addr) +{ + free_page(addr); +} +EXPORT_SYMBOL_GPL(hv_free_hyperv_page); + static int hv_cpu_init(unsigned int cpu) { u64 msr_vp_index; -- 2.17.1
[PATCH v3 4/5] HID: hv: Remove dependencies on PAGE_SIZE for ring buffer
Define the ring buffer size as a constant expression because it should not depend on the guest page size. Signed-off-by: Maya Nakamura Reviewed-by: Michael Kelley --- drivers/hid/hid-hyperv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c index 7795831d37c2..cc5b09b87ab0 100644 --- a/drivers/hid/hid-hyperv.c +++ b/drivers/hid/hid-hyperv.c @@ -104,8 +104,8 @@ struct synthhid_input_report { #pragma pack(pop) -#define INPUTVSC_SEND_RING_BUFFER_SIZE (10*PAGE_SIZE) -#define INPUTVSC_RECV_RING_BUFFER_SIZE (10*PAGE_SIZE) +#define INPUTVSC_SEND_RING_BUFFER_SIZE (40 * 1024) +#define INPUTVSC_RECV_RING_BUFFER_SIZE (40 * 1024) enum pipe_prot_msg_type { -- 2.17.1
Re: [PATCH v2 2/5] x86: hv: hv_init.c: Add functions to allocate/deallocate page for Hyper-V
On Wed, Jun 12, 2019 at 12:36:47PM +0200, Vitaly Kuznetsov wrote: > Maya Nakamura writes: > > > Introduce two new functions, hv_alloc_hyperv_page() and > > hv_free_hyperv_page(), to allocate/deallocate memory with the size and > > alignment that Hyper-V expects as a page. Although currently they are > > not used, they are ready to be used to allocate/deallocate memory on x86 > > when their ARM64 counterparts are implemented, keeping symmetry between > > architectures with potentially different guest page sizes. > > > > Signed-off-by: Maya Nakamura > > --- > > arch/x86/hyperv/hv_init.c | 14 ++ > > 1 file changed, 14 insertions(+) > > > > diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c > > index e4ba467a9fc6..84baf0e9a2d4 100644 > > --- a/arch/x86/hyperv/hv_init.c > > +++ b/arch/x86/hyperv/hv_init.c > > @@ -98,6 +98,20 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); > > u32 hv_max_vp_index; > > EXPORT_SYMBOL_GPL(hv_max_vp_index); > > > > +void *hv_alloc_hyperv_page(void) > > +{ > > + BUILD_BUG_ON(!(PAGE_SIZE == HV_HYP_PAGE_SIZE)); > > (nit) > > PAGE_SIZE != HV_HYP_PAGE_SIZE ? > > > + > > + return (void *)__get_free_page(GFP_KERNEL); > > +} > > +EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page); > > + > > +void hv_free_hyperv_page(unsigned long addr) > > +{ > > + free_page(addr); > > +} > > +EXPORT_SYMBOL_GPL(hv_free_hyperv_page); > > + > > static int hv_cpu_init(unsigned int cpu) > > { > > u64 msr_vp_index; > > Reviewed-by: Vitaly Kuznetsov > > -- > Vitaly Agreed. I will resubmit the patch set with this correction.
[PATCH v2 5/5] Input: hv: Remove dependencies on PAGE_SIZE for ring buffer
Define the ring buffer size as a constant expression because it should not depend on the guest page size. Signed-off-by: Maya Nakamura --- drivers/input/serio/hyperv-keyboard.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/serio/hyperv-keyboard.c b/drivers/input/serio/hyperv-keyboard.c index 7935e52b5435..a3480dbfadd2 100644 --- a/drivers/input/serio/hyperv-keyboard.c +++ b/drivers/input/serio/hyperv-keyboard.c @@ -83,8 +83,8 @@ struct synth_kbd_keystroke { #define HK_MAXIMUM_MESSAGE_SIZE 256 -#define KBD_VSC_SEND_RING_BUFFER_SIZE (10 * PAGE_SIZE) -#define KBD_VSC_RECV_RING_BUFFER_SIZE (10 * PAGE_SIZE) +#define KBD_VSC_SEND_RING_BUFFER_SIZE (40 * 1024) +#define KBD_VSC_RECV_RING_BUFFER_SIZE (40 * 1024) #define XTKBD_EMUL0 0xe0 #define XTKBD_EMUL1 0xe1 -- 2.17.1
[PATCH v2 4/5] HID: hv: Remove dependencies on PAGE_SIZE for ring buffer
Define the ring buffer size as a constant expression because it should not depend on the guest page size. Signed-off-by: Maya Nakamura --- drivers/hid/hid-hyperv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c index d3311d714d35..e8b154fa38e2 100644 --- a/drivers/hid/hid-hyperv.c +++ b/drivers/hid/hid-hyperv.c @@ -112,8 +112,8 @@ struct synthhid_input_report { #pragma pack(pop) -#define INPUTVSC_SEND_RING_BUFFER_SIZE (10*PAGE_SIZE) -#define INPUTVSC_RECV_RING_BUFFER_SIZE (10*PAGE_SIZE) +#define INPUTVSC_SEND_RING_BUFFER_SIZE (40 * 1024) +#define INPUTVSC_RECV_RING_BUFFER_SIZE (40 * 1024) enum pipe_prot_msg_type { -- 2.17.1
[PATCH v2 3/5] hv: vmbus: Replace page definition with Hyper-V specific one
Replace PAGE_SIZE with HV_HYP_PAGE_SIZE because the guest page size may not be 4096 on all architectures and Hyper-V always runs with a page size of 4096. Signed-off-by: Maya Nakamura --- drivers/hv/hyperv_vmbus.h | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index e5467b821f41..5489b061d261 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -208,11 +208,11 @@ int hv_ringbuffer_read(struct vmbus_channel *channel, u64 *requestid, bool raw); /* - * Maximum channels is determined by the size of the interrupt page - * which is PAGE_SIZE. 1/2 of PAGE_SIZE is for send endpoint interrupt - * and the other is receive endpoint interrupt + * Maximum channels, 16348, is determined by the size of the interrupt page, + * which is HV_HYP_PAGE_SIZE. 1/2 of HV_HYP_PAGE_SIZE is to send endpoint + * interrupt, and the other is to receive endpoint interrupt. */ -#define MAX_NUM_CHANNELS ((PAGE_SIZE >> 1) << 3) /* 16348 channels */ +#define MAX_NUM_CHANNELS ((HV_HYP_PAGE_SIZE >> 1) << 3) /* The value here must be in multiple of 32 */ /* TODO: Need to make this configurable */ -- 2.17.1
[PATCH v2 2/5] x86: hv: hv_init.c: Add functions to allocate/deallocate page for Hyper-V
Introduce two new functions, hv_alloc_hyperv_page() and hv_free_hyperv_page(), to allocate/deallocate memory with the size and alignment that Hyper-V expects as a page. Although currently they are not used, they are ready to be used to allocate/deallocate memory on x86 when their ARM64 counterparts are implemented, keeping symmetry between architectures with potentially different guest page sizes. Signed-off-by: Maya Nakamura --- arch/x86/hyperv/hv_init.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index e4ba467a9fc6..84baf0e9a2d4 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -98,6 +98,20 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); u32 hv_max_vp_index; EXPORT_SYMBOL_GPL(hv_max_vp_index); +void *hv_alloc_hyperv_page(void) +{ + BUILD_BUG_ON(!(PAGE_SIZE == HV_HYP_PAGE_SIZE)); + + return (void *)__get_free_page(GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page); + +void hv_free_hyperv_page(unsigned long addr) +{ + free_page(addr); +} +EXPORT_SYMBOL_GPL(hv_free_hyperv_page); + static int hv_cpu_init(unsigned int cpu) { u64 msr_vp_index; -- 2.17.1
[PATCH v2 1/5] x86: hv: hyperv-tlfs.h: Create and use Hyper-V page definitions
Define HV_HYP_PAGE_SHIFT, HV_HYP_PAGE_SIZE, and HV_HYP_PAGE_MASK because the Linux guest page size and hypervisor page size concepts are different, even though they happen to be the same value on x86. Also, replace PAGE_SIZE with HV_HYP_PAGE_SIZE. Signed-off-by: Maya Nakamura --- arch/x86/include/asm/hyperv-tlfs.h | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index cdf44aa9a501..44bd68aefd00 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -12,6 +12,16 @@ #include #include +/* + * While not explicitly listed in the TLFS, Hyper-V always runs with a page size + * of 4096. These definitions are used when communicating with Hyper-V using + * guest physical pages and guest physical page addresses, since the guest page + * size may not be 4096 on all architectures. + */ +#define HV_HYP_PAGE_SHIFT 12 +#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT) +#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1)) + /* * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent * is set by CPUID(HvCpuIdFunctionVersionAndFeatures). @@ -841,7 +851,7 @@ union hv_gpa_page_range { * count is equal with how many entries of union hv_gpa_page_range can * be populated into the input parameter page. */ -#define HV_MAX_FLUSH_REP_COUNT ((PAGE_SIZE - 2 * sizeof(u64)) /\ +#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) / \ sizeof(union hv_gpa_page_range)) struct hv_guest_mapping_flush_list { -- 2.17.1
[PATCH v2 0/5] hv: Remove dependencies on guest page size
The Linux guest page size and hypervisor page size concepts are different, even though they happen to be the same value on x86. Hyper-V code mixes up the two, so this patchset begins to address that by creating and using a set of Hyper-V specific page definitions. A major benefit of those new definitions is that they support non-x86 architectures, such as ARM64, that use different page sizes. On ARM64, the guest page size may not be 4096, and Hyper-V always runs with a page size of 4096. In this patchset, the first two patches lay the foundation for the others, creating definitions and preparing for allocation of memory with the size and alignment that Hyper-V expects as a page. Patch 3 applies the page size definition where the guest VM and Hyper-V communicate, and where the code intends to use the Hyper-V page size. The last two patches set the ring buffer size to a fixed value, removing the dependency on the guest page size. This is the initial set of changes to the Hyper-V code, and future patches will make additional changes using the same foundation, for example, replace __vmalloc() and related functions when Hyper-V pages are intended. Changes in v2: - [PATCH 2/5] Replace with a new patch. Maya Nakamura (5): x86: hv: hyperv-tlfs.h: Create and use Hyper-V page definitions x86: hv: hv_init.c: Add functions to allocate/deallocate page for Hyper-V hv: vmbus: Replace page definition with Hyper-V specific one HID: hv: Remove dependencies on PAGE_SIZE for ring buffer Input: hv: Remove dependencies on PAGE_SIZE for ring buffer arch/x86/hyperv/hv_init.c | 14 ++ arch/x86/include/asm/hyperv-tlfs.h| 12 +++- drivers/hid/hid-hyperv.c | 4 ++-- drivers/hv/hyperv_vmbus.h | 8 drivers/input/serio/hyperv-keyboard.c | 4 ++-- 5 files changed, 33 insertions(+), 9 deletions(-) -- 2.17.1
Re: [PATCH 2/6] x86: hv: hv_init.c: Replace alloc_page() with kmem_cache_alloc()
On Fri, Apr 12, 2019 at 09:52:47AM +0200, Vitaly Kuznetsov wrote: > Maya Nakamura writes: > > > On Fri, Apr 05, 2019 at 01:31:02PM +0200, Vitaly Kuznetsov wrote: > >> Maya Nakamura writes: > >> > >> > @@ -98,18 +99,20 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); > >> > u32 hv_max_vp_index; > >> > EXPORT_SYMBOL_GPL(hv_max_vp_index); > >> > > >> > +struct kmem_cache *cachep; > >> > +EXPORT_SYMBOL_GPL(cachep); > >> > + > >> > static int hv_cpu_init(unsigned int cpu) > >> > { > >> > u64 msr_vp_index; > >> > struct hv_vp_assist_page **hvp = > >> > &hv_vp_assist_page[smp_processor_id()]; > >> > void **input_arg; > >> > -struct page *pg; > >> > > >> > input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); > >> > -pg = alloc_page(GFP_KERNEL); > >> > -if (unlikely(!pg)) > >> > +*input_arg = kmem_cache_alloc(cachep, GFP_KERNEL); > >> > >> I'm not sure use of kmem_cache is justified here: pages we allocate are > >> not cache-line and all these allocations are supposed to persist for the > >> lifetime of the guest. In case you think that even on x86 it will be > >> possible to see PAGE_SIZE != HV_HYP_PAGE_SIZE you can use alloc_pages() > >> instead. > >> > > Thank you for your feedback, Vitaly! > > > > Will you please tell me how cache-line relates to kmem_cache? > > > > I understand that alloc_pages() would work when PAGE_SIZE <= > > HV_HYP_PAGE_SIZE, but I think that it would not work if PAGE_SIZE > > > HV_HYP_PAGE_SIZE. > > Sorry, my bad: I meant to say "not cache-like" (these allocations are > not 'cache') but the typo made it completely incomprehensible. No worries! Thank you for sharing your thoughts with me, Vitaly. Do you know of any alternatives to kmem_cache that can allocate memory in a specified size (different than a guest page size) with alignment? Memory allocated by alloc_page() is aligned but limited to the guest page size, and kmalloc() can allocate a particular size but it seems that it does not guarantee alignment. I am asking this while considering the changes for architecture independent code. > >> Also, in case the idea is to generalize stuff, what will happen if > >> PAGE_SIZE > HV_HYP_PAGE_SIZE? Who will guarantee proper alignment? > >> > >> I think we can leave hypercall arguments, vp_assist and similar pages > >> alone for now: the code is not going to be shared among architectures > >> anyways. > >> > > About the alignment, kmem_cache_create() aligns memory with its third > > parameter, offset. > > Yes, I know, I was trying to think about a (hypothetical) situation when > page sizes differ: what would be the memory alignment requirements from > the hypervisor for e.g. hypercall arguments? In case it's always > HV_HYP_PAGE_SIZE we're good but could it be PAGE_SIZE (for e.g. TLB > flush hypercall)? I don't know. For x86 this discussion probably makes > no sense. I'm, however, struggling to understand what benefit we will > get from the change. Maybe just leave it as-is for now and fix > arch-independent code only? And later, if we decide to generalize this > code, make another approach? (Not insisting, just a suggestion) Thank you for the suggestion, Vitaly! The introduction of HV_HYP_PAGE_SIZE is weighing the assumption of the future page size—it can be bigger based on the general trend, not smaller, which is a reasonable assumption, I think. > >> > @@ -338,7 +349,10 @@ void __init hyperv_init(void) > >> > guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); > >> > wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); > >> > > >> > -hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, > >> > PAGE_KERNEL_RX); > >> > +hv_hypercall_pg = kmem_cache_alloc(cachep, GFP_KERNEL); > >> > +if (hv_hypercall_pg) > >> > +set_memory_x((unsigned long)hv_hypercall_pg, 1); > >> > >> _RX is not writeable, right? > >> > > Yes, you are correct. I should use set_memory_ro() in addition to > > set_memory_x(). > > > >> > @@ -416,6 +431,7 @@ void hyperv_cleanup(void) > >> > * let hypercall operations fail safely rather than > >> > * panic the kernel for using invalid hypercall page > >> > */ > >> > +kmem_cache_free(cachep, hv_hypercall_pg); > >> > >> Please don't do that: hyperv_cleanup() is called on kexec/kdump and > >> we're trying to do the bare minimum to allow next kernel to boot. Doing > >> excessive work here will likely lead to consequent problems (we're > >> already crashing the case it's kdump!). > >> > > Thank you for the explanation! I will remove that. > > > > -- > Vitaly
Re: [PATCH 2/6] x86: hv: hv_init.c: Replace alloc_page() with kmem_cache_alloc()
On Fri, Apr 05, 2019 at 01:31:02PM +0200, Vitaly Kuznetsov wrote: > Maya Nakamura writes: > > > @@ -98,18 +99,20 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); > > u32 hv_max_vp_index; > > EXPORT_SYMBOL_GPL(hv_max_vp_index); > > > > +struct kmem_cache *cachep; > > +EXPORT_SYMBOL_GPL(cachep); > > + > > static int hv_cpu_init(unsigned int cpu) > > { > > u64 msr_vp_index; > > struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; > > void **input_arg; > > - struct page *pg; > > > > input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); > > - pg = alloc_page(GFP_KERNEL); > > - if (unlikely(!pg)) > > + *input_arg = kmem_cache_alloc(cachep, GFP_KERNEL); > > I'm not sure use of kmem_cache is justified here: pages we allocate are > not cache-line and all these allocations are supposed to persist for the > lifetime of the guest. In case you think that even on x86 it will be > possible to see PAGE_SIZE != HV_HYP_PAGE_SIZE you can use alloc_pages() > instead. > Thank you for your feedback, Vitaly! Will you please tell me how cache-line relates to kmem_cache? I understand that alloc_pages() would work when PAGE_SIZE <= HV_HYP_PAGE_SIZE, but I think that it would not work if PAGE_SIZE > HV_HYP_PAGE_SIZE. > Also, in case the idea is to generalize stuff, what will happen if > PAGE_SIZE > HV_HYP_PAGE_SIZE? Who will guarantee proper alignment? > > I think we can leave hypercall arguments, vp_assist and similar pages > alone for now: the code is not going to be shared among architectures > anyways. > About the alignment, kmem_cache_create() aligns memory with its third parameter, offset. > > @@ -338,7 +349,10 @@ void __init hyperv_init(void) > > guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); > > wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); > > > > - hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX); > > + hv_hypercall_pg = kmem_cache_alloc(cachep, GFP_KERNEL); > > + if (hv_hypercall_pg) > > + set_memory_x((unsigned long)hv_hypercall_pg, 1); > > _RX is not writeable, right? > Yes, you are correct. I should use set_memory_ro() in addition to set_memory_x(). > > @@ -416,6 +431,7 @@ void hyperv_cleanup(void) > > * let hypercall operations fail safely rather than > > * panic the kernel for using invalid hypercall page > > */ > > + kmem_cache_free(cachep, hv_hypercall_pg); > > Please don't do that: hyperv_cleanup() is called on kexec/kdump and > we're trying to do the bare minimum to allow next kernel to boot. Doing > excessive work here will likely lead to consequent problems (we're > already crashing the case it's kdump!). > Thank you for the explanation! I will remove that.
[PATCH 6/6] Input: hv: Remove dependencies on PAGE_SIZE for ring buffer
Define the ring buffer size as a constant expression because it should not depend on the guest page size. Signed-off-by: Maya Nakamura --- drivers/input/serio/hyperv-keyboard.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/serio/hyperv-keyboard.c b/drivers/input/serio/hyperv-keyboard.c index a8b9be3e28db..e1b0feeaeb91 100644 --- a/drivers/input/serio/hyperv-keyboard.c +++ b/drivers/input/serio/hyperv-keyboard.c @@ -83,8 +83,8 @@ struct synth_kbd_keystroke { #define HK_MAXIMUM_MESSAGE_SIZE 256 -#define KBD_VSC_SEND_RING_BUFFER_SIZE (10 * PAGE_SIZE) -#define KBD_VSC_RECV_RING_BUFFER_SIZE (10 * PAGE_SIZE) +#define KBD_VSC_SEND_RING_BUFFER_SIZE (40 * 1024) +#define KBD_VSC_RECV_RING_BUFFER_SIZE (40 * 1024) #define XTKBD_EMUL0 0xe0 #define XTKBD_EMUL1 0xe1 -- 2.17.1
[PATCH 5/6] HID: hv: Remove dependencies on PAGE_SIZE for ring buffer
Define the ring buffer size as a constant expression because it should not depend on the guest page size. Signed-off-by: Maya Nakamura --- drivers/hid/hid-hyperv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c index 704049e62d58..4d0b63bf8b33 100644 --- a/drivers/hid/hid-hyperv.c +++ b/drivers/hid/hid-hyperv.c @@ -112,8 +112,8 @@ struct synthhid_input_report { #pragma pack(pop) -#define INPUTVSC_SEND_RING_BUFFER_SIZE (10*PAGE_SIZE) -#define INPUTVSC_RECV_RING_BUFFER_SIZE (10*PAGE_SIZE) +#define INPUTVSC_SEND_RING_BUFFER_SIZE (40 * 1024) +#define INPUTVSC_RECV_RING_BUFFER_SIZE (40 * 1024) enum pipe_prot_msg_type { -- 2.17.1
[PATCH 4/6] x86: hv: mmu.c: Replace page definitions with Hyper-V specific ones
Replace PAGE_SHIFT, PAGE_SIZE, and PAGE_MASK with HV_HYP_PAGE_SHIFT, HV_HYP_PAGE_SIZE, and HV_HYP_PAGE_MASK, respectively, because the guest page size and hypervisor page size concepts are different, even though they happen to be the same value on x86. Signed-off-by: Maya Nakamura --- arch/x86/hyperv/mmu.c | 15 --- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index e65d7fe6489f..175f6dcc7362 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -15,7 +15,7 @@ #include /* Each gva in gva_list encodes up to 4096 pages to flush */ -#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) +#define HV_TLB_FLUSH_UNIT (4096 * HV_HYP_PAGE_SIZE) static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus, const struct flush_tlb_info *info); @@ -32,15 +32,15 @@ static inline int fill_gva_list(u64 gva_list[], int offset, do { diff = end > cur ? end - cur : 0; - gva_list[gva_n] = cur & PAGE_MASK; + gva_list[gva_n] = cur & HV_HYP_PAGE_MASK; /* * Lower 12 bits encode the number of additional * pages to flush (in addition to the 'cur' page). */ if (diff >= HV_TLB_FLUSH_UNIT) - gva_list[gva_n] |= ~PAGE_MASK; + gva_list[gva_n] |= ~HV_HYP_PAGE_MASK; else if (diff) - gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT; + gva_list[gva_n] |= (diff - 1) >> HV_HYP_PAGE_SHIFT; cur += HV_TLB_FLUSH_UNIT; gva_n++; @@ -129,7 +129,8 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, * We can flush not more than max_gvas with one hypercall. Flush the * whole address space if we were asked to do more. */ - max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]); + max_gvas = (HV_HYP_PAGE_SIZE - sizeof(*flush)) / + sizeof(flush->gva_list[0]); if (info->end == TLB_FLUSH_ALL) { flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; @@ -200,9 +201,9 @@ static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus, * whole address space if we were asked to do more. */ max_gvas = - (PAGE_SIZE - sizeof(*flush) - nr_bank * + (HV_HYP_PAGE_SIZE - sizeof(*flush) - nr_bank * sizeof(flush->hv_vp_set.bank_contents[0])) / - sizeof(flush->gva_list[0]); +sizeof(flush->gva_list[0]); if (info->end == TLB_FLUSH_ALL) { flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; -- 2.17.1
[PATCH 3/6] hv: vmbus: Replace page definition with Hyper-V specific one
Replace PAGE_SIZE with HV_HYP_PAGE_SIZE because the guest page size may not be 4096 on all architectures and Hyper-V always runs with a page size of 4096. Signed-off-by: Maya Nakamura --- drivers/hv/hyperv_vmbus.h | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index a94aab94e0b5..00ad573870e9 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -207,11 +207,11 @@ int hv_ringbuffer_read(struct vmbus_channel *channel, u64 *requestid, bool raw); /* - * Maximum channels is determined by the size of the interrupt page - * which is PAGE_SIZE. 1/2 of PAGE_SIZE is for send endpoint interrupt - * and the other is receive endpoint interrupt + * Maximum channels, 16348, is determined by the size of the interrupt page, + * which is HV_HYP_PAGE_SIZE. 1/2 of HV_HYP_PAGE_SIZE is to send endpoint + * interrupt, and the other is to receive endpoint interrupt. */ -#define MAX_NUM_CHANNELS ((PAGE_SIZE >> 1) << 3) /* 16348 channels */ +#define MAX_NUM_CHANNELS ((HV_HYP_PAGE_SIZE >> 1) << 3) /* The value here must be in multiple of 32 */ /* TODO: Need to make this configurable */ -- 2.17.1
[PATCH 2/6] x86: hv: hv_init.c: Replace alloc_page() with kmem_cache_alloc()
Switch from the function that allocates a single Linux guest page to a different one to use a Hyper-V page because the guest page size and hypervisor page size concepts are different, even though they happen to be the same value on x86. Signed-off-by: Maya Nakamura --- arch/x86/hyperv/hv_init.c | 38 +++--- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index e4ba467a9fc6..5f946135aa18 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -31,6 +31,7 @@ #include #include #include +#include #ifdef CONFIG_HYPERV_TSCPAGE @@ -98,18 +99,20 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); u32 hv_max_vp_index; EXPORT_SYMBOL_GPL(hv_max_vp_index); +struct kmem_cache *cachep; +EXPORT_SYMBOL_GPL(cachep); + static int hv_cpu_init(unsigned int cpu) { u64 msr_vp_index; struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; void **input_arg; - struct page *pg; input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); - pg = alloc_page(GFP_KERNEL); - if (unlikely(!pg)) + *input_arg = kmem_cache_alloc(cachep, GFP_KERNEL); + + if (unlikely(!*input_arg)) return -ENOMEM; - *input_arg = page_address(pg); hv_get_vp_index(msr_vp_index); @@ -122,14 +125,12 @@ static int hv_cpu_init(unsigned int cpu) return 0; if (!*hvp) - *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); + *hvp = kmem_cache_alloc(cachep, GFP_KERNEL); if (*hvp) { u64 val; - val = vmalloc_to_pfn(*hvp); - val = (val << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) | - HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; + val = virt_to_phys(*hvp) | HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, val); } @@ -233,17 +234,22 @@ static int hv_cpu_die(unsigned int cpu) unsigned long flags; void **input_arg; void *input_pg = NULL; + struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu]; local_irq_save(flags); input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); input_pg = *input_arg; *input_arg = NULL; local_irq_restore(flags); - free_page((unsigned long)input_pg); + kmem_cache_free(cachep, input_pg); + input_pg = NULL; if (hv_vp_assist_page && hv_vp_assist_page[cpu]) wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0); + kmem_cache_free(cachep, *hvp); + *hvp = NULL; + if (hv_reenlightenment_cb == NULL) return 0; @@ -325,6 +331,11 @@ void __init hyperv_init(void) goto free_vp_index; } + cachep = kmem_cache_create("hyperv_pages", HV_HYP_PAGE_SIZE, + HV_HYP_PAGE_SIZE, 0, NULL); + if (!cachep) + goto free_vp_assist_page; + cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", hv_cpu_init, hv_cpu_die); if (cpuhp < 0) @@ -338,7 +349,10 @@ void __init hyperv_init(void) guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); - hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX); + hv_hypercall_pg = kmem_cache_alloc(cachep, GFP_KERNEL); + if (hv_hypercall_pg) + set_memory_x((unsigned long)hv_hypercall_pg, 1); + if (hv_hypercall_pg == NULL) { wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); goto remove_cpuhp_state; @@ -346,7 +360,8 @@ void __init hyperv_init(void) rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); hypercall_msr.enable = 1; - hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); + hypercall_msr.guest_physical_address = virt_to_phys(hv_hypercall_pg) >> + HV_HYP_PAGE_SHIFT; wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); hv_apic_init(); @@ -416,6 +431,7 @@ void hyperv_cleanup(void) * let hypercall operations fail safely rather than * panic the kernel for using invalid hypercall page */ + kmem_cache_free(cachep, hv_hypercall_pg); hv_hypercall_pg = NULL; /* Reset the hypercall page */ -- 2.17.1
[PATCH 1/6] x86: hv: hyperv-tlfs.h: Create and use Hyper-V page definitions
Define HV_HYP_PAGE_SHIFT, HV_HYP_PAGE_SIZE, and HV_HYP_PAGE_MASK because the Linux guest page size and hypervisor page size concepts are different, even though they happen to be the same value on x86. Also, replace PAGE_SIZE with HV_HYP_PAGE_SIZE. Signed-off-by: Maya Nakamura --- arch/x86/include/asm/hyperv-tlfs.h | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index cdf44aa9a501..44bd68aefd00 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -12,6 +12,16 @@ #include #include +/* + * While not explicitly listed in the TLFS, Hyper-V always runs with a page size + * of 4096. These definitions are used when communicating with Hyper-V using + * guest physical pages and guest physical page addresses, since the guest page + * size may not be 4096 on all architectures. + */ +#define HV_HYP_PAGE_SHIFT 12 +#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT) +#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1)) + /* * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent * is set by CPUID(HvCpuIdFunctionVersionAndFeatures). @@ -841,7 +851,7 @@ union hv_gpa_page_range { * count is equal with how many entries of union hv_gpa_page_range can * be populated into the input parameter page. */ -#define HV_MAX_FLUSH_REP_COUNT ((PAGE_SIZE - 2 * sizeof(u64)) /\ +#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) / \ sizeof(union hv_gpa_page_range)) struct hv_guest_mapping_flush_list { -- 2.17.1
[PATCH 0/6] hv: Remove dependencies on guest page size
The Linux guest page size and hypervisor page size concepts are different, even though they happen to be the same value on x86. Hyper-V code mixes up the two, so this patchset begins to address that by creating and using a set of Hyper-V specific page definitions. A major benefit of those new definitions is that they support non-x86 architectures, such as ARM64, that use different page sizes. On ARM64, the guest page size may not be 4096, and Hyper-V always runs with a page size of 4096. In this patchset, the first two patches lay the foundation for the others, creating definitions and preparing for memory allocations of Hyper-V pages. Subsequent patches apply the definitions where the guest VM and Hyper-V communicate, and where the code intends to use the Hyper-V page size. The last two patches set the ring buffer size to a fixed value, removing the dependencies on the guest page size. This is the initial set of changes to the Hyper-V code, and future patches will make additional changes using the same foundation, for example, replace __vmalloc() and related functions when Hyper-V pages are intended. Maya Nakamura (6): x86: hv: hyperv-tlfs.h: Create and use Hyper-V page definitions x86: hv: hv_init.c: Replace alloc_page() with kmem_cache_alloc() hv: vmbus: Replace page definition with Hyper-V specific one x86: hv: mmu.c: Replace page definitions with Hyper-V specific ones HID: hv: Remove dependencies on PAGE_SIZE for ring buffer Input: hv: Remove dependencies on PAGE_SIZE for ring buffer arch/x86/hyperv/hv_init.c | 38 +++ arch/x86/hyperv/mmu.c | 15 ++- arch/x86/include/asm/hyperv-tlfs.h| 12 - drivers/hid/hid-hyperv.c | 4 +-- drivers/hv/hyperv_vmbus.h | 8 +++--- drivers/input/serio/hyperv-keyboard.c | 4 +-- 6 files changed, 54 insertions(+), 27 deletions(-) -- 2.17.1
[PATCH v5 3/3] PCI: hv: Refactor hv_irq_unmask() to use cpumask_to_vpset()
Remove the duplicate implementation of cpumask_to_vpset() and use the shared implementation. Export hv_max_vp_index, which is required by cpumask_to_vpset(). Signed-off-by: Maya Nakamura Reviewed-by: Michael Kelley Reviewed-by: Vitaly Kuznetsov Tested-by: Vitaly Kuznetsov --- Changes in v5: - Revise the commit message to clarify the changes based on feedback. - Add the Reviewed-by and Tested-by tags. Changes in v4: - Replace GFP_KERNEL with GFP_ATOMIC for alloc_cpumask_var(). - Update the commit message. Changes in v3: - Modify to catch all failures from cpumask_to_vpset(). - Correct the v2 change log about the commit message. Changes in v2: - Remove unnecessary nr_bank initialization. - Delete two unnecessary dev_err()'s. - Unlock before returning. - Update the commit message. arch/x86/hyperv/hv_init.c | 1 + drivers/pci/controller/pci-hyperv.c | 38 + 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 7abb09e2eeb8..7f2eed1fc81b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -96,6 +96,7 @@ void __percpu **hyperv_pcpu_input_arg; EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); u32 hv_max_vp_index; +EXPORT_SYMBOL_GPL(hv_max_vp_index); static int hv_cpu_init(unsigned int cpu) { diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index d71695db1ba0..95441a35eceb 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -391,8 +391,6 @@ struct hv_interrupt_entry { u32 data; }; -#define HV_VP_SET_BANK_COUNT_MAX 5 /* current implementation limit */ - /* * flags for hv_device_interrupt_target.flags */ @@ -908,12 +906,12 @@ static void hv_irq_unmask(struct irq_data *data) struct retarget_msi_interrupt *params; struct hv_pcibus_device *hbus; struct cpumask *dest; + cpumask_var_t tmp; struct pci_bus *pbus; struct pci_dev *pdev; unsigned long flags; u32 var_size = 0; - int cpu_vmbus; - int cpu; + int cpu, nr_bank; u64 res; dest = irq_data_get_effective_affinity_mask(data); @@ -953,29 +951,27 @@ static void hv_irq_unmask(struct irq_data *data) */ params->int_target.flags |= HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; - params->int_target.vp_set.valid_bank_mask = - (1ull << HV_VP_SET_BANK_COUNT_MAX) - 1; + + if (!alloc_cpumask_var(&tmp, GFP_ATOMIC)) { + res = 1; + goto exit_unlock; + } + + cpumask_and(tmp, dest, cpu_online_mask); + nr_bank = cpumask_to_vpset(¶ms->int_target.vp_set, tmp); + free_cpumask_var(tmp); + + if (nr_bank <= 0) { + res = 1; + goto exit_unlock; + } /* * var-sized hypercall, var-size starts after vp_mask (thus * vp_set.format does not count, but vp_set.valid_bank_mask * does). */ - var_size = 1 + HV_VP_SET_BANK_COUNT_MAX; - - for_each_cpu_and(cpu, dest, cpu_online_mask) { - cpu_vmbus = hv_cpu_number_to_vp_number(cpu); - - if (cpu_vmbus >= HV_VP_SET_BANK_COUNT_MAX * 64) { - dev_err(&hbus->hdev->device, - "too high CPU %d", cpu_vmbus); - res = 1; - goto exit_unlock; - } - - params->int_target.vp_set.bank_contents[cpu_vmbus / 64] |= - (1ULL << (cpu_vmbus & 63)); - } + var_size = 1 + nr_bank; } else { for_each_cpu_and(cpu, dest, cpu_online_mask) { params->int_target.vp_mask |= -- 2.17.1
[PATCH v5 2/3] PCI: hv: Replace hv_vp_set with hv_vpset
Remove a duplicate definition of VP set (hv_vp_set) and use the common definition (hv_vpset) that is used in other places. Change the order of the members in struct hv_pcibus_device so that the declaration of retarget_msi_interrupt_params is the last member. Struct hv_vpset, which contains a flexible array, is nested two levels deep in struct hv_pcibus_device via retarget_msi_interrupt_params. Add a comment that retarget_msi_interrupt_params should be the last member of struct hv_pcibus_device. Signed-off-by: Maya Nakamura Reviewed-by: Michael Kelley Reviewed-by: Vitaly Kuznetsov Tested-by: Vitaly Kuznetsov --- Changes in v5: - Remove the code added in v4. - Delete the v4 code change related comment from the commit message. - Add the Reviewed-by and Tested-by tags. Changes in v4: - Add __aligned(8) to struct retarget_msi_interrupt. - Update the commit message. Change in v3: - Correct the v2 change log. Change in v2: - Update the commit message. drivers/pci/controller/pci-hyperv.c | 25 - 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 73862eef09ec..d71695db1ba0 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -393,12 +393,6 @@ struct hv_interrupt_entry { #define HV_VP_SET_BANK_COUNT_MAX 5 /* current implementation limit */ -struct hv_vp_set { - u64 format; /* 0 (HvGenericSetSparse4k) */ - u64 valid_banks; - u64 masks[HV_VP_SET_BANK_COUNT_MAX]; -}; - /* * flags for hv_device_interrupt_target.flags */ @@ -410,7 +404,7 @@ struct hv_device_interrupt_target { u32 flags; union { u64 vp_mask; - struct hv_vp_set vp_set; + struct hv_vpset vp_set; }; }; @@ -460,12 +454,16 @@ struct hv_pcibus_device { struct msi_controller msi_chip; struct irq_domain *irq_domain; - /* hypercall arg, must not cross page boundary */ - struct retarget_msi_interrupt retarget_msi_interrupt_params; - spinlock_t retarget_msi_interrupt_lock; struct workqueue_struct *wq; + + /* hypercall arg, must not cross page boundary */ + struct retarget_msi_interrupt retarget_msi_interrupt_params; + + /* +* Don't put anything here: retarget_msi_interrupt_params must be last +*/ }; /* @@ -955,12 +953,13 @@ static void hv_irq_unmask(struct irq_data *data) */ params->int_target.flags |= HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; - params->int_target.vp_set.valid_banks = + params->int_target.vp_set.valid_bank_mask = (1ull << HV_VP_SET_BANK_COUNT_MAX) - 1; /* * var-sized hypercall, var-size starts after vp_mask (thus -* vp_set.format does not count, but vp_set.valid_banks does). +* vp_set.format does not count, but vp_set.valid_bank_mask +* does). */ var_size = 1 + HV_VP_SET_BANK_COUNT_MAX; @@ -974,7 +973,7 @@ static void hv_irq_unmask(struct irq_data *data) goto exit_unlock; } - params->int_target.vp_set.masks[cpu_vmbus / 64] |= + params->int_target.vp_set.bank_contents[cpu_vmbus / 64] |= (1ULL << (cpu_vmbus & 63)); } } else { -- 2.17.1
[PATCH v5 1/3] PCI: hv: Add __aligned(8) to struct retarget_msi_interrupt
Because Hyper-V requires that hypercall arguments be aligned on an 8 byte boundary, add __aligned(8) to struct retarget_msi_interrupt. Link: https://lore.kernel.org/lkml/87k1hlqlby@vitty.brq.redhat.com/ Signed-off-by: Maya Nakamura --- drivers/pci/controller/pci-hyperv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 9ba4d12c179c..73862eef09ec 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -420,7 +420,7 @@ struct retarget_msi_interrupt { struct hv_interrupt_entry int_entry; u64 reserved2; struct hv_device_interrupt_target int_target; -} __packed; +} __packed __aligned(8); /* * Driver specific state. -- 2.17.1
[PATCH v5 0/3] PCI: hv: Refactor hv_irq_unmask() to use hv_vpset and cpumask_to_vpset()
This patchset removes a duplicate definition of VP set (hv_vp_set) and uses the common definition (hv_vpset) that is used in other places. It changes the order of the members in struct hv_pcibus_device due to flexible array in hv_vpset. It also removes the duplicate implementation of cpumask_to_vpset(), uses the shared implementation, and exports hv_max_vp_index, which is required by cpumask_to_vpset(). It adds __aligned(8) to struct retarget_msi_interrupt because Hyper-V requires that hypercall arguments be aligned on an 8 byte boundary. This is now a new patch, separate from one of the previous patches. Maya Nakamura (3): PCI: hv: Add __aligned(8) to struct retarget_msi_interrupt PCI: hv: Replace hv_vp_set with hv_vpset PCI: hv: Refactor hv_irq_unmask() to use cpumask_to_vpset() arch/x86/hyperv/hv_init.c | 1 + drivers/pci/controller/pci-hyperv.c | 61 + 2 files changed, 29 insertions(+), 33 deletions(-) -- 2.17.1
[PATCH v4 2/2] PCI: hv: Refactor hv_irq_unmask() to use cpumask_to_vpset()
Remove the duplicate implementation of cpumask_to_vpset() and use the shared implementation. Export hv_max_vp_index, which is required by cpumask_to_vpset(). Apply changes to hv_irq_unmask() based on feedback. Based on Vitaly's finding, use GFP_ATOMIC instead of GFP_KERNEL for alloc_cpumask_var() because hv_irq_unmask() runs while a spinlock is held. Signed-off-by: Maya Nakamura --- Changes in v4: - Replace GFP_KERNEL with GFP_ATOMIC for alloc_cpumask_var(). - Update the commit message. Changes in v3: - Modify to catch all failures from cpumask_to_vpset(). - Correct the v2 change log about the commit message. Changes in v2: - Remove unnecessary nr_bank initialization. - Delete two unnecessary dev_err()'s. - Unlock before returning. - Update the commit message. arch/x86/hyperv/hv_init.c | 1 + drivers/pci/controller/pci-hyperv.c | 38 + 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 7abb09e2eeb8..7f2eed1fc81b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -96,6 +96,7 @@ void __percpu **hyperv_pcpu_input_arg; EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); u32 hv_max_vp_index; +EXPORT_SYMBOL_GPL(hv_max_vp_index); static int hv_cpu_init(unsigned int cpu) { diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index d71695db1ba0..95441a35eceb 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -391,8 +391,6 @@ struct hv_interrupt_entry { u32 data; }; -#define HV_VP_SET_BANK_COUNT_MAX 5 /* current implementation limit */ - /* * flags for hv_device_interrupt_target.flags */ @@ -908,12 +906,12 @@ static void hv_irq_unmask(struct irq_data *data) struct retarget_msi_interrupt *params; struct hv_pcibus_device *hbus; struct cpumask *dest; + cpumask_var_t tmp; struct pci_bus *pbus; struct pci_dev *pdev; unsigned long flags; u32 var_size = 0; - int cpu_vmbus; - int cpu; + int cpu, nr_bank; u64 res; dest = irq_data_get_effective_affinity_mask(data); @@ -953,29 +951,27 @@ static void hv_irq_unmask(struct irq_data *data) */ params->int_target.flags |= HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; - params->int_target.vp_set.valid_bank_mask = - (1ull << HV_VP_SET_BANK_COUNT_MAX) - 1; + + if (!alloc_cpumask_var(&tmp, GFP_ATOMIC)) { + res = 1; + goto exit_unlock; + } + + cpumask_and(tmp, dest, cpu_online_mask); + nr_bank = cpumask_to_vpset(¶ms->int_target.vp_set, tmp); + free_cpumask_var(tmp); + + if (nr_bank <= 0) { + res = 1; + goto exit_unlock; + } /* * var-sized hypercall, var-size starts after vp_mask (thus * vp_set.format does not count, but vp_set.valid_bank_mask * does). */ - var_size = 1 + HV_VP_SET_BANK_COUNT_MAX; - - for_each_cpu_and(cpu, dest, cpu_online_mask) { - cpu_vmbus = hv_cpu_number_to_vp_number(cpu); - - if (cpu_vmbus >= HV_VP_SET_BANK_COUNT_MAX * 64) { - dev_err(&hbus->hdev->device, - "too high CPU %d", cpu_vmbus); - res = 1; - goto exit_unlock; - } - - params->int_target.vp_set.bank_contents[cpu_vmbus / 64] |= - (1ULL << (cpu_vmbus & 63)); - } + var_size = 1 + nr_bank; } else { for_each_cpu_and(cpu, dest, cpu_online_mask) { params->int_target.vp_mask |= -- 2.17.1
[PATCH v4 1/2] PCI: hv: Replace hv_vp_set with hv_vpset
Remove a duplicate definition of VP set (hv_vp_set) and use the common definition (hv_vpset) that is used in other places. Change the order of the members in struct hv_pcibus_device so that the declaration of retarget_msi_interrupt_params is the last member. Struct hv_vpset, which contains a flexible array, is nested two levels deep in struct hv_pcibus_device via retarget_msi_interrupt_params. Add a comment that retarget_msi_interrupt_params should be the last member of struct hv_pcibus_device. Based on Vitaly's finding, add __aligned(8) to struct retarget_msi_interrupt because Hyper-V requires that hypercall arguments be aligned on an 8 byte boundary. Signed-off-by: Maya Nakamura --- Changes in v4: - Add __aligned(8) to struct retarget_msi_interrupt. - Update the commit message. Change in v3: - Correct the v2 change log. Change in v2: - Update the commit message. drivers/pci/controller/pci-hyperv.c | 27 +-- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 9ba4d12c179c..d71695db1ba0 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -393,12 +393,6 @@ struct hv_interrupt_entry { #define HV_VP_SET_BANK_COUNT_MAX 5 /* current implementation limit */ -struct hv_vp_set { - u64 format; /* 0 (HvGenericSetSparse4k) */ - u64 valid_banks; - u64 masks[HV_VP_SET_BANK_COUNT_MAX]; -}; - /* * flags for hv_device_interrupt_target.flags */ @@ -410,7 +404,7 @@ struct hv_device_interrupt_target { u32 flags; union { u64 vp_mask; - struct hv_vp_set vp_set; + struct hv_vpset vp_set; }; }; @@ -420,7 +414,7 @@ struct retarget_msi_interrupt { struct hv_interrupt_entry int_entry; u64 reserved2; struct hv_device_interrupt_target int_target; -} __packed; +} __packed __aligned(8); /* * Driver specific state. @@ -460,12 +454,16 @@ struct hv_pcibus_device { struct msi_controller msi_chip; struct irq_domain *irq_domain; - /* hypercall arg, must not cross page boundary */ - struct retarget_msi_interrupt retarget_msi_interrupt_params; - spinlock_t retarget_msi_interrupt_lock; struct workqueue_struct *wq; + + /* hypercall arg, must not cross page boundary */ + struct retarget_msi_interrupt retarget_msi_interrupt_params; + + /* +* Don't put anything here: retarget_msi_interrupt_params must be last +*/ }; /* @@ -955,12 +953,13 @@ static void hv_irq_unmask(struct irq_data *data) */ params->int_target.flags |= HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; - params->int_target.vp_set.valid_banks = + params->int_target.vp_set.valid_bank_mask = (1ull << HV_VP_SET_BANK_COUNT_MAX) - 1; /* * var-sized hypercall, var-size starts after vp_mask (thus -* vp_set.format does not count, but vp_set.valid_banks does). +* vp_set.format does not count, but vp_set.valid_bank_mask +* does). */ var_size = 1 + HV_VP_SET_BANK_COUNT_MAX; @@ -974,7 +973,7 @@ static void hv_irq_unmask(struct irq_data *data) goto exit_unlock; } - params->int_target.vp_set.masks[cpu_vmbus / 64] |= + params->int_target.vp_set.bank_contents[cpu_vmbus / 64] |= (1ULL << (cpu_vmbus & 63)); } } else { -- 2.17.1
[PATCH v4 0/2] PCI: hv: Refactor hv_irq_unmask() to use hv_vpset and cpumask_to_vpset()
This patchset removes a duplicate definition of VP set (hv_vp_set) and uses the common definition (hv_vpset) that is used in other places. It changes the order of the members in struct hv_pcibus_device due to flexible array in hv_vpset. It also removes the duplicate implementation of cpumask_to_vpset(), uses the shared implementation, and exports hv_max_vp_index, which is required by cpumask_to_vpset(). Based on Vitaly's findings, two changes were applied: replace GFP_KERNEL with GFP_ATOMIC for alloc_cpumask_var() because hv_irq_unmask() runs while a spinlock is held, and add __aligned(8) to struct retarget_msi_interrupt because Hyper-V requires that hypercall arguments be aligned on an 8 byte boundary. Vitaly, thank you for finding the issues, and Lorenzo and Michael, thank you for your guidance and support! Maya Nakamura (2): PCI: hv: Replace hv_vp_set with hv_vpset PCI: hv: Refactor hv_irq_unmask() to use cpumask_to_vpset() arch/x86/hyperv/hv_init.c | 1 + drivers/pci/controller/pci-hyperv.c | 61 + 2 files changed, 29 insertions(+), 33 deletions(-) -- 2.17.1
[PATCH v3 2/2] PCI: hv: Refactor hv_irq_unmask() to use cpumask_to_vpset()
Remove the duplicate implementation of cpumask_to_vpset() and use the shared implementation. Export hv_max_vp_index, which is required by cpumask_to_vpset(). Apply changes to hv_irq_unmask() based on feedback. Signed-off-by: Maya Nakamura --- Changes in v3: - Modify to catch all failures from cpumask_to_vpset(). - Correct the v2 change log about the commit message. Changes in v2: - Remove unnecessary nr_bank initialization. - Delete two unnecessary dev_err()'s. - Unlock before returning. - Update the commit message. arch/x86/hyperv/hv_init.c | 1 + drivers/pci/controller/pci-hyperv.c | 38 + 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 7abb09e2eeb8..7f2eed1fc81b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -96,6 +96,7 @@ void __percpu **hyperv_pcpu_input_arg; EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); u32 hv_max_vp_index; +EXPORT_SYMBOL_GPL(hv_max_vp_index); static int hv_cpu_init(unsigned int cpu) { diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index da8b58d8630d..a78def332bbc 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -391,8 +391,6 @@ struct hv_interrupt_entry { u32 data; }; -#define HV_VP_SET_BANK_COUNT_MAX 5 /* current implementation limit */ - /* * flags for hv_device_interrupt_target.flags */ @@ -908,12 +906,12 @@ static void hv_irq_unmask(struct irq_data *data) struct retarget_msi_interrupt *params; struct hv_pcibus_device *hbus; struct cpumask *dest; + cpumask_var_t tmp; struct pci_bus *pbus; struct pci_dev *pdev; unsigned long flags; u32 var_size = 0; - int cpu_vmbus; - int cpu; + int cpu, nr_bank; u64 res; dest = irq_data_get_effective_affinity_mask(data); @@ -953,29 +951,27 @@ static void hv_irq_unmask(struct irq_data *data) */ params->int_target.flags |= HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; - params->int_target.vp_set.valid_bank_mask = - (1ull << HV_VP_SET_BANK_COUNT_MAX) - 1; + + if (!alloc_cpumask_var(&tmp, GFP_KERNEL)) { + res = 1; + goto exit_unlock; + } + + cpumask_and(tmp, dest, cpu_online_mask); + nr_bank = cpumask_to_vpset(¶ms->int_target.vp_set, tmp); + free_cpumask_var(tmp); + + if (nr_bank <= 0) { + res = 1; + goto exit_unlock; + } /* * var-sized hypercall, var-size starts after vp_mask (thus * vp_set.format does not count, but vp_set.valid_bank_mask * does). */ - var_size = 1 + HV_VP_SET_BANK_COUNT_MAX; - - for_each_cpu_and(cpu, dest, cpu_online_mask) { - cpu_vmbus = hv_cpu_number_to_vp_number(cpu); - - if (cpu_vmbus >= HV_VP_SET_BANK_COUNT_MAX * 64) { - dev_err(&hbus->hdev->device, - "too high CPU %d", cpu_vmbus); - res = 1; - goto exit_unlock; - } - - params->int_target.vp_set.bank_contents[cpu_vmbus / 64] |= - (1ULL << (cpu_vmbus & 63)); - } + var_size = 1 + nr_bank; } else { for_each_cpu_and(cpu, dest, cpu_online_mask) { params->int_target.vp_mask |= -- 2.17.1
[PATCH v3 1/2] PCI: hv: Replace hv_vp_set with hv_vpset
Remove a duplicate definition of VP set (hv_vp_set) and use the common definition (hv_vpset) that is used in other places. Change the order of the members in struct hv_pcibus_device so that the declaration of retarget_msi_interrupt_params is the last member. Struct hv_vpset, which contains a flexible array, is nested two levels deep in struct hv_pcibus_device via retarget_msi_interrupt_params. Add a comment that retarget_msi_interrupt_params should be the last member of struct hv_pcibus_device. Signed-off-by: Maya Nakamura --- Change in v3: - Correct the v2 change log. Change in v2: - Update the commit message. drivers/pci/controller/pci-hyperv.c | 25 - 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 9ba4d12c179c..da8b58d8630d 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -393,12 +393,6 @@ struct hv_interrupt_entry { #define HV_VP_SET_BANK_COUNT_MAX 5 /* current implementation limit */ -struct hv_vp_set { - u64 format; /* 0 (HvGenericSetSparse4k) */ - u64 valid_banks; - u64 masks[HV_VP_SET_BANK_COUNT_MAX]; -}; - /* * flags for hv_device_interrupt_target.flags */ @@ -410,7 +404,7 @@ struct hv_device_interrupt_target { u32 flags; union { u64 vp_mask; - struct hv_vp_set vp_set; + struct hv_vpset vp_set; }; }; @@ -460,12 +454,16 @@ struct hv_pcibus_device { struct msi_controller msi_chip; struct irq_domain *irq_domain; - /* hypercall arg, must not cross page boundary */ - struct retarget_msi_interrupt retarget_msi_interrupt_params; - spinlock_t retarget_msi_interrupt_lock; struct workqueue_struct *wq; + + /* hypercall arg, must not cross page boundary */ + struct retarget_msi_interrupt retarget_msi_interrupt_params; + + /* +* Don't put anything here: retarget_msi_interrupt_params must be last +*/ }; /* @@ -955,12 +953,13 @@ static void hv_irq_unmask(struct irq_data *data) */ params->int_target.flags |= HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; - params->int_target.vp_set.valid_banks = + params->int_target.vp_set.valid_bank_mask = (1ull << HV_VP_SET_BANK_COUNT_MAX) - 1; /* * var-sized hypercall, var-size starts after vp_mask (thus -* vp_set.format does not count, but vp_set.valid_banks does). +* vp_set.format does not count, but vp_set.valid_bank_mask +* does). */ var_size = 1 + HV_VP_SET_BANK_COUNT_MAX; @@ -974,7 +973,7 @@ static void hv_irq_unmask(struct irq_data *data) goto exit_unlock; } - params->int_target.vp_set.masks[cpu_vmbus / 64] |= + params->int_target.vp_set.bank_contents[cpu_vmbus / 64] |= (1ULL << (cpu_vmbus & 63)); } } else { -- 2.17.1
[PATCH v3 0/2] PCI: hv: Refactor hv_irq_unmask() to use hv_vpset and cpumask_to_vpset()
This patchset removes a duplicate definition of VP set (hv_vp_set) and uses the common definition (hv_vpset) that is used in other places. It changes the order of the members in struct hv_pcibus_device due to flexible array in hv_vpset. It also removes the duplicate implementation of cpumask_to_vpset(), uses the shared implementation, and exports hv_max_vp_index, which is required by cpumask_to_vpset(). Maya Nakamura (2): PCI: hv: Replace hv_vp_set with hv_vpset PCI: hv: Refactor hv_irq_unmask() to use cpumask_to_vpset() arch/x86/hyperv/hv_init.c | 1 + drivers/pci/controller/pci-hyperv.c | 59 + 2 files changed, 28 insertions(+), 32 deletions(-) -- 2.17.1
Re: [PATCH v2 2/2] PCI: hv: Refactor hv_irq_unmask() to use cpumask_to_vpset()
On Sun, Jan 27, 2019 at 05:22:06AM +, Michael Kelley wrote: > From: Maya Nakamura Sent: Saturday, January 26, > 2019 12:55 AM > > > > @@ -953,29 +951,27 @@ static void hv_irq_unmask(struct irq_data *data) > > */ > > params->int_target.flags |= > > HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; > > - params->int_target.vp_set.valid_bank_mask = > > - (1ull << HV_VP_SET_BANK_COUNT_MAX) - 1; > > + > > + if (!alloc_cpumask_var(&tmp, GFP_KERNEL)) { > > + res = 1; > > + goto exit_unlock; > > + } > > + > > + cpumask_and(tmp, dest, cpu_online_mask); > > + nr_bank = cpumask_to_vpset(¶ms->int_target.vp_set, tmp); > > + free_cpumask_var(tmp); > > + > > + if (!nr_bank) { > > There are two failures cases in cpumask_to_vpset(). One case returns > 0, and the other case returns -1. The above test only catches the 0 > failure case. Need to modify the test to catch both cases. > > Michael > Thank you for your feedback. I will correct it in v3. Maya > > + res = 1; > > + goto exit_unlock; > > + } > > >
Re: [PATCH v2 1/2] PCI: hv: Replace hv_vp_set with hv_vpset
On Sun, Jan 27, 2019 at 05:11:48AM +, Michael Kelley wrote: > From: Maya Nakamura Sent: Saturday, January 26, > 2019 12:52 AM > > > > Remove a duplicate definition of VP set (hv_vp_set) and use the common > > definition (hv_vpset) that is used in other places. > > > > Change the order of the members in struct hv_pcibus_device so that the > > declaration of retarget_msi_interrupt_params is the last member. Struct > > hv_vpset, which contains a flexible array, is nested two levels deep in > > struct hv_pcibus_device via retarget_msi_interrupt_params. > > > > Add a comment that retarget_msi_interrupt_params should be the last member > > of struct hv_pcibus_device. > > > > Signed-off-by: Maya Nakamura > > --- > > Change in v2: > > - None > > > > Right -- there was no code change. But it's customary to note that > you updated the commit message. > Thank you for your feedback. I will edit the change log in v3. > Reviewed-by: Michael Kelley
[PATCH v2 2/2] PCI: hv: Refactor hv_irq_unmask() to use cpumask_to_vpset()
Remove the duplicate implementation of cpumask_to_vpset() and use the shared implementation. Export hv_max_vp_index, which is required by cpumask_to_vpset(). Apply changes to hv_irq_unmask() based on feedback. Signed-off-by: Maya Nakamura --- Changes in v2: - Remove unnecessary nr_bank initialization. - Delete two unnecessary dev_err()'s. - Unlock before returning. arch/x86/hyperv/hv_init.c | 1 + drivers/pci/controller/pci-hyperv.c | 38 + 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 7abb09e2eeb8..7f2eed1fc81b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -96,6 +96,7 @@ void __percpu **hyperv_pcpu_input_arg; EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); u32 hv_max_vp_index; +EXPORT_SYMBOL_GPL(hv_max_vp_index); static int hv_cpu_init(unsigned int cpu) { diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index da8b58d8630d..d74c0f111fd0 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -391,8 +391,6 @@ struct hv_interrupt_entry { u32 data; }; -#define HV_VP_SET_BANK_COUNT_MAX 5 /* current implementation limit */ - /* * flags for hv_device_interrupt_target.flags */ @@ -908,12 +906,12 @@ static void hv_irq_unmask(struct irq_data *data) struct retarget_msi_interrupt *params; struct hv_pcibus_device *hbus; struct cpumask *dest; + cpumask_var_t tmp; struct pci_bus *pbus; struct pci_dev *pdev; unsigned long flags; u32 var_size = 0; - int cpu_vmbus; - int cpu; + int cpu, nr_bank; u64 res; dest = irq_data_get_effective_affinity_mask(data); @@ -953,29 +951,27 @@ static void hv_irq_unmask(struct irq_data *data) */ params->int_target.flags |= HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; - params->int_target.vp_set.valid_bank_mask = - (1ull << HV_VP_SET_BANK_COUNT_MAX) - 1; + + if (!alloc_cpumask_var(&tmp, GFP_KERNEL)) { + res = 1; + goto exit_unlock; + } + + cpumask_and(tmp, dest, cpu_online_mask); + nr_bank = cpumask_to_vpset(¶ms->int_target.vp_set, tmp); + free_cpumask_var(tmp); + + if (!nr_bank) { + res = 1; + goto exit_unlock; + } /* * var-sized hypercall, var-size starts after vp_mask (thus * vp_set.format does not count, but vp_set.valid_bank_mask * does). */ - var_size = 1 + HV_VP_SET_BANK_COUNT_MAX; - - for_each_cpu_and(cpu, dest, cpu_online_mask) { - cpu_vmbus = hv_cpu_number_to_vp_number(cpu); - - if (cpu_vmbus >= HV_VP_SET_BANK_COUNT_MAX * 64) { - dev_err(&hbus->hdev->device, - "too high CPU %d", cpu_vmbus); - res = 1; - goto exit_unlock; - } - - params->int_target.vp_set.bank_contents[cpu_vmbus / 64] |= - (1ULL << (cpu_vmbus & 63)); - } + var_size = 1 + nr_bank; } else { for_each_cpu_and(cpu, dest, cpu_online_mask) { params->int_target.vp_mask |= -- 2.17.1
[PATCH v2 1/2] PCI: hv: Replace hv_vp_set with hv_vpset
Remove a duplicate definition of VP set (hv_vp_set) and use the common definition (hv_vpset) that is used in other places. Change the order of the members in struct hv_pcibus_device so that the declaration of retarget_msi_interrupt_params is the last member. Struct hv_vpset, which contains a flexible array, is nested two levels deep in struct hv_pcibus_device via retarget_msi_interrupt_params. Add a comment that retarget_msi_interrupt_params should be the last member of struct hv_pcibus_device. Signed-off-by: Maya Nakamura --- Change in v2: - None drivers/pci/controller/pci-hyperv.c | 25 - 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 9ba4d12c179c..da8b58d8630d 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -393,12 +393,6 @@ struct hv_interrupt_entry { #define HV_VP_SET_BANK_COUNT_MAX 5 /* current implementation limit */ -struct hv_vp_set { - u64 format; /* 0 (HvGenericSetSparse4k) */ - u64 valid_banks; - u64 masks[HV_VP_SET_BANK_COUNT_MAX]; -}; - /* * flags for hv_device_interrupt_target.flags */ @@ -410,7 +404,7 @@ struct hv_device_interrupt_target { u32 flags; union { u64 vp_mask; - struct hv_vp_set vp_set; + struct hv_vpset vp_set; }; }; @@ -460,12 +454,16 @@ struct hv_pcibus_device { struct msi_controller msi_chip; struct irq_domain *irq_domain; - /* hypercall arg, must not cross page boundary */ - struct retarget_msi_interrupt retarget_msi_interrupt_params; - spinlock_t retarget_msi_interrupt_lock; struct workqueue_struct *wq; + + /* hypercall arg, must not cross page boundary */ + struct retarget_msi_interrupt retarget_msi_interrupt_params; + + /* +* Don't put anything here: retarget_msi_interrupt_params must be last +*/ }; /* @@ -955,12 +953,13 @@ static void hv_irq_unmask(struct irq_data *data) */ params->int_target.flags |= HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; - params->int_target.vp_set.valid_banks = + params->int_target.vp_set.valid_bank_mask = (1ull << HV_VP_SET_BANK_COUNT_MAX) - 1; /* * var-sized hypercall, var-size starts after vp_mask (thus -* vp_set.format does not count, but vp_set.valid_banks does). +* vp_set.format does not count, but vp_set.valid_bank_mask +* does). */ var_size = 1 + HV_VP_SET_BANK_COUNT_MAX; @@ -974,7 +973,7 @@ static void hv_irq_unmask(struct irq_data *data) goto exit_unlock; } - params->int_target.vp_set.masks[cpu_vmbus / 64] |= + params->int_target.vp_set.bank_contents[cpu_vmbus / 64] |= (1ULL << (cpu_vmbus & 63)); } } else { -- 2.17.1
[PATCH v2 0/2] PCI: hv: Refactor hv_irq_unmask() to use hv_vpset and cpumask_to_vpset()
This patchset removes a duplicate definition of VP set (hv_vp_set) and uses the common definition (hv_vpset) that is used in other places. It changes the order of the members in struct hv_pcibus_device due to flexible array in hv_vpset. It also removes the duplicate implementation of cpumask_to_vpset(), uses the shared implementation, and exports hv_max_vp_index, which is required by cpumask_to_vpset(). Maya Nakamura (2): PCI: hv: Replace hv_vp_set with hv_vpset PCI: hv: Refactor hv_irq_unmask() to use cpumask_to_vpset() arch/x86/hyperv/hv_init.c | 1 + drivers/pci/controller/pci-hyperv.c | 59 + 2 files changed, 28 insertions(+), 32 deletions(-) -- 2.17.1
Re: [PATCH 1/2] PCI: hv: Replace hv_vp_set with hv_vpset
On Thu, Jan 24, 2019 at 02:12:13PM +0100, Vitaly Kuznetsov wrote: > Maya Nakamura writes: > > > @@ -460,12 +454,16 @@ struct hv_pcibus_device { > > struct msi_controller msi_chip; > > struct irq_domain *irq_domain; > > > > - /* hypercall arg, must not cross page boundary */ > > - struct retarget_msi_interrupt retarget_msi_interrupt_params; > > - > > spinlock_t retarget_msi_interrupt_lock; > > > > struct workqueue_struct *wq; > > + > > + /* hypercall arg, must not cross page boundary */ > > + struct retarget_msi_interrupt retarget_msi_interrupt_params; > > + > > + /* > > +* Don't put anything here: retarget_msi_interrupt_params must be last > > +*/ > > This change seems to be unrelated and not anyhow described in the commit > message - or did I miss something? > Thank you for pointing that out. I will add why it was moved in v2. Maya > > -- > Vitaly
Re: [PATCH 2/2] PCI: hv: Refactor hv_irq_unmask() to use cpumask_to_vpset()
On Thu, Jan 24, 2019 at 03:29:18PM +0300, Dan Carpenter wrote: > On Wed, Jan 23, 2019 at 01:02:12PM -0800, Maya Nakamura wrote: > > @@ -908,12 +906,12 @@ static void hv_irq_unmask(struct irq_data *data) > > struct retarget_msi_interrupt *params; > > struct hv_pcibus_device *hbus; > > struct cpumask *dest; > > + cpumask_var_t tmp; > > struct pci_bus *pbus; > > struct pci_dev *pdev; > > unsigned long flags; > > u32 var_size = 0; > > - int cpu_vmbus; > > - int cpu; > > + int cpu, nr_bank = 0; > ^ > No need to initialize this to a bogus value. It's misleading and it > turns off GCC's uninitialized variable warning so it can lead to bugs. > Thank you for pointing that out. I will remove its initialization in v2. > > @@ -953,29 +951,28 @@ static void hv_irq_unmask(struct irq_data *data) > > */ > > params->int_target.flags |= > > HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; > > - params->int_target.vp_set.valid_bank_mask = > > - (1ull << HV_VP_SET_BANK_COUNT_MAX) - 1; > > + > > + if (!alloc_cpumask_var(&tmp, GFP_KERNEL)) { > > + dev_err(&hbus->hdev->device, "out of memory"); > > > No need for this error message. alloc_cpumask_var() already has better > debug messages built in. > I will delete this dev_err() in v2. > > + return; > > We can't return directly. We need to unlock first. > I will unlock in v2. > > + if (!nr_bank) { > > + dev_err(&hbus->hdev->device, "too high CPU"); > > This error message is not useful. > I will delete this dev_err() in v2. Maya > > regards, > dan carpenter >
[PATCH 2/2] PCI: hv: Refactor hv_irq_unmask() to use cpumask_to_vpset()
Remove the duplicate implementation of cpumask_to_vpset() and use the shared implementation. Export hv_max_vp_index, which is required by cpumask_to_vpset(). Signed-off-by: Maya Nakamura --- arch/x86/hyperv/hv_init.c | 1 + drivers/pci/controller/pci-hyperv.c | 39 + 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 7abb09e2eeb8..7f2eed1fc81b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -96,6 +96,7 @@ void __percpu **hyperv_pcpu_input_arg; EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); u32 hv_max_vp_index; +EXPORT_SYMBOL_GPL(hv_max_vp_index); static int hv_cpu_init(unsigned int cpu) { diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index da8b58d8630d..d879458d441f 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -391,8 +391,6 @@ struct hv_interrupt_entry { u32 data; }; -#define HV_VP_SET_BANK_COUNT_MAX 5 /* current implementation limit */ - /* * flags for hv_device_interrupt_target.flags */ @@ -908,12 +906,12 @@ static void hv_irq_unmask(struct irq_data *data) struct retarget_msi_interrupt *params; struct hv_pcibus_device *hbus; struct cpumask *dest; + cpumask_var_t tmp; struct pci_bus *pbus; struct pci_dev *pdev; unsigned long flags; u32 var_size = 0; - int cpu_vmbus; - int cpu; + int cpu, nr_bank = 0; u64 res; dest = irq_data_get_effective_affinity_mask(data); @@ -953,29 +951,28 @@ static void hv_irq_unmask(struct irq_data *data) */ params->int_target.flags |= HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; - params->int_target.vp_set.valid_bank_mask = - (1ull << HV_VP_SET_BANK_COUNT_MAX) - 1; + + if (!alloc_cpumask_var(&tmp, GFP_KERNEL)) { + dev_err(&hbus->hdev->device, "out of memory"); + return; + } + + cpumask_and(tmp, dest, cpu_online_mask); + nr_bank = cpumask_to_vpset(¶ms->int_target.vp_set, tmp); + free_cpumask_var(tmp); + + if (!nr_bank) { + dev_err(&hbus->hdev->device, "too high CPU"); + res = 1; + goto exit_unlock; + } /* * var-sized hypercall, var-size starts after vp_mask (thus * vp_set.format does not count, but vp_set.valid_bank_mask * does). */ - var_size = 1 + HV_VP_SET_BANK_COUNT_MAX; - - for_each_cpu_and(cpu, dest, cpu_online_mask) { - cpu_vmbus = hv_cpu_number_to_vp_number(cpu); - - if (cpu_vmbus >= HV_VP_SET_BANK_COUNT_MAX * 64) { - dev_err(&hbus->hdev->device, - "too high CPU %d", cpu_vmbus); - res = 1; - goto exit_unlock; - } - - params->int_target.vp_set.bank_contents[cpu_vmbus / 64] |= - (1ULL << (cpu_vmbus & 63)); - } + var_size = 1 + nr_bank; } else { for_each_cpu_and(cpu, dest, cpu_online_mask) { params->int_target.vp_mask |= -- 2.17.1
[PATCH 1/2] PCI: hv: Replace hv_vp_set with hv_vpset
Remove a duplicate definition of VP set (hv_vp_set) and use the common definition (hv_vpset) that is used in other places. Signed-off-by: Maya Nakamura --- drivers/pci/controller/pci-hyperv.c | 25 - 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 9ba4d12c179c..da8b58d8630d 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -393,12 +393,6 @@ struct hv_interrupt_entry { #define HV_VP_SET_BANK_COUNT_MAX 5 /* current implementation limit */ -struct hv_vp_set { - u64 format; /* 0 (HvGenericSetSparse4k) */ - u64 valid_banks; - u64 masks[HV_VP_SET_BANK_COUNT_MAX]; -}; - /* * flags for hv_device_interrupt_target.flags */ @@ -410,7 +404,7 @@ struct hv_device_interrupt_target { u32 flags; union { u64 vp_mask; - struct hv_vp_set vp_set; + struct hv_vpset vp_set; }; }; @@ -460,12 +454,16 @@ struct hv_pcibus_device { struct msi_controller msi_chip; struct irq_domain *irq_domain; - /* hypercall arg, must not cross page boundary */ - struct retarget_msi_interrupt retarget_msi_interrupt_params; - spinlock_t retarget_msi_interrupt_lock; struct workqueue_struct *wq; + + /* hypercall arg, must not cross page boundary */ + struct retarget_msi_interrupt retarget_msi_interrupt_params; + + /* +* Don't put anything here: retarget_msi_interrupt_params must be last +*/ }; /* @@ -955,12 +953,13 @@ static void hv_irq_unmask(struct irq_data *data) */ params->int_target.flags |= HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; - params->int_target.vp_set.valid_banks = + params->int_target.vp_set.valid_bank_mask = (1ull << HV_VP_SET_BANK_COUNT_MAX) - 1; /* * var-sized hypercall, var-size starts after vp_mask (thus -* vp_set.format does not count, but vp_set.valid_banks does). +* vp_set.format does not count, but vp_set.valid_bank_mask +* does). */ var_size = 1 + HV_VP_SET_BANK_COUNT_MAX; @@ -974,7 +973,7 @@ static void hv_irq_unmask(struct irq_data *data) goto exit_unlock; } - params->int_target.vp_set.masks[cpu_vmbus / 64] |= + params->int_target.vp_set.bank_contents[cpu_vmbus / 64] |= (1ULL << (cpu_vmbus & 63)); } } else { -- 2.17.1
[PATCH 0/2] PCI: hv: Refactor hv_irq_unmask() to use hv_vpset and cpumask_to_vpset()
This patchset removes a duplicate definition of VP set (hv_vp_set) and uses the common definition (hv_vpset) that is used in other places. It also removes the duplicate implementation of cpumask_to_vpset() and uses the shared implementation. Finally, it exports hv_max_vp_index, which is required by cpumask_to_vpset(). Maya Nakamura (2): PCI: hv: Replace hv_vp_set with hv_vpset PCI: hv: Refactor hv_irq_unmask() to use cpumask_to_vpset() arch/x86/hyperv/hv_init.c | 1 + drivers/pci/controller/pci-hyperv.c | 60 ++--- 2 files changed, 29 insertions(+), 32 deletions(-) -- 2.17.1