From: Nuno Das Neves <[email protected]> Sent: Friday, November 
20, 2020 4:31 PM
> To: [email protected]
> Cc: [email protected]; [email protected]; 
> Michael Kelley
> <[email protected]>; [email protected]; Sunil Muthuswamy
> <[email protected]>; [email protected]; 
> [email protected];
> Lillian Grassin-Drake <[email protected]>; KY Srinivasan
> <[email protected]>
> Subject: [RFC PATCH 15/18] virt/mshv: get and set vp state ioctls
> 
> Introduce ioctls for getting and setting guest vcpu emulated LAPIC
> state, and xsave data.
> 
> Signed-off-by: Nuno Das Neves <[email protected]>
> ---
>  Documentation/virt/mshv/api.rst         |   8 +
>  arch/x86/include/uapi/asm/hyperv-tlfs.h |  59 ++++++
>  include/asm-generic/hyperv-tlfs.h       |  41 ++++
>  include/uapi/asm-generic/hyperv-tlfs.h  |  28 +++
>  include/uapi/linux/mshv.h               |  13 ++
>  virt/mshv/mshv_main.c                   | 262 ++++++++++++++++++++++++
>  6 files changed, 411 insertions(+)
> 
> diff --git a/Documentation/virt/mshv/api.rst b/Documentation/virt/mshv/api.rst
> index 694f978131f9..7fd75f248eff 100644
> --- a/Documentation/virt/mshv/api.rst
> +++ b/Documentation/virt/mshv/api.rst
> @@ -140,4 +140,12 @@ Assert interrupts in partitions that use Microsoft 
> Hypervisor's
> internal
>  emulated LAPIC. This must be enabled on partition creation with the flag:
>  HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED
> 
> +3.9 MSHV_GET_VP_STATE and MSHV_SET_VP_STATE
> +--------------------------
> +:Type: vp ioctl
> +:Parameters: struct mshv_vp_state
> +:Returns: 0 on success
> +
> +Get/set various vp state. Currently these can be used to get and set
> +emulated LAPIC state, and xsave data.
> 
> diff --git a/arch/x86/include/uapi/asm/hyperv-tlfs.h 
> b/arch/x86/include/uapi/asm/hyperv-
> tlfs.h
> index 5478d4943bfc..78758aedf23e 100644
> --- a/arch/x86/include/uapi/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/uapi/asm/hyperv-tlfs.h
> @@ -1051,4 +1051,63 @@ union hv_interrupt_control {
>       __u64 as_uint64;
>  };
> 
> +struct hv_local_interrupt_controller_state {
> +     __u32 apic_id;
> +     __u32 apic_version;
> +     __u32 apic_ldr;
> +     __u32 apic_dfr;
> +     __u32 apic_spurious;
> +     __u32 apic_isr[8];
> +     __u32 apic_tmr[8];
> +     __u32 apic_irr[8];
> +     __u32 apic_esr;
> +     __u32 apic_icr_high;
> +     __u32 apic_icr_low;
> +     __u32 apic_lvt_timer;
> +     __u32 apic_lvt_thermal;
> +     __u32 apic_lvt_perfmon;
> +     __u32 apic_lvt_lint0;
> +     __u32 apic_lvt_lint1;
> +     __u32 apic_lvt_error;
> +     __u32 apic_lvt_cmci;
> +     __u32 apic_error_status;
> +     __u32 apic_initial_count;
> +     __u32 apic_counter_value;
> +     __u32 apic_divide_configuration;
> +     __u32 apic_remote_read;
> +};
> +
> +#define HV_XSAVE_DATA_NO_XMM_REGISTERS 1
> +
> +union hv_x64_xsave_xfem_register {
> +     __u64 as_uint64;
> +     struct {
> +             __u32 low_uint32;
> +             __u32 high_uint32;
> +     };
> +     struct {
> +             __u64 legacy_x87: 1;
> +             __u64 legacy_sse: 1;
> +             __u64 avx: 1;
> +             __u64 mpx_bndreg: 1;
> +             __u64 mpx_bndcsr: 1;
> +             __u64 avx_512_op_mask: 1;
> +             __u64 avx_512_zmmhi: 1;
> +             __u64 avx_512_zmm16_31: 1;
> +             __u64 rsvd8_9: 2;
> +             __u64 pasid: 1;
> +             __u64 cet_u: 1;
> +             __u64 cet_s: 1;
> +             __u64 rsvd13_16: 4;
> +             __u64 xtile_cfg: 1;
> +             __u64 xtile_data: 1;
> +             __u64 rsvd19_63: 45;
> +     };
> +};
> +
> +struct hv_vp_state_data_xsave {
> +     __u64 flags;
> +     union hv_x64_xsave_xfem_register states;
> +};
> +
>  #endif
> diff --git a/include/asm-generic/hyperv-tlfs.h 
> b/include/asm-generic/hyperv-tlfs.h
> index 2cd46241c545..4bc59a0344ce 100644
> --- a/include/asm-generic/hyperv-tlfs.h
> +++ b/include/asm-generic/hyperv-tlfs.h
> @@ -167,6 +167,9 @@ struct ms_hyperv_tsc_page {
>  #define HVCALL_ASSERT_VIRTUAL_INTERRUPT              0x0094
>  #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
>  #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
> +#define HVCALL_MAP_VP_STATE_PAGE                     0x00e1
> +#define HVCALL_GET_VP_STATE                          0x00e3
> +#define HVCALL_SET_VP_STATE                          0x00e4
> 
>  #define HV_FLUSH_ALL_PROCESSORS                      BIT(0)
>  #define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES  BIT(1)
> @@ -796,4 +799,42 @@ struct hv_assert_virtual_interrupt {
>       u16 rsvd_z1;
>  };
> 
> +struct hv_vp_state_data {
> +     enum hv_get_set_vp_state_type type;
> +     u32 rsvd;
> +     struct hv_vp_state_data_xsave xsave;
> +
> +};
> +
> +struct hv_get_vp_state_in {
> +     u64 partition_id;
> +     u32 vp_index;
> +     u8 input_vtl;
> +     u8 rsvd0;
> +     u16 rsvd1;
> +     struct hv_vp_state_data state_data;
> +     u64 output_data_pfns[];
> +};
> +
> +union hv_get_vp_state_out {
> +     struct hv_local_interrupt_controller_state interrupt_controller_state;
> +     /* Not supported yet */
> +     /* struct hv_synthetic_timers_state synthetic_timers_state; */
> +};
> +
> +union hv_input_set_vp_state_data {
> +     u64 pfns;
> +     u8 bytes;
> +};
> +
> +struct hv_set_vp_state_in {
> +     u64 partition_id;
> +     u32 vp_index;
> +     u8 input_vtl;
> +     u8 rsvd0;
> +     u16 rsvd1;
> +     struct hv_vp_state_data state_data;
> +     union hv_input_set_vp_state_data data[];
> +};
> +
>  #endif
> diff --git a/include/uapi/asm-generic/hyperv-tlfs.h 
> b/include/uapi/asm-generic/hyperv-
> tlfs.h
> index e87389054b68..b3c84c69b73f 100644
> --- a/include/uapi/asm-generic/hyperv-tlfs.h
> +++ b/include/uapi/asm-generic/hyperv-tlfs.h
> @@ -64,4 +64,32 @@ struct hv_message {
>  #define HV_MAP_GPA_EXECUTABLE           0xC
>  #define HV_MAP_GPA_PERMISSIONS_MASK     0xF
> 
> +/*
> + * For getting and setting VP state, there are two options based on the 
> state type:
> + *
> + *     1.) Data that is accessed by PFNs in the input hypercall page. This 
> is used
> + *         for state which may not fit into the hypercall pages.
> + *     2.) Data that is accessed directly in the input\output hypercall 
> pages.
> + *         This is used for state that will always fit into the hypercall 
> pages.
> + *
> + * In the future this could be dynamic based on the size if needed.
> + *
> + * Note these hypercalls have an 8-byte aligned variable header size as per 
> the tlfs
> + */
> +
> +#define HV_GET_SET_VP_STATE_TYPE_PFN BIT(31)
> +
> +enum hv_get_set_vp_state_type {
> +     HV_GET_SET_VP_STATE_LOCAL_INTERRUPT_CONTROLLER_STATE = 0,
> +
> +     HV_GET_SET_VP_STATE_XSAVE               = 1 |
> HV_GET_SET_VP_STATE_TYPE_PFN,
> +     /* Synthetic message page */
> +     HV_GET_SET_VP_STATE_SIM_PAGE            = 2 |
> HV_GET_SET_VP_STATE_TYPE_PFN,
> +     /* Synthetic interrupt event flags page. */
> +     HV_GET_SET_VP_STATE_SIEF_PAGE           = 3 |
> HV_GET_SET_VP_STATE_TYPE_PFN,
> +
> +     /* Synthetic timers. */
> +     HV_GET_SET_VP_STATE_SYNTHETIC_TIMERS    = 4,
> +};
> +
>  #endif
> diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
> index faed9d065bb7..ae0bb64bbec3 100644
> --- a/include/uapi/linux/mshv.h
> +++ b/include/uapi/linux/mshv.h
> @@ -53,6 +53,17 @@ struct mshv_assert_interrupt {
>       __u32 vector;
>  };
> 
> +struct mshv_vp_state {
> +     enum hv_get_set_vp_state_type type;
> +     struct hv_vp_state_data_xsave xsave; /* only for xsave request */
> +
> +     __u64 buf_size; /* If xsave, must be page-aligned */
> +     union {
> +             struct hv_local_interrupt_controller_state *lapic;
> +             __u8 *bytes; /* Xsave data. must be page-aligned */
> +     } buf;
> +};
> +
>  #define MSHV_IOCTL 0xB8
> 
>  /* mshv device */
> @@ -70,5 +81,7 @@ struct mshv_assert_interrupt {
>  #define MSHV_GET_VP_REGISTERS   _IOWR(MSHV_IOCTL, 0x05, struct
> mshv_vp_registers)
>  #define MSHV_SET_VP_REGISTERS   _IOW(MSHV_IOCTL, 0x06, struct 
> mshv_vp_registers)
>  #define MSHV_RUN_VP          _IOR(MSHV_IOCTL, 0x07, struct hv_message)
> +#define MSHV_GET_VP_STATE    _IOWR(MSHV_IOCTL, 0x0A, struct mshv_vp_state)
> +#define MSHV_SET_VP_STATE    _IOWR(MSHV_IOCTL, 0x0B, struct mshv_vp_state)
> 
>  #endif
> diff --git a/virt/mshv/mshv_main.c b/virt/mshv/mshv_main.c
> index 9cf236ade50a..70172d9488de 100644
> --- a/virt/mshv/mshv_main.c
> +++ b/virt/mshv/mshv_main.c
> @@ -864,6 +864,262 @@ mshv_vp_ioctl_set_regs(struct mshv_vp *vp, void __user
> *user_args)
>       return ret;
>  }
> 
> +static int
> +hv_call_get_vp_state(u32 vp_index,
> +                  u64 partition_id,
> +                  enum hv_get_set_vp_state_type type,
> +                  struct hv_vp_state_data_xsave xsave,
> +                 /* Choose between pages and ret_output */
> +                  u64 page_count,
> +                  struct page **pages,
> +                  union hv_get_vp_state_out *ret_output)
> +{
> +     struct hv_get_vp_state_in *input;
> +     union hv_get_vp_state_out *output;
> +     int status;
> +     int i;
> +     u64 control;
> +     unsigned long flags;
> +     int ret = 0;
> +
> +     if (sizeof(*input) + (page_count * sizeof(u64)) > PAGE_SIZE)
> +             return -EINVAL;

Nit:  Stylistically, you are handling this differently from the BATCH_SIZE
macros, which are essentially doing the same thing of calculating
how many entries will fit in the input page.   Note to use
HV_HYP_PAGE_SIZE.

> +
> +     if (!page_count && !ret_output)
> +             return -EINVAL;
> +
> +     do {
> +             local_irq_save(flags);
> +             input = (struct hv_get_vp_state_in *)
> +                             (*this_cpu_ptr(hyperv_pcpu_input_arg));
> +             output = (union hv_get_vp_state_out *)
> +                             (*this_cpu_ptr(hyperv_pcpu_output_arg));
> +             memset(input, 0, sizeof(*input));
> +             memset(output, 0, sizeof(*output));
> +
> +             input->partition_id = partition_id;
> +             input->vp_index = vp_index;
> +             input->state_data.type = type;
> +             memcpy(&input->state_data.xsave, &xsave, sizeof(xsave));
> +             for (i = 0; i < page_count; i++)
> +                     input->output_data_pfns[i] =
> +                             page_to_pfn(pages[i]) & HV_MAP_GPA_MASK;
> +
> +             control = (HVCALL_GET_VP_STATE) |
> +                       (page_count << HV_HYPERCALL_VARHEAD_OFFSET);
> +
> +             status = hv_do_hypercall(control, input, output) &
> +                      HV_HYPERCALL_RESULT_MASK;
> +
> +             if (status != HV_STATUS_INSUFFICIENT_MEMORY) {
> +                     if (status != HV_STATUS_SUCCESS)
> +                             pr_err("%s: %s\n", __func__,
> +                                    hv_status_to_string(status));
> +                     else if (ret_output)
> +                             memcpy(ret_output, output, sizeof(*output));
> +
> +                     local_irq_restore(flags);
> +                     ret = -hv_status_to_errno(status);
> +                     break;
> +             }
> +             local_irq_restore(flags);
> +
> +             ret = hv_call_deposit_pages(NUMA_NO_NODE,
> +                                         partition_id, 1);
> +     } while (!ret);
> +
> +     return ret;
> +}
> +
> +static int
> +hv_call_set_vp_state(u32 vp_index,
> +                  u64 partition_id,
> +                  enum hv_get_set_vp_state_type type,
> +                  struct hv_vp_state_data_xsave xsave,
> +                 /* Choose between pages and bytes */
> +                  u64 page_count,
> +                  struct page **pages,
> +                  u32 num_bytes,
> +                  u8 *bytes)
> +{
> +     struct hv_set_vp_state_in *input;
> +     int status;
> +     int i;
> +     u64 control;
> +     unsigned long flags;
> +     int ret = 0;
> +     u16 varhead_sz;
> +
> +     if (sizeof(*input) + (page_count * sizeof(u64)) > PAGE_SIZE)

Same comment as above.

> +             return -EINVAL;
> +     if (sizeof(*input) + num_bytes > PAGE_SIZE)

Use HV_HYP_PAGE_SIZE.

> +             return -EINVAL;
> +
> +     if (num_bytes)
> +             /* round up to 8 and divide by 8 */
> +             varhead_sz = (num_bytes + 7) >> 3;
> +     else if (page_count)
> +             varhead_sz =  page_count;
> +     else
> +             return -EINVAL;
> +
> +     do {
> +             local_irq_save(flags);
> +             input = (struct hv_set_vp_state_in *)
> +                             (*this_cpu_ptr(hyperv_pcpu_input_arg));
> +             memset(input, 0, sizeof(*input));
> +
> +             input->partition_id = partition_id;
> +             input->vp_index = vp_index;
> +             input->state_data.type = type;
> +             memcpy(&input->state_data.xsave, &xsave, sizeof(xsave));
> +             if (num_bytes) {
> +                     memcpy((u8 *)input->data, bytes, num_bytes);
> +             } else {
> +                     for (i = 0; i < page_count; i++)
> +                             input->data[i].pfns =
> +                                     page_to_pfn(pages[i]) & HV_MAP_GPA_MASK;

Same comment as in earlier patch about GPA_MASK.  Also, this doesn't work
if PAGE_SIZE != HV_HYP_PAGE_SIZE, though it may be fine to not handle that case
for now.

> +             }
> +
> +             control = (HVCALL_SET_VP_STATE) |
> +                       (varhead_sz << HV_HYPERCALL_VARHEAD_OFFSET);
> +
> +             status = hv_do_hypercall(control, input, NULL) &
> +                      HV_HYPERCALL_RESULT_MASK;
> +
> +             if (status != HV_STATUS_INSUFFICIENT_MEMORY) {
> +                     if (status != HV_STATUS_SUCCESS)
> +                             pr_err("%s: %s\n", __func__,
> +                                    hv_status_to_string(status));
> +
> +                     local_irq_restore(flags);
> +                     ret = -hv_status_to_errno(status);
> +                     break;
> +             }
> +             local_irq_restore(flags);
> +
> +             ret = hv_call_deposit_pages(NUMA_NO_NODE,
> +                                         partition_id, 1);
> +     } while (!ret);
> +
> +     return ret;
> +}
> +
> +static long
> +mshv_vp_ioctl_get_set_state_pfn(struct mshv_vp *vp,
> +                             struct mshv_vp_state *args,
> +                             bool is_set)
> +{
> +     u64 page_count, remaining;
> +     int completed;
> +     struct page **pages;
> +     long ret;
> +     unsigned long u_buf;
> +
> +     /* Buffer must be page aligned */
> +     if (args->buf_size & (PAGE_SIZE - 1) ||
> +         (u64)args->buf.bytes & (PAGE_SIZE - 1))
> +             return -EINVAL;

Use PAGE_ALIGNED macro.

> +
> +     if (!access_ok(args->buf.bytes, args->buf_size))
> +             return -EFAULT;
> +
> +     /* Pin user pages so hypervisor can copy directly to them */
> +     page_count = args->buf_size >> PAGE_SHIFT;
> +     pages = kcalloc(page_count, sizeof(struct page *), GFP_KERNEL);
> +     if (!pages)
> +             return -ENOMEM;
> +
> +     remaining = page_count;
> +     u_buf = (unsigned long)args->buf.bytes;
> +     while (remaining) {
> +             completed = pin_user_pages_fast(
> +                             u_buf,
> +                             remaining,
> +                             FOLL_WRITE,
> +                             &pages[page_count - remaining]);
> +             if (completed < 0) {
> +                     pr_err("%s: failed to pin user pages error %i\n",
> +                            __func__, completed);
> +                     ret = completed;
> +                     goto unpin_pages;
> +             }
> +             remaining -= completed;
> +             u_buf += completed * PAGE_SIZE;
> +     }
> +
> +     if (is_set)
> +             ret = hv_call_set_vp_state(vp->index,
> +                                        vp->partition->id,
> +                                        args->type, args->xsave,
> +                                        page_count, pages,
> +                                        0, NULL);
> +     else
> +             ret = hv_call_get_vp_state(vp->index,
> +                                        vp->partition->id,
> +                                        args->type, args->xsave,
> +                                        page_count, pages,
> +                                        NULL);
> +
> +unpin_pages:
> +     unpin_user_pages(pages, page_count - remaining);
> +     kfree(pages);
> +     return ret;
> +}
> +
> +static long
> +mshv_vp_ioctl_get_set_state(struct mshv_vp *vp, void __user *user_args, bool 
> is_set)
> +{
> +     struct mshv_vp_state args;
> +     long ret = 0;
> +     union hv_get_vp_state_out vp_state;
> +
> +     if (copy_from_user(&args, user_args, sizeof(args)))
> +             return -EFAULT;
> +
> +     /* For now just support these */
> +     if (args.type != HV_GET_SET_VP_STATE_LOCAL_INTERRUPT_CONTROLLER_STATE &&
> +         args.type != HV_GET_SET_VP_STATE_XSAVE)
> +             return -EINVAL;
> +
> +     /* If we need to pin pfns, delegate to helper */
> +     if (args.type & HV_GET_SET_VP_STATE_TYPE_PFN)
> +             return mshv_vp_ioctl_get_set_state_pfn(vp, &args, is_set);
> +
> +     if (args.buf_size < sizeof(vp_state))
> +             return -EINVAL;
> +
> +     if (is_set) {
> +             if (copy_from_user(
> +                             &vp_state,
> +                             args.buf.lapic,
> +                             sizeof(vp_state)))
> +                     return -EFAULT;
> +
> +             return hv_call_set_vp_state(vp->index,
> +                                         vp->partition->id,
> +                                         args.type, args.xsave,
> +                                         0, NULL,
> +                                         sizeof(vp_state),
> +                                         (u8 *)&vp_state);
> +     }
> +
> +     ret = hv_call_get_vp_state(vp->index,
> +                                vp->partition->id,
> +                                args.type, args.xsave,
> +                                0, NULL,
> +                                &vp_state);
> +
> +     if (ret)
> +             return ret;
> +
> +     if (copy_to_user(args.buf.lapic,
> +                      &vp_state.interrupt_controller_state,
> +                      sizeof(vp_state.interrupt_controller_state)))
> +             return -EFAULT;
> +
> +     return 0;
> +}
> 
>  static long
>  mshv_vp_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
> @@ -884,6 +1140,12 @@ mshv_vp_ioctl(struct file *filp, unsigned int ioctl, 
> unsigned long
> arg)
>       case MSHV_SET_VP_REGISTERS:
>               r = mshv_vp_ioctl_set_regs(vp, (void __user *)arg);
>               break;
> +     case MSHV_GET_VP_STATE:
> +             r = mshv_vp_ioctl_get_set_state(vp, (void __user *)arg, false);
> +             break;
> +     case MSHV_SET_VP_STATE:
> +             r = mshv_vp_ioctl_get_set_state(vp, (void __user *)arg, true);
> +             break;
>       default:
>               r = -ENOTTY;
>               break;
> --
> 2.25.1

Reply via email to