On 2/8/2021 11:45 AM, Michael Kelley wrote:
> From: Nuno Das Neves <[email protected]> Sent: Friday, 
> November 20, 2020 4:30 PM
>>
>> Introduce ioctls for mapping and unmapping regions of guest memory.
>>
>> Uses a table of memory 'slots' similar to KVM, but the slot
>> number is not visible to userspace.
>>
>> For now, this simple implementation requires each new mapping to be
>> disjoint - the underlying hypercalls have no such restriction, and
>> implicitly overwrite any mappings on the pages in the specified regions.
>>
>> Co-developed-by: Lillian Grassin-Drake <[email protected]>
>> Signed-off-by: Lillian Grassin-Drake <[email protected]>
>> Signed-off-by: Nuno Das Neves <[email protected]>
>> ---
>>  Documentation/virt/mshv/api.rst        |  15 ++
>>  include/asm-generic/hyperv-tlfs.h      |  15 ++
>>  include/linux/mshv.h                   |  14 ++
>>  include/uapi/asm-generic/hyperv-tlfs.h |   9 +
>>  include/uapi/linux/mshv.h              |  15 ++
>>  virt/mshv/mshv_main.c                  | 322 ++++++++++++++++++++++++-
>>  6 files changed, 388 insertions(+), 2 deletions(-)
>>
>> diff --git a/Documentation/virt/mshv/api.rst 
>> b/Documentation/virt/mshv/api.rst
>> index ce651a1738e0..530efc29d354 100644
>> --- a/Documentation/virt/mshv/api.rst
>> +++ b/Documentation/virt/mshv/api.rst
>> @@ -72,3 +72,18 @@ it is open - this ioctl can only be called once per open.
>>  This ioctl creates a guest partition, returning a file descriptor to use as 
>> a
>>  handle for partition ioctls.
>>
>> +3.3 MSHV_MAP_GUEST_MEMORY and MSHV_UNMAP_GUEST_MEMORY
>> +-----------------------------------------------------
>> +:Type: partition ioctl
>> +:Parameters: struct mshv_user_mem_region
>> +:Returns: 0 on success
>> +
>> +Create a mapping from a region of process memory to a region of physical 
>> memory
>> +in a guest partition.
> 
> Just to be super explicit:
> 
> Create a mapping from memory in the user space of the calling process (running
> in the root partition) to a region of guest physical memory in a guest 
> partition.
> 

Thanks, yes this is clearer.

>> +
>> +Mappings must be disjoint in process address space and guest address space.
>> +
>> +Note: In the current implementation, this memory is pinned to stop the pages
>> +being moved by linux and subsequently clobbered by the hypervisor. So the 
>> region
>> +is backed by physical memory.
> 
> Again to be super explicit:
> 
> Note: In the current implementation, this memory is pinned to real physical
> memory to stop the pages being moved by Linux in the root partition,
> and subsequently being clobbered by the hypervisor.  So the region is backed
> by real physical memory.
> 

Yep, I'll update this also.

>> +
>> diff --git a/include/asm-generic/hyperv-tlfs.h 
>> b/include/asm-generic/hyperv-tlfs.h
>> index 2a49503b7396..6e5072e29897 100644
>> --- a/include/asm-generic/hyperv-tlfs.h
>> +++ b/include/asm-generic/hyperv-tlfs.h
>> @@ -149,6 +149,8 @@ struct ms_hyperv_tsc_page {
>>  #define HVCALL_GET_PARTITION_ID                     0x0046
>>  #define HVCALL_DEPOSIT_MEMORY                       0x0048
>>  #define HVCALL_WITHDRAW_MEMORY                      0x0049
>> +#define HVCALL_MAP_GPA_PAGES                        0x004b
>> +#define HVCALL_UNMAP_GPA_PAGES                      0x004c
>>  #define HVCALL_CREATE_VP                    0x004e
>>  #define HVCALL_GET_VP_REGISTERS                     0x0050
>>  #define HVCALL_SET_VP_REGISTERS                     0x0051
>> @@ -827,4 +829,17 @@ struct hv_delete_partition {
>>      u64 partition_id;
>>  };
>>
>> +struct hv_map_gpa_pages {
>> +    u64 target_partition_id;
>> +    u64 target_gpa_base;
>> +    u32 map_flags;
> 
> Is there a reserved 32 bit field here?  Hyper-V always aligns
> things on 64 bit boundaries.
> 

The hypervisor code uses implicit padding here, and I copied it directly.
Yes, it should be 8 byte aligned. I will insert a padding field and add 
__packed.

>> +    u64 source_gpa_page_list[];
>> +};
>> +
>> +struct hv_unmap_gpa_pages {
>> +    u64 target_partition_id;
>> +    u64 target_gpa_base;
>> +    u32 unmap_flags;
> 
> Is there a reserved 32 bit field here?  Hyper-V always aligns
> things on 64 bit boundaries.
> 

ditto as above.

>> +};
> 
> Add __packed to the above two structs after sorting out
> the alignment issues.
> 

Yep.

>> +
>>  #endif
>> diff --git a/include/linux/mshv.h b/include/linux/mshv.h
>> index fc4f35089b2c..91a742f37440 100644
>> --- a/include/linux/mshv.h
>> +++ b/include/linux/mshv.h
>> @@ -7,13 +7,27 @@
>>   */
>>
>>  #include <linux/spinlock.h>
>> +#include <linux/mutex.h>
>>  #include <uapi/linux/mshv.h>
>>
>>  #define MSHV_MAX_PARTITIONS         128
>> +#define MSHV_MAX_MEM_REGIONS                64
>> +
>> +struct mshv_mem_region {
>> +    u64 size; /* bytes */
>> +    u64 guest_pfn;
>> +    u64 userspace_addr; /* start of the userspace allocated memory */
>> +    struct page **pages;
>> +};
>>
>>  struct mshv_partition {
>>      u64 id;
>>      refcount_t ref_count;
>> +    struct mutex mutex;
>> +    struct {
>> +            u32 count;
>> +            struct mshv_mem_region slots[MSHV_MAX_MEM_REGIONS];
>> +    } regions;
>>  };
>>
>>  struct mshv {
>> diff --git a/include/uapi/asm-generic/hyperv-tlfs.h 
>> b/include/uapi/asm-generic/hyperv-
>> tlfs.h
>> index 7a858226a9c5..e7b09b9f00de 100644
>> --- a/include/uapi/asm-generic/hyperv-tlfs.h
>> +++ b/include/uapi/asm-generic/hyperv-tlfs.h
>> @@ -12,4 +12,13 @@
>>  #define HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED          BIT(4)
>>  #define HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED                    BIT(13)
>>
>> +/* HV Map GPA (Guest Physical Address) Flags */
>> +#define HV_MAP_GPA_PERMISSIONS_NONE     0x0
>> +#define HV_MAP_GPA_READABLE             0x1
>> +#define HV_MAP_GPA_WRITABLE             0x2
>> +#define HV_MAP_GPA_KERNEL_EXECUTABLE    0x4
>> +#define HV_MAP_GPA_USER_EXECUTABLE      0x8
>> +#define HV_MAP_GPA_EXECUTABLE           0xC
>> +#define HV_MAP_GPA_PERMISSIONS_MASK     0xF
>> +
>>  #endif
>> diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
>> index 4f8da9a6fde2..47be03ef4e86 100644
>> --- a/include/uapi/linux/mshv.h
>> +++ b/include/uapi/linux/mshv.h
>> @@ -18,10 +18,25 @@ struct mshv_create_partition {
>>      struct hv_partition_creation_properties partition_creation_properties;
>>  };
>>
>> +/*
>> + * Mappings can't overlap in GPA space or userspace
>> + * To unmap, these fields must match an existing mapping
>> + */
>> +struct mshv_user_mem_region {
>> +    __u64 size;             /* bytes */
>> +    __u64 guest_pfn;
>> +    __u64 userspace_addr;   /* start of the userspace allocated memory */
>> +    __u32 flags;            /* ignored on unmap */
>> +};
>> +
>>  #define MSHV_IOCTL 0xB8
>>
>>  /* mshv device */
>>  #define MSHV_REQUEST_VERSION        _IOW(MSHV_IOCTL, 0x00, __u32)
>>  #define MSHV_CREATE_PARTITION       _IOW(MSHV_IOCTL, 0x01, struct 
>> mshv_create_partition)
>>
>> +/* partition device */
>> +#define MSHV_MAP_GUEST_MEMORY       _IOW(MSHV_IOCTL, 0x02, struct 
>> mshv_user_mem_region)
>> +#define MSHV_UNMAP_GUEST_MEMORY     _IOW(MSHV_IOCTL, 0x03, struct 
>> mshv_user_mem_region)
>> +
>>  #endif
>> diff --git a/virt/mshv/mshv_main.c b/virt/mshv/mshv_main.c
>> index 162a1bb42a4a..ce480598e67f 100644
>> --- a/virt/mshv/mshv_main.c
>> +++ b/virt/mshv/mshv_main.c
>> @@ -60,6 +60,10 @@ static struct miscdevice mshv_dev = {
>>
>>  #define HV_WITHDRAW_BATCH_SIZE      (PAGE_SIZE / sizeof(u64))
>>  #define HV_INIT_PARTITION_DEPOSIT_PAGES 208
>> +#define HV_MAP_GPA_MASK             (0x0000000FFFFFFFFFULL)
>> +#define HV_MAP_GPA_BATCH_SIZE       \
>> +            (PAGE_SIZE / sizeof(struct hv_map_gpa_pages) / sizeof(u64))
> 
> Hmmm. Shouldn't this be:
> 
>       ((HV_HYP_PAGE_SIZE - sizeof(struct hv_map_gpa_pages))/sizeof(u64))
> 
> 

Yes! Not sure how that happened.

>> +#define PIN_PAGES_BATCH_SIZE        (0x10000000 / PAGE_SIZE)
>>
>>  static int
>>  hv_call_withdraw_memory(u64 count, int node, u64 partition_id)
>> @@ -245,16 +249,318 @@ hv_call_delete_partition(u64 partition_id)
>>      return -hv_status_to_errno(status);
>>  }
>>
>> +static int
>> +hv_call_map_gpa_pages(u64 partition_id,
>> +                  u64 gpa_target,
>> +                  u64 page_count, u32 flags,
>> +                  struct page **pages)
>> +{
>> +    struct hv_map_gpa_pages *input_page;
>> +    int status;
>> +    int i;
>> +    struct page **p;
>> +    u32 completed = 0;
>> +    u64 hypercall_status;
>> +    unsigned long remaining = page_count;
>> +    int rep_count;
>> +    unsigned long irq_flags;
>> +    int ret = 0;
>> +
>> +    while (remaining) {
>> +
>> +            rep_count = min(remaining, HV_MAP_GPA_BATCH_SIZE);
>> +
>> +            local_irq_save(irq_flags);
>> +            input_page = (struct hv_map_gpa_pages *)(*this_cpu_ptr(
>> +                    hyperv_pcpu_input_arg));
>> +
>> +            input_page->target_partition_id = partition_id;
>> +            input_page->target_gpa_base = gpa_target;
>> +            input_page->map_flags = flags;
>> +
>> +            for (i = 0, p = pages; i < rep_count; i++, p++)
>> +                    input_page->source_gpa_page_list[i] =
>> +                            page_to_pfn(*p) & HV_MAP_GPA_MASK;
> 
> The masking seems a bit weird.  The mask allows for up to 64G page frames,
> which is 256 Tbytes of total physical memory, which is probably the current
> Hyper-V limit on memory size (48 bit physical address space, though 52 bit
> physical address spaces are coming).  So the masking shouldn't ever be doing
> anything.   And if it was doing something, that probably should be treated as
> an error rather than simply dropping the high bits.

Good point - It looks like the mask isn't needed.

> 
> Note that this code does not handle the case where PAGE_SIZE !=
> HV_HYP_PAGE_SIZE.  But maybe we'll never run the root partition with a
> page size other than 4K.
> 

For now on x86 it won't happen, but maybe on ARM?
It shouldn't be hard to support this case, especially since
PAGE_SIZE >= HV_HYP_PAGE_SIZE. Do you think we need it in this patch set?

>> +            hypercall_status = hv_do_rep_hypercall(
>> +                    HVCALL_MAP_GPA_PAGES, rep_count, 0, input_page, NULL);
>> +            local_irq_restore(irq_flags);
>> +
>> +            status = hypercall_status & HV_HYPERCALL_RESULT_MASK;
>> +            completed = (hypercall_status & HV_HYPERCALL_REP_COMP_MASK) >>
>> +                            HV_HYPERCALL_REP_COMP_OFFSET;
>> +
>> +            if (status == HV_STATUS_INSUFFICIENT_MEMORY) {
>> +                    ret = hv_call_deposit_pages(NUMA_NO_NODE,
>> +                                                partition_id, 256);
> 
> Why adding 256 pages?  I'm just contrasting with other places that add
> 1 page at a time.  Maybe a comment to explain ....
> 

Empirically determined. I'll add a #define and comment.

>> +                    if (ret)
>> +                            break;
>> +            } else if (status != HV_STATUS_SUCCESS) {
>> +                    pr_err("%s: completed %llu out of %llu, %s\n",
>> +                           __func__,
>> +                           page_count - remaining, page_count,
>> +                           hv_status_to_string(status));
>> +                    ret = -hv_status_to_errno(status);
>> +                    break;
>> +            }
>> +
>> +            pages += completed;
>> +            remaining -= completed;
>> +            gpa_target += completed;
>> +    }
>> +
>> +    if (ret && completed) {
> 
> Is the above the right test?  Completed could be zero from the most
> recent iteration, but still could be partially succeeded based on a previous
> successful iteration.   I think this needs to check whether remaining equals
> page_count.
> 

You're right; I'll change it to (ret && remaining < page_count)

>> +            pr_err("%s: Partially succeeded; mapped regions may be in 
>> invalid state",
>> +                   __func__);
>> +            ret = -EBADFD;
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +static int
>> +hv_call_unmap_gpa_pages(u64 partition_id,
>> +                    u64 gpa_target,
>> +                    u64 page_count, u32 flags)
>> +{
>> +    struct hv_unmap_gpa_pages *input_page;
>> +    int status;
>> +    int ret = 0;
>> +    u32 completed = 0;
>> +    u64 hypercall_status;
>> +    unsigned long remaining = page_count;
>> +    int rep_count;
>> +    unsigned long irq_flags;
>> +
>> +    local_irq_save(irq_flags);
>> +    input_page = (struct hv_unmap_gpa_pages *)(*this_cpu_ptr(
>> +            hyperv_pcpu_input_arg));
>> +
>> +    input_page->target_partition_id = partition_id;
>> +    input_page->target_gpa_base = gpa_target;
>> +    input_page->unmap_flags = flags;
>> +
>> +    while (remaining) {
>> +            rep_count = min(remaining, HV_MAP_GPA_BATCH_SIZE);
>> +            hypercall_status = hv_do_rep_hypercall(
>> +                    HVCALL_UNMAP_GPA_PAGES, rep_count, 0, input_page, NULL);
> 
> Similarly, this code doesn't handle PAGE_SIZE != HV_HYP_PAGE_SIZE.
> 

As above - do we need this for this patch set? This won't happen on x86.

>> +            status = hypercall_status & HV_HYPERCALL_RESULT_MASK;
>> +            completed = (hypercall_status & HV_HYPERCALL_REP_COMP_MASK) >>
>> +                            HV_HYPERCALL_REP_COMP_OFFSET;
>> +            if (status != HV_STATUS_SUCCESS) {
>> +                    pr_err("%s: completed %llu out of %llu, %s\n",
>> +                           __func__,
>> +                           page_count - remaining, page_count,
>> +                           hv_status_to_string(status));
>> +                    ret = -hv_status_to_errno(status);
>> +                    break;
>> +            }
>> +
>> +            remaining -= completed;
>> +            gpa_target += completed;
>> +            input_page->target_gpa_base = gpa_target;
>> +    }
>> +    local_irq_restore(irq_flags);
> 
> I have some concern about holding interrupts disabled for this long.
> 

How about I move the interrupt enabling/disabling inside the loop? i.e.:
        while (remaining) {
                local_irq_save(irq_flags);
                input_page = (struct hv_unmap_gpa_pages *)(*this_cpu_ptr(
                        hyperv_pcpu_input_arg));

                input_page->target_partition_id = partition_id;
                input_page->target_gpa_base = gpa_target;
                input_page->unmap_flags = flags;
                rep_count = min(remaining, HV_MAP_GPA_BATCH_SIZE);
                status = hv_do_rep_hypercall(
                        HVCALL_UNMAP_GPA_PAGES, rep_count, 0, input_page, NULL);
                local_irq_restore(irq_flags);

                completed = (status & HV_HYPERCALL_REP_COMP_MASK) >>
                                HV_HYPERCALL_REP_COMP_OFFSET;
                status &= HV_HYPERCALL_RESULT_MASK;
                if (status != HV_STATUS_SUCCESS) {
                        pr_err("%s: completed %llu out of %llu, %s\n",
                               __func__,
                               page_count - remaining, page_count,
                               hv_status_to_string(status));
                        ret = hv_status_to_errno(status);
                        break;
                }

                remaining -= completed;
                gpa_target += completed;
        }


>> +
>> +    if (ret && completed) {
> 
> Same comment as before.
> 

Ditto as above.

>> +            pr_err("%s: Partially succeeded; mapped regions may be in 
>> invalid state",
>> +                   __func__);
>> +            ret = -EBADFD;
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +static long
>> +mshv_partition_ioctl_map_memory(struct mshv_partition *partition,
>> +                            struct mshv_user_mem_region __user *user_mem)
>> +{
>> +    struct mshv_user_mem_region mem;
>> +    struct mshv_mem_region *region;
>> +    int completed;
>> +    unsigned long remaining, batch_size;
>> +    int i;
>> +    struct page **pages;
>> +    u64 page_count, user_start, user_end, gpfn_start, gpfn_end;
>> +    u64 region_page_count, region_user_start, region_user_end;
>> +    u64 region_gpfn_start, region_gpfn_end;
>> +    long ret = 0;
>> +
>> +    /* Check we have enough slots*/
>> +    if (partition->regions.count == MSHV_MAX_MEM_REGIONS) {
>> +            pr_err("%s: not enough memory region slots\n", __func__);
>> +            return -ENOSPC;
>> +    }
>> +
>> +    if (copy_from_user(&mem, user_mem, sizeof(mem)))
>> +            return -EFAULT;
>> +
>> +    if (!mem.size ||
>> +        mem.size & (PAGE_SIZE - 1) ||
>> +        mem.userspace_addr & (PAGE_SIZE - 1) ||
> 
> There's a PAGE_ALIGNED macro that expresses exactly what
> each of the previous two tests is doing.
> 

Since these need to be HV_HYP_PAGE_SIZE aligned, I will add a
HV_HYP_PAGE_ALIGNED macro for this.

>> +        !access_ok(mem.userspace_addr, mem.size))
>> +            return -EINVAL;
>> +
>> +    /* Reject overlapping regions */
>> +    page_count = mem.size >> PAGE_SHIFT;
>> +    user_start = mem.userspace_addr;
>> +    user_end = mem.userspace_addr + mem.size;
>> +    gpfn_start = mem.guest_pfn;
>> +    gpfn_end = mem.guest_pfn + page_count;
>> +    for (i = 0; i < MSHV_MAX_MEM_REGIONS; ++i) {
>> +            region = &partition->regions.slots[i];
>> +            if (!region->size)
>> +                    continue;
>> +            region_page_count = region->size >> PAGE_SHIFT;
>> +            region_user_start = region->userspace_addr;
>> +            region_user_end = region->userspace_addr + region->size;
>> +            region_gpfn_start = region->guest_pfn;
>> +            region_gpfn_end = region->guest_pfn + region_page_count;
>> +
>> +            if (!(
>> +                 (user_end <= region_user_start) ||
>> +                 (region_user_end <= user_start))) {
>> +                    return -EEXIST;
>> +            }
>> +            if (!(
>> +                 (gpfn_end <= region_gpfn_start) ||
>> +                 (region_gpfn_end <= gpfn_start))) {
>> +                    return -EEXIST;
> 
> You could apply De Morgan's theorem to the conditions
> in each "if" statement and get rid of the "!".  That might make
> these slightly easier to understand, but I have no strong
> preference.
> 

I agree, I think that would be a bit clearer. I will change it.

>> +            }
>> +    }
>> +
>> +    /* Pin the userspace pages */
>> +    pages = vzalloc(sizeof(struct page *) * page_count);
>> +    if (!pages)
>> +            return -ENOMEM;
>> +
>> +    remaining = page_count;
>> +    while (remaining) {
>> +            /*
>> +             * We need to batch this, as pin_user_pages_fast with the
>> +             * FOLL_LONGTERM flag does a big temporary allocation
>> +             * of contiguous memory
>> +             */
>> +            batch_size = min(remaining, PIN_PAGES_BATCH_SIZE);
>> +            completed = pin_user_pages_fast(
>> +                            mem.userspace_addr +
>> +                                    (page_count - remaining) * PAGE_SIZE,
>> +                            batch_size,
>> +                            FOLL_WRITE | FOLL_LONGTERM,
>> +                            &pages[page_count - remaining]);
>> +            if (completed < 0) {
>> +                    pr_err("%s: failed to pin user pages error %i\n",
>> +                           __func__,
>> +                           completed);
>> +                    ret = completed;
>> +                    goto err_unpin_pages;
>> +            }
>> +            remaining -= completed;
>> +    }
>> +
>> +    /* Map the pages to GPA pages */
>> +    ret = hv_call_map_gpa_pages(partition->id, mem.guest_pfn,
>> +                                page_count, mem.flags, pages);
>> +    if (ret)
>> +            goto err_unpin_pages;
>> +
>> +    /* Install the new region */
>> +    for (i = 0; i < MSHV_MAX_MEM_REGIONS; ++i) {
>> +            if (!partition->regions.slots[i].size) {
>> +                    region = &partition->regions.slots[i];
>> +                    break;
>> +            }
>> +    }
>> +    region->pages = pages;
>> +    region->size = mem.size;
>> +    region->guest_pfn = mem.guest_pfn;
>> +    region->userspace_addr = mem.userspace_addr;
>> +
>> +    partition->regions.count++;
>> +
>> +    return 0;
>> +
>> +err_unpin_pages:
>> +    unpin_user_pages(pages, page_count - remaining);
>> +    vfree(pages);
>> +
>> +    return ret;
>> +}
>> +
>> +static long
>> +mshv_partition_ioctl_unmap_memory(struct mshv_partition *partition,
>> +                              struct mshv_user_mem_region __user *user_mem)
>> +{
>> +    struct mshv_user_mem_region mem;
>> +    struct mshv_mem_region *region_ptr;
>> +    int i;
>> +    u64 page_count;
>> +    long ret;
>> +
>> +    if (!partition->regions.count)
>> +            return -EINVAL;
>> +
>> +    if (copy_from_user(&mem, user_mem, sizeof(mem)))
>> +            return -EFAULT;
>> +
>> +    /* Find matching region */
>> +    for (i = 0; i < MSHV_MAX_MEM_REGIONS; ++i) {
>> +            if (!partition->regions.slots[i].size)
>> +                    continue;
>> +            region_ptr = &partition->regions.slots[i];
>> +            if (region_ptr->userspace_addr == mem.userspace_addr &&
>> +                region_ptr->size == mem.size &&
>> +                region_ptr->guest_pfn == mem.guest_pfn)
>> +                    break;
>> +    }
>> +
>> +    if (i == MSHV_MAX_MEM_REGIONS)
>> +            return -EINVAL;
>> +
>> +    page_count = region_ptr->size >> PAGE_SHIFT;
>> +    ret = hv_call_unmap_gpa_pages(partition->id, region_ptr->guest_pfn,
>> +                                  page_count, 0);
>> +    if (ret)
>> +            return ret;
>> +
>> +    unpin_user_pages(region_ptr->pages, page_count);
>> +    vfree(region_ptr->pages);
>> +    memset(region_ptr, 0, sizeof(*region_ptr));
>> +    partition->regions.count--;
>> +
>> +    return 0;
>> +}
>> +
>>  static long
>>  mshv_partition_ioctl(struct file *filp, unsigned int ioctl, unsigned long 
>> arg)
>>  {
>> -    return -ENOTTY;
>> +    struct mshv_partition *partition = filp->private_data;
>> +    long ret;
>> +
>> +    if (mutex_lock_killable(&partition->mutex))
>> +            return -EINTR;
>> +
>> +    switch (ioctl) {
>> +    case MSHV_MAP_GUEST_MEMORY:
>> +            ret = mshv_partition_ioctl_map_memory(partition,
>> +                                                    (void __user *)arg);
>> +            break;
>> +    case MSHV_UNMAP_GUEST_MEMORY:
>> +            ret = mshv_partition_ioctl_unmap_memory(partition,
>> +                                                    (void __user *)arg);
>> +            break;
>> +    default:
>> +            ret = -ENOTTY;
>> +    }
>> +
>> +    mutex_unlock(&partition->mutex);
>> +    return ret;
>>  }
>>
>>  static void
>>  destroy_partition(struct mshv_partition *partition)
>>  {
>> -    unsigned long flags;
>> +    unsigned long flags, page_count;
>> +    struct mshv_mem_region *region;
>>      int i;
>>
>>      /* Remove from list of partitions */
>> @@ -286,6 +592,16 @@ destroy_partition(struct mshv_partition *partition)
>>
>>      hv_call_delete_partition(partition->id);
>>
>> +    /* Remove regions and unpin the pages */
>> +    for (i = 0; i < MSHV_MAX_MEM_REGIONS; ++i) {
>> +            region = &partition->regions.slots[i];
>> +            if (!region->size)
>> +                    continue;
>> +            page_count = region->size >> PAGE_SHIFT;
>> +            unpin_user_pages(region->pages, page_count);
>> +            vfree(region->pages);
>> +    }
>> +
>>      kfree(partition);
>>  }
>>
>> @@ -353,6 +669,8 @@ mshv_ioctl_create_partition(void __user *user_arg)
>>      if (!partition)
>>              return -ENOMEM;
>>
>> +    mutex_init(&partition->mutex);
>> +
>>      fd = get_unused_fd_flags(O_CLOEXEC);
>>      if (fd < 0) {
>>              ret = fd;
>> --
>> 2.25.1

Reply via email to