Device Coherent type uses device memory that is coherently accesible by
the CPU. This could be shown as SP (special purpose) memory range
at the BIOS-e820 memory enumeration. If no SP memory is supported in
system, this could be faked by setting CONFIG_EFI_FAKE_MEMMAP.

Currently, test_hmm only supports two different SP ranges of at least
256MB size. This could be specified in the kernel parameter variable
efi_fake_mem. Ex. Two SP ranges of 1GB starting at 0x100000000 &
0x140000000 physical address. Ex.
efi_fake_mem=1G@0x100000000:0x40000,1G@0x140000000:0x40000

Signed-off-by: Alex Sierra <alex.sie...@amd.com>
---
 lib/test_hmm.c      | 191 +++++++++++++++++++++++++++++++++-----------
 lib/test_hmm_uapi.h |  15 ++--
 2 files changed, 153 insertions(+), 53 deletions(-)

diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 45334df28d7b..9e2cc0cd4412 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -471,6 +471,7 @@ static int dmirror_allocate_chunk(struct dmirror_device 
*mdevice,
        unsigned long pfn_first;
        unsigned long pfn_last;
        void *ptr;
+       int ret = -ENOMEM;
 
        devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
        if (!devmem)
@@ -553,7 +554,7 @@ static int dmirror_allocate_chunk(struct dmirror_device 
*mdevice,
        }
        spin_unlock(&mdevice->lock);
 
-       return true;
+       return 0;
 
 err_release:
        mutex_unlock(&mdevice->devmem_lock);
@@ -562,7 +563,7 @@ static int dmirror_allocate_chunk(struct dmirror_device 
*mdevice,
 err_devmem:
        kfree(devmem);
 
-       return false;
+       return ret;
 }
 
 static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
@@ -571,8 +572,10 @@ static struct page *dmirror_devmem_alloc_page(struct 
dmirror_device *mdevice)
        struct page *rpage;
 
        /*
-        * This is a fake device so we alloc real system memory to store
-        * our device memory.
+        * For ZONE_DEVICE private type, this is a fake device so we alloc real
+        * system memory to store our device memory.
+        * For ZONE_DEVICE coherent type we use the actual dpage to store the 
data
+        * and ignore rpage.
         */
        rpage = alloc_page(GFP_HIGHUSER);
        if (!rpage)
@@ -623,12 +626,17 @@ static void dmirror_migrate_alloc_and_copy(struct 
migrate_vma *args,
                 * unallocated pte_none() or read-only zero page.
                 */
                spage = migrate_pfn_to_page(*src);
+               if (spage && is_zone_device_page(spage))
+                       pr_err("page already in device spage pfn: 0x%lx\n",
+                               page_to_pfn(spage));
+               WARN_ON(spage && is_zone_device_page(spage));
 
                dpage = dmirror_devmem_alloc_page(mdevice);
                if (!dpage)
                        continue;
 
-               rpage = dpage->zone_device_data;
+               rpage = is_device_private_page(dpage) ? dpage->zone_device_data 
:
+                                                       dpage;
                if (spage)
                        copy_highpage(rpage, spage);
                else
@@ -640,8 +648,11 @@ static void dmirror_migrate_alloc_and_copy(struct 
migrate_vma *args,
                 * the simulated device memory and that page holds the pointer
                 * to the mirror.
                 */
+               rpage = dpage->zone_device_data;
                rpage->zone_device_data = dmirror;
 
+               pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 
0x%lx\n",
+                        page_to_pfn(spage), page_to_pfn(dpage));
                *dst = migrate_pfn(page_to_pfn(dpage)) |
                            MIGRATE_PFN_LOCKED;
                if ((*src & MIGRATE_PFN_WRITE) ||
@@ -721,10 +732,13 @@ static int dmirror_migrate_finalize_and_map(struct 
migrate_vma *args,
                        continue;
 
                /*
-                * Store the page that holds the data so the page table
-                * doesn't have to deal with ZONE_DEVICE private pages.
+                * For ZONE_DEVICE private pages we store the page that
+                * holds the data so the page table doesn't have to deal it.
+                * For ZONE_DEVICE coherent pages we store the actual page, 
since
+                * the CPU has coherent access to the page.
                 */
-               entry = dpage->zone_device_data;
+               entry = is_device_private_page(dpage) ? dpage->zone_device_data 
:
+                                                       dpage;
                if (*dst & MIGRATE_PFN_WRITE)
                        entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
                entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
@@ -804,8 +818,110 @@ static int dmirror_exclusive(struct dmirror *dmirror,
        return ret;
 }
 
-static int dmirror_migrate(struct dmirror *dmirror,
-                          struct hmm_dmirror_cmd *cmd)
+static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
+                                                     struct dmirror *dmirror)
+{
+       const unsigned long *src = args->src;
+       unsigned long *dst = args->dst;
+       unsigned long start = args->start;
+       unsigned long end = args->end;
+       unsigned long addr;
+
+       for (addr = start; addr < end; addr += PAGE_SIZE,
+                                      src++, dst++) {
+               struct page *dpage, *spage;
+
+               spage = migrate_pfn_to_page(*src);
+               if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
+                       continue;
+
+               WARN_ON(!is_device_page(spage));
+               spage = is_device_private_page(spage) ? spage->zone_device_data 
:
+                                                       spage;
+               dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
+               if (!dpage)
+                       continue;
+               pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 
0x%lx\n",
+                        page_to_pfn(spage), page_to_pfn(dpage));
+
+               lock_page(dpage);
+               xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
+               copy_highpage(dpage, spage);
+               *dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+               if (*src & MIGRATE_PFN_WRITE)
+                       *dst |= MIGRATE_PFN_WRITE;
+       }
+       return 0;
+}
+
+static int dmirror_migrate_to_system(struct dmirror *dmirror,
+                                    struct hmm_dmirror_cmd *cmd)
+{
+       unsigned long start, end, addr;
+       unsigned long size = cmd->npages << PAGE_SHIFT;
+       struct mm_struct *mm = dmirror->notifier.mm;
+       struct vm_area_struct *vma;
+       unsigned long src_pfns[64];
+       unsigned long dst_pfns[64];
+       struct migrate_vma args;
+       unsigned long next;
+       int ret;
+
+       start = cmd->addr;
+       end = start + size;
+       if (end < start)
+               return -EINVAL;
+
+       /* Since the mm is for the mirrored process, get a reference first. */
+       if (!mmget_not_zero(mm))
+               return -EINVAL;
+
+       mmap_read_lock(mm);
+       for (addr = start; addr < end; addr = next) {
+               vma = find_vma(mm, addr);
+               if (!vma || addr < vma->vm_start ||
+                   !(vma->vm_flags & VM_READ)) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+               next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
+               if (next > vma->vm_end)
+                       next = vma->vm_end;
+
+               args.vma = vma;
+               args.src = src_pfns;
+               args.dst = dst_pfns;
+               args.start = addr;
+               args.end = next;
+               args.pgmap_owner = dmirror->mdevice;
+               args.flags = (dmirror->mdevice->zone_device_type ==
+                             HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ?
+                             MIGRATE_VMA_SELECT_DEVICE_PRIVATE :
+                             MIGRATE_VMA_SELECT_DEVICE_COHERENT;
+
+               ret = migrate_vma_setup(&args);
+               if (ret)
+                       goto out;
+
+               pr_debug("Migrating from device mem to sys mem\n");
+               dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
+
+               migrate_vma_pages(&args);
+               migrate_vma_finalize(&args);
+       }
+       mmap_read_unlock(mm);
+       mmput(mm);
+
+       return ret;
+
+out:
+       mmap_read_unlock(mm);
+       mmput(mm);
+       return ret;
+}
+
+static int dmirror_migrate_to_device(struct dmirror *dmirror,
+                               struct hmm_dmirror_cmd *cmd)
 {
        unsigned long start, end, addr;
        unsigned long size = cmd->npages << PAGE_SHIFT;
@@ -849,6 +965,7 @@ static int dmirror_migrate(struct dmirror *dmirror,
                if (ret)
                        goto out;
 
+               pr_debug("Migrating from sys mem to device mem\n");
                dmirror_migrate_alloc_and_copy(&args, dmirror);
                migrate_vma_pages(&args);
                dmirror_migrate_finalize_and_map(&args, dmirror);
@@ -857,7 +974,7 @@ static int dmirror_migrate(struct dmirror *dmirror,
        mmap_read_unlock(mm);
        mmput(mm);
 
-       /* Return the migrated data for verification. */
+       /* Return the migrated data for verification. only for pages in device 
zone */
        ret = dmirror_bounce_init(&bounce, start, size);
        if (ret)
                return ret;
@@ -894,9 +1011,15 @@ static void dmirror_mkentry(struct dmirror *dmirror, 
struct hmm_range *range,
        }
 
        page = hmm_pfn_to_page(entry);
-       if (is_device_private_page(page)) {
-               /* Is the page migrated to this device or some other? */
-               if (dmirror->mdevice == dmirror_page_to_device(page))
+       if (is_device_page(page)) {
+               /* Is page ZONE_DEVICE coherent? */
+               if (!is_device_private_page(page))
+                       *perm = HMM_DMIRROR_PROT_DEV_COHERENT;
+               /*
+                * Is page ZONE_DEVICE private migrated to
+                * this device or some other?
+                */
+               else if (dmirror->mdevice == dmirror_page_to_device(page))
                        *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL;
                else
                        *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE;
@@ -1096,8 +1219,12 @@ static long dmirror_fops_unlocked_ioctl(struct file 
*filp,
                ret = dmirror_write(dmirror, &cmd);
                break;
 
-       case HMM_DMIRROR_MIGRATE:
-               ret = dmirror_migrate(dmirror, &cmd);
+       case HMM_DMIRROR_MIGRATE_TO_DEV:
+               ret = dmirror_migrate_to_device(dmirror, &cmd);
+               break;
+
+       case HMM_DMIRROR_MIGRATE_TO_SYS:
+               ret = dmirror_migrate_to_system(dmirror, &cmd);
                break;
 
        case HMM_DMIRROR_EXCLUSIVE:
@@ -1152,38 +1279,6 @@ static void dmirror_devmem_free(struct page *page)
        spin_unlock(&mdevice->lock);
 }
 
-static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
-                                                     struct dmirror *dmirror)
-{
-       const unsigned long *src = args->src;
-       unsigned long *dst = args->dst;
-       unsigned long start = args->start;
-       unsigned long end = args->end;
-       unsigned long addr;
-
-       for (addr = start; addr < end; addr += PAGE_SIZE,
-                                      src++, dst++) {
-               struct page *dpage, *spage;
-
-               spage = migrate_pfn_to_page(*src);
-               if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
-                       continue;
-               spage = spage->zone_device_data;
-
-               dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
-               if (!dpage)
-                       continue;
-
-               lock_page(dpage);
-               xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
-               copy_highpage(dpage, spage);
-               *dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
-               if (*src & MIGRATE_PFN_WRITE)
-                       *dst |= MIGRATE_PFN_WRITE;
-       }
-       return 0;
-}
-
 static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
 {
        struct migrate_vma args;
diff --git a/lib/test_hmm_uapi.h b/lib/test_hmm_uapi.h
index 77f81e6314eb..af15c35a90f4 100644
--- a/lib/test_hmm_uapi.h
+++ b/lib/test_hmm_uapi.h
@@ -19,6 +19,7 @@
  * @npages: (in) number of pages to read/write
  * @cpages: (out) number of pages copied
  * @faults: (out) number of device page faults seen
+ * @zone_device_type: (out) zone device memory type
  */
 struct hmm_dmirror_cmd {
        __u64           addr;
@@ -32,11 +33,12 @@ struct hmm_dmirror_cmd {
 /* Expose the address space of the calling process through hmm device file */
 #define HMM_DMIRROR_READ               _IOWR('H', 0x00, struct hmm_dmirror_cmd)
 #define HMM_DMIRROR_WRITE              _IOWR('H', 0x01, struct hmm_dmirror_cmd)
-#define HMM_DMIRROR_MIGRATE            _IOWR('H', 0x02, struct hmm_dmirror_cmd)
-#define HMM_DMIRROR_SNAPSHOT           _IOWR('H', 0x03, struct hmm_dmirror_cmd)
-#define HMM_DMIRROR_EXCLUSIVE          _IOWR('H', 0x04, struct hmm_dmirror_cmd)
-#define HMM_DMIRROR_CHECK_EXCLUSIVE    _IOWR('H', 0x05, struct hmm_dmirror_cmd)
-#define HMM_DMIRROR_GET_MEM_DEV_TYPE   _IOWR('H', 0x06, struct hmm_dmirror_cmd)
+#define HMM_DMIRROR_MIGRATE_TO_DEV     _IOWR('H', 0x02, struct hmm_dmirror_cmd)
+#define HMM_DMIRROR_MIGRATE_TO_SYS     _IOWR('H', 0x03, struct hmm_dmirror_cmd)
+#define HMM_DMIRROR_SNAPSHOT           _IOWR('H', 0x04, struct hmm_dmirror_cmd)
+#define HMM_DMIRROR_EXCLUSIVE          _IOWR('H', 0x05, struct hmm_dmirror_cmd)
+#define HMM_DMIRROR_CHECK_EXCLUSIVE    _IOWR('H', 0x06, struct hmm_dmirror_cmd)
+#define HMM_DMIRROR_GET_MEM_DEV_TYPE   _IOWR('H', 0x07, struct hmm_dmirror_cmd)
 
 /*
  * Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT.
@@ -51,6 +53,8 @@ struct hmm_dmirror_cmd {
  *                                     device the ioctl() is made
  * HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE: Migrated device private page on some
  *                                     other device
+ * HMM_DMIRROR_PROT_DEV_COHERENT: Migrate device coherent page on the device
+ *                               the ioctl() is made
  */
 enum {
        HMM_DMIRROR_PROT_ERROR                  = 0xFF,
@@ -62,6 +66,7 @@ enum {
        HMM_DMIRROR_PROT_ZERO                   = 0x10,
        HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL      = 0x20,
        HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE     = 0x30,
+       HMM_DMIRROR_PROT_DEV_COHERENT           = 0x40,
 };
 
 enum {
-- 
2.32.0

Reply via email to