[HMM-v25 03/19] mm/hmm/mirror: mirror process address space on device with HMM helpers v3

2017-08-16 Thread Jérôme Glisse
This is a heterogeneous memory management (HMM) process address space
mirroring. In a nutshell this provide an API to mirror process address
space on a device. This boils down to keeping CPU and device page table
synchronize (we assume that both device and CPU are cache coherent like
PCIe device can be).

This patch provide a simple API for device driver to achieve address
space mirroring thus avoiding each device driver to grow its own CPU
page table walker and its own CPU page table synchronization mechanism.

This is useful for NVidia GPU >= Pascal, Mellanox IB >= mlx5 and more
hardware in the future.

Changed since v2:
  - s/device unaddressable/device private/
Changed since v1:
  - Kconfig logic (depend on x86-64 and use ARCH_HAS pattern)

Signed-off-by: Jérôme Glisse 
Signed-off-by: Evgeny Baskakov 
Signed-off-by: John Hubbard 
Signed-off-by: Mark Hairgrove 
Signed-off-by: Sherry Cheung 
Signed-off-by: Subhash Gutti 
---
 include/linux/hmm.h | 110 ++
 mm/Kconfig  |  12 
 mm/hmm.c| 168 +++-
 3 files changed, 275 insertions(+), 15 deletions(-)

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index ca60595ce784..61c707970aa6 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -72,6 +72,7 @@
 
 #if IS_ENABLED(CONFIG_HMM)
 
+struct hmm;
 
 /*
  * hmm_pfn_t - HMM uses its own pfn type to keep several flags per page
@@ -134,6 +135,115 @@ static inline hmm_pfn_t hmm_pfn_t_from_pfn(unsigned long 
pfn)
 }
 
 
+#if IS_ENABLED(CONFIG_HMM_MIRROR)
+/*
+ * Mirroring: how to synchronize device page table with CPU page table.
+ *
+ * A device driver that is participating in HMM mirroring must always
+ * synchronize with CPU page table updates. For this, device drivers can either
+ * directly use mmu_notifier APIs or they can use the hmm_mirror API. Device
+ * drivers can decide to register one mirror per device per process, or just
+ * one mirror per process for a group of devices. The pattern is:
+ *
+ *  int device_bind_address_space(..., struct mm_struct *mm, ...)
+ *  {
+ *  struct device_address_space *das;
+ *
+ *  // Device driver specific initialization, and allocation of das
+ *  // which contains an hmm_mirror struct as one of its fields.
+ *  ...
+ *
+ *  ret = hmm_mirror_register(>mirror, mm, _mirror_ops);
+ *  if (ret) {
+ *  // Cleanup on error
+ *  return ret;
+ *  }
+ *
+ *  // Other device driver specific initialization
+ *  ...
+ *  }
+ *
+ * Once an hmm_mirror is registered for an address space, the device driver
+ * will get callbacks through sync_cpu_device_pagetables() operation (see
+ * hmm_mirror_ops struct).
+ *
+ * Device driver must not free the struct containing the hmm_mirror struct
+ * before calling hmm_mirror_unregister(). The expected usage is to do that 
when
+ * the device driver is unbinding from an address space.
+ *
+ *
+ *  void device_unbind_address_space(struct device_address_space *das)
+ *  {
+ *  // Device driver specific cleanup
+ *  ...
+ *
+ *  hmm_mirror_unregister(>mirror);
+ *
+ *  // Other device driver specific cleanup, and now das can be freed
+ *  ...
+ *  }
+ */
+
+struct hmm_mirror;
+
+/*
+ * enum hmm_update_type - type of update
+ * @HMM_UPDATE_INVALIDATE: invalidate range (no indication as to why)
+ */
+enum hmm_update_type {
+   HMM_UPDATE_INVALIDATE,
+};
+
+/*
+ * struct hmm_mirror_ops - HMM mirror device operations callback
+ *
+ * @update: callback to update range on a device
+ */
+struct hmm_mirror_ops {
+   /* sync_cpu_device_pagetables() - synchronize page tables
+*
+* @mirror: pointer to struct hmm_mirror
+* @update_type: type of update that occurred to the CPU page table
+* @start: virtual start address of the range to update
+* @end: virtual end address of the range to update
+*
+* This callback ultimately originates from mmu_notifiers when the CPU
+* page table is updated. The device driver must update its page table
+* in response to this callback. The update argument tells what action
+* to perform.
+*
+* The device driver must not return from this callback until the device
+* page tables are completely updated (TLBs flushed, etc); this is a
+* synchronous call.
+*/
+   void (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror,
+  enum hmm_update_type update_type,
+  unsigned long start,
+  unsigned long end);
+};
+
+/*
+ * struct hmm_mirror - mirror struct for a device driver
+ *
+ * @hmm: pointer 

[HMM-v25 03/19] mm/hmm/mirror: mirror process address space on device with HMM helpers v3

2017-08-16 Thread Jérôme Glisse
This is a heterogeneous memory management (HMM) process address space
mirroring. In a nutshell this provide an API to mirror process address
space on a device. This boils down to keeping CPU and device page table
synchronize (we assume that both device and CPU are cache coherent like
PCIe device can be).

This patch provide a simple API for device driver to achieve address
space mirroring thus avoiding each device driver to grow its own CPU
page table walker and its own CPU page table synchronization mechanism.

This is useful for NVidia GPU >= Pascal, Mellanox IB >= mlx5 and more
hardware in the future.

Changed since v2:
  - s/device unaddressable/device private/
Changed since v1:
  - Kconfig logic (depend on x86-64 and use ARCH_HAS pattern)

Signed-off-by: Jérôme Glisse 
Signed-off-by: Evgeny Baskakov 
Signed-off-by: John Hubbard 
Signed-off-by: Mark Hairgrove 
Signed-off-by: Sherry Cheung 
Signed-off-by: Subhash Gutti 
---
 include/linux/hmm.h | 110 ++
 mm/Kconfig  |  12 
 mm/hmm.c| 168 +++-
 3 files changed, 275 insertions(+), 15 deletions(-)

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index ca60595ce784..61c707970aa6 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -72,6 +72,7 @@
 
 #if IS_ENABLED(CONFIG_HMM)
 
+struct hmm;
 
 /*
  * hmm_pfn_t - HMM uses its own pfn type to keep several flags per page
@@ -134,6 +135,115 @@ static inline hmm_pfn_t hmm_pfn_t_from_pfn(unsigned long 
pfn)
 }
 
 
+#if IS_ENABLED(CONFIG_HMM_MIRROR)
+/*
+ * Mirroring: how to synchronize device page table with CPU page table.
+ *
+ * A device driver that is participating in HMM mirroring must always
+ * synchronize with CPU page table updates. For this, device drivers can either
+ * directly use mmu_notifier APIs or they can use the hmm_mirror API. Device
+ * drivers can decide to register one mirror per device per process, or just
+ * one mirror per process for a group of devices. The pattern is:
+ *
+ *  int device_bind_address_space(..., struct mm_struct *mm, ...)
+ *  {
+ *  struct device_address_space *das;
+ *
+ *  // Device driver specific initialization, and allocation of das
+ *  // which contains an hmm_mirror struct as one of its fields.
+ *  ...
+ *
+ *  ret = hmm_mirror_register(>mirror, mm, _mirror_ops);
+ *  if (ret) {
+ *  // Cleanup on error
+ *  return ret;
+ *  }
+ *
+ *  // Other device driver specific initialization
+ *  ...
+ *  }
+ *
+ * Once an hmm_mirror is registered for an address space, the device driver
+ * will get callbacks through sync_cpu_device_pagetables() operation (see
+ * hmm_mirror_ops struct).
+ *
+ * Device driver must not free the struct containing the hmm_mirror struct
+ * before calling hmm_mirror_unregister(). The expected usage is to do that 
when
+ * the device driver is unbinding from an address space.
+ *
+ *
+ *  void device_unbind_address_space(struct device_address_space *das)
+ *  {
+ *  // Device driver specific cleanup
+ *  ...
+ *
+ *  hmm_mirror_unregister(>mirror);
+ *
+ *  // Other device driver specific cleanup, and now das can be freed
+ *  ...
+ *  }
+ */
+
+struct hmm_mirror;
+
+/*
+ * enum hmm_update_type - type of update
+ * @HMM_UPDATE_INVALIDATE: invalidate range (no indication as to why)
+ */
+enum hmm_update_type {
+   HMM_UPDATE_INVALIDATE,
+};
+
+/*
+ * struct hmm_mirror_ops - HMM mirror device operations callback
+ *
+ * @update: callback to update range on a device
+ */
+struct hmm_mirror_ops {
+   /* sync_cpu_device_pagetables() - synchronize page tables
+*
+* @mirror: pointer to struct hmm_mirror
+* @update_type: type of update that occurred to the CPU page table
+* @start: virtual start address of the range to update
+* @end: virtual end address of the range to update
+*
+* This callback ultimately originates from mmu_notifiers when the CPU
+* page table is updated. The device driver must update its page table
+* in response to this callback. The update argument tells what action
+* to perform.
+*
+* The device driver must not return from this callback until the device
+* page tables are completely updated (TLBs flushed, etc); this is a
+* synchronous call.
+*/
+   void (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror,
+  enum hmm_update_type update_type,
+  unsigned long start,
+  unsigned long end);
+};
+
+/*
+ * struct hmm_mirror - mirror struct for a device driver
+ *
+ * @hmm: pointer to struct hmm (which is unique per mm_struct)
+ * @ops: device driver callback for HMM mirror operations
+ * @list: for list