Re: [PATCH 2/6] mm/device-public-memory: device memory cache coherent with CPU v4

2017-07-13 Thread Balbir Singh
On Thu, 2017-07-13 at 17:15 -0400, Jérôme Glisse wrote:
> Platform with advance system bus (like CAPI or CCIX) allow device
> memory to be accessible from CPU in a cache coherent fashion. Add
> a new type of ZONE_DEVICE to represent such memory. The use case
> are the same as for the un-addressable device memory but without
> all the corners cases.
> 
> Changed since v3:
>   - s/public/public (going back)
> Changed since v2:
>   - s/public/public
>   - add proper include in migrate.c and drop useless #if/#endif
> Changed since v1:
>   - Kconfig and #if/#else cleanup
> 
> Signed-off-by: Jérôme Glisse 
> Cc: Balbir Singh 
> Cc: Aneesh Kumar 
> Cc: Paul E. McKenney 
> Cc: Benjamin Herrenschmidt 
> Cc: Dan Williams 
> Cc: Ross Zwisler 
> ---

Acked-by: Balbir Singh 



Re: [PATCH 2/6] mm/device-public-memory: device memory cache coherent with CPU v4

2017-07-13 Thread Balbir Singh
On Thu, 2017-07-13 at 17:15 -0400, Jérôme Glisse wrote:
> Platform with advance system bus (like CAPI or CCIX) allow device
> memory to be accessible from CPU in a cache coherent fashion. Add
> a new type of ZONE_DEVICE to represent such memory. The use case
> are the same as for the un-addressable device memory but without
> all the corners cases.
> 
> Changed since v3:
>   - s/public/public (going back)
> Changed since v2:
>   - s/public/public
>   - add proper include in migrate.c and drop useless #if/#endif
> Changed since v1:
>   - Kconfig and #if/#else cleanup
> 
> Signed-off-by: Jérôme Glisse 
> Cc: Balbir Singh 
> Cc: Aneesh Kumar 
> Cc: Paul E. McKenney 
> Cc: Benjamin Herrenschmidt 
> Cc: Dan Williams 
> Cc: Ross Zwisler 
> ---

Acked-by: Balbir Singh 



[PATCH 2/6] mm/device-public-memory: device memory cache coherent with CPU v4

2017-07-13 Thread Jérôme Glisse
Platform with advance system bus (like CAPI or CCIX) allow device
memory to be accessible from CPU in a cache coherent fashion. Add
a new type of ZONE_DEVICE to represent such memory. The use case
are the same as for the un-addressable device memory but without
all the corners cases.

Changed since v3:
  - s/public/public (going back)
Changed since v2:
  - s/public/public
  - add proper include in migrate.c and drop useless #if/#endif
Changed since v1:
  - Kconfig and #if/#else cleanup

Signed-off-by: Jérôme Glisse 
Cc: Balbir Singh 
Cc: Aneesh Kumar 
Cc: Paul E. McKenney 
Cc: Benjamin Herrenschmidt 
Cc: Dan Williams 
Cc: Ross Zwisler 
---
 fs/proc/task_mmu.c   |  2 +-
 include/linux/hmm.h  |  4 ++--
 include/linux/ioport.h   |  1 +
 include/linux/memremap.h | 21 ++
 include/linux/mm.h   | 20 ++---
 kernel/memremap.c| 15 -
 mm/Kconfig   | 11 ++
 mm/gup.c |  7 ++
 mm/hmm.c |  4 ++--
 mm/madvise.c |  2 +-
 mm/memory.c  | 46 +-
 mm/migrate.c | 57 ++--
 mm/swap.c| 11 ++
 13 files changed, 156 insertions(+), 45 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 957b6ea80d5f..1f38f2c7cc34 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1182,7 +1182,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct 
pagemapread *pm,
if (pm->show_pfn)
frame = pte_pfn(pte);
flags |= PM_PRESENT;
-   page = vm_normal_page(vma, addr, pte);
+   page = _vm_normal_page(vma, addr, pte, true);
if (pte_soft_dirty(pte))
flags |= PM_SOFT_DIRTY;
} else if (is_swap_pte(pte)) {
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 458d0d6d82f3..a40288309fd2 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -327,7 +327,7 @@ int hmm_vma_fault(struct vm_area_struct *vma,
 #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
 
 
-#if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
+#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
 struct hmm_devmem;
 
 struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma,
@@ -443,7 +443,7 @@ struct hmm_device {
  */
 struct hmm_device *hmm_device_new(void *drvdata);
 void hmm_device_put(struct hmm_device *hmm_device);
-#endif /* IS_ENABLED(CONFIG_DEVICE_PRIVATE) */
+#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
 
 
 /* Below are for HMM internal use only! Not to be used by device driver! */
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 3a4f69137bc2..f5cf32e80041 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -131,6 +131,7 @@ enum {
IORES_DESC_PERSISTENT_MEMORY= 4,
IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5,
IORES_DESC_DEVICE_PRIVATE_MEMORY= 6,
+   IORES_DESC_DEVICE_PUBLIC_MEMORY = 7,
 };
 
 /* helpers to define resources */
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index ae5ff92f72b4..c7b4c75ae3f8 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -57,10 +57,18 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned 
long memmap_start)
  *
  * A more complete discussion of unaddressable memory may be found in
  * include/linux/hmm.h and Documentation/vm/hmm.txt.
+ *
+ * MEMORY_DEVICE_PUBLIC:
+ * Device memory that is cache coherent from device and CPU point of view. This
+ * is use on platform that have an advance system bus (like CAPI or CCIX). A
+ * driver can hotplug the device memory using ZONE_DEVICE and with that memory
+ * type. Any page of a process can be migrated to such memory. However no one
+ * should be allow to pin such memory so that it can always be evicted.
  */
 enum memory_type {
MEMORY_DEVICE_HOST = 0,
MEMORY_DEVICE_PRIVATE,
+   MEMORY_DEVICE_PUBLIC,
 };
 
 /*
@@ -92,6 +100,8 @@ enum memory_type {
  * The page_free() callback is called once the page refcount reaches 1
  * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug.
  * This allows the device driver to implement its own memory management.)
+ *
+ * For MEMORY_DEVICE_PUBLIC only the page_free() callback matter.
  */
 typedef int (*dev_page_fault_t)(struct vm_area_struct *vma,
unsigned long addr,
@@ -134,6 +144,12 @@ static inline bool is_device_private_page(const struct 
page *page)
return is_zone_device_page(page) &&
page->pgmap->type == MEMORY_DEVICE_PRIVATE;
 }
+
+static inline bool is_device_public_page(const struct page *page)
+{
+   return 

[PATCH 2/6] mm/device-public-memory: device memory cache coherent with CPU v4

2017-07-13 Thread Jérôme Glisse
Platform with advance system bus (like CAPI or CCIX) allow device
memory to be accessible from CPU in a cache coherent fashion. Add
a new type of ZONE_DEVICE to represent such memory. The use case
are the same as for the un-addressable device memory but without
all the corners cases.

Changed since v3:
  - s/public/public (going back)
Changed since v2:
  - s/public/public
  - add proper include in migrate.c and drop useless #if/#endif
Changed since v1:
  - Kconfig and #if/#else cleanup

Signed-off-by: Jérôme Glisse 
Cc: Balbir Singh 
Cc: Aneesh Kumar 
Cc: Paul E. McKenney 
Cc: Benjamin Herrenschmidt 
Cc: Dan Williams 
Cc: Ross Zwisler 
---
 fs/proc/task_mmu.c   |  2 +-
 include/linux/hmm.h  |  4 ++--
 include/linux/ioport.h   |  1 +
 include/linux/memremap.h | 21 ++
 include/linux/mm.h   | 20 ++---
 kernel/memremap.c| 15 -
 mm/Kconfig   | 11 ++
 mm/gup.c |  7 ++
 mm/hmm.c |  4 ++--
 mm/madvise.c |  2 +-
 mm/memory.c  | 46 +-
 mm/migrate.c | 57 ++--
 mm/swap.c| 11 ++
 13 files changed, 156 insertions(+), 45 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 957b6ea80d5f..1f38f2c7cc34 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1182,7 +1182,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct 
pagemapread *pm,
if (pm->show_pfn)
frame = pte_pfn(pte);
flags |= PM_PRESENT;
-   page = vm_normal_page(vma, addr, pte);
+   page = _vm_normal_page(vma, addr, pte, true);
if (pte_soft_dirty(pte))
flags |= PM_SOFT_DIRTY;
} else if (is_swap_pte(pte)) {
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 458d0d6d82f3..a40288309fd2 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -327,7 +327,7 @@ int hmm_vma_fault(struct vm_area_struct *vma,
 #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
 
 
-#if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
+#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
 struct hmm_devmem;
 
 struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma,
@@ -443,7 +443,7 @@ struct hmm_device {
  */
 struct hmm_device *hmm_device_new(void *drvdata);
 void hmm_device_put(struct hmm_device *hmm_device);
-#endif /* IS_ENABLED(CONFIG_DEVICE_PRIVATE) */
+#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
 
 
 /* Below are for HMM internal use only! Not to be used by device driver! */
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 3a4f69137bc2..f5cf32e80041 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -131,6 +131,7 @@ enum {
IORES_DESC_PERSISTENT_MEMORY= 4,
IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5,
IORES_DESC_DEVICE_PRIVATE_MEMORY= 6,
+   IORES_DESC_DEVICE_PUBLIC_MEMORY = 7,
 };
 
 /* helpers to define resources */
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index ae5ff92f72b4..c7b4c75ae3f8 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -57,10 +57,18 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned 
long memmap_start)
  *
  * A more complete discussion of unaddressable memory may be found in
  * include/linux/hmm.h and Documentation/vm/hmm.txt.
+ *
+ * MEMORY_DEVICE_PUBLIC:
+ * Device memory that is cache coherent from device and CPU point of view. This
+ * is use on platform that have an advance system bus (like CAPI or CCIX). A
+ * driver can hotplug the device memory using ZONE_DEVICE and with that memory
+ * type. Any page of a process can be migrated to such memory. However no one
+ * should be allow to pin such memory so that it can always be evicted.
  */
 enum memory_type {
MEMORY_DEVICE_HOST = 0,
MEMORY_DEVICE_PRIVATE,
+   MEMORY_DEVICE_PUBLIC,
 };
 
 /*
@@ -92,6 +100,8 @@ enum memory_type {
  * The page_free() callback is called once the page refcount reaches 1
  * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug.
  * This allows the device driver to implement its own memory management.)
+ *
+ * For MEMORY_DEVICE_PUBLIC only the page_free() callback matter.
  */
 typedef int (*dev_page_fault_t)(struct vm_area_struct *vma,
unsigned long addr,
@@ -134,6 +144,12 @@ static inline bool is_device_private_page(const struct 
page *page)
return is_zone_device_page(page) &&
page->pgmap->type == MEMORY_DEVICE_PRIVATE;
 }
+
+static inline bool is_device_public_page(const struct page *page)
+{
+   return is_zone_device_page(page) &&
+   page->pgmap->type == MEMORY_DEVICE_PUBLIC;
+}
 #else
 static inline void *devm_memremap_pages(struct device *dev,
struct