Re: [PATCH 2/6] mm/device-public-memory: device memory cache coherent with CPU v4
On Thu, 2017-07-13 at 17:15 -0400, Jérôme Glisse wrote: > Platform with advance system bus (like CAPI or CCIX) allow device > memory to be accessible from CPU in a cache coherent fashion. Add > a new type of ZONE_DEVICE to represent such memory. The use case > are the same as for the un-addressable device memory but without > all the corners cases. > > Changed since v3: > - s/public/public (going back) > Changed since v2: > - s/public/public > - add proper include in migrate.c and drop useless #if/#endif > Changed since v1: > - Kconfig and #if/#else cleanup > > Signed-off-by: Jérôme Glisse> Cc: Balbir Singh > Cc: Aneesh Kumar > Cc: Paul E. McKenney > Cc: Benjamin Herrenschmidt > Cc: Dan Williams > Cc: Ross Zwisler > --- Acked-by: Balbir Singh
Re: [PATCH 2/6] mm/device-public-memory: device memory cache coherent with CPU v4
On Thu, 2017-07-13 at 17:15 -0400, Jérôme Glisse wrote: > Platform with advance system bus (like CAPI or CCIX) allow device > memory to be accessible from CPU in a cache coherent fashion. Add > a new type of ZONE_DEVICE to represent such memory. The use case > are the same as for the un-addressable device memory but without > all the corners cases. > > Changed since v3: > - s/public/public (going back) > Changed since v2: > - s/public/public > - add proper include in migrate.c and drop useless #if/#endif > Changed since v1: > - Kconfig and #if/#else cleanup > > Signed-off-by: Jérôme Glisse > Cc: Balbir Singh > Cc: Aneesh Kumar > Cc: Paul E. McKenney > Cc: Benjamin Herrenschmidt > Cc: Dan Williams > Cc: Ross Zwisler > --- Acked-by: Balbir Singh
[PATCH 2/6] mm/device-public-memory: device memory cache coherent with CPU v4
Platform with advance system bus (like CAPI or CCIX) allow device memory to be accessible from CPU in a cache coherent fashion. Add a new type of ZONE_DEVICE to represent such memory. The use case are the same as for the un-addressable device memory but without all the corners cases. Changed since v3: - s/public/public (going back) Changed since v2: - s/public/public - add proper include in migrate.c and drop useless #if/#endif Changed since v1: - Kconfig and #if/#else cleanup Signed-off-by: Jérôme GlisseCc: Balbir Singh Cc: Aneesh Kumar Cc: Paul E. McKenney Cc: Benjamin Herrenschmidt Cc: Dan Williams Cc: Ross Zwisler --- fs/proc/task_mmu.c | 2 +- include/linux/hmm.h | 4 ++-- include/linux/ioport.h | 1 + include/linux/memremap.h | 21 ++ include/linux/mm.h | 20 ++--- kernel/memremap.c| 15 - mm/Kconfig | 11 ++ mm/gup.c | 7 ++ mm/hmm.c | 4 ++-- mm/madvise.c | 2 +- mm/memory.c | 46 +- mm/migrate.c | 57 ++-- mm/swap.c| 11 ++ 13 files changed, 156 insertions(+), 45 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 957b6ea80d5f..1f38f2c7cc34 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1182,7 +1182,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, if (pm->show_pfn) frame = pte_pfn(pte); flags |= PM_PRESENT; - page = vm_normal_page(vma, addr, pte); + page = _vm_normal_page(vma, addr, pte, true); if (pte_soft_dirty(pte)) flags |= PM_SOFT_DIRTY; } else if (is_swap_pte(pte)) { diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 458d0d6d82f3..a40288309fd2 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -327,7 +327,7 @@ int hmm_vma_fault(struct vm_area_struct *vma, #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ -#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) +#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) struct hmm_devmem; struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma, @@ -443,7 +443,7 @@ struct hmm_device { */ struct hmm_device *hmm_device_new(void *drvdata); void hmm_device_put(struct hmm_device *hmm_device); -#endif /* IS_ENABLED(CONFIG_DEVICE_PRIVATE) */ +#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ /* Below are for HMM internal use only! Not to be used by device driver! */ diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 3a4f69137bc2..f5cf32e80041 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -131,6 +131,7 @@ enum { IORES_DESC_PERSISTENT_MEMORY= 4, IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5, IORES_DESC_DEVICE_PRIVATE_MEMORY= 6, + IORES_DESC_DEVICE_PUBLIC_MEMORY = 7, }; /* helpers to define resources */ diff --git a/include/linux/memremap.h b/include/linux/memremap.h index ae5ff92f72b4..c7b4c75ae3f8 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -57,10 +57,18 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) * * A more complete discussion of unaddressable memory may be found in * include/linux/hmm.h and Documentation/vm/hmm.txt. + * + * MEMORY_DEVICE_PUBLIC: + * Device memory that is cache coherent from device and CPU point of view. This + * is use on platform that have an advance system bus (like CAPI or CCIX). A + * driver can hotplug the device memory using ZONE_DEVICE and with that memory + * type. Any page of a process can be migrated to such memory. However no one + * should be allow to pin such memory so that it can always be evicted. */ enum memory_type { MEMORY_DEVICE_HOST = 0, MEMORY_DEVICE_PRIVATE, + MEMORY_DEVICE_PUBLIC, }; /* @@ -92,6 +100,8 @@ enum memory_type { * The page_free() callback is called once the page refcount reaches 1 * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug. * This allows the device driver to implement its own memory management.) + * + * For MEMORY_DEVICE_PUBLIC only the page_free() callback matter. */ typedef int (*dev_page_fault_t)(struct vm_area_struct *vma, unsigned long addr, @@ -134,6 +144,12 @@ static inline bool is_device_private_page(const struct page *page) return is_zone_device_page(page) && page->pgmap->type == MEMORY_DEVICE_PRIVATE; } + +static inline bool is_device_public_page(const struct page *page) +{ + return
[PATCH 2/6] mm/device-public-memory: device memory cache coherent with CPU v4
Platform with advance system bus (like CAPI or CCIX) allow device memory to be accessible from CPU in a cache coherent fashion. Add a new type of ZONE_DEVICE to represent such memory. The use case are the same as for the un-addressable device memory but without all the corners cases. Changed since v3: - s/public/public (going back) Changed since v2: - s/public/public - add proper include in migrate.c and drop useless #if/#endif Changed since v1: - Kconfig and #if/#else cleanup Signed-off-by: Jérôme Glisse Cc: Balbir Singh Cc: Aneesh Kumar Cc: Paul E. McKenney Cc: Benjamin Herrenschmidt Cc: Dan Williams Cc: Ross Zwisler --- fs/proc/task_mmu.c | 2 +- include/linux/hmm.h | 4 ++-- include/linux/ioport.h | 1 + include/linux/memremap.h | 21 ++ include/linux/mm.h | 20 ++--- kernel/memremap.c| 15 - mm/Kconfig | 11 ++ mm/gup.c | 7 ++ mm/hmm.c | 4 ++-- mm/madvise.c | 2 +- mm/memory.c | 46 +- mm/migrate.c | 57 ++-- mm/swap.c| 11 ++ 13 files changed, 156 insertions(+), 45 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 957b6ea80d5f..1f38f2c7cc34 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1182,7 +1182,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, if (pm->show_pfn) frame = pte_pfn(pte); flags |= PM_PRESENT; - page = vm_normal_page(vma, addr, pte); + page = _vm_normal_page(vma, addr, pte, true); if (pte_soft_dirty(pte)) flags |= PM_SOFT_DIRTY; } else if (is_swap_pte(pte)) { diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 458d0d6d82f3..a40288309fd2 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -327,7 +327,7 @@ int hmm_vma_fault(struct vm_area_struct *vma, #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ -#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) +#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) struct hmm_devmem; struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma, @@ -443,7 +443,7 @@ struct hmm_device { */ struct hmm_device *hmm_device_new(void *drvdata); void hmm_device_put(struct hmm_device *hmm_device); -#endif /* IS_ENABLED(CONFIG_DEVICE_PRIVATE) */ +#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ /* Below are for HMM internal use only! Not to be used by device driver! */ diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 3a4f69137bc2..f5cf32e80041 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -131,6 +131,7 @@ enum { IORES_DESC_PERSISTENT_MEMORY= 4, IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5, IORES_DESC_DEVICE_PRIVATE_MEMORY= 6, + IORES_DESC_DEVICE_PUBLIC_MEMORY = 7, }; /* helpers to define resources */ diff --git a/include/linux/memremap.h b/include/linux/memremap.h index ae5ff92f72b4..c7b4c75ae3f8 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -57,10 +57,18 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) * * A more complete discussion of unaddressable memory may be found in * include/linux/hmm.h and Documentation/vm/hmm.txt. + * + * MEMORY_DEVICE_PUBLIC: + * Device memory that is cache coherent from device and CPU point of view. This + * is use on platform that have an advance system bus (like CAPI or CCIX). A + * driver can hotplug the device memory using ZONE_DEVICE and with that memory + * type. Any page of a process can be migrated to such memory. However no one + * should be allow to pin such memory so that it can always be evicted. */ enum memory_type { MEMORY_DEVICE_HOST = 0, MEMORY_DEVICE_PRIVATE, + MEMORY_DEVICE_PUBLIC, }; /* @@ -92,6 +100,8 @@ enum memory_type { * The page_free() callback is called once the page refcount reaches 1 * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug. * This allows the device driver to implement its own memory management.) + * + * For MEMORY_DEVICE_PUBLIC only the page_free() callback matter. */ typedef int (*dev_page_fault_t)(struct vm_area_struct *vma, unsigned long addr, @@ -134,6 +144,12 @@ static inline bool is_device_private_page(const struct page *page) return is_zone_device_page(page) && page->pgmap->type == MEMORY_DEVICE_PRIVATE; } + +static inline bool is_device_public_page(const struct page *page) +{ + return is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_PUBLIC; +} #else static inline void *devm_memremap_pages(struct device *dev, struct