Re: [PATCH 5/6] mm/memcontrol: support MEMORY_DEVICE_PRIVATE and MEMORY_DEVICE_PUBLIC v3

2017-07-17 Thread Balbir Singh
On Thu, 2017-07-13 at 17:15 -0400, Jérôme Glisse wrote:
> HMM pages (private or public device pages) are ZONE_DEVICE page and
> thus need special handling when it comes to lru or refcount. This
> patch make sure that memcontrol properly handle those when it face
> them. Those pages are use like regular pages in a process address
> space either as anonymous page or as file back page. So from memcg
> point of view we want to handle them like regular page for now at
> least.
> 
> Changed since v2:
>   - s/host/public
> Changed since v1:
>   - s/public/host
>   - add comments explaining how device memory behave and why
> 
> Signed-off-by: Jérôme Glisse 
> Cc: Johannes Weiner 
> Cc: Michal Hocko 
> Cc: Vladimir Davydov 
> Cc: cgro...@vger.kernel.org
> ---

Acked-by: Balbir Singh 



Re: [PATCH 5/6] mm/memcontrol: support MEMORY_DEVICE_PRIVATE and MEMORY_DEVICE_PUBLIC v3

2017-07-17 Thread Balbir Singh
On Thu, 2017-07-13 at 17:15 -0400, Jérôme Glisse wrote:
> HMM pages (private or public device pages) are ZONE_DEVICE page and
> thus need special handling when it comes to lru or refcount. This
> patch make sure that memcontrol properly handle those when it face
> them. Those pages are use like regular pages in a process address
> space either as anonymous page or as file back page. So from memcg
> point of view we want to handle them like regular page for now at
> least.
> 
> Changed since v2:
>   - s/host/public
> Changed since v1:
>   - s/public/host
>   - add comments explaining how device memory behave and why
> 
> Signed-off-by: Jérôme Glisse 
> Cc: Johannes Weiner 
> Cc: Michal Hocko 
> Cc: Vladimir Davydov 
> Cc: cgro...@vger.kernel.org
> ---

Acked-by: Balbir Singh 



[PATCH 5/6] mm/memcontrol: support MEMORY_DEVICE_PRIVATE and MEMORY_DEVICE_PUBLIC v3

2017-07-13 Thread Jérôme Glisse
HMM pages (private or public device pages) are ZONE_DEVICE page and
thus need special handling when it comes to lru or refcount. This
patch make sure that memcontrol properly handle those when it face
them. Those pages are use like regular pages in a process address
space either as anonymous page or as file back page. So from memcg
point of view we want to handle them like regular page for now at
least.

Changed since v2:
  - s/host/public
Changed since v1:
  - s/public/host
  - add comments explaining how device memory behave and why

Signed-off-by: Jérôme Glisse 
Cc: Johannes Weiner 
Cc: Michal Hocko 
Cc: Vladimir Davydov 
Cc: cgro...@vger.kernel.org
---
 kernel/memremap.c |  2 ++
 mm/memcontrol.c   | 63 ++-
 2 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/kernel/memremap.c b/kernel/memremap.c
index 25c098151ed2..4d74b4a4f8f5 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -479,6 +479,8 @@ void put_zone_device_private_or_public_page(struct page 
*page)
__ClearPageActive(page);
__ClearPageWaiters(page);
 
+   mem_cgroup_uncharge(page);
+
page->pgmap->page_free(page, page->pgmap->data);
}
else if (!count)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c709fdceac13..858842a741bf 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4391,12 +4391,13 @@ enum mc_target_type {
MC_TARGET_NONE = 0,
MC_TARGET_PAGE,
MC_TARGET_SWAP,
+   MC_TARGET_DEVICE,
 };
 
 static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
unsigned long addr, pte_t ptent)
 {
-   struct page *page = vm_normal_page(vma, addr, ptent);
+   struct page *page = _vm_normal_page(vma, addr, ptent, true);
 
if (!page || !page_mapped(page))
return NULL;
@@ -4407,13 +4408,20 @@ static struct page *mc_handle_present_pte(struct 
vm_area_struct *vma,
if (!(mc.flags & MOVE_FILE))
return NULL;
}
-   if (!get_page_unless_zero(page))
+   if (is_device_public_page(page)) {
+   /*
+* MEMORY_DEVICE_PUBLIC means ZONE_DEVICE page and which have a
+* refcount of 1 when free (unlike normal page)
+*/
+   if (!page_ref_add_unless(page, 1, 1))
+   return NULL;
+   } else if (!get_page_unless_zero(page))
return NULL;
 
return page;
 }
 
-#ifdef CONFIG_SWAP
+#if defined(CONFIG_SWAP) || defined(CONFIG_DEVICE_PRIVATE)
 static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
pte_t ptent, swp_entry_t *entry)
 {
@@ -4422,6 +4430,23 @@ static struct page *mc_handle_swap_pte(struct 
vm_area_struct *vma,
 
if (!(mc.flags & MOVE_ANON) || non_swap_entry(ent))
return NULL;
+
+   /*
+* Handle MEMORY_DEVICE_PRIVATE which are ZONE_DEVICE page belonging to
+* a device and because they are not accessible by CPU they are store
+* as special swap entry in the CPU page table.
+*/
+   if (is_device_private_entry(ent)) {
+   page = device_private_entry_to_page(ent);
+   /*
+* MEMORY_DEVICE_PRIVATE means ZONE_DEVICE page and which have
+* a refcount of 1 when free (unlike normal page)
+*/
+   if (!page_ref_add_unless(page, 1, 1))
+   return NULL;
+   return page;
+   }
+
/*
 * Because lookup_swap_cache() updates some statistics counter,
 * we call find_get_page() with swapper_space directly.
@@ -4582,6 +4607,13 @@ static int mem_cgroup_move_account(struct page *page,
  *   2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
  * target for charge migration. if @target is not NULL, the entry is stored
  * in target->ent.
+ *   3(MC_TARGET_DEVICE): like MC_TARGET_PAGE  but page is MEMORY_DEVICE_PUBLIC
+ * or MEMORY_DEVICE_PRIVATE (so ZONE_DEVICE page and thus not on the lru).
+ * For now we such page is charge like a regular page would be as for all
+ * intent and purposes it is just special memory taking the place of a
+ * regular page. See Documentations/vm/hmm.txt and include/linux/hmm.h for
+ * more informations on this type of memory how it is use and why it is
+ * charge like this.
  *
  * Called with pte lock held.
  */
@@ -4610,6 +4642,9 @@ static enum mc_target_type get_mctgt_type(struct 
vm_area_struct *vma,
 */
if (page->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
+   if (is_device_private_page(page) ||
+   is_device_public_page(page))
+ 

[PATCH 5/6] mm/memcontrol: support MEMORY_DEVICE_PRIVATE and MEMORY_DEVICE_PUBLIC v3

2017-07-13 Thread Jérôme Glisse
HMM pages (private or public device pages) are ZONE_DEVICE page and
thus need special handling when it comes to lru or refcount. This
patch make sure that memcontrol properly handle those when it face
them. Those pages are use like regular pages in a process address
space either as anonymous page or as file back page. So from memcg
point of view we want to handle them like regular page for now at
least.

Changed since v2:
  - s/host/public
Changed since v1:
  - s/public/host
  - add comments explaining how device memory behave and why

Signed-off-by: Jérôme Glisse 
Cc: Johannes Weiner 
Cc: Michal Hocko 
Cc: Vladimir Davydov 
Cc: cgro...@vger.kernel.org
---
 kernel/memremap.c |  2 ++
 mm/memcontrol.c   | 63 ++-
 2 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/kernel/memremap.c b/kernel/memremap.c
index 25c098151ed2..4d74b4a4f8f5 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -479,6 +479,8 @@ void put_zone_device_private_or_public_page(struct page 
*page)
__ClearPageActive(page);
__ClearPageWaiters(page);
 
+   mem_cgroup_uncharge(page);
+
page->pgmap->page_free(page, page->pgmap->data);
}
else if (!count)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c709fdceac13..858842a741bf 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4391,12 +4391,13 @@ enum mc_target_type {
MC_TARGET_NONE = 0,
MC_TARGET_PAGE,
MC_TARGET_SWAP,
+   MC_TARGET_DEVICE,
 };
 
 static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
unsigned long addr, pte_t ptent)
 {
-   struct page *page = vm_normal_page(vma, addr, ptent);
+   struct page *page = _vm_normal_page(vma, addr, ptent, true);
 
if (!page || !page_mapped(page))
return NULL;
@@ -4407,13 +4408,20 @@ static struct page *mc_handle_present_pte(struct 
vm_area_struct *vma,
if (!(mc.flags & MOVE_FILE))
return NULL;
}
-   if (!get_page_unless_zero(page))
+   if (is_device_public_page(page)) {
+   /*
+* MEMORY_DEVICE_PUBLIC means ZONE_DEVICE page and which have a
+* refcount of 1 when free (unlike normal page)
+*/
+   if (!page_ref_add_unless(page, 1, 1))
+   return NULL;
+   } else if (!get_page_unless_zero(page))
return NULL;
 
return page;
 }
 
-#ifdef CONFIG_SWAP
+#if defined(CONFIG_SWAP) || defined(CONFIG_DEVICE_PRIVATE)
 static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
pte_t ptent, swp_entry_t *entry)
 {
@@ -4422,6 +4430,23 @@ static struct page *mc_handle_swap_pte(struct 
vm_area_struct *vma,
 
if (!(mc.flags & MOVE_ANON) || non_swap_entry(ent))
return NULL;
+
+   /*
+* Handle MEMORY_DEVICE_PRIVATE which are ZONE_DEVICE page belonging to
+* a device and because they are not accessible by CPU they are store
+* as special swap entry in the CPU page table.
+*/
+   if (is_device_private_entry(ent)) {
+   page = device_private_entry_to_page(ent);
+   /*
+* MEMORY_DEVICE_PRIVATE means ZONE_DEVICE page and which have
+* a refcount of 1 when free (unlike normal page)
+*/
+   if (!page_ref_add_unless(page, 1, 1))
+   return NULL;
+   return page;
+   }
+
/*
 * Because lookup_swap_cache() updates some statistics counter,
 * we call find_get_page() with swapper_space directly.
@@ -4582,6 +4607,13 @@ static int mem_cgroup_move_account(struct page *page,
  *   2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
  * target for charge migration. if @target is not NULL, the entry is stored
  * in target->ent.
+ *   3(MC_TARGET_DEVICE): like MC_TARGET_PAGE  but page is MEMORY_DEVICE_PUBLIC
+ * or MEMORY_DEVICE_PRIVATE (so ZONE_DEVICE page and thus not on the lru).
+ * For now we such page is charge like a regular page would be as for all
+ * intent and purposes it is just special memory taking the place of a
+ * regular page. See Documentations/vm/hmm.txt and include/linux/hmm.h for
+ * more informations on this type of memory how it is use and why it is
+ * charge like this.
  *
  * Called with pte lock held.
  */
@@ -4610,6 +4642,9 @@ static enum mc_target_type get_mctgt_type(struct 
vm_area_struct *vma,
 */
if (page->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
+   if (is_device_private_page(page) ||
+   is_device_public_page(page))
+   ret = MC_TARGET_DEVICE;
if (target)